diff --git a/docs/ClangFormatStyleOptions.rst b/docs/ClangFormatStyleOptions.rst
index 1a783fd4306..0ddc313d27a 100644
--- a/docs/ClangFormatStyleOptions.rst
+++ b/docs/ClangFormatStyleOptions.rst
@@ -277,6 +277,41 @@ the configuration (without a prefix: ``Auto``).
     int a;     // My comment a      vs.     int a; // My comment a
     int b = 2; // comment  b                int b = 2; // comment about b
 
+**AllowAllArgumentsOnNextLine** (``bool``)
+  If a function call or braced initializer list doesn't fit on a
+  line, allow putting all arguments onto the next line, even if
+  ``BinPackArguments`` is ``false``.
+
+  .. code-block:: c++
+
+    true:
+    callFunction(
+        a, b, c, d);
+
+    false:
+    callFunction(a,
+                 b,
+                 c,
+                 d);
+
+**AllowAllConstructorInitializersOnNextLine** (``bool``)
+  If a constructor definition with a member initializer list doesn't
+  fit on a single line, allow putting all member initializers onto the next
+  line, if ```ConstructorInitializerAllOnOneLineOrOnePerLine``` is true.
+  Note that this parameter has no effect if
+  ```ConstructorInitializerAllOnOneLineOrOnePerLine``` is false.
+
+  .. code-block:: c++
+
+    true:
+    MyClass::MyClass() :
+        member0(0), member1(2) {}
+
+    false:
+    MyClass::MyClass() :
+        member0(0),
+        member1(2) {}
+
 **AllowAllParametersOfDeclarationOnNextLine** (``bool``)
   If the function declaration doesn't fit on a line,
   allow putting all parameters of a function declaration onto
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 7664a2c878e..a1c39b42cda 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -165,12 +165,13 @@ release of Clang. Users of the build system should adjust accordingly.
 AST Matchers
 ------------
 
-- Add language support for clang-formatting C# files
-- Add Microsoft coding style to encapsulate default C# formatting style
+- ...
 
 clang-format
 ------------
 
+- Add language support for clang-formatting C# files
+- Add Microsoft coding style to encapsulate default C# formatting style
 - Added new option `PPDIS_BeforeHash` (in configuration: `BeforeHash`) to
   `IndentPPDirectives` which indents preprocessor directives before the hash.
 
diff --git a/include/clang/AST/GlobalDecl.h b/include/clang/AST/GlobalDecl.h
index a5937c239ea..b40099f5090 100644
--- a/include/clang/AST/GlobalDecl.h
+++ b/include/clang/AST/GlobalDecl.h
@@ -104,6 +104,20 @@ class GlobalDecl {
     return Result;
   }
 
+  GlobalDecl getWithCtorType(CXXCtorType Type) {
+    assert(isa<CXXConstructorDecl>(getDecl()));
+    GlobalDecl Result(*this);
+    Result.Value.setInt(Type);
+    return Result;
+  }
+
+  GlobalDecl getWithDtorType(CXXDtorType Type) {
+    assert(isa<CXXDestructorDecl>(getDecl()));
+    GlobalDecl Result(*this);
+    Result.Value.setInt(Type);
+    return Result;
+  }
+
   GlobalDecl getWithMultiVersionIndex(unsigned Index) {
     assert(isa<FunctionDecl>(getDecl()) &&
            !isa<CXXConstructorDecl>(getDecl()) &&
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td
index 45e50c9a8cd..48d9551d686 100644
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -268,12 +268,14 @@ def err_pp_hash_error : Error<"%0">;
 }
 
 def pp_include_next_in_primary : Warning<
-  "#include_next in primary source file">,
+  "#include_next in primary source file; "
+  "will search from start of include path">,
   InGroup<DiagGroup<"include-next-outside-header">>;
 def pp_include_macros_out_of_predefines : Error<
   "the #__include_macros directive is only for internal use by -imacros">;
 def pp_include_next_absolute_path : Warning<
-  "#include_next with absolute path">,
+  "#include_next in file found relative to primary source file or found by "
+  "absolute path; will search from start of include path">,
   InGroup<DiagGroup<"include-next-absolute-path">>;
 def ext_c99_whitespace_required_after_macro_name : ExtWarn<
   "ISO C99 requires whitespace after the macro name">, InGroup<C99>;
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 7ab9d75609f..0f8d19ca4fc 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7200,8 +7200,9 @@ def err_cuda_device_exceptions : Error<
 def err_dynamic_var_init : Error<
     "dynamic initialization is not supported for "
     "__device__, __constant__, and __shared__ variables.">;
-def err_shared_var_init : Error<
-    "initialization is not supported for __shared__ variables.">;
+def warn_shared_var_init : Warning<
+    "initialization is not supported for __shared__ variables.">,
+    InGroup<DiagGroup<"cuda-shared-init">>, DefaultError;
 def err_device_static_local_var : Error<
     "within a %select{__device__|__global__|__host__|__host__ __device__}0 "
     "function, only __shared__ variables or const variables without device "
@@ -9232,6 +9233,9 @@ def warn_omp_used_different_allocator : Warning<
   InGroup<OpenMPClauses>;
 def note_omp_previous_allocator : Note<
   "previous allocator is specified here">;
+def err_expected_allocator_clause : Error<"expected an 'allocator' clause "
+  "inside of the target region; provide an 'allocator' clause or use 'requires'"
+  " directive with the 'dynamic_allocators' clause">;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h
index 34a511f6543..fd84c924ff8 100644
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -154,6 +154,38 @@ struct FormatStyle {
   /// \endcode
   bool AlignTrailingComments;
 
+  /// \brief If a function call or braced initializer list doesn't fit on a
+  /// line, allow putting all arguments onto the next line, even if
+  /// ``BinPackArguments`` is ``false``.
+  /// \code
+  ///   true:
+  ///   callFunction(
+  ///       a, b, c, d);
+  ///
+  ///   false:
+  ///   callFunction(a,
+  ///                b,
+  ///                c,
+  ///                d);
+  /// \endcode
+  bool AllowAllArgumentsOnNextLine;
+
+  /// \brief If a constructor definition with a member initializer list doesn't
+  /// fit on a single line, allow putting all member initializers onto the next
+  /// line, if ```ConstructorInitializerAllOnOneLineOrOnePerLine``` is true.
+  /// Note that this parameter has no effect if
+  /// ```ConstructorInitializerAllOnOneLineOrOnePerLine``` is false.
+  /// \code
+  ///   true:
+  ///   MyClass::MyClass() :
+  ///       member0(0), member1(2) {}
+  ///
+  ///   false:
+  ///   MyClass::MyClass() :
+  ///       member0(0),
+  ///       member1(2) {}
+  bool AllowAllConstructorInitializersOnNextLine;
+
   /// If the function declaration doesn't fit on a line,
   /// allow putting all parameters of a function declaration onto
   /// the next line even if ``BinPackParameters`` is ``false``.
@@ -1169,7 +1201,7 @@ struct FormatStyle {
 
   /// A vector of prefixes ordered by the desired groups for Java imports.
   ///
-  /// Each group is seperated by a newline. Static imports will also follow the
+  /// Each group is separated by a newline. Static imports will also follow the
   /// same grouping convention above all non-static imports. One group's prefix
   /// can be a subset of another - the longest prefix is always matched. Within
   /// a group, the imports are ordered lexicographically.
@@ -1761,6 +1793,9 @@ struct FormatStyle {
            AlignEscapedNewlines == R.AlignEscapedNewlines &&
            AlignOperands == R.AlignOperands &&
            AlignTrailingComments == R.AlignTrailingComments &&
+           AllowAllArgumentsOnNextLine == R.AllowAllArgumentsOnNextLine &&
+           AllowAllConstructorInitializersOnNextLine ==
+               R.AllowAllConstructorInitializersOnNextLine &&
            AllowAllParametersOfDeclarationOnNextLine ==
                R.AllowAllParametersOfDeclarationOnNextLine &&
            AllowShortBlocksOnASingleLine == R.AllowShortBlocksOnASingleLine &&
diff --git a/include/clang/Tooling/Inclusions/IncludeStyle.h b/include/clang/Tooling/Inclusions/IncludeStyle.h
index 7191380d929..a0f236e6fc4 100644
--- a/include/clang/Tooling/Inclusions/IncludeStyle.h
+++ b/include/clang/Tooling/Inclusions/IncludeStyle.h
@@ -67,7 +67,7 @@ struct IncludeStyle {
   /// used for ordering ``#includes``.
   ///
   /// `POSIX extended
-  /// <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html>`_
+  /// <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html>`_
   /// regular expressions are supported.
   ///
   /// These regular expressions are matched against the filename of an include
@@ -79,7 +79,7 @@ struct IncludeStyle {
   /// If none of the regular expressions match, INT_MAX is assigned as
   /// category. The main header for a source file automatically gets category 0.
   /// so that it is generally kept at the beginning of the ``#includes``
-  /// (http://llvm.org/docs/CodingStandards.html#include-style). However, you
+  /// (https://llvm.org/docs/CodingStandards.html#include-style). However, you
   /// can also assign negative priorities if you have certain headers that
   /// always need to be first.
   ///
diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp
index 87a954fcfd3..87535824adf 100644
--- a/lib/Basic/Targets/X86.cpp
+++ b/lib/Basic/Targets/X86.cpp
@@ -115,6 +115,11 @@ bool X86TargetInfo::initFeatureMap(
   if (Kind != CK_Lakemont)
     setFeatureEnabledImpl(Features, "x87", true);
 
+  // Enable cmpxchg8 for i586 and greater CPUs. Include generic for backwards
+  // compatibility.
+  if (Kind >= CK_i586 || Kind == CK_Generic)
+    setFeatureEnabledImpl(Features, "cx8", true);
+
   switch (Kind) {
   case CK_Generic:
   case CK_i386:
@@ -777,6 +782,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasMOVBE = true;
     } else if (Feature == "+sgx") {
       HasSGX = true;
+    } else if (Feature == "+cx8") {
+      HasCX8 = true;
     } else if (Feature == "+cx16") {
       HasCX16 = true;
     } else if (Feature == "+fxsr") {
@@ -1275,12 +1282,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     break;
   }
 
-  if (CPU >= CK_i486) {
+  if (CPU >= CK_i486 || CPU == CK_Generic) {
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
   }
-  if (CPU >= CK_i586)
+  if (HasCX8)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
   if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
@@ -1394,6 +1401,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("clflushopt", HasCLFLUSHOPT)
       .Case("clwb", HasCLWB)
       .Case("clzero", HasCLZERO)
+      .Case("cx8", HasCX8)
       .Case("cx16", HasCX16)
       .Case("f16c", HasF16C)
       .Case("fma", HasFMA)
@@ -1819,10 +1827,9 @@ void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
 #define PROC(ENUM, STRING, IS64BIT)                                            \
   if (IS64BIT || getTriple().getArch() == llvm::Triple::x86)                   \
     Values.emplace_back(STRING);
-  // Go through CPUKind checking to ensure that the alias is de-aliased and
-  // 64 bit-ness is checked.
+  // For aliases we need to lookup the CPUKind to check get the 64-bit ness.
 #define PROC_ALIAS(ENUM, ALIAS)                                                \
-  if (checkCPUKind(getCPUKind(ALIAS)))                                         \
+  if (checkCPUKind(CK_##ENUM))                                                      \
     Values.emplace_back(ALIAS);
 #include "clang/Basic/X86Target.def"
 }
diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h
index 35b9b45862b..a26bbe64e84 100644
--- a/lib/Basic/Targets/X86.h
+++ b/lib/Basic/Targets/X86.h
@@ -81,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasMPX = false;
   bool HasSHSTK = false;
   bool HasSGX = false;
+  bool HasCX8 = false;
   bool HasCX16 = false;
   bool HasFXSR = false;
   bool HasXSAVE = false;
@@ -345,9 +346,8 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo {
          (1 << TargetInfo::LongDouble));
 
     // x86-32 has atomics up to 8 bytes
-    // FIXME: Check that we actually have cmpxchg8b before setting
-    // MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.)
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    MaxAtomicPromoteWidth = 64;
+    MaxAtomicInlineWidth = 32;
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
@@ -383,6 +383,11 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo {
     return X86TargetInfo::validateOperandSize(Constraint, Size);
   }
 
+  void setMaxAtomicWidth() override {
+    if (hasFeature("cx8"))
+      MaxAtomicInlineWidth = 64;
+  }
+
   ArrayRef<Builtin::Info> getTargetBuiltins() const override;
 };
 
diff --git a/lib/CodeGen/CGAMPRuntime.cpp b/lib/CodeGen/CGAMPRuntime.cpp
index a83d52a1ac1..1a02daad366 100644
--- a/lib/CodeGen/CGAMPRuntime.cpp
+++ b/lib/CodeGen/CGAMPRuntime.cpp
@@ -208,14 +208,13 @@ void CGAMPRuntime::EmitCXXAMPDeserializer(CodeGenFunction &CGF,
   }
 
   // Emit code to call the deserializing constructor
-  llvm::Constant *Callee = CGM.getAddrOfCXXStructor(DeserializeConstructor, 
-                                                    StructorType::Complete);
+  llvm::Constant *Callee = CGM.getAddrOfCXXStructor(GlobalDecl(DeserializeConstructor,Dtor_Complete));
 
   const FunctionProtoType *FPT =
       DeserializeConstructor->getType()->castAs<FunctionProtoType>();
 
   const CGFunctionInfo &DesFnInfo = 
-      CGM.getTypes().arrangeCXXStructorDeclaration(DeserializeConstructor, StructorType::Complete);
+      CGM.getTypes().arrangeCXXStructorDeclaration(GlobalDecl(DeserializeConstructor, Dtor_Complete));
 
   for (unsigned I = 1, E = DeserializerArgs.size(); I != E; ++I) {
     auto T = FPT->getParamType(I-1);
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index a87cfcf2be5..4455d0ce19b 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -5097,6 +5097,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
 
   switch (BuiltinID) {
   default: break;
+  case NEON::BI__builtin_neon_vpadd_v:
+  case NEON::BI__builtin_neon_vpaddq_v:
+    // We don't allow fp/int overloading of intrinsics.
+    if (VTy->getElementType()->isFloatingPointTy() &&
+        Int == Intrinsic::aarch64_neon_addp)
+      Int = Intrinsic::aarch64_neon_faddp;
+    break;
   case NEON::BI__builtin_neon_vabs_v:
   case NEON::BI__builtin_neon_vabsq_v:
     if (VTy->getElementType()->isFloatingPointTy())
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 576b60da583..adaeacfe868 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -203,50 +203,37 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
   return false;
 }
 
-llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD,
-                                                  StructorType Type) {
-  const CGFunctionInfo &FnInfo =
-      getTypes().arrangeCXXStructorDeclaration(MD, Type);
+llvm::Function *CodeGenModule::codegenCXXStructor(GlobalDecl GD) {
+  const CGFunctionInfo &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD);
   auto *Fn = cast<llvm::Function>(
-      getAddrOfCXXStructor(MD, Type, &FnInfo, /*FnType=*/nullptr,
+      getAddrOfCXXStructor(GD, &FnInfo, /*FnType=*/nullptr,
                            /*DontDefer=*/true, ForDefinition));
 
-  GlobalDecl GD;
-  if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) {
-    GD = GlobalDecl(DD, toCXXDtorType(Type));
-  } else {
-    const auto *CD = cast<CXXConstructorDecl>(MD);
-    GD = GlobalDecl(CD, toCXXCtorType(Type));
-  }
-
   setFunctionLinkage(GD, Fn);
 
   CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo);
   setNonAliasAttributes(GD, Fn);
-  SetLLVMFunctionAttributesForDefinition(MD, Fn);
+  SetLLVMFunctionAttributesForDefinition(cast<CXXMethodDecl>(GD.getDecl()), Fn);
   return Fn;
 }
 
 llvm::FunctionCallee CodeGenModule::getAddrAndTypeOfCXXStructor(
-    const CXXMethodDecl *MD, StructorType Type, const CGFunctionInfo *FnInfo,
-    llvm::FunctionType *FnType, bool DontDefer,
-    ForDefinition_t IsForDefinition) {
+    GlobalDecl GD, const CGFunctionInfo *FnInfo, llvm::FunctionType *FnType,
+    bool DontDefer, ForDefinition_t IsForDefinition) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
 
-  GlobalDecl GD;
-  if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
-    GD = GlobalDecl(CD, toCXXCtorType(Type));
-  } else {
+  if (isa<CXXDestructorDecl>(MD)) {
     // Always alias equivalent complete destructors to base destructors in the
     // MS ABI.
     if (getTarget().getCXXABI().isMicrosoft() &&
-        Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0)
-      Type = StructorType::Base;
-    GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type));
+        GD.getDtorType() == Dtor_Complete &&
+        MD->getParent()->getNumVBases() == 0)
+      GD = GD.getWithDtorType(Dtor_Base);
   }
 
   if (!FnType) {
     if (!FnInfo)
-      FnInfo = &getTypes().arrangeCXXStructorDeclaration(MD, Type);
+      FnInfo = &getTypes().arrangeCXXStructorDeclaration(GD);
     FnType = getTypes().GetFunctionType(*FnInfo);
   }
 
@@ -313,7 +300,7 @@ CodeGenFunction::BuildAppleKextVirtualDestructorCall(
   assert(DD->isVirtual() && Type != Dtor_Base);
   // Compute the function type we're calling.
   const CGFunctionInfo &FInfo = CGM.getTypes().arrangeCXXStructorDeclaration(
-      DD, StructorType::Complete);
+      GlobalDecl(DD, Dtor_Complete));
   llvm::Type *Ty = CGM.getTypes().GetFunctionType(FInfo);
   return ::BuildAppleKextVirtualCall(*this, GlobalDecl(DD, Type), Ty, RD);
 }
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index edec5db7638..183a4f93c03 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -309,7 +309,7 @@ class CGCXXABI {
   /// adding any required parameters.  For convenience, ArgTys has been
   /// initialized with the type of 'this'.
   virtual AddedStructorArgs
-  buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+  buildStructorSignature(GlobalDecl GD,
                          SmallVectorImpl<CanQualType> &ArgTys) = 0;
 
   /// Returns true if the given destructor type should be emitted as a linkonce
@@ -588,7 +588,7 @@ class CGCXXABI {
 
   /// Emit a single constructor/destructor with the given type from a C++
   /// constructor Decl.
-  virtual void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) = 0;
+  virtual void emitCXXStructor(GlobalDecl GD) = 0;
 
   /// Load a vtable from This, an object of polymorphic type RD, or from one of
   /// its virtual bases if it does not have its own vtable. Returns the vtable
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 5cade7c54d1..239bd38b039 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -315,11 +315,11 @@ bool CodeGenTypes::inheritingCtorHasParams(
   return Type == Ctor_Complete ||
          !Inherited.getShadowDecl()->constructsVirtualBase() ||
          !Target.getCXXABI().hasConstructorVariants();
-  }
+}
 
 const CGFunctionInfo &
-CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
-                                            StructorType Type) {
+CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
 
   SmallVector<CanQualType, 16> argTypes;
   SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
@@ -327,17 +327,11 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
 
   bool PassParams = true;
 
-  GlobalDecl GD;
   if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
-    GD = GlobalDecl(CD, toCXXCtorType(Type));
-
     // A base class inheriting constructor doesn't get forwarded arguments
     // needed to construct a virtual base (or base class thereof).
     if (auto Inherited = CD->getInheritedConstructor())
-      PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type));
-  } else {
-    auto *DD = dyn_cast<CXXDestructorDecl>(MD);
-    GD = GlobalDecl(DD, toCXXDtorType(Type));
+      PassParams = inheritingCtorHasParams(Inherited, GD.getCtorType());
   }
 
   CanQual<FunctionProtoType> FTP = GetFormalType(MD);
@@ -347,7 +341,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
     appendParameterTypes(*this, argTypes, paramInfos, FTP);
 
   CGCXXABI::AddedStructorArgs AddedArgs =
-      TheCXXABI.buildStructorSignature(MD, Type, argTypes);
+      TheCXXABI.buildStructorSignature(GD, argTypes);
   if (!paramInfos.empty()) {
     // Note: prefix implies after the first param.
     if (AddedArgs.Prefix)
@@ -535,11 +529,9 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
   // FIXME: Do we need to handle ObjCMethodDecl?
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
 
-  if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD))
-    return arrangeCXXStructorDeclaration(CD, getFromCtorType(GD.getCtorType()));
-
-  if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(FD))
-    return arrangeCXXStructorDeclaration(DD, getFromDtorType(GD.getDtorType()));
+  if (isa<CXXConstructorDecl>(GD.getDecl()) ||
+      isa<CXXDestructorDecl>(GD.getDecl()))
+    return arrangeCXXStructorDeclaration(GD);
 
   return arrangeFunctionDeclaration(FD);
 }
@@ -1701,13 +1693,7 @@ llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
   if (!isFuncTypeConvertible(FPT))
     return llvm::StructType::get(getLLVMContext());
 
-  const CGFunctionInfo *Info;
-  if (isa<CXXDestructorDecl>(MD))
-    Info =
-        &arrangeCXXStructorDeclaration(MD, getFromDtorType(GD.getDtorType()));
-  else
-    Info = &arrangeCXXMethodDeclaration(MD);
-  return GetFunctionType(*Info);
+  return GetFunctionType(GD);
 }
 
 static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 644ed45a2ae..8534b4138f1 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -2138,8 +2138,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                                                  Delegating, Args);
 
   // Emit the call.
-  llvm::Constant *CalleePtr =
-    CGM.getAddrOfCXXStructor(D, getFromCtorType(Type));
+  llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(GlobalDecl(D, Type));
   const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
       Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
   CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index beaffa0074b..8c649cf8356 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -117,7 +117,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
     assert(!Record->hasTrivialDestructor());
     CXXDestructorDecl *Dtor = Record->getDestructor();
 
-    Func = CGM.getAddrAndTypeOfCXXStructor(Dtor, StructorType::Complete);
+    Func = CGM.getAddrAndTypeOfCXXStructor(GlobalDecl(Dtor, Dtor_Complete));
     Argument = llvm::ConstantExpr::getBitCast(
         Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo());
 
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index bf27e896ef2..00036882948 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -353,8 +353,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
           dyn_cast_or_null<VarDecl>(M->getExtendingDecl()));
       CleanupArg = llvm::Constant::getNullValue(CGF.Int8PtrTy);
     } else {
-      CleanupFn = CGF.CGM.getAddrAndTypeOfCXXStructor(ReferenceTemporaryDtor,
-                                                      StructorType::Complete);
+      CleanupFn = CGF.CGM.getAddrAndTypeOfCXXStructor(
+          GlobalDecl(ReferenceTemporaryDtor, Dtor_Complete));
       CleanupArg = cast<llvm::Constant>(ReferenceTemporary.getPointer());
     }
     CGF.CGM.getCXXABI().registerGlobalDtor(
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index e5cc22a1196..10ee829f655 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -90,14 +90,14 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
 }
 
 RValue CodeGenFunction::EmitCXXDestructorCall(
-    const CXXDestructorDecl *DD, const CGCallee &Callee, llvm::Value *This,
-    llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE,
-    StructorType Type) {
+    GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This,
+    llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) {
   CallArgList Args;
-  commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam,
-                                    ImplicitParamTy, CE, Args, nullptr);
-  return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type),
-                  Callee, ReturnValueSlot(), Args);
+  commonEmitCXXMemberOrOperatorCall(*this, cast<CXXMethodDecl>(Dtor.getDecl()),
+                                    This, ImplicitParam, ImplicitParamTy, CE,
+                                    Args, nullptr);
+  return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee,
+                  ReturnValueSlot(), Args);
 }
 
 RValue CodeGenFunction::EmitCXXPseudoDestructorExpr(
@@ -290,7 +290,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
   const CGFunctionInfo *FInfo = nullptr;
   if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl))
     FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
-        Dtor, StructorType::Complete);
+        GlobalDecl(Dtor, Dtor_Complete));
   else
     FInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(CalleeDecl);
 
@@ -334,23 +334,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
           *this, Dtor, Dtor_Complete, This.getAddress(),
           cast<CXXMemberCallExpr>(CE));
     } else {
+      GlobalDecl GD(Dtor, Dtor_Complete);
       CGCallee Callee;
       if (getLangOpts().AppleKext && Dtor->isVirtual() && HasQualifier)
         Callee = BuildAppleKextVirtualCall(Dtor, Qualifier, Ty);
       else if (!DevirtualizedMethod)
-        Callee = CGCallee::forDirect(
-            CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty),
-            GlobalDecl(Dtor, Dtor_Complete));
+        Callee =
+            CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD, FInfo, Ty), GD);
       else {
-        Callee = CGCallee::forDirect(
-            CGM.GetAddrOfFunction(GlobalDecl(Dtor, Dtor_Complete), Ty),
-            GlobalDecl(Dtor, Dtor_Complete));
+        Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(GD, Ty), GD);
       }
 
-      EmitCXXDestructorCall(Dtor, Callee, This.getPointer(),
+      EmitCXXDestructorCall(GD, Callee, This.getPointer(),
                             /*ImplicitParam=*/nullptr,
-                            /*ImplicitParamTy=*/QualType(), nullptr,
-                            getFromDtorType(Dtor_Complete));
+                            /*ImplicitParamTy=*/QualType(), nullptr);
     }
     return RValue::get(nullptr);
   }
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 561f21afdb1..69ced587957 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -1958,10 +1958,10 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM,
 /// Perform an operation having the signature
 ///   i8* (i8*)
 /// where a null input causes a no-op and returns null.
-static llvm::Value *
-emitARCValueOperation(CodeGenFunction &CGF, llvm::Value *value,
-                      llvm::Type *returnType, llvm::Function *&fn,
-                      llvm::Intrinsic::ID IntID, bool isTailCall = false) {
+static llvm::Value *emitARCValueOperation(
+    CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType,
+    llvm::Function *&fn, llvm::Intrinsic::ID IntID,
+    llvm::CallInst::TailCallKind tailKind = llvm::CallInst::TCK_None) {
   if (isa<llvm::ConstantPointerNull>(value))
     return value;
 
@@ -1976,8 +1976,7 @@ emitARCValueOperation(CodeGenFunction &CGF, llvm::Value *value,
 
   // Call the function.
   llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value);
-  if (isTailCall)
-    call->setTailCall();
+  call->setTailCallKind(tailKind);
 
   // Cast the result back to the original type.
   return CGF.Builder.CreateBitCast(call, origType);
@@ -2187,9 +2186,15 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
 llvm::Value *
 CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
   emitAutoreleasedReturnValueMarker(*this);
-  return emitARCValueOperation(*this, value, nullptr,
-              CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
-                           llvm::Intrinsic::objc_retainAutoreleasedReturnValue);
+  llvm::CallInst::TailCallKind tailKind =
+      CGM.getTargetCodeGenInfo()
+              .shouldSuppressTailCallsOfRetainAutoreleasedReturnValue()
+          ? llvm::CallInst::TCK_NoTail
+          : llvm::CallInst::TCK_None;
+  return emitARCValueOperation(
+      *this, value, nullptr,
+      CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
+      llvm::Intrinsic::objc_retainAutoreleasedReturnValue, tailKind);
 }
 
 /// Claim a possibly-autoreleased return value at +0.  This is only
@@ -2326,7 +2331,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
   return emitARCValueOperation(*this, value, nullptr,
                             CGM.getObjCEntrypoints().objc_autoreleaseReturnValue,
                                llvm::Intrinsic::objc_autoreleaseReturnValue,
-                               /*isTailCall*/ true);
+                               llvm::CallInst::TCK_Tail);
 }
 
 /// Do a fused retain/autorelease of the given object.
@@ -2336,7 +2341,7 @@ CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) {
   return emitARCValueOperation(*this, value, nullptr,
                      CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue,
                              llvm::Intrinsic::objc_retainAutoreleaseReturnValue,
-                               /*isTailCall*/ true);
+                               llvm::CallInst::TCK_Tail);
 }
 
 /// Do a fused retain/autorelease of the given object.
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index c8e34533097..20f06b76db7 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8928,6 +8928,30 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
          " Expected target-based directive.");
 }
 
+bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+                                                       LangAS &AS) {
+  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+  switch(A->getAllocatorType()) {
+  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+  // Not supported, fallback to the default mem space.
+  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+  case OMPAllocateDeclAttr::OMPConstMemAlloc:
+  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    AS = LangAS::Default;
+    return true;
+  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+    llvm_unreachable("Expected predefined allocator for the variables with the "
+                     "static storage.");
+  }
+  return false;
+}
+
 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
     CodeGenModule &CGM)
     : CGM(CGM) {
@@ -9721,54 +9745,50 @@ class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
 
 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                    const VarDecl *VD) {
+  if (!VD)
+    return Address::invalid();
   const VarDecl *CVD = VD->getCanonicalDecl();
   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
     return Address::invalid();
-  for (const Attr *A: CVD->getAttrs()) {
-    if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
-      auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-      if (!Elem.second.ServiceInsertPt)
-        setLocThreadIdInsertPt(CGF);
-      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-      CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
-      llvm::Value *Size;
-      CharUnits Align = CGM.getContext().getDeclAlign(CVD);
-      if (CVD->getType()->isVariablyModifiedType()) {
-        Size = CGF.getTypeSize(CVD->getType());
-        Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
-      } else {
-        CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
-        Align = CGM.getContext().getDeclAlign(CVD);
-        Size = CGM.getSize(Sz.alignTo(Align));
-      }
-      llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
-      llvm::Value *Allocator;
-      if (const Expr *AllocExpr = AA->getAllocator()) {
-        Allocator = CGF.EmitScalarExpr(AllocExpr);
-      } else {
-        // Default allocator in libomp is nullptr.
-        Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
-      }
-      llvm::Value *Args[] = {ThreadID, Size, Allocator};
-
-      llvm::Value *Addr =
-          CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
-                              CVD->getName() + ".void.addr");
-      llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
-          ThreadID, Addr, Allocator};
-      llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
-
-      CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
-          NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
-      Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-          Addr,
-          CGF.ConvertTypeForMem(
-              CGM.getContext().getPointerType(CVD->getType())),
-          CVD->getName() + ".addr");
-      return Address(Addr, Align);
-    }
-  }
-  return Address::invalid();
+  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+  // Use the default allocation.
+  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc)
+    return Address::invalid();
+  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+  if (!Elem.second.ServiceInsertPt)
+    setLocThreadIdInsertPt(CGF);
+  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+  llvm::Value *Size;
+  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+  if (CVD->getType()->isVariablyModifiedType()) {
+    Size = CGF.getTypeSize(CVD->getType());
+    Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
+  } else {
+    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+    Align = CGM.getContext().getDeclAlign(CVD);
+    Size = CGM.getSize(Sz.alignTo(Align));
+  }
+  llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+  assert(AA->getAllocator() &&
+         "Expected allocator expression for non-default allocator.");
+  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+  llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+  llvm::Value *Addr =
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
+                          CVD->getName() + ".void.addr");
+  llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
+                                                              Allocator};
+  llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+
+  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
+                                                llvm::makeArrayRef(FiniArgs));
+  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      Addr,
+      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
+      CVD->getName() + ".addr");
+  return Address(Addr, Align);
 }
 
 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 2896a659b98..7b2c0f1b914 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -1598,6 +1598,11 @@ class CGOpenMPRuntime {
   /// Perform check on requires decl to ensure that target architecture
   /// supports unified addressing
   virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {}
+
+  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
+  /// the predefined allocator and translates it into the corresponding address
+  /// space.
+  virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS);
 };
 
 /// Class supports emissionof SIMD-only code.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 59066e8813d..46b1b0faaee 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4725,6 +4725,28 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
 
 Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                         const VarDecl *VD) {
+  bool UseDefaultAllocator = true;
+  if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
+    const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+    switch (A->getAllocatorType()) {
+      // Use the default allocator here as by default local vars are
+      // threadlocal.
+    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+    case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+      // Just pass-through to check if the globalization is required.
+      break;
+    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+    case OMPAllocateDeclAttr::OMPConstMemAlloc:
+    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+      UseDefaultAllocator = false;
+      break;
+    }
+  }
+
   if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
     return Address::invalid();
 
@@ -4746,9 +4768,12 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
         return VDI->second.PrivateAddr;
     }
   }
+
   // TODO: replace it with return
+  // UseDefaultAllocator ? Address::invalid :
   // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
   // supports __kmpc_alloc|__kmpc_free.
+  (void)UseDefaultAllocator; // Prevent a warning.
   return Address::invalid();
 }
 
@@ -4840,6 +4865,34 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const {
   return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
 }
 
+bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+                                                            LangAS &AS) {
+  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+  switch(A->getAllocatorType()) {
+  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+  // Not supported, fallback to the default mem space.
+  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+    AS = LangAS::Default;
+    return true;
+  case OMPAllocateDeclAttr::OMPConstMemAlloc:
+    AS = LangAS::cuda_constant;
+    return true;
+  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    AS = LangAS::cuda_shared;
+    return true;
+  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+    llvm_unreachable("Expected predefined allocator for the variables with the "
+                     "static storage.");
+  }
+  return false;
+}
+
 // Get current CudaArch and ignore any unknown values
 static CudaArch getCudaArch(CodeGenModule &CGM) {
   if (!CGM.getTarget().hasFeature("ptx"))
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 8a92c500b8f..6709ae322a6 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -389,6 +389,11 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
   /// address space by default.
   unsigned getDefaultFirstprivateAddressSpace() const override;
 
+  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
+  /// the predefined allocator and translates it into the corresponding address
+  /// space.
+  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
+
 private:
   /// Track the execution mode when codegening directives within a target
   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 39ab41cced3..9a1da7970be 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -3673,11 +3673,10 @@ class CodeGenFunction : public CodeGenTypeCache {
                               llvm::Value *ImplicitParam,
                               QualType ImplicitParamTy, const CallExpr *E,
                               CallArgList *RtlArgs);
-  RValue EmitCXXDestructorCall(const CXXDestructorDecl *DD,
+  RValue EmitCXXDestructorCall(GlobalDecl Dtor,
                                const CGCallee &Callee,
                                llvm::Value *This, llvm::Value *ImplicitParam,
-                               QualType ImplicitParamTy, const CallExpr *E,
-                               StructorType Type);
+                               QualType ImplicitParamTy, const CallExpr *E);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
                                ReturnValueSlot ReturnValue);
   RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE,
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 96df1e61336..97a3af928d0 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -2637,10 +2637,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
     if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
       // Make sure to emit the definition(s) before we emit the thunks.
       // This is necessary for the generation of certain thunks.
-      if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method))
-        ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType()));
-      else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method))
-        ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType()));
+      if (isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method))
+        ABI->emitCXXStructor(GD);
       else if (FD->isMultiVersion())
         EmitMultiVersionFunctionDefinition(GD, GV);
       else
@@ -3386,15 +3384,8 @@ llvm::Constant *
 CodeGenModule::GetAddrOfGlobal(GlobalDecl GD,
                                ForDefinition_t IsForDefinition) {
   const Decl *D = GD.getDecl();
-  if (isa<CXXConstructorDecl>(D))
-    return getAddrOfCXXStructor(cast<CXXConstructorDecl>(D),
-                                getFromCtorType(GD.getCtorType()),
-                                /*FnInfo=*/nullptr, /*FnType=*/nullptr,
-                                /*DontDefer=*/false, IsForDefinition);
-  else if (isa<CXXDestructorDecl>(D))
-    return getAddrOfCXXStructor(cast<CXXDestructorDecl>(D),
-                                getFromDtorType(GD.getDtorType()),
-                                /*FnInfo=*/nullptr, /*FnType=*/nullptr,
+  if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
+    return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr,
                                 /*DontDefer=*/false, IsForDefinition);
   else if (isa<CXXMethodDecl>(D)) {
     auto FInfo = &getTypes().arrangeCXXMethodDeclaration(
@@ -3542,6 +3533,11 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
       D && D->hasAttr<HCCTileStaticAttr>())
     return LangAS::hcc_tilestatic;
 
+  if (LangOpts.OpenMP) {
+    LangAS AS;
+    if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS))
+      return AS;
+  }
   return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
 }
 
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index effa21c06c5..fda9a6f32a2 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -963,25 +963,22 @@ class CodeGenModule : public CodeGenTypeCache {
   // Produce code for this constructor/destructor. This method doesn't try
   // to apply any ABI rules about which other constructors/destructors
   // are needed or if they are alias to each other.
-  llvm::Function *codegenCXXStructor(const CXXMethodDecl *MD,
-                                     StructorType Type);
+  llvm::Function *codegenCXXStructor(GlobalDecl GD);
 
   /// Return the address of the constructor/destructor of the given type.
   llvm::Constant *
-  getAddrOfCXXStructor(const CXXMethodDecl *MD, StructorType Type,
-                       const CGFunctionInfo *FnInfo = nullptr,
+  getAddrOfCXXStructor(GlobalDecl GD, const CGFunctionInfo *FnInfo = nullptr,
                        llvm::FunctionType *FnType = nullptr,
                        bool DontDefer = false,
                        ForDefinition_t IsForDefinition = NotForDefinition) {
-    return cast<llvm::Constant>(getAddrAndTypeOfCXXStructor(MD, Type, FnInfo,
-                                                            FnType, DontDefer,
+    return cast<llvm::Constant>(getAddrAndTypeOfCXXStructor(GD, FnInfo, FnType,
+                                                            DontDefer,
                                                             IsForDefinition)
                                     .getCallee());
   }
 
   llvm::FunctionCallee getAddrAndTypeOfCXXStructor(
-      const CXXMethodDecl *MD, StructorType Type,
-      const CGFunctionInfo *FnInfo = nullptr,
+      GlobalDecl GD, const CGFunctionInfo *FnInfo = nullptr,
       llvm::FunctionType *FnType = nullptr, bool DontDefer = false,
       ForDefinition_t IsForDefinition = NotForDefinition);
 
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index a12615fdde2..7ef45d4fb69 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -54,65 +54,6 @@ class CGRecordLayout;
 class CodeGenModule;
 class RequiredArgs;
 
-enum class StructorType {
-  Complete, // constructor or destructor
-  Base,     // constructor or destructor
-  Deleting  // destructor only
-};
-
-inline CXXCtorType toCXXCtorType(StructorType T) {
-  switch (T) {
-  case StructorType::Complete:
-    return Ctor_Complete;
-  case StructorType::Base:
-    return Ctor_Base;
-  case StructorType::Deleting:
-    llvm_unreachable("cannot have a deleting ctor");
-  }
-  llvm_unreachable("not a StructorType");
-}
-
-inline StructorType getFromCtorType(CXXCtorType T) {
-  switch (T) {
-  case Ctor_Complete:
-    return StructorType::Complete;
-  case Ctor_Base:
-    return StructorType::Base;
-  case Ctor_Comdat:
-    llvm_unreachable("not expecting a COMDAT");
-  case Ctor_CopyingClosure:
-  case Ctor_DefaultClosure:
-    llvm_unreachable("not expecting a closure");
-  }
-  llvm_unreachable("not a CXXCtorType");
-}
-
-inline CXXDtorType toCXXDtorType(StructorType T) {
-  switch (T) {
-  case StructorType::Complete:
-    return Dtor_Complete;
-  case StructorType::Base:
-    return Dtor_Base;
-  case StructorType::Deleting:
-    return Dtor_Deleting;
-  }
-  llvm_unreachable("not a StructorType");
-}
-
-inline StructorType getFromDtorType(CXXDtorType T) {
-  switch (T) {
-  case Dtor_Deleting:
-    return StructorType::Deleting;
-  case Dtor_Complete:
-    return StructorType::Complete;
-  case Dtor_Base:
-    return StructorType::Base;
-  case Dtor_Comdat:
-    llvm_unreachable("not expecting a COMDAT");
-  }
-  llvm_unreachable("not a CXXDtorType");
-}
-
 /// This class organizes the cross-module state that is used while lowering
 /// AST types to LLVM types.
 class CodeGenTypes {
@@ -314,8 +255,7 @@ class CodeGenTypes {
 
   /// C++ methods have some special rules and also have implicit parameters.
   const CGFunctionInfo &arrangeCXXMethodDeclaration(const CXXMethodDecl *MD);
-  const CGFunctionInfo &arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
-                                                      StructorType Type);
+  const CGFunctionInfo &arrangeCXXStructorDeclaration(GlobalDecl GD);
   const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args,
                                                   const CXXConstructorDecl *D,
                                                   CXXCtorType CtorKind,
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index cb90ce3fd70..e18839f78b6 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -216,7 +216,7 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
   void EmitCXXConstructors(const CXXConstructorDecl *D) override;
 
   AddedStructorArgs
-  buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+  buildStructorSignature(GlobalDecl GD,
                          SmallVectorImpl<CanQualType> &ArgTys) override;
 
   bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
@@ -376,7 +376,7 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
                          llvm::GlobalValue::LinkageTypes Linkage) const;
   friend class ItaniumRTTIBuilder;
 
-  void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
+  void emitCXXStructor(GlobalDecl GD) override;
 
   std::pair<llvm::Value *, const CXXRecordDecl *>
   LoadVTablePtr(CodeGenFunction &CGF, Address This,
@@ -1209,8 +1209,8 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
     CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
     if (!Record->hasTrivialDestructor()) {
       CXXDestructorDecl *DtorD = Record->getDestructor();
-      Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete);
-      Dtor = llvm::ConstantExpr::getPointerCast(Dtor, CGM.Int8PtrTy);
+      Dtor = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete));
+      Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy);
     }
   }
   if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
@@ -1459,7 +1459,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
 }
 
 CGCXXABI::AddedStructorArgs
-ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+ItaniumCXXABI::buildStructorSignature(GlobalDecl GD,
                                       SmallVectorImpl<CanQualType> &ArgTys) {
   ASTContext &Context = getContext();
 
@@ -1467,7 +1467,9 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
   // These are Clang types, so we don't need to worry about sret yet.
 
   // Check if we need to add a VTT parameter (which has type void **).
-  if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) {
+  if ((isa<CXXConstructorDecl>(GD.getDecl()) ? GD.getCtorType() == Ctor_Base
+                                             : GD.getDtorType() == Dtor_Base) &&
+      cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) {
     ArgTys.insert(ArgTys.begin() + 1,
                   Context.getPointerType(Context.VoidPtrTy));
     return AddedStructorArgs::prefix(1);
@@ -1565,11 +1567,9 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
       Type != Dtor_Base && DD->isVirtual())
     Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
   else
-    Callee = CGCallee::forDirect(
-        CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD);
+    Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD);
 
-  CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(), VTT, VTTTy, nullptr,
-                            getFromDtorType(Type));
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), VTT, VTTTy, nullptr);
 }
 
 void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1761,14 +1761,14 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
-  const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
-      Dtor, getFromDtorType(DtorType));
+  GlobalDecl GD(Dtor, DtorType);
+  const CGFunctionInfo *FInfo =
+      &CGM.getTypes().arrangeCXXStructorDeclaration(GD);
   llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
-  CGCallee Callee =
-      CGCallee::forVirtual(CE, GlobalDecl(Dtor, DtorType), This, Ty);
+  CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
 
-  CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), nullptr,
-                            QualType(), nullptr, getFromDtorType(DtorType));
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), nullptr, QualType(),
+                            nullptr);
   return nullptr;
 }
 
@@ -3845,31 +3845,28 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM,
   CGM.SetCommonAttributes(AliasDecl, Alias);
 }
 
-void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
-                                    StructorType Type) {
+void ItaniumCXXABI::emitCXXStructor(GlobalDecl GD) {
+  auto *MD = cast<CXXMethodDecl>(GD.getDecl());
   auto *CD = dyn_cast<CXXConstructorDecl>(MD);
   const CXXDestructorDecl *DD = CD ? nullptr : cast<CXXDestructorDecl>(MD);
 
   StructorCodegen CGType = getCodegenToUse(CGM, MD);
 
-  if (Type == StructorType::Complete) {
-    GlobalDecl CompleteDecl;
+  if (CD ? GD.getCtorType() == Ctor_Complete
+         : GD.getDtorType() == Dtor_Complete) {
     GlobalDecl BaseDecl;
-    if (CD) {
-      CompleteDecl = GlobalDecl(CD, Ctor_Complete);
-      BaseDecl = GlobalDecl(CD, Ctor_Base);
-    } else {
-      CompleteDecl = GlobalDecl(DD, Dtor_Complete);
-      BaseDecl = GlobalDecl(DD, Dtor_Base);
-    }
+    if (CD)
+      BaseDecl = GD.getWithCtorType(Ctor_Base);
+    else
+      BaseDecl = GD.getWithDtorType(Dtor_Base);
 
     if (CGType == StructorCodegen::Alias || CGType == StructorCodegen::COMDAT) {
-      emitConstructorDestructorAlias(CGM, CompleteDecl, BaseDecl);
+      emitConstructorDestructorAlias(CGM, GD, BaseDecl);
       return;
     }
 
     if (CGType == StructorCodegen::RAUW) {
-      StringRef MangledName = CGM.getMangledName(CompleteDecl);
+      StringRef MangledName = CGM.getMangledName(GD);
       auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl);
       CGM.addReplacement(MangledName, Aliasee);
       return;
@@ -3880,7 +3877,8 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
   // base class if there is exactly one non-virtual base class with a
   // non-trivial destructor, there are no fields with a non-trivial
   // destructor, and the body of the destructor is trivial.
-  if (DD && Type == StructorType::Base && CGType != StructorCodegen::COMDAT &&
+  if (DD && GD.getDtorType() == Dtor_Base &&
+      CGType != StructorCodegen::COMDAT &&
       !CGM.TryEmitBaseDestructorAsAlias(DD))
     return;
 
@@ -3896,7 +3894,7 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
   // In such cases we should try to emit the deleting dtor as an alias to the
   // selected 'operator delete'.
 
-  llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type);
+  llvm::Function *Fn = CGM.codegenCXXStructor(GD);
 
   if (CGType == StructorCodegen::COMDAT) {
     SmallString<256> Buffer;
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 7a5cdf6865f..a736e39158e 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -205,7 +205,7 @@ class MicrosoftCXXABI : public CGCXXABI {
   // delegate to or alias the base destructor.
 
   AddedStructorArgs
-  buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+  buildStructorSignature(GlobalDecl GD,
                          SmallVectorImpl<CanQualType> &ArgTys) override;
 
   /// Non-base dtors should be emitted as delegating thunks in this ABI.
@@ -673,7 +673,7 @@ class MicrosoftCXXABI : public CGCXXABI {
                                   llvm::Value *MemPtr,
                                   const MemberPointerType *MPT) override;
 
-  void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
+  void emitCXXStructor(GlobalDecl GD) override;
 
   llvm::StructType *getCatchableTypeType() {
     if (CatchableTypeType)
@@ -1234,16 +1234,17 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
 }
 
 CGCXXABI::AddedStructorArgs
-MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
+MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD,
                                         SmallVectorImpl<CanQualType> &ArgTys) {
   AddedStructorArgs Added;
   // TODO: 'for base' flag
-  if (T == StructorType::Deleting) {
+  if (isa<CXXDestructorDecl>(GD.getDecl()) &&
+      GD.getDtorType() == Dtor_Deleting) {
     // The scalar deleting destructor takes an implicit int parameter.
     ArgTys.push_back(getContext().IntTy);
     ++Added.Suffix;
   }
-  auto *CD = dyn_cast<CXXConstructorDecl>(MD);
+  auto *CD = dyn_cast<CXXConstructorDecl>(GD.getDecl());
   if (!CD)
     return Added;
 
@@ -1553,9 +1554,8 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
   if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0)
     Type = Dtor_Base;
 
-  CGCallee Callee =
-      CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
-                          GlobalDecl(DD, Type));
+  GlobalDecl GD(DD, Type);
+  CGCallee Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD);
 
   if (DD->isVirtual()) {
     assert(Type != CXXDtorType::Dtor_Deleting &&
@@ -1569,10 +1569,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
     BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF);
   }
 
-  CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(),
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(),
                             /*ImplicitParam=*/nullptr,
-                            /*ImplicitParamTy=*/QualType(), nullptr,
-                            getFromDtorType(Type));
+                            /*ImplicitParamTy=*/QualType(), nullptr);
   if (BaseDtorEndBB) {
     // Complete object handler should continue to be the remaining
     CGF.Builder.CreateBr(BaseDtorEndBB);
@@ -1886,8 +1885,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
   // We have only one destructor in the vftable but can get both behaviors
   // by passing an implicit int parameter.
   GlobalDecl GD(Dtor, Dtor_Deleting);
-  const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
-      Dtor, StructorType::Deleting);
+  const CGFunctionInfo *FInfo =
+      &CGM.getTypes().arrangeCXXStructorDeclaration(GD);
   llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
   CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
 
@@ -1897,9 +1896,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
       DtorType == Dtor_Deleting);
 
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  RValue RV =
-      CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam,
-                                Context.IntTy, CE, StructorType::Deleting);
+  RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(),
+                                        ImplicitParam, Context.IntTy, CE);
   return RV.getScalarVal();
 }
 
@@ -3818,44 +3816,36 @@ MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD,
   return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info);
 }
 
-static void emitCXXConstructor(CodeGenModule &CGM,
-                               const CXXConstructorDecl *ctor,
-                               StructorType ctorType) {
-  // There are no constructor variants, always emit the complete destructor.
-  llvm::Function *Fn = CGM.codegenCXXStructor(ctor, StructorType::Complete);
-  CGM.maybeSetTrivialComdat(*ctor, *Fn);
-}
+void MicrosoftCXXABI::emitCXXStructor(GlobalDecl GD) {
+  if (auto *ctor = dyn_cast<CXXConstructorDecl>(GD.getDecl())) {
+    // There are no constructor variants, always emit the complete destructor.
+    llvm::Function *Fn =
+        CGM.codegenCXXStructor(GD.getWithCtorType(Ctor_Complete));
+    CGM.maybeSetTrivialComdat(*ctor, *Fn);
+    return;
+  }
+
+  auto *dtor = cast<CXXDestructorDecl>(GD.getDecl());
 
-static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor,
-                              StructorType dtorType) {
   // Emit the base destructor if the base and complete (vbase) destructors are
   // equivalent. This effectively implements -mconstructor-aliases as part of
   // the ABI.
-  if (dtorType == StructorType::Complete &&
+  if (GD.getDtorType() == Dtor_Complete &&
       dtor->getParent()->getNumVBases() == 0)
-    dtorType = StructorType::Base;
+    GD = GD.getWithDtorType(Dtor_Base);
 
   // The base destructor is equivalent to the base destructor of its
   // base class if there is exactly one non-virtual base class with a
   // non-trivial destructor, there are no fields with a non-trivial
   // destructor, and the body of the destructor is trivial.
-  if (dtorType == StructorType::Base && !CGM.TryEmitBaseDestructorAsAlias(dtor))
+  if (GD.getDtorType() == Dtor_Base && !CGM.TryEmitBaseDestructorAsAlias(dtor))
     return;
 
-  llvm::Function *Fn = CGM.codegenCXXStructor(dtor, dtorType);
+  llvm::Function *Fn = CGM.codegenCXXStructor(GD);
   if (Fn->isWeakForLinker())
     Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName()));
 }
 
-void MicrosoftCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
-                                      StructorType Type) {
-  if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
-    emitCXXConstructor(CGM, CD, Type);
-    return;
-  }
-  emitCXXDestructor(CGM, cast<CXXDestructorDecl>(MD), Type);
-}
-
 llvm::Function *
 MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
                                          CXXCtorType CT) {
@@ -3957,7 +3947,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
                                  /*Delegating=*/false, Args);
   // Call the destructor with our arguments.
   llvm::Constant *CalleePtr =
-    CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
+      CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete));
   CGCallee Callee =
       CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete));
   const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
@@ -4008,7 +3998,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
     if (CT == Ctor_CopyingClosure)
       CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure);
     else
-      CopyCtor = CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
+      CopyCtor = CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete));
 
     CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy);
   } else {
@@ -4221,7 +4211,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
     if (CXXDestructorDecl *DtorD = RD->getDestructor())
       if (!DtorD->isTrivial())
         CleanupFn = llvm::ConstantExpr::getBitCast(
-            CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete),
+            CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)),
             CGM.Int8PtrTy);
   // This is unused as far as we can tell, initialize it to null.
   llvm::Constant *ForwardCompat =
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 61283125dd3..17f726d357d 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -2268,6 +2268,12 @@ class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
     return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
   }
 
+  /// Disable tail call on x86-64. The epilogue code before the tail jump blocks
+  /// the autoreleaseRV/retainRV optimization.
+  bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const override {
+    return true;
+  }
+
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
   }
@@ -5591,8 +5597,10 @@ class ARMABIInfo : public SwiftABIInfo {
   ABIKind getABIKind() const { return Kind; }
 
 private:
-  ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const;
+  ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic,
+                                unsigned functionCallConv) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
+                                  unsigned functionCallConv) const;
   ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base,
                                           uint64_t Members) const;
   ABIArgInfo coerceIllegalVector(QualType Ty) const;
@@ -5602,6 +5610,8 @@ class ARMABIInfo : public SwiftABIInfo {
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
 
+  bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const;
+
   void computeInfo(CGFunctionInfo &FI) const override;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -5722,11 +5732,13 @@ void WindowsARMTargetCodeGenInfo::setTargetAttributes(
 
 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (!::classifyReturnType(getCXXABI(), FI, *this))
-    FI.getReturnInfo() =
-        classifyReturnType(FI.getReturnType(), FI.isVariadic());
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(),
+                                            FI.getCallingConvention());
 
   for (auto &I : FI.arguments())
-    I.info = classifyArgumentType(I.type, FI.isVariadic());
+    I.info = classifyArgumentType(I.type, FI.isVariadic(),
+                                  FI.getCallingConvention());
+
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
@@ -5805,8 +5817,8 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
   return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
-                                            bool isVariadic) const {
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
+                                            unsigned functionCallConv) const {
   // 6.1.2.1 The following argument types are VFP CPRCs:
   //   A single-precision floating-point type (including promoted
   //   half-precision types); A double-precision floating-point type;
@@ -5814,7 +5826,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   //   with a Base Type of a single- or double-precision floating-point type,
   //   64-bit containerized vectors or 128-bit containerized vectors with one
   //   to four Elements.
-  bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic;
+  // Variadic functions should always marshal to the base standard.
+  bool IsAAPCS_VFP =
+      !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false);
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -5827,7 +5841,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   // half type natively, and does not need to interwork with AAPCS code.
   if ((Ty->isFloat16Type() || Ty->isHalfType()) &&
       !getContext().getLangOpts().NativeHalfArgsAndReturns) {
-    llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
+    llvm::Type *ResType = IsAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
     return ABIArgInfo::getDirect(ResType);
@@ -5851,7 +5865,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   if (isEmptyRecord(getContext(), Ty, true))
     return ABIArgInfo::getIgnore();
 
-  if (IsEffectivelyAAPCS_VFP) {
+  if (IsAAPCS_VFP) {
     // Homogeneous Aggregates need to be expanded when we can fit the aggregate
     // into VFP registers.
     const Type *Base = nullptr;
@@ -6008,10 +6022,12 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
   return true;
 }
 
-ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
-                                          bool isVariadic) const {
-  bool IsEffectivelyAAPCS_VFP =
-      (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic;
+ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
+                                          unsigned functionCallConv) const {
+
+  // Variadic functions should always marshal to the base standard.
+  bool IsAAPCS_VFP =
+      !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true);
 
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
@@ -6032,7 +6048,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   // half type natively, and does not need to interwork with AAPCS code.
   if ((RetTy->isFloat16Type() || RetTy->isHalfType()) &&
       !getContext().getLangOpts().NativeHalfArgsAndReturns) {
-    llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
+    llvm::Type *ResType = IsAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
     return ABIArgInfo::getDirect(ResType);
@@ -6081,7 +6097,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
     return ABIArgInfo::getIgnore();
 
   // Check for homogeneous aggregates with AAPCS-VFP.
-  if (IsEffectivelyAAPCS_VFP) {
+  if (IsAAPCS_VFP) {
     const Type *Base = nullptr;
     uint64_t Members = 0;
     if (isHomogeneousAggregate(RetTy, Base, Members))
@@ -6186,6 +6202,16 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
   return Members <= 4;
 }
 
+bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention,
+                                        bool acceptHalf) const {
+  // Give precedence to user-specified calling conventions.
+  if (callConvention != llvm::CallingConv::C)
+    return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP);
+  else
+    return (getABIKind() == AAPCS_VFP) ||
+           (acceptHalf && (getABIKind() == AAPCS16_VFP));
+}
+
 Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                               QualType Ty) const {
   CharUnits SlotSize = CharUnits::fromQuantity(4);
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index b5d5c1fb300..8a4154030ce 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -156,6 +156,12 @@ class TargetCodeGenInfo {
     return "";
   }
 
+  /// Determine whether a call to objc_retainAutoreleasedReturnValue should be
+  /// marked as 'notail'.
+  virtual bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const {
+    return false;
+  }
+
   /// Return a constant used by UBSan as a signature to identify functions
   /// possessing type information, or 0 if the platform is unsupported.
   virtual llvm::Constant *
diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp
index 1917f211785..ae8720cdee4 100644
--- a/lib/Driver/ToolChains/Clang.cpp
+++ b/lib/Driver/ToolChains/Clang.cpp
@@ -4624,6 +4624,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
   Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
   Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
+  Args.AddLastArg(CmdArgs, options::OPT_malign_double);
 
   if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
     CmdArgs.push_back("-ftrapv-handler");
diff --git a/lib/Driver/ToolChains/WebAssembly.cpp b/lib/Driver/ToolChains/WebAssembly.cpp
index 0104cbbbf4d..b69588d4c1e 100644
--- a/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/lib/Driver/ToolChains/WebAssembly.cpp
@@ -62,8 +62,10 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     if (ToolChain.ShouldLinkCXXStdlib(Args))
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
 
-    if (Args.hasArg(options::OPT_pthread))
+    if (Args.hasArg(options::OPT_pthread)) {
       CmdArgs.push_back("-lpthread");
+      CmdArgs.push_back("--shared-memory");
+    }
 
     CmdArgs.push_back("-lc");
     AddRunTimeLibs(ToolChain, ToolChain.getDriver(), CmdArgs, Args);
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index a2a13d4d465..b64fdd7c0ba 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -881,14 +881,30 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
     State.Stack.back().BreakBeforeClosingBrace = true;
 
   if (State.Stack.back().AvoidBinPacking) {
-    // If we are breaking after '(', '{', '<', this is not bin packing
-    // unless AllowAllParametersOfDeclarationOnNextLine is false or this is a
-    // dict/object literal.
-    if (!Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) ||
+    // If we are breaking after '(', '{', '<', or this is the break after a ':'
+    // to start a member initializater list in a constructor, this should not
+    // be considered bin packing unless the relevant AllowAll option is false or
+    // this is a dict/object literal.
+    bool PreviousIsBreakingCtorInitializerColon =
+        Previous.is(TT_CtorInitializerColon) &&
+        Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
+    if (!(Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) ||
+          PreviousIsBreakingCtorInitializerColon) ||
         (!Style.AllowAllParametersOfDeclarationOnNextLine &&
          State.Line->MustBeDeclaration) ||
+        (!Style.AllowAllArgumentsOnNextLine &&
+         !State.Line->MustBeDeclaration) ||
+        (!Style.AllowAllConstructorInitializersOnNextLine &&
+         PreviousIsBreakingCtorInitializerColon) ||
         Previous.is(TT_DictLiteral))
       State.Stack.back().BreakBeforeParameter = true;
+
+    // If we are breaking after a ':' to start a member initializer list,
+    // and we allow all arguments on the next line, we should not break
+    // before the next parameter.
+    if (PreviousIsBreakingCtorInitializerColon &&
+        Style.AllowAllConstructorInitializersOnNextLine)
+      State.Stack.back().BreakBeforeParameter = false;
   }
 
   return Penalty;
@@ -1102,9 +1118,13 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
              ? 0
              : 2);
     State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
-    if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+    if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
       State.Stack.back().AvoidBinPacking = true;
-    State.Stack.back().BreakBeforeParameter = false;
+      State.Stack.back().BreakBeforeParameter =
+          !Style.AllowAllConstructorInitializersOnNextLine;
+    } else {
+      State.Stack.back().BreakBeforeParameter = false;
+    }
   }
   if (Current.is(TT_CtorInitializerColon) &&
       Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) {
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 5fe118a7161..2d1adb58e4a 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -335,6 +335,10 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines);
     IO.mapOptional("AlignOperands", Style.AlignOperands);
     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
+    IO.mapOptional("AllowAllArgumentsOnNextLine",
+                   Style.AllowAllArgumentsOnNextLine);
+    IO.mapOptional("AllowAllConstructorInitializersOnNextLine",
+                   Style.AllowAllConstructorInitializersOnNextLine);
     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
                    Style.AllowAllParametersOfDeclarationOnNextLine);
     IO.mapOptional("AllowShortBlocksOnASingleLine",
@@ -351,6 +355,7 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.AlwaysBreakAfterDefinitionReturnType);
     IO.mapOptional("AlwaysBreakAfterReturnType",
                    Style.AlwaysBreakAfterReturnType);
+
     // If AlwaysBreakAfterDefinitionReturnType was specified but
     // AlwaysBreakAfterReturnType was not, initialize the latter from the
     // former for backwards compatibility.
@@ -641,6 +646,8 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.AlignTrailingComments = true;
   LLVMStyle.AlignConsecutiveAssignments = false;
   LLVMStyle.AlignConsecutiveDeclarations = false;
+  LLVMStyle.AllowAllArgumentsOnNextLine = true;
+  LLVMStyle.AllowAllConstructorInitializersOnNextLine = true;
   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
   LLVMStyle.AllowShortBlocksOnASingleLine = false;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index ccf5e51576d..a3000a8fdeb 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -1119,10 +1119,10 @@ class AnnotatingParser {
       return LT_ImportStatement;
     }
 
-    // In .proto files, top-level options are very similar to import statements
-    // and should not be line-wrapped.
+    // In .proto files, top-level options and package statements are very
+    // similar to import statements and should not be line-wrapped.
     if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
-        CurrentToken->is(Keywords.kw_option)) {
+        CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
       next();
       if (CurrentToken && CurrentToken->is(tok::identifier))
         return LT_ImportStatement;
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 814e8143e17..5c7ab1240c4 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -2018,6 +2018,10 @@ bool UnwrappedLineParser::parseEnum() {
       FormatTok->isOneOf(tok::colon, tok::question))
     return false;
 
+  // In protobuf, "enum" can be used as a field name.
+  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
+    return false;
+
   // Eat up enum class ...
   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
     nextToken();
diff --git a/lib/Headers/ia32intrin.h b/lib/Headers/ia32intrin.h
index dcbb1e0ab51..64ead34796c 100644
--- a/lib/Headers/ia32intrin.h
+++ b/lib/Headers/ia32intrin.h
@@ -28,6 +28,160 @@
 #ifndef __IA32INTRIN_H
 #define __IA32INTRIN_H
 
+/** Find the first set bit starting from the lsb. Result is undefined if
+ *  input is 0.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> BSF </c> instruction or the
+ *  <c> TZCNT </c> instruction.
+ *
+ *  \param __A
+ *     A 32-bit integer operand.
+ *  \returns A 32-bit integer containing the bit number.
+ */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__bsfd(int __A) {
+  return __builtin_ctz(__A);
+}
+
+/** Find the first set bit starting from the msb. Result is undefined if
+ *  input is 0.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> BSR </c> instruction or the
+ *  <c> LZCNT </c> instruction and an <c> XOR </c>.
+ *
+ *  \param __A
+ *     A 32-bit integer operand.
+ *  \returns A 32-bit integer containing the bit number.
+ */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__bsrd(int __A) {
+  return 31 - __builtin_clz(__A);
+}
+
+/** Swaps the bytes in the input. Converting little endian to big endian or
+ *  vice versa.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
+ *
+ *  \param __A
+ *     A 32-bit integer operand.
+ *  \returns A 32-bit integer containing the swapped bytes.
+ */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__bswapd(int __A) {
+  return __builtin_bswap32(__A);
+}
+
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_bswap(int __A) {
+  return __builtin_bswap32(__A);
+}
+
+#define _bit_scan_forward(A) __bsfd((A))
+#define _bit_scan_reverse(A) __bsrd((A))
+
+#ifdef __x86_64__
+/** Find the first set bit starting from the lsb. Result is undefined if
+ *  input is 0.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> BSF </c> instruction or the
+ *  <c> TZCNT </c> instruction.
+ *
+ *  \param __A
+ *     A 64-bit integer operand.
+ *  \returns A 32-bit integer containing the bit number.
+ */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__bsfq(long long __A) {
+  return __builtin_ctzll(__A);
+}
+
+/** Find the first set bit starting from the msb. Result is undefined if
+ *  input is 0.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> BSR </c> instruction or the
+ *  <c> LZCNT </c> instruction and an <c> XOR </c>.
+ *
+ *  \param __A
+ *     A 64-bit integer operand.
+ *  \returns A 32-bit integer containing the bit number.
+ */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__bsrq(long long __A) {
+  return 63 - __builtin_clzll(__A);
+}
+
+/** Swaps the bytes in the input. Converting little endian to big endian or
+ *  vice versa.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> BSWAP </c> instruction.
+ *
+ *  \param __A
+ *     A 64-bit integer operand.
+ *  \returns A 64-bit integer containing the swapped bytes.
+ */
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+__bswapq(long long __A) {
+  return __builtin_bswap64(__A);
+}
+
+#define _bswap64(A) __bswapq((A))
+#endif
+
+/** Counts the number of bits in the source operand having a value of 1.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
+ *  a sequence of arithmetic and logic ops to calculate it.
+ *
+ *  \param __A
+ *     An unsigned 32-bit integer operand.
+ *  \returns A 32-bit integer containing the number of bits with value 1 in the
+ *     source operand.
+ */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__popcntd(unsigned int __A)
+{
+  return __builtin_popcount(__A);
+}
+
+#define _popcnt32(A) __popcntd((A))
+
+#ifdef __x86_64__
+/** Counts the number of bits in the source operand having a value of 1.
+ *
+ *  \headerfile <x86intrin.h>
+ *
+ *  This intrinsic corresponds to the <c> POPCNT </c> instruction or a
+ *  a sequence of arithmetic and logic ops to calculate it.
+ *
+ *  \param __A
+ *     An unsigned 64-bit integer operand.
+ *  \returns A 64-bit integer containing the number of bits with value 1 in the
+ *     source operand.
+ */
+static __inline__ long long __attribute__((__always_inline__, __nodebug__))
+__popcntq(unsigned long long __A)
+{
+  return __builtin_popcountll(__A);
+}
+
+#define _popcnt64(A) __popcntq((A))
+#endif /* __x86_64__ */
+
 #ifdef __x86_64__
 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
 __readeflags(void)
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 19edd4ac4a4..634db600b05 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -241,18 +241,6 @@ _rdrand64_step(unsigned long long *__p)
 #endif
 #endif /* __RDRND__ */
 
-/* __bit_scan_forward */
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
-_bit_scan_forward(int __A) {
-  return __builtin_ctz(__A);
-}
-
-/* __bit_scan_reverse */
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
-_bit_scan_reverse(int __A) {
-  return 31 - __builtin_clz(__A);
-}
-
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
 #ifdef __x86_64__
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
diff --git a/lib/Headers/popcntintrin.h b/lib/Headers/popcntintrin.h
index 75ceab9e150..dc4a8bd260d 100644
--- a/lib/Headers/popcntintrin.h
+++ b/lib/Headers/popcntintrin.h
@@ -43,22 +43,6 @@ _mm_popcnt_u32(unsigned int __A)
   return __builtin_popcount(__A);
 }
 
-/// Counts the number of bits in the source operand having a value of 1.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> POPCNT </c> instruction.
-///
-/// \param __A
-///    A signed 32-bit integer operand.
-/// \returns A 32-bit integer containing the number of bits with value 1 in the
-///    source operand.
-static __inline__ int __DEFAULT_FN_ATTRS
-_popcnt32(int __A)
-{
-  return __builtin_popcount(__A);
-}
-
 #ifdef __x86_64__
 /// Counts the number of bits in the source operand having a value of 1.
 ///
@@ -75,22 +59,6 @@ _mm_popcnt_u64(unsigned long long __A)
 {
   return __builtin_popcountll(__A);
 }
-
-/// Counts the number of bits in the source operand having a value of 1.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> POPCNT </c> instruction.
-///
-/// \param __A
-///    A signed 64-bit integer operand.
-/// \returns A 64-bit integer containing the number of bits with value 1 in the
-///    source operand.
-static __inline__ long long __DEFAULT_FN_ATTRS
-_popcnt64(long long __A)
-{
-  return __builtin_popcountll(__A);
-}
 #endif /* __x86_64__ */
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 8d4b9f06569..1850af9cfa4 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -1813,26 +1813,26 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     return;
   }
 
-  // Should we enter the source file? Set to false if either the source file is
+  // Should we enter the source file? Set to Skip if either the source file is
   // known to have no effect beyond its effect on module visibility -- that is,
-  // if it's got an include guard that is already defined or is a modular header
-  // we've imported or already built.
-  bool ShouldEnter = true;
+  // if it's got an include guard that is already defined, set to Import if it
+  // is a modular header we've already built and should import.
+  enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
 
   if (PPOpts->SingleFileParseMode)
-    ShouldEnter = false;
+    Action = IncludeLimitReached;
 
   // If we've reached the max allowed include depth, it is usually due to an
   // include cycle. Don't enter already processed files again as it can lead to
   // reaching the max allowed include depth again.
-  if (ShouldEnter && HasReachedMaxIncludeDepth && File &&
+  if (Action == Enter && HasReachedMaxIncludeDepth && File &&
       HeaderInfo.getFileInfo(File).NumIncludes)
-    ShouldEnter = false;
+    Action = IncludeLimitReached;
 
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (ShouldEnter && File && SuggestedModule && getLangOpts().Modules &&
+  if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
       !isForModuleBuilding(SuggestedModule.getModule(),
                            getLangOpts().CurrentModule,
                            getLangOpts().ModuleName)) {
@@ -1872,9 +1872,9 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
            "the imported module is different than the suggested one");
 
-    if (Imported)
-      ShouldEnter = false;
-    else if (Imported.isMissingExpected()) {
+    if (Imported) {
+      Action = Import;
+    } else if (Imported.isMissingExpected()) {
       // We failed to find a submodule that we assumed would exist (because it
       // was in the directory of an umbrella header, for instance), but no
       // actual module containing it exists (because the umbrella header is
@@ -1907,13 +1907,18 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
 
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
-  bool SkipHeader = false;
-  if (ShouldEnter && File &&
+  if (Action == Enter && File &&
       !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
                                          getLangOpts().Modules,
                                          SuggestedModule.getModule())) {
-    ShouldEnter = false;
-    SkipHeader = true;
+    // Even if we've already preprocessed this header once and know that we
+    // don't need to see its contents again, we still need to import it if it's
+    // modular because we might not have imported it from this submodule before.
+    //
+    // FIXME: We don't do this when compiling a PCH because the AST
+    // serialization layer can't cope with it. This means we get local
+    // submodule visibility semantics wrong in that case.
+    Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
   }
 
   if (Callbacks) {
@@ -1922,8 +1927,9 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
         HashLoc, IncludeTok,
         LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
         FilenameRange, File, SearchPath, RelativePath,
-        ShouldEnter ? nullptr : SuggestedModule.getModule(), FileCharacter);
-    if (SkipHeader && !SuggestedModule.getModule())
+        Action == Import ? SuggestedModule.getModule() : nullptr,
+        FileCharacter);
+    if (Action == Skip)
       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
   }
 
@@ -1968,28 +1974,33 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     }
   }
 
-  // If we don't need to enter the file, stop now.
-  if (!ShouldEnter) {
+  switch (Action) {
+  case Skip:
+    // If we don't need to enter the file, stop now.
+    return;
+
+  case IncludeLimitReached:
+    // If we reached our include limit and don't want to enter any more files,
+    // don't go any further.
+    return;
+
+  case Import: {
     // If this is a module import, make it visible if needed.
-    if (auto *M = SuggestedModule.getModule()) {
-      // When building a pch, -fmodule-name tells the compiler to textually
-      // include headers in the specified module. But it is possible that
-      // ShouldEnter is false because we are skipping the header. In that
-      // case, We are not importing the specified module.
-      if (SkipHeader && getLangOpts().CompilingPCH &&
-          isForModuleBuilding(M, getLangOpts().CurrentModule,
-                              getLangOpts().ModuleName))
-        return;
+    Module *M = SuggestedModule.getModule();
+    assert(M && "no module to import");
 
-      makeModuleVisible(M, HashLoc);
+    makeModuleVisible(M, HashLoc);
 
-      if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
-          tok::pp___include_macros)
-        EnterAnnotationToken(DirectiveRange, tok::annot_module_include, M);
-    }
+    if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
+        tok::pp___include_macros)
+      EnterAnnotationToken(DirectiveRange, tok::annot_module_include, M);
     return;
   }
 
+  case Enter:
+    break;
+  }
+
   // Check that we don't have infinite #include recursion.
   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
     Diag(FilenameTok, diag::err_pp_include_too_deep);
@@ -2024,6 +2035,11 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     // When building a pch, -fmodule-name tells the compiler to textually
     // include headers in the specified module. We are not building the
     // specified module.
+    //
+    // FIXME: This is the wrong way to handle this. We should produce a PCH
+    // that behaves the same as the header would behave in a compilation using
+    // that PCH, which means we should enter the submodule. We need to teach
+    // the AST serialization layer to deal with the resulting AST.
     if (getLangOpts().CompilingPCH &&
         isForModuleBuilding(M, getLangOpts().CurrentModule,
                             getLangOpts().ModuleName))
@@ -2069,6 +2085,10 @@ void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
     LookupFromFile = CurPPLexer->getFileEntry();
     Lookup = nullptr;
   } else if (!Lookup) {
+    // The current file was not found by walking the include path. Either it
+    // is the primary file (handled above), or it was found by absolute path,
+    // or it was found relative to such a file.
+    // FIXME: Track enough information so we know which case we're in.
     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
   } else {
     // Start looking up in the next directory.
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index f3a1a83c5fd..28ac50f8385 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -2281,10 +2281,9 @@ StmtResult Parser::ParseCXXCatchBlock(bool FnCatch) {
   // C++ 3.3.2p3:
   // The name in a catch exception-declaration is local to the handler and
   // shall not be redeclared in the outermost block of the handler.
-  unsigned ScopeFlags = Scope::DeclScope | Scope::ControlScope |
-                        Scope::CatchScope |
-                        (FnCatch ? Scope::FnTryCatchScope : 0);
-  ParseScope CatchScope(this, ScopeFlags);
+  ParseScope CatchScope(this, Scope::DeclScope | Scope::ControlScope |
+                                  Scope::CatchScope |
+                                  (FnCatch ? Scope::FnTryCatchScope : 0));
 
   // exception-declaration is equivalent to '...' or a parameter-declaration
   // without default arguments.
@@ -2313,7 +2312,7 @@ StmtResult Parser::ParseCXXCatchBlock(bool FnCatch) {
     return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace);
 
   // FIXME: Possible draft standard bug: attribute-specifier should be allowed?
-  StmtResult Block(ParseCompoundStatement(/*isStmtExpr=*/false, ScopeFlags));
+  StmtResult Block(ParseCompoundStatement());
   if (Block.isInvalid())
     return Block;
 
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index d062e8b201a..4c7626f27d8 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -499,7 +499,7 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
 
     if (!AllowedInit) {
       Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
-                                  ? diag::err_shared_var_init
+                                  ? diag::warn_shared_var_init
                                   : diag::err_dynamic_var_init)
           << Init->getSourceRange();
       VD->setInvalidDecl();
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp
index 5d7ad8c5be6..f541d75ce5e 100644
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -422,6 +422,16 @@ class DSAStackTy {
     RequiresDecls.push_back(RD);
   }
 
+  /// Checks if the defined 'requires' directive has specified type of clause.
+  template <typename ClauseType>
+  bool hasRequiresDeclWithClause() {
+    return llvm::any_of(RequiresDecls, [](const OMPRequiresDecl *D) {
+      return llvm::any_of(D->clauselists(), [](const OMPClause *C) {
+        return isa<ClauseType>(C);
+      });
+    });
+  }
+
   /// Checks for a duplicate clause amongst previously declared requires
   /// directives
   bool hasDuplicateRequiresClause(ArrayRef<OMPClause *> ClauseList) const {
@@ -2219,35 +2229,22 @@ getAllocatorKind(Sema &S, DSAStackTy *Stack, Expr *Allocator) {
     return OMPAllocateDeclAttr::OMPDefaultMemAlloc;
   if (Allocator->isTypeDependent() || Allocator->isValueDependent() ||
       Allocator->isInstantiationDependent() ||
-      Allocator->containsUnexpandedParameterPack() ||
-      !Allocator->isEvaluatable(S.getASTContext()))
+      Allocator->containsUnexpandedParameterPack())
     return OMPAllocateDeclAttr::OMPUserDefinedMemAlloc;
-  bool Suppress = S.getDiagnostics().getSuppressAllDiagnostics();
-  S.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
   auto AllocatorKindRes = OMPAllocateDeclAttr::OMPUserDefinedMemAlloc;
   for (int I = OMPAllocateDeclAttr::OMPDefaultMemAlloc;
        I < OMPAllocateDeclAttr::OMPUserDefinedMemAlloc; ++I) {
     auto AllocatorKind = static_cast<OMPAllocateDeclAttr::AllocatorTypeTy>(I);
     Expr *DefAllocator = Stack->getAllocator(AllocatorKind);
-    // Compare allocator with the predefined allocator and if true - return
-    // predefined allocator kind.
-    ExprResult DefAllocRes = S.DefaultLvalueConversion(DefAllocator);
-    ExprResult AllocRes = S.DefaultLvalueConversion(Allocator);
-    ExprResult CompareRes = S.CreateBuiltinBinOp(
-        Allocator->getExprLoc(), BO_EQ, DefAllocRes.get(), AllocRes.get());
-    if (!CompareRes.isUsable())
-      continue;
-    bool Result;
-    if (!CompareRes.get()->EvaluateAsBooleanCondition(Result,
-                                                      S.getASTContext()))
-      continue;
-    if (Result) {
+    const Expr *AE = Allocator->IgnoreParenImpCasts();
+    llvm::FoldingSetNodeID AEId, DAEId;
+    AE->Profile(AEId, S.getASTContext(), /*Canonical=*/true);
+    DefAllocator->Profile(DAEId, S.getASTContext(), /*Canonical=*/true);
+    if (AEId == DAEId) {
       AllocatorKindRes = AllocatorKind;
       break;
     }
-
   }
-  S.getDiagnostics().setSuppressAllDiagnostics(Suppress);
   return AllocatorKindRes;
 }
 
@@ -2256,8 +2253,17 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective(
     ArrayRef<OMPClause *> Clauses, DeclContext *Owner) {
   assert(Clauses.size() <= 1 && "Expected at most one clause.");
   Expr *Allocator = nullptr;
-  if (!Clauses.empty())
+  if (Clauses.empty()) {
+    // OpenMP 5.0, 2.11.3 allocate Directive, Restrictions.
+    // allocate directives that appear in a target region must specify an
+    // allocator clause unless a requires directive with the dynamic_allocators
+    // clause is present in the same compilation unit.
+    if (LangOpts.OpenMPIsDevice &&
+        !DSAStack->hasRequiresDeclWithClause<OMPDynamicAllocatorsClause>())
+      targetDiag(Loc, diag::err_expected_allocator_clause);
+  } else {
     Allocator = cast<OMPAllocatorClause>(Clauses.back())->getAllocator();
+  }
   OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind =
       getAllocatorKind(*this, DSAStack, Allocator);
   SmallVector<Expr *, 8> Vars;
@@ -2325,26 +2331,7 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective(
     // allocator clause must be a constant expression that evaluates to one of
     // the predefined memory allocator values.
     if (Allocator && VD->hasGlobalStorage()) {
-      bool IsPredefinedAllocator = false;
-      if (const auto *DRE =
-              dyn_cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())) {
-        if (DRE->getType().isConstant(getASTContext())) {
-          DeclarationName DN = DRE->getDecl()->getDeclName();
-          if (DN.isIdentifier()) {
-            StringRef PredefinedAllocators[] = {
-                "omp_default_mem_alloc", "omp_large_cap_mem_alloc",
-                "omp_const_mem_alloc",   "omp_high_bw_mem_alloc",
-                "omp_low_lat_mem_alloc", "omp_cgroup_mem_alloc",
-                "omp_pteam_mem_alloc",   "omp_thread_mem_alloc",
-            };
-            IsPredefinedAllocator =
-                llvm::any_of(PredefinedAllocators, [&DN](StringRef S) {
-                  return DN.getAsIdentifierInfo()->isStr(S);
-                });
-          }
-        }
-      }
-      if (!IsPredefinedAllocator) {
+      if (AllocatorKind == OMPAllocateDeclAttr::OMPUserDefinedMemAlloc) {
         Diag(Allocator->getExprLoc(),
              diag::err_omp_expected_predefined_allocator)
             << Allocator->getSourceRange();
diff --git a/lib/Tooling/AllTUsExecution.cpp b/lib/Tooling/AllTUsExecution.cpp
index bc50412cf4f..ca9db7a561b 100644
--- a/lib/Tooling/AllTUsExecution.cpp
+++ b/lib/Tooling/AllTUsExecution.cpp
@@ -9,6 +9,7 @@
 #include "clang/Tooling/AllTUsExecution.h"
 #include "clang/Tooling/ToolExecutorPluginRegistry.h"
 #include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/VirtualFileSystem.h"
 
 namespace clang {
 namespace tooling {
@@ -114,25 +115,22 @@ llvm::Error AllTUsToolExecutor::execute(
   {
     llvm::ThreadPool Pool(ThreadCount == 0 ? llvm::hardware_concurrency()
                                            : ThreadCount);
-    llvm::SmallString<128> InitialWorkingDir;
-    if (auto EC = llvm::sys::fs::current_path(InitialWorkingDir)) {
-      InitialWorkingDir = "";
-      llvm::errs() << "Error while getting current working directory: "
-                   << EC.message() << "\n";
-    }
     for (std::string File : Files) {
       Pool.async(
           [&](std::string Path) {
             Log("[" + std::to_string(Count()) + "/" + TotalNumStr +
                 "] Processing file " + Path);
-            ClangTool Tool(Compilations, {Path});
+            // Each thread gets an indepent copy of a VFS to allow different
+            // concurrent working directories.
+            IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS =
+                llvm::vfs::createPhysicalFileSystem().release();
+            ClangTool Tool(Compilations, {Path},
+                           std::make_shared<PCHContainerOperations>(), FS);
             Tool.appendArgumentsAdjuster(Action.second);
             Tool.appendArgumentsAdjuster(getDefaultArgumentsAdjusters());
             for (const auto &FileAndContent : OverlayFiles)
               Tool.mapVirtualFile(FileAndContent.first(),
                                   FileAndContent.second);
-            // Do not restore working dir from multiple threads to avoid races.
-            Tool.setRestoreWorkingDir(false);
             if (Tool.run(Action.first.get()))
               AppendError(llvm::Twine("Failed to run action on ") + Path +
                           "\n");
@@ -141,11 +139,6 @@ llvm::Error AllTUsToolExecutor::execute(
     }
     // Make sure all tasks have finished before resetting the working directory.
     Pool.wait();
-    if (!InitialWorkingDir.empty()) {
-      if (auto EC = llvm::sys::fs::set_current_path(InitialWorkingDir))
-        llvm::errs() << "Error while restoring working directory: "
-                     << EC.message() << "\n";
-    }
   }
 
   if (!ErrorMsg.empty())
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c
index 40e39912be9..9a5b3a9f18b 100644
--- a/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -4411,7 +4411,7 @@ uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
 // CHECK-LABEL: @test_vpadd_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %a, <2 x float> %b)
+// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
@@ -4475,7 +4475,7 @@ uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
 // CHECK-LABEL: @test_vpaddq_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
+// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
 // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
@@ -4485,7 +4485,7 @@ float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
 // CHECK-LABEL: @test_vpaddq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %a, <2 x double> %b)
+// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
 // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
diff --git a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index e1a2e3fb92d..a4bf8753363 100644
--- a/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -736,14 +736,14 @@ float16x8_t test_vmulxq_f16(float16x8_t a, float16x8_t b) {
 }
 
 // CHECK-LABEL: test_vpadd_f16
-// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.addp.v4f16(<4 x half> %a, <4 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %a, <4 x half> %b)
 // CHECK:  ret <4 x half> [[ADD]]
 float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
   return vpadd_f16(a, b);
 }
 
 // CHECK-LABEL: test_vpaddq_f16
-// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.addp.v8f16(<8 x half> %a, <8 x half> %b)
+// CHECK:  [[ADD:%.*]] = call <8 x half> @llvm.aarch64.neon.faddp.v8f16(<8 x half> %a, <8 x half> %b)
 // CHECK:  ret <8 x half> [[ADD]]
 float16x8_t test_vpaddq_f16(float16x8_t a, float16x8_t b) {
   return vpaddq_f16(a, b);
diff --git a/test/CodeGen/arm-target-features.c b/test/CodeGen/arm-target-features.c
index ca574cc05d5..f58d37824aa 100644
--- a/test/CodeGen/arm-target-features.c
+++ b/test/CodeGen/arm-target-features.c
@@ -31,6 +31,7 @@
 // CHECK-BASIC-V8: "target-features"="+armv8-a,+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,+thumb-mode"
 
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
 // CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+dotprod,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode"
 
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM
diff --git a/test/CodeGen/attr-cpuspecific.c b/test/CodeGen/attr-cpuspecific.c
index d6c99648cb7..2c5e411ce3a 100644
--- a/test/CodeGen/attr-cpuspecific.c
+++ b/test/CodeGen/attr-cpuspecific.c
@@ -254,6 +254,6 @@ int DispatchFirst(void) {return 1;}
 // WINDOWS: define dso_local i32 @DispatchFirst.B
 // WINDOWS: ret i32 1
 
-// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
+// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
diff --git a/test/CodeGen/attr-target-x86-mmx.c b/test/CodeGen/attr-target-x86-mmx.c
index 412e8e93af9..01663766d98 100644
--- a/test/CodeGen/attr-target-x86-mmx.c
+++ b/test/CodeGen/attr-target-x86-mmx.c
@@ -19,4 +19,4 @@ void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) {
   _mm_srai_pi32(a, c);
 }
 
-// CHECK: "target-features"="+mmx,+sse,+x87"
+// CHECK: "target-features"="+cx8,+mmx,+sse,+x87"
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
index 153cdb3e94c..56ccaf98ea4 100644
--- a/test/CodeGen/attr-target-x86.c
+++ b/test/CodeGen/attr-target-x86.c
@@ -48,11 +48,11 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
 // CHECK: qq{{.*}} #6
 // CHECK: lake{{.*}} #7
 // CHECK: use_before_def{{.*}} #7
-// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
-// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
-// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx"
-// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx"
+// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
+// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
+// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
diff --git a/test/CodeGen/attr-target-x87-softfp.c b/test/CodeGen/attr-target-x87-softfp.c
index 16b7cfe8277..0d26dab74ec 100644
--- a/test/CodeGen/attr-target-x87-softfp.c
+++ b/test/CodeGen/attr-target-x87-softfp.c
@@ -7,10 +7,10 @@ int __attribute__((target("no-x87"))) bar(int a) { return 4; }
 // CHECK: foo{{.*}} #0
 // CHECK: bar{{.*}} #1
 
-// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
+// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87"
 // HARD: "use-soft-float"="false"
 // SOFT: "use-soft-float"="true"
 
-// CHECK: #1 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,-x87"
+// CHECK: #1 = {{.*}}"target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,-x87"
 // HARD: "use-soft-float"="false"
 // SOFT: "use-soft-float"="true"
diff --git a/test/CodeGen/bitscan-builtins.c b/test/CodeGen/bitscan-builtins.c
index 25dfa404620..176d829127b 100644
--- a/test/CodeGen/bitscan-builtins.c
+++ b/test/CodeGen/bitscan-builtins.c
@@ -3,18 +3,45 @@
 // PR33722
 // RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -o - %s | FileCheck %s
 
-#include <immintrin.h>
+#include <x86intrin.h>
 
 int test_bit_scan_forward(int a) {
   return _bit_scan_forward(a);
 // CHECK: @test_bit_scan_forward
-// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(
+// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
 // CHECK: ret i32 %[[call]]
 }
 
 int test_bit_scan_reverse(int a) {
   return _bit_scan_reverse(a);
-// CHECK:  %[[call:.*]] = call i32 @llvm.ctlz.i32(
+// CHECK:  %[[call:.*]] = call i32 @llvm.ctlz.i32(i32 %{{.*}}, i1 true)
 // CHECK:  %[[sub:.*]] = sub nsw i32 31, %[[call]]
 // CHECK: ret i32 %[[sub]]
 }
+
+int test__bsfd(int X) {
+// CHECK: @test__bsfd
+// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(i32 %{{.*}}, i1 true)
+  return __bsfd(X);
+}
+
+int test__bsfq(long long X) {
+// CHECK: @test__bsfq
+// CHECK: %[[call:.*]] = call i64 @llvm.cttz.i64(i64 %{{.*}}, i1 true)
+  return __bsfq(X);
+}
+
+int test__bsrd(int X) {
+// CHECK: @test__bsrd
+// CHECK:  %[[call:.*]] = call i32 @llvm.ctlz.i32(i32 %{{.*}}, i1 true)
+// CHECK:  %[[sub:.*]] = sub nsw i32 31, %[[call]]
+  return __bsrd(X);
+}
+
+int test__bsrq(long long X) {
+// CHECK: @test__bsrq
+// CHECK:  %[[call:.*]] = call i64 @llvm.ctlz.i64(i64 %{{.*}}, i1 true)
+// CHECK:  %[[cast:.*]] = trunc i64 %[[call]] to i32
+// CHECK:  %[[sub:.*]] = sub nsw i32 63, %[[cast]]
+  return __bsrq(X);
+}
diff --git a/test/CodeGen/popcnt-builtins.c b/test/CodeGen/popcnt-builtins.c
index 1fdb43339a8..800e759bba1 100644
--- a/test/CodeGen/popcnt-builtins.c
+++ b/test/CodeGen/popcnt-builtins.c
@@ -1,24 +1,39 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-POPCNT
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
 
 
-#include <immintrin.h>
+#include <x86intrin.h>
 
-unsigned int test_mm_popcnt_u32(unsigned int __X) {
-  //CHECK: call i32 @llvm.ctpop.i32
+#ifdef __POPCNT__
+int test_mm_popcnt_u32(unsigned int __X) {
+  //CHECK-POPCNT: call i32 @llvm.ctpop.i32
   return _mm_popcnt_u32(__X);
 }
+#endif
 
-unsigned int test_popcnt_32(int __X) {
+int test_popcnt32(unsigned int __X) {
   //CHECK: call i32 @llvm.ctpop.i32
   return _popcnt32(__X);
 }
 
-unsigned long long test_mm_popcnt_u64(unsigned long long __X) {
-  //CHECK: call i64 @llvm.ctpop.i64
+int test__popcntd(unsigned int __X) {
+  //CHECK: call i32 @llvm.ctpop.i32
+  return __popcntd(__X);
+}
+
+#ifdef __POPCNT__
+long long test_mm_popcnt_u64(unsigned long long __X) {
+  //CHECK-POPCNT: call i64 @llvm.ctpop.i64
   return _mm_popcnt_u64(__X);
 }
+#endif
 
-unsigned long long test_popcnt_64(long long __X) {
+long long test_popcnt64(unsigned long long __X) {
   //CHECK: call i64 @llvm.ctpop.i64
   return _popcnt64(__X);
 }
+
+long long test__popcntq(unsigned long long __X) {
+  //CHECK: call i64 @llvm.ctpop.i64
+  return __popcntq(__X);
+}
diff --git a/test/CodeGen/x86-bswap.c b/test/CodeGen/x86-bswap.c
new file mode 100644
index 00000000000..adf8b7846a0
--- /dev/null
+++ b/test/CodeGen/x86-bswap.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
+
+#include <x86intrin.h>
+
+int test__bswapd(int X) {
+// CHECK-LABEL: @test__bswapd
+// CHECK: call i32 @llvm.bswap.i32
+  return __bswapd(X);
+}
+
+int test_bswap(int X) {
+// CHECK-LABEL: @test_bswap
+// CHECK: call i32 @llvm.bswap.i32
+  return _bswap(X);
+}
+
+long test__bswapq(long long X) {
+// CHECK-LABEL: @test__bswapq
+// CHECK: call i64 @llvm.bswap.i64
+  return __bswapq(X);
+}
+
+long test_bswap64(long long X) {
+// CHECK-LABEL: @test_bswap64
+// CHECK: call i64 @llvm.bswap.i64
+  return _bswap64(X);
+}
+
+
diff --git a/test/CodeGenCXX/arm-pcs.cpp b/test/CodeGenCXX/arm-pcs.cpp
new file mode 100644
index 00000000000..1d327d794b7
--- /dev/null
+++ b/test/CodeGenCXX/arm-pcs.cpp
@@ -0,0 +1,51 @@
+// Covers a bug fix for ABI selection with homogenous aggregates:
+//  See: https://bugs.llvm.org/show_bug.cgi?id=39982
+
+// REQUIRES: arm-registered-target
+// RUN: %clang -mfloat-abi=hard --target=armv7-unknown-linux-gnueabi -O3 -S -o - %s | FileCheck %s -check-prefixes=HARD,CHECK
+// RUN: %clang -mfloat-abi=softfp --target=armv7-unknown-linux-gnueabi -O3 -S -o - %s | FileCheck %s -check-prefixes=SOFTFP,CHECK
+// RUN: %clang -mfloat-abi=soft --target=armv7-unknown-linux-gnueabi -O3 -S -o - %s | FileCheck %s -check-prefixes=SOFT,CHECK
+
+struct S {
+  float f;
+  float d;
+  float c;
+  float t;
+};
+
+// Variadic functions should always marshal for the base standard.
+// See section 5.5 (Parameter Passing) of the AAPCS.
+float __attribute__((pcs("aapcs-vfp"))) variadic(S s, ...) {
+  // CHECK-NOT: vmov s{{[0-9]+}}, s{{[0-9]+}}
+  // CHECK: mov r{{[0-9]+}}, r{{[0-9]+}}
+  return s.d;
+}
+
+float no_attribute(S s) {
+  // SOFT: mov r{{[0-9]+}}, r{{[0-9]+}}
+  // SOFTFP: mov r{{[0-9]+}}, r{{[0-9]+}}
+  // HARD: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  return s.d;
+}
+
+float __attribute__((pcs("aapcs-vfp"))) baz(float x, float y) {
+  // CHECK-NOT: mov s{{[0-9]+}}, r{{[0-9]+}}
+  // SOFT: mov r{{[0-9]+}}, r{{[0-9]+}}
+  // SOFTFP: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  // HARD: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  return y;
+}
+
+float __attribute__((pcs("aapcs-vfp"))) foo(S s) {
+  // CHECK-NOT: mov s{{[0-9]+}}, r{{[0-9]+}}
+  // SOFT: mov r{{[0-9]+}}, r{{[0-9]+}}
+  // SOFTFP: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  // HARD: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  return s.d;
+}
+
+float __attribute__((pcs("aapcs"))) bar(S s) {
+  // CHECK-NOT: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  // CHECK: mov r{{[0-9]+}}, r{{[0-9]+}}
+  return s.d;
+}
diff --git a/test/CodeGenCXX/discard-name-values.cpp b/test/CodeGenCXX/discard-name-values.cpp
index d4d7527c285..aa30dae7501 100644
--- a/test/CodeGenCXX/discard-name-values.cpp
+++ b/test/CodeGenCXX/discard-name-values.cpp
@@ -10,7 +10,7 @@ bool test(bool pred) {
   // CHECK: br i1 %pred, label %if.then, label %if.end
 
   if (pred) {
-    // DISCARDVALUE: ; <label>:2:
+    // DISCARDVALUE: 2:
     // DISCARDVALUE-NEXT: tail call void @branch()
     // DISCARDVALUE-NEXT: br label %3
 
@@ -20,7 +20,7 @@ bool test(bool pred) {
     branch();
   }
 
-  // DISCARDVALUE: ; <label>:3:
+  // DISCARDVALUE: 3:
   // DISCARDVALUE-NEXT: ret i1 %0
 
   // CHECK: if.end:
diff --git a/test/CodeGenObjC/arc-blocks.m b/test/CodeGenObjC/arc-blocks.m
index 47e2723b511..e64a7e4c2fe 100644
--- a/test/CodeGenObjC/arc-blocks.m
+++ b/test/CodeGenObjC/arc-blocks.m
@@ -127,7 +127,7 @@ void test4(void) {
   // CHECK-NEXT: store i32 838860800, i32* [[T0]]
   // CHECK:      [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6
   // CHECK-NEXT: [[T0:%.*]] = call i8* @test4_source()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: store i8* [[T1]], i8** [[SLOT]]
   // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6
   // 0x42800000 - has signature, copy/dispose helpers, as well as BLOCK_HAS_EXTENDED_LAYOUT
@@ -181,7 +181,7 @@ void test5(void) {
   // CHECK-NEXT: [[VARPTR1:%.*]] = bitcast i8** [[VAR]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[VARPTR1]])
   // CHECK: [[T0:%.*]] = call i8* @test5_source()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: store i8* [[T1]], i8** [[VAR]],
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
   // 0x40800000 - has signature but no copy/dispose, as well as BLOCK_HAS_EXTENDED_LAYOUT
@@ -212,7 +212,7 @@ void test6(void) {
   // CHECK-NEXT: store i32 1107296256, i32* [[T0]]
   // CHECK:      [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6
   // CHECK-NEXT: [[T0:%.*]] = call i8* @test6_source()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: call i8* @llvm.objc.initWeak(i8** [[SLOT]], i8* [[T1]])
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
   // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[VAR]], i32 0, i32 6
@@ -258,7 +258,7 @@ void test7(void) {
   // CHECK:      [[VAR:%.*]] = alloca i8*,
   // CHECK-NEXT: [[BLOCK:%.*]] = alloca [[BLOCK_T:<{.*}>]],
   // CHECK:      [[T0:%.*]] = call i8* @test7_source()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: call i8* @llvm.objc.initWeak(i8** [[VAR]], i8* [[T1]])
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
   // 0x42800000 - has signature, copy/dispose helpers, as well as BLOCK_HAS_EXTENDED_LAYOUT
diff --git a/test/CodeGenObjC/arc-foreach.m b/test/CodeGenObjC/arc-foreach.m
index c8c7120bae7..575bb0b711e 100644
--- a/test/CodeGenObjC/arc-foreach.m
+++ b/test/CodeGenObjC/arc-foreach.m
@@ -139,7 +139,7 @@ void test2(Test2 *a) {
 // CHECK-LP64-LABEL:    define void @test2(
 // CHECK-LP64:      [[T0:%.*]] = call [[ARRAY_T]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to [[ARRAY_T]]* (i8*, i8*)*)(
 // CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[ARRAY_T]]* [[T0]] to i8*
-// CHECK-LP64-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-LP64-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-LP64-NEXT: [[COLL:%.*]] = bitcast i8* [[T2]] to [[ARRAY_T]]*
 
 // Make sure it's not immediately released before starting the iteration.
diff --git a/test/CodeGenObjC/arc-literals.m b/test/CodeGenObjC/arc-literals.m
index a9cb7691380..2e613401d1c 100644
--- a/test/CodeGenObjC/arc-literals.m
+++ b/test/CodeGenObjC/arc-literals.m
@@ -59,7 +59,7 @@ void test_array(id a, id b) {
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[CLASS_T]]* [[T0]] to i8*
   // CHECK-NEXT: [[T2:%.*]] = bitcast [2 x i8*]* [[OBJECTS]] to i8**
   // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast ({{.*@objc_msgSend.*}})(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i64 2)
-  // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]])
+  // CHECK-NEXT: [[T4:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]])
   // CHECK: call void (...) @llvm.objc.clang.arc.use(i8* [[V0]], i8* [[V1]])
   id arr = @[a, b];
 
@@ -103,7 +103,7 @@ void test_dictionary(id k1, id o1, id k2, id o2) {
   // CHECK-NEXT: [[T2:%.*]] = bitcast [2 x i8*]* [[OBJECTS]] to i8**
   // CHECK-NEXT: [[T3:%.*]] = bitcast [2 x i8*]* [[KEYS]] to i8**
   // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast ({{.*@objc_msgSend.*}})(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i8** [[T3]], i64 2)
-  // CHECK-NEXT: [[T5:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]])
+  // CHECK-NEXT: [[T5:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]])
   // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.use(i8* [[V0]], i8* [[V1]], i8* [[V2]], i8* [[V3]])
 
   id dict = @{ k1 : o1, k2 : o2 };
@@ -135,7 +135,7 @@ void test_property(B *b) {
   // CHECK-NEXT: [[T1:%.*]] = bitcast
   // CHECK-NEXT: [[T2:%.*]] = call [[B:%.*]]* bitcast ({{.*}} @objc_msgSend to {{.*}})(i8* [[T1]], i8* [[SEL]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast [[B]]* [[T2]] to i8*
-  // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]])
+  // CHECK-NEXT: [[T4:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T3]])
   // CHECK-NEXT: [[V0:%.*]] = bitcast i8* [[T4]] to [[B]]*
   // CHECK-NEXT: [[V1:%.*]] = bitcast [[B]]* [[V0]] to i8*
 
diff --git a/test/CodeGenObjC/arc-precise-lifetime.m b/test/CodeGenObjC/arc-precise-lifetime.m
index 4c563c5b844..bbde18ee6ca 100644
--- a/test/CodeGenObjC/arc-precise-lifetime.m
+++ b/test/CodeGenObjC/arc-precise-lifetime.m
@@ -43,7 +43,7 @@ void test1a_message(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
@@ -77,7 +77,7 @@ void test1a_property(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
@@ -111,7 +111,7 @@ void test1b_message(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
@@ -142,7 +142,7 @@ void test1b_property(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
@@ -173,7 +173,7 @@ void test1c_message(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
@@ -206,7 +206,7 @@ void test1c_property(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
@@ -239,7 +239,7 @@ void test1d_message(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
@@ -269,7 +269,7 @@ void test1d_property(void) {
   // CHECK:      call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PTRPTR1]])
   // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
   // CHECK-NEXT: store [[TEST1]]* [[T3]]
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
diff --git a/test/CodeGenObjC/arc-property.m b/test/CodeGenObjC/arc-property.m
index b417f9505e1..6e4cbb5944e 100644
--- a/test/CodeGenObjC/arc-property.m
+++ b/test/CodeGenObjC/arc-property.m
@@ -126,7 +126,7 @@ - (id) copyMachine {
 }
 // CHECK:    define internal i8* @"\01-[Test3 copyMachine]"(
 // CHECK:      [[T0:%.*]] = call i8* @test3_helper()
-// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
 // CHECK-NEXT: ret i8* [[T1]]
 - (void) setCopyMachine: (id) x {}
 @end
diff --git a/test/CodeGenObjC/arc-related-result-type.m b/test/CodeGenObjC/arc-related-result-type.m
index ac69e0d9f64..1d3805bf9bb 100644
--- a/test/CodeGenObjC/arc-related-result-type.m
+++ b/test/CodeGenObjC/arc-related-result-type.m
@@ -17,7 +17,7 @@ void test0(Test0 *val) {
 // CHECK-NEXT: load
 // CHECK-NEXT: bitcast
 // CHECK-NEXT: [[T0:%.*]] = call i8* bitcast (
-// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
 // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to [[TEST0]]*
 // CHECK-NEXT: store [[TEST0]]* [[T2]], [[TEST0]]** [[X]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[TEST0]]** [[X]] to i8**
diff --git a/test/CodeGenObjC/arc-ternary-op.m b/test/CodeGenObjC/arc-ternary-op.m
index 2be04628464..4883143791a 100644
--- a/test/CodeGenObjC/arc-ternary-op.m
+++ b/test/CodeGenObjC/arc-ternary-op.m
@@ -130,7 +130,7 @@ void test2(int cond) {
   // CHECK-NEXT: br i1
   //   Within true branch, cleanup enabled.
   // CHECK:      [[T0:%.*]] = call i8* @test2_producer()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: store i8* [[T1]], i8** [[CLEANUP_SAVE]]
   // CHECK-NEXT: store i1 true, i1* [[RUN_CLEANUP]]
   // CHECK-NEXT: br label
diff --git a/test/CodeGenObjC/arc-unsafeclaim.m b/test/CodeGenObjC/arc-unsafeclaim.m
index fd063bfccf9..f5982ddc04d 100644
--- a/test/CodeGenObjC/arc-unsafeclaim.m
+++ b/test/CodeGenObjC/arc-unsafeclaim.m
@@ -41,7 +41,7 @@ void test_assign() {
 // DISABLED:             [[T0:%.*]] = call [[A:.*]]* @makeA()
 // DISABLED-MARKED-NEXT: call void asm sideeffect
 // DISABLED-NEXT:        [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8*
-// DISABLED-NEXT:        [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// DISABLED-NEXT:        [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 
 void test_assign_assign() {
   __unsafe_unretained id x, y;
@@ -75,7 +75,7 @@ void test_strong_assign_assign() {
 // CHECK:                [[T0:%.*]] = call [[A]]* @makeA()
 // CHECK-MARKED-NEXT:    call void asm sideeffect
 // CHECK-NEXT:           [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8*
-// CHECK-NEXT:           [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-NEXT:           [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-NEXT:           [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]*
 // CHECK-NEXT:           [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8*
 // CHECK-NEXT:           store i8* [[T4]], i8** [[Y]]
@@ -102,7 +102,7 @@ void test_assign_strong_assign() {
 // CHECK:                [[T0:%.*]] = call [[A]]* @makeA()
 // CHECK-MARKED-NEXT:    call void asm sideeffect
 // CHECK-NEXT:           [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8*
-// CHECK-NEXT:           [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-NEXT:           [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-NEXT:           [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]*
 // CHECK-NEXT:           [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8*
 // CHECK-NEXT:           [[OLD:%.*]] = load i8*, i8** [[Y]]
@@ -165,7 +165,7 @@ void test_strong_init_assignment() {
 // CHECK:                [[T0:%.*]] = call [[A]]* @makeA()
 // CHECK-MARKED-NEXT:    call void asm sideeffect
 // CHECK-NEXT:           [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8*
-// CHECK-NEXT:           [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-NEXT:           [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-NEXT:           [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]*
 // CHECK-NEXT:           [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8*
 // CHECK-NEXT:           store i8* [[T4]], i8** [[X]]
@@ -189,7 +189,7 @@ void test_init_strong_assignment() {
 // CHECK:                [[T0:%.*]] = call [[A]]* @makeA()
 // CHECK-MARKED-NEXT:    call void asm sideeffect
 // CHECK-NEXT:           [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8*
-// CHECK-NEXT:           [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-NEXT:           [[T2:%.*]] = {{.*}}call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-NEXT:           [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]*
 // CHECK-NEXT:           [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8*
 // CHECK-NEXT:           [[OLD:%.*]] = load i8*, i8** [[X]]
diff --git a/test/CodeGenObjC/arc-with-atthrow.m b/test/CodeGenObjC/arc-with-atthrow.m
index 93fa228c8b0..c5e9dc92beb 100644
--- a/test/CodeGenObjC/arc-with-atthrow.m
+++ b/test/CodeGenObjC/arc-with-atthrow.m
@@ -11,7 +11,7 @@ void test() {
 
 // CHECK-LABEL:    define void @test()
 // CHECK:      [[T0:%.*]] = call i8* @make()
-// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
 // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.autorelease(i8* [[T1]])
 // CHECK-NEXT: call void @objc_exception_throw(i8* [[T2]]) [[NR:#[0-9]+]]
 // CHECK-NEXT: unreachable
diff --git a/test/CodeGenObjC/arc.m b/test/CodeGenObjC/arc.m
index cbdb03205a6..bfabfb9349a 100644
--- a/test/CodeGenObjC/arc.m
+++ b/test/CodeGenObjC/arc.m
@@ -318,13 +318,13 @@ void test10() {
   // CHECK-NEXT: bitcast
   // CHECK-NEXT: [[T0:%.*]] = call [[TEST10]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST10]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[V:%.*]] = bitcast i8* [[T2]] to [[TEST10]]*
   // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_{{[0-9]*}}
   // CHECK-NEXT: bitcast
   // CHECK-NEXT: [[T0:%.*]] = call [[TEST10]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST10]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST10]]*
   // CHECK-NEXT: [[T4:%.*]] = bitcast [[TEST10]]* [[T3]] to i8*
   // CHECK-NEXT: store i8* [[T4]], i8** [[Y]]
@@ -371,13 +371,13 @@ void test12(void) {
   // CHECK-NEXT: [[XPTR1:%.*]] = bitcast i8** [[X]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = call i8* @test12_helper()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: call i8* @llvm.objc.initWeak(i8** [[X]], i8* [[T1]])
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
 
   x = test12_helper();
   // CHECK-NEXT: [[T0:%.*]] = call i8* @test12_helper()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: call i8* @llvm.objc.storeWeak(i8** [[X]], i8* [[T1]])
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
 
@@ -514,7 +514,7 @@ void test19() {
   x[2] = test19_helper();
 
   // CHECK-NEXT: [[CALL:%.*]] = call i8* @test19_helper()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) [[NUW]]
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]]) [[NUW]]
   // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[X]], i64 0, i64 2
   // CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[SLOT]]
   // CHECK-NEXT: store i8* [[T1]], i8** [[SLOT]]
@@ -876,7 +876,7 @@ - (Test30_helper*) initHelper {
 __attribute__((ns_returns_retained)) id test32(void) {
 // CHECK-LABEL:    define i8* @test32()
 // CHECK:      [[CALL:%.*]] = call i8* @test32_helper()
-// CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]])
+// CHECK-NEXT: [[T0:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]])
 // CHECK-NEXT: ret i8* [[T0]]
   extern id test32_helper(void);
   return test32_helper();
@@ -1045,7 +1045,7 @@ - (id) test __attribute__((ns_returns_retained)) {
   extern id test43_produce(void);
   return test43_produce();
   // CHECK:      call i8* @test43_produce()
-  // CHECK-NEXT: call i8* @llvm.objc.retainAutoreleasedReturnValue(
+  // CHECK-NEXT: notail call i8* @llvm.objc.retainAutoreleasedReturnValue(
   // CHECK-NEXT: ret 
 }
 @end
@@ -1067,7 +1067,7 @@ void test46(__weak id *wp, __weak volatile id *wvp) {
   // TODO: this is sub-optimal, we should retain at the actual call site.
 
   // CHECK:      [[T0:%.*]] = call i8* @test46_helper()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8
   // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]])
   // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retain(i8* [[T3]])
@@ -1076,7 +1076,7 @@ void test46(__weak id *wp, __weak volatile id *wvp) {
   id x = *wp = test46_helper();
 
   // CHECK:      [[T0:%.*]] = call i8* @test46_helper()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8
   // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]])
   // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retain(i8* [[T3]])
@@ -1096,7 +1096,7 @@ void test47(void) {
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]])
   // CHECK-NEXT: store i8* null, i8** [[X]]
   // CHECK-NEXT: [[CALL:%.*]] = call i8* @test47_helper()
-  // CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]])
+  // CHECK-NEXT: [[T0:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]])
   // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[X]]
   // CHECK-NEXT: store i8* [[T0]], i8** [[X]]
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
@@ -1120,7 +1120,7 @@ void test48(void) {
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.initWeak(i8** [[X]], i8* null)
   // CHECK-NEXT: [[T1:%.*]] = call i8* @test48_helper()
-  // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[X]], i8* [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[X]], i8* [[T3]])
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T2]])
@@ -1139,7 +1139,7 @@ void test49(void) {
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[XPTR1]])
   // CHECK-NEXT: store i8* null, i8** [[X]]
   // CHECK-NEXT: [[CALL:%.*]] = call i8* @test49_helper()
-  // CHECK-NEXT: [[T0:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]])
+  // CHECK-NEXT: [[T0:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[CALL]])
   // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.autorelease(i8* [[T0]])
   // CHECK-NEXT: store i8* [[T2]], i8** [[X]]
   // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.retainAutorelease(i8* [[T1]])
@@ -1208,7 +1208,7 @@ void test53(void) {
 // CHECK-NEXT: [[YPTR1:%.*]] = bitcast i8** [[Y]] to i8*
 // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[YPTR1]])
 // CHECK-NEXT: [[T0:%.*]] = call i8* @test53_helper()
-// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
 // CHECK-NEXT: store i8* [[T1]], i8** [[Y]],
 // CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[Y]],
 // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retain(i8* [[T0]])
@@ -1261,7 +1261,7 @@ @implementation Test56
 + (id) make {
   extern id test56_helper(void);
   // CHECK:      [[T0:%.*]] = call i8* @test56_helper()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: ret i8* [[T1]]
   return test56_helper();
 }
@@ -1328,7 +1328,7 @@ void test59(void) {
 
   // CHECK-LABEL:    define void @test59()
   // CHECK:      [[T0:%.*]] = call i8* @test59_getlock()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: call i32 @objc_sync_enter(i8* [[T1]])
   // CHECK-NEXT: call void @test59_body()
   // CHECK-NEXT: call i32 @objc_sync_exit(i8* [[T1]])
@@ -1350,7 +1350,7 @@ void test61(void) {
   extern id test61_make(void);
 
   // CHECK-NEXT: [[T0:%.*]] = call i8* @test61_make()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* [[T1]], i8* [[T3]], i8* [[T2]])
@@ -1360,11 +1360,11 @@ void test61(void) {
   // CHECK-NEXT: [[YPTR1:%.*]] = bitcast i8** [[Y]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[YPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = call i8* @test61_make()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* [[T1]], i8* [[T3]], i8* [[T2]])
-  // CHECK-NEXT: [[T5:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]])
+  // CHECK-NEXT: [[T5:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]])
   // CHECK-NEXT: store i8* [[T5]], i8** [[Y]]
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T1]])
   id y = [test61_make() performSelector: @selector(test61_id)];
@@ -1400,7 +1400,7 @@ void test62(void) {
     // CHECK-NEXT: store i1 false, i1* [[CLEANUP_REQUIRED]]
     // CHECK-NEXT: br i1 [[T1]],
     // CHECK:      [[T0:%.*]] = call i8* @test62_make()
-    // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+    // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
     // CHECK-NEXT: store i8* [[T1]], i8** [[CLEANUP_VALUE]]
     // CHECK-NEXT: store i1 true, i1* [[CLEANUP_REQUIRED]]
     // CHECK-NEXT: [[T2:%.*]] = icmp ne i8* [[T1]], null
@@ -1455,10 +1455,10 @@ void test66(void) {
 // CHECK-LABEL:    define void @test66()
 // CHECK:      [[T0:%.*]] = call [[TEST66:%.*]]* @test66_receiver()
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST66]]* [[T0]] to i8*
-// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST66]]*
 // CHECK-NEXT: [[T4:%.*]] = call i8* @test66_arg()
-// CHECK-NEXT: [[T5:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]])
+// CHECK-NEXT: [[T5:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T4]])
 // CHECK-NEXT: [[T6:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T7:%.*]] = bitcast [[TEST66]]* [[T3]] to i8*
 // CHECK-NEXT: [[SIX:%.*]] = icmp eq i8* [[T7]], null
@@ -1495,7 +1495,7 @@ void test68(void) {
 // CHECK-NEXT: [[CLPTR1:%.*]] = bitcast i8** [[CL]] to i8*
 // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[CLPTR1]])
 // CHECK-NEXT: [[T0:%.*]] = call i8* @test67_helper()
-// CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+// CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
 // CHECK-NEXT: store i8* [[T1]], i8** [[CL]], align 8
 // CHECK-NEXT: [[T2:%.*]] = load i8*, i8** [[CL]]
 // CHECK-NEXT: call void @llvm.objc.release(i8* [[T2]])
diff --git a/test/CodeGenObjC/objc-arc-container-subscripting.m b/test/CodeGenObjC/objc-arc-container-subscripting.m
index 339415e3c05..2f6062d7ce6 100644
--- a/test/CodeGenObjC/objc-arc-container-subscripting.m
+++ b/test/CodeGenObjC/objc-arc-container-subscripting.m
@@ -12,7 +12,7 @@ id func() {
 }
 
 // CHECK: [[call:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
-// CHECK: [[SIX:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[call]]) [[NUW:#[0-9]+]]
+// CHECK: [[SIX:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[call]]) [[NUW:#[0-9]+]]
 // CHECK: [[ARRAY_CASTED:%.*]] = bitcast %0** {{%.*}} to i8**
 // CHECK: call void @llvm.objc.storeStrong(i8** [[ARRAY_CASTED]], i8* null)
 // CHECK: [[EIGHT:%.*]] = tail call i8* @llvm.objc.autoreleaseReturnValue(i8* [[SIX]]) [[NUW]]
diff --git a/test/CodeGenObjC/os_log.m b/test/CodeGenObjC/os_log.m
index 150987b0820..15999c6315d 100644
--- a/test/CodeGenObjC/os_log.m
+++ b/test/CodeGenObjC/os_log.m
@@ -21,7 +21,7 @@
 
   // CHECK: %[[CALL:.*]] = tail call %[[TY0:.*]]* (...) @GenString()
   // CHECK: %[[V0:.*]] = bitcast %[[TY0]]* %[[CALL]] to i8*
-  // CHECK: %[[V1:.*]] = tail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V0]])
+  // CHECK: %[[V1:.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V0]])
   // CHECK: %[[V2:.*]] = ptrtoint %[[TY0]]* %[[CALL]] to i64
   // CHECK: store i8 2, i8* %[[BUF]], align 1
   // CHECK: %[[NUMARGS_I:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
@@ -45,7 +45,7 @@
   // CHECK-O0: %[[V0:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
   // CHECK-O0: %[[CALL:.*]] = call %[[TY0:.*]]* (...) @GenString()
   // CHECK-O0: %[[V1:.*]] = bitcast %[[TY0]]* %[[CALL]] to i8*
-  // CHECK-O0: %[[V2:.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V1]])
+  // CHECK-O0: %[[V2:.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[V1]])
   // CHECK-O0: %[[V3:.*]] = bitcast i8* %[[V2]] to %[[TY0]]*
   // CHECK-O0: %[[V4:.*]] = ptrtoint %[[TY0]]* %[[V3]] to i64
   // CHECK-O0: call void @__os_log_helper_1_2_1_8_64(i8* %[[V0]], i64 %[[V4]])
diff --git a/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm b/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm
index 5a5cb42067d..37a68136dd5 100644
--- a/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm
+++ b/test/CodeGenObjCXX/arc-forwarded-lambda-call.mm
@@ -5,7 +5,7 @@ void test0(id x) {
   test0_helper([=]() { return x; });
   // CHECK-LABEL: define internal i8* @___Z5test0P11objc_object_block_invoke
   // CHECK: [[T0:%.*]] = call i8* @"_ZZ5test0P11objc_objectENK3$_0clEv"
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = tail call i8* @llvm.objc.autoreleaseReturnValue(i8* [[T1]])
   // CHECK-NEXT: ret i8* [[T2]]
 }
@@ -28,7 +28,7 @@ void test1() {
   test1_helper([](){ return test1_rv; });
   // CHECK-LABEL: define internal i8* @"_ZZ5test1vEN3$_18__invokeEv"
   // CHECK: [[T0:%.*]] = call i8* @"_ZZ5test1vENK3$_1clEv"
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = tail call i8* @llvm.objc.autoreleaseReturnValue(i8* [[T1]])
   // CHECK-NEXT: ret i8* [[T2]]
 }
diff --git a/test/CodeGenObjCXX/arc.mm b/test/CodeGenObjCXX/arc.mm
index e32c1f89215..f351ff6089c 100644
--- a/test/CodeGenObjCXX/arc.mm
+++ b/test/CodeGenObjCXX/arc.mm
@@ -20,7 +20,7 @@ void test0(__weak id *wp, __weak volatile id *wvp) {
   // TODO: in the non-volatile case, we do not need to be reloading.
 
   // CHECK:      [[T0:%.*]] = call i8* @_Z12test0_helperv()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8
   // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]])
   // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.retain(i8* [[T3]])
@@ -29,7 +29,7 @@ void test0(__weak id *wp, __weak volatile id *wvp) {
   id x = *wp = test0_helper();
 
   // CHECK:      [[T0:%.*]] = call i8* @_Z12test0_helperv()
-  // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
+  // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
   // CHECK-NEXT: [[T2:%.*]] = load i8**, i8*** {{%.*}}, align 8
   // CHECK-NEXT: [[T3:%.*]] = call i8* @llvm.objc.storeWeak(i8** [[T2]], i8* [[T1]])
   // CHECK-NEXT: [[T4:%.*]] = call i8* @llvm.objc.loadWeakRetained(i8** [[T2]])
@@ -224,7 +224,7 @@ - (NSArray *) array;
 // CHECK-LABEL: define weak_odr void @_Z6test37I6Test37EvPT_(
 // CHECK:      [[T0:%.*]] = call [[NSARRAY]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to [[NSARRAY]]* (i8*, i8*)*)(
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[NSARRAY]]* [[T0]] to i8*
-// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
+// CHECK-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T1]])
 // CHECK-NEXT: [[COLL:%.*]] = bitcast i8* [[T2]] to [[NSARRAY]]*
 
 // Make sure it's not immediately released before starting the iteration.
diff --git a/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm b/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm
index 229e84a0f56..a7770e07e42 100644
--- a/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm
+++ b/test/CodeGenObjCXX/inheriting-constructor-cleanup.mm
@@ -23,7 +23,7 @@ void f() {
 }
 // CHECK-LABEL: define void @_Z1fv
 // CHECK:       %[[TMP:.*]] = call i8* @_Z1gv()
-// CHECK:       {{.*}} = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[TMP]])
+// CHECK:       {{.*}} = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* %[[TMP]])
 // CHECK:       call void (%struct.Base*, i8*, ...) @_ZN4BaseC2E6Strongz(%struct.Base* {{.*}}, i8* {{.*}})
 // CHECK-NEXT:  call void @_ZN9InheritorD1Ev(%struct.Inheritor* {{.*}})
 
diff --git a/test/CodeGenObjCXX/literals.mm b/test/CodeGenObjCXX/literals.mm
index 0a14d330bdb..612d12dd135 100644
--- a/test/CodeGenObjCXX/literals.mm
+++ b/test/CodeGenObjCXX/literals.mm
@@ -29,7 +29,7 @@ void test_array() {
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP_CAST]])
   // CHECK-NEXT: call void @_ZN1XC1Ev({{.*}} [[TMPX]])
   // CHECK-NEXT: [[OBJECT0:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1XcvP11objc_objectEv
-  // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]])
+  // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]])
   // CHECK: store i8* [[RET0]], i8** [[ELEMENT0]]
   
   // Initializing the second element
@@ -38,7 +38,7 @@ void test_array() {
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP_CAST]])
   // CHECK-NEXT: invoke void @_ZN1YC1Ev({{.*}} [[TMPY]])
   // CHECK: [[OBJECT1:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1YcvP11objc_objectEv
-  // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]])
+  // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]])
   // CHECK: store i8* [[RET1]], i8** [[ELEMENT1]]
 
   // Build the array
@@ -83,14 +83,14 @@ void test_array_instantiation() {
   // CHECK: [[ELEMENT0:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 0
   // CHECK: call void @_ZN1XC1Ev
   // CHECK-NEXT: [[OBJECT0:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1XcvP11objc_objectEv
-  // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]])
+  // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT0]])
   // CHECK: store i8* [[RET0]], i8** [[ELEMENT0]]
   
   // Initializing the second element
   // CHECK: [[ELEMENT1:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 1
   // CHECK: invoke void @_ZN1YC1Ev
   // CHECK: [[OBJECT1:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1YcvP11objc_objectEv
-  // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]])
+  // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[OBJECT1]])
   // CHECK: store i8* [[RET1]], i8** [[ELEMENT1]]
 
   // Build the array
diff --git a/test/Driver/aarch64-cpus.c b/test/Driver/aarch64-cpus.c
index 8621112c0ad..32920ea2edd 100644
--- a/test/Driver/aarch64-cpus.c
+++ b/test/Driver/aarch64-cpus.c
@@ -189,6 +189,16 @@
 // M4-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 // M4-TUNE-NOT: "+v8.2a"
 
+// RUN: %clang -target aarch64_be -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5 %s
+// RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5 %s
+// RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5 %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-TUNE %s
+// M5: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m5" "-target-feature" "+v8.2a"
+// M5-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
+// M5-TUNE-NOT: "+v8.2a"
+
 // RUN: %clang -target arm64 -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
 // RUN: %clang -target arm64 -mlittle-endian -mcpu=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1 %s
 // RUN: %clang -target arm64 -mtune=exynos-m1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M1-TUNE %s
@@ -218,6 +228,14 @@
 // ARM64-M4-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 // ARM64-M4-TUNE-NOT: "+v8.2a"
 
+// RUN: %clang -target arm64 -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M5 %s
+// RUN: %clang -target arm64 -mlittle-endian -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M5 %s
+// RUN: %clang -target arm64 -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M5-TUNE %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-M5-TUNE %s
+// ARM64-M5: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "exynos-m5" "-target-feature" "+v8.2a"
+// ARM64-M5-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
+// ARM64-M5-TUNE-NOT: "+v8.2a"
+
 // RUN: %clang -target aarch64 -mcpu=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR %s
 // RUN: %clang -target aarch64 -mtune=falkor -### -c %s 2>&1 | FileCheck -check-prefix=FALKOR-TUNE %s
@@ -360,6 +378,16 @@
 // M4-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 // M4-BE-TUNE-NOT: "+v8.2a"
 
+// RUN: %clang -target aarch64_be -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-BE %s
+// RUN: %clang -target aarch64_be -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-BE-TUNE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-BE-TUNE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=M5-BE-TUNE %s
+// M5-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "exynos-m5" "-target-feature" "+v8.2a"
+// M5-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
+// M5-BE-TUNE-NOT: "+v8.2a"
+
 // RUN: %clang -target aarch64_be -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=THUNDERX2T99-BE %s
diff --git a/test/Driver/arm-cortex-cpus.c b/test/Driver/arm-cortex-cpus.c
index cfb569312e0..7303c9a6847 100644
--- a/test/Driver/arm-cortex-cpus.c
+++ b/test/Driver/arm-cortex-cpus.c
@@ -676,6 +676,8 @@
 //
 // RUN: %clang -target arm -mcpu=exynos-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
 // RUN: %clang -target arm -mcpu=exynos-m4 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
+// RUN: %clang -target arm -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
+// RUN: %clang -target arm -mcpu=exynos-m5 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s
 // CHECK-CPUV82A: "-cc1"{{.*}} "-triple" "armv8.2a-{{.*}}
 
 // RUN: %clang -target armeb -mcpu=cortex-a32 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
@@ -710,6 +712,8 @@
 //
 // RUN: %clang -target armeb -mcpu=exynos-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
 // RUN: %clang -target arm -mcpu=exynos-m4 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
+// RUN: %clang -target armeb -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
+// RUN: %clang -target arm -mcpu=exynos-m5 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s
 // CHECK-BE-CPUV82A: "-cc1"{{.*}} "-triple" "armebv8.2a-{{.*}}
 
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r52 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8R %s
@@ -747,6 +751,8 @@
 //
 // RUN: %clang -target arm -mcpu=exynos-m4 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m4 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
+// RUN: %clang -target arm -mcpu=exynos-m5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
+// RUN: %clang -target arm -mcpu=exynos-m5 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A-THUMB %s
 // CHECK-CPUV82A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8.2a-{{.*}}
 
 // RUN: %clang -target armeb -mcpu=cortex-a32 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
@@ -781,6 +787,8 @@
 //
 // RUN: %clang -target armeb -mcpu=exynos-m4 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
 // RUN: %clang -target arm -mcpu=exynos-m4 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
+// RUN: %clang -target armeb -mcpu=exynos-m5 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
+// RUN: %clang -target arm -mcpu=exynos-m5 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A-THUMB %s
 // CHECK-BE-CPUV82A-THUMB: "-cc1"{{.*}} "-triple" "thumbebv8.2a-{{.*}}
 
 // RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a73 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A73 %s
@@ -822,8 +830,10 @@
 // RUN: %clang -target arm -mcpu=cortex-m23 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8MBASE %s
 // CHECK-CPUV8MBASE:  "-cc1"{{.*}} "-triple" "thumbv8m.base-
 
-// RUN: %clang -target arm -mcpu=cortex-m33 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8MMAIN %s
-// CHECK-CPUV8MMAIN:  "-cc1"{{.*}} "-triple" "thumbv8m.main-
+// RUN: %clang -target arm -mcpu=cortex-m33 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-M33 %s
+// RUN: %clang -target arm -mcpu=cortex-m35p -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-M35P %s
+// CHECK-CORTEX-M33:  "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "cortex-m33"
+// CHECK-CORTEX-M35P:  "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "cortex-m35p"
 
 // ================== Check whether -mcpu accepts mixed-case values.
 // RUN: %clang -target arm-linux-gnueabi -mcpu=Cortex-a5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s
diff --git a/test/Driver/compiler-rt-unwind.c b/test/Driver/compiler-rt-unwind.c
index 00024dfa7ed..0ec067cbfc6 100644
--- a/test/Driver/compiler-rt-unwind.c
+++ b/test/Driver/compiler-rt-unwind.c
@@ -2,13 +2,6 @@
 // --unwindlib=XXX properly.
 //
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
-// RUN:     --target=x86_64-unknown-linux \
-// RUN:     --gcc-toolchain="" \
-// RUN:   | FileCheck --check-prefix=RTLIB-EMPTY %s
-// RTLIB-EMPTY: "{{.*}}lgcc"
-// RTLIB-EMPTY: "{{.*}}-lgcc_s"
-//
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     --target=x86_64-unknown-linux -rtlib=libgcc \
 // RUN:     --gcc-toolchain="" \
 // RUN:   | FileCheck --check-prefix=RTLIB-GCC %s
diff --git a/test/Driver/malign_double.c b/test/Driver/malign_double.c
new file mode 100644
index 00000000000..2c5cc35ea19
--- /dev/null
+++ b/test/Driver/malign_double.c
@@ -0,0 +1,5 @@
+// RUN: %clang -### -malign-double %s  2>&1 | FileCheck %s
+
+// Make sure -malign-double is passed through the driver.
+
+// CHECK: "-malign-double"
diff --git a/test/Driver/wasm-toolchain.c b/test/Driver/wasm-toolchain.c
index 9c217ef2e94..893fd2510d3 100644
--- a/test/Driver/wasm-toolchain.c
+++ b/test/Driver/wasm-toolchain.c
@@ -41,9 +41,10 @@
 
 // Thread-related command line tests.
 
-// '-pthread' sets '-target-feature +atomics'
-// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s -pthread 2>&1 | FileCheck -check-prefix=PTHREAD %s
+// '-pthread' sets '-target-feature +atomics' and '--shared-memory'
+// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s -fuse-ld=wasm-ld -pthread 2>&1 | FileCheck -check-prefix=PTHREAD %s
 // PTHREAD: clang{{.*}}" "-cc1" {{.*}} "-target-feature" "+atomics"
+// PTHREAD: wasm-ld{{.*}}" "-lpthread" "--shared-memory"
 
 // '-pthread' not allowed with '-mno-atomics'
 // RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s -pthread -mno-atomics 2>&1 | FileCheck -check-prefix=PTHREAD_NO_ATOMICS %s
diff --git a/test/OpenMP/allocate_codegen.cpp b/test/OpenMP/allocate_codegen.cpp
index 6239ded455d..daad9353368 100644
--- a/test/OpenMP/allocate_codegen.cpp
+++ b/test/OpenMP/allocate_codegen.cpp
@@ -67,25 +67,15 @@ int main () {
   static int a;
 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
   a=2;
-  // CHECK:      [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
-  // CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
-  // CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
   // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
-  // CHECK:      store double 3.000000e+00, double* [[B_ADDR]],
-  // CHECK:      [[RES:%.+]] = call i32 [[FOO:@.+]]()
-  // CHECK:      store i32 [[RES]], i32* [[RET:%.+]],
-  // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
+  // CHECK:      alloca double,
   // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
   double b = 3;
 #pragma omp allocate(b)
-  // CHECK:      [[RETVAL:%.+]] = load i32, i32* [[RET]],
-  // CHECK:      ret i32 [[RETVAL]]
   return (foo<int>());
 }
 
-// CHECK-NOT:  call {{.+}} {{__kmpc_alloc|__kmpc_free}}
-
-// CHECK: define {{.*}}i32 [[FOO]]()
+// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
 // CHECK:      [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
 // CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
 // CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])
diff --git a/test/OpenMP/nvptx_allocate_codegen.cpp b/test/OpenMP/nvptx_allocate_codegen.cpp
new file mode 100644
index 00000000000..ec1faff4265
--- /dev/null
+++ b/test/OpenMP/nvptx_allocate_codegen.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda  -emit-llvm-bc -o %t-host.bc %s
+// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+#pragma omp declare target
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+// CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32,
+// CHECK-DAG: @a = global i32 0,
+// CHECK-DAG: @b = addrspace(4) global i32 0,
+// CHECK-DAG: @c = global i32 0,
+// CHECK-DAG: @d = global %struct.St1 zeroinitializer,
+// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,
+// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,
+// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32,
+struct St{
+ int a;
+};
+
+struct St1{
+ int a;
+ static int b;
+#pragma omp allocate(b) allocator(omp_default_mem_alloc)
+} d;
+
+int a, b, c;
+#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
+#pragma omp allocate(b) allocator(omp_const_mem_alloc)
+#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc)
+
+template <class T>
+struct ST {
+  static T m;
+  #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
+};
+
+template <class T> T foo() {
+  T v;
+  #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
+  v = ST<T>::m;
+  return v;
+}
+
+namespace ns{
+  int a;
+}
+#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
+
+// CHECK-LABEL: @main
+int main () {
+  // CHECK: alloca double,
+  static int a;
+#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
+  a=2;
+  double b = 3;
+#pragma omp allocate(b) allocator(omp_default_mem_alloc)
+  return (foo<int>());
+}
+
+// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
+// CHECK: alloca i32,
+
+extern template int ST<int>::m;
+#pragma omp end declare target
+#endif
diff --git a/test/OpenMP/nvptx_allocate_messages.cpp b/test/OpenMP/nvptx_allocate_messages.cpp
new file mode 100644
index 00000000000..5bb94222270
--- /dev/null
+++ b/test/OpenMP/nvptx_allocate_messages.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda  -emit-llvm-bc -o %t-host.bc %s
+// RUN: %clang_cc1 -verify -DDEVICE -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -fsyntax-only %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc
+// RUN: %clang_cc1 -verify -DDEVICE -DREQUIRES -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -fsyntax-only %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc
+#if !defined(DEVICE) || defined(REQUIRES)
+// expected-no-diagnostics
+#endif // DEVICE
+
+#ifndef HEADER
+#define HEADER
+
+#if defined(REQUIRES) && defined(DEVICE)
+#pragma omp requires dynamic_allocators
+#endif // REQUIRES && DEVICE
+
+int bar() {
+  int res = 0;
+#if defined(DEVICE) && !defined(REQUIRES)
+// expected-error@+2 {{expected an 'allocator' clause inside of the target region; provide an 'allocator' clause or use 'requires' directive with the 'dynamic_allocators' clause}}
+#endif // DEVICE && !REQUIRES
+#pragma omp allocate(res)
+  return 0;
+}
+
+#pragma omp declare target
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+struct St{
+ int a;
+};
+
+struct St1{
+ int a;
+ static int b;
+#pragma omp allocate(b) allocator(omp_default_mem_alloc)
+} d;
+
+int a, b, c;
+#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
+#pragma omp allocate(b) allocator(omp_const_mem_alloc)
+#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc)
+
+template <class T>
+struct ST {
+  static T m;
+  #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
+};
+
+template <class T> T foo() {
+  T v;
+  #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
+  v = ST<T>::m;
+  return v;
+}
+
+namespace ns{
+  int a;
+}
+#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
+
+int main () {
+  static int a;
+#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
+  a=2;
+  double b = 3;
+#if defined(DEVICE) && !defined(REQUIRES)
+// expected-error@+2 {{expected an 'allocator' clause inside of the target region; provide an 'allocator' clause or use 'requires' directive with the 'dynamic_allocators' clause}}
+#endif // DEVICE && !REQUIRES
+#pragma omp allocate(b)
+#if defined(DEVICE) && !defined(REQUIRES)
+// expected-note@+2 {{called by 'main'}}
+#endif // DEVICE && !REQUIRES
+  return (foo<int>() + bar());
+}
+
+extern template int ST<int>::m;
+#pragma omp end declare target
+#endif
diff --git a/test/Preprocessor/Inputs/include-next-1/bar.h b/test/Preprocessor/Inputs/include-next-1/bar.h
new file mode 100644
index 00000000000..1cf97aeb50c
--- /dev/null
+++ b/test/Preprocessor/Inputs/include-next-1/bar.h
@@ -0,0 +1 @@
+#define BAR 1
diff --git a/test/Preprocessor/Inputs/include-next-1/foo.h b/test/Preprocessor/Inputs/include-next-1/foo.h
new file mode 100644
index 00000000000..7d2753c4e4d
--- /dev/null
+++ b/test/Preprocessor/Inputs/include-next-1/foo.h
@@ -0,0 +1 @@
+#include_next "bar.h"
diff --git a/test/Preprocessor/Inputs/include-next-2/bar.h b/test/Preprocessor/Inputs/include-next-2/bar.h
new file mode 100644
index 00000000000..3ac8411a18d
--- /dev/null
+++ b/test/Preprocessor/Inputs/include-next-2/bar.h
@@ -0,0 +1 @@
+#define BAR 2
diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c
index 5ab43313468..add3ff45eb0 100644
--- a/test/Preprocessor/aarch64-target-features.c
+++ b/test/Preprocessor/aarch64-target-features.c
@@ -152,6 +152,7 @@
 // RUN: %clang -target aarch64 -mcpu=exynos-m2 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M1 %s
 // RUN: %clang -target aarch64 -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M1 %s
 // RUN: %clang -target aarch64 -mcpu=exynos-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M4 %s
+// RUN: %clang -target aarch64 -mcpu=exynos-m5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-M4 %s
 // RUN: %clang -target aarch64 -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-KRYO %s
 // RUN: %clang -target aarch64 -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-THUNDERX2T99 %s
 // CHECK-MCPU-CYCLONE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz"
diff --git a/test/Preprocessor/arm-target-features.c b/test/Preprocessor/arm-target-features.c
index 004aaac5c3c..891093650e7 100644
--- a/test/Preprocessor/arm-target-features.c
+++ b/test/Preprocessor/arm-target-features.c
@@ -534,6 +534,8 @@
 // RUN: %clang -target armv8 -mthumb -mcpu=exynos-m3 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
 // RUN: %clang -target armv8 -mcpu=exynos-m4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
 // RUN: %clang -target armv8 -mthumb -mcpu=exynos-m4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
+// RUN: %clang -target armv8 -mcpu=exynos-m5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
+// RUN: %clang -target armv8 -mthumb -mcpu=exynos-m5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s
 // ARMV8:#define __ARM_ARCH_EXT_IDIV__ 1
 // ARMV8:#define __ARM_FEATURE_DSP 1
 // ARMV8-NOT:#define __ARM_FP 0x
@@ -560,6 +562,8 @@
 // RUN: %clang -target armv8-eabi -mthumb -mcpu=exynos-m3 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
 // RUN: %clang -target armv8-eabi -mcpu=exynos-m4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
 // RUN: %clang -target armv8-eabi -mthumb -mcpu=exynos-m4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mcpu=exynos-m5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
+// RUN: %clang -target armv8-eabi -mthumb -mcpu=exynos-m5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8-ALLOW-FP-INSTR %s
 // ARMV8-ALLOW-FP-INSTR:#define __ARM_ARCH_EXT_IDIV__ 1
 // ARMV8-ALLOW-FP-INSTR:#define __ARM_FEATURE_DSP 1
 // ARMV8-ALLOW-FP-INSTR:#define __ARM_FP 0xe
diff --git a/test/Preprocessor/include-next.c b/test/Preprocessor/include-next.c
new file mode 100644
index 00000000000..4b9a0e870ce
--- /dev/null
+++ b/test/Preprocessor/include-next.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -verify %s -E -o /dev/null -I%S/Inputs/include-next-1 -I%S/Inputs/include-next-2 -DTEST=1
+// RUN: %clang_cc1 -verify %s -E -o /dev/null -I%S/Inputs/include-next-1 -I%S/Inputs/include-next-2 -DTEST=2
+// RUN: %clang_cc1 -verify %s -E -o /dev/null -I%S/Inputs/include-next-1 -I%S/Inputs/include-next-2 -DTEST=3
+
+#if TEST == 1
+// expected-warning@+1 {{#include_next in primary source file}}
+#include_next "bar.h"
+#if BAR != 1
+#error wrong bar
+#endif
+
+#elif TEST == 2
+// expected-no-diagnostics
+#include "foo.h"
+#if BAR != 2
+#error wrong bar
+#endif
+
+#elif TEST == 3
+// expected-warning@foo.h:1 {{#include_next in file found relative to primary source file or found by absolute path}}
+#include "Inputs/include-next-1/foo.h"
+#if BAR != 1
+#error wrong bar
+#endif
+#undef BAR
+
+#else
+#error unknown test
+#endif
diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c
index dffcdf7f4d8..641d1006499 100644
--- a/test/Preprocessor/init.c
+++ b/test/Preprocessor/init.c
@@ -2845,8 +2845,9 @@
 // I386:#define __i386__ 1
 // I386:#define i386 1
 //
-// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX %s
-// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-CXX %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN32 %s
+// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-CXX -check-prefix I386-LINUX-ALIGN32 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN64 %s
 //
 // I386-LINUX-NOT:#define _LP64
 // I386-LINUX:#define __BIGGEST_ALIGNMENT__ 16
@@ -2883,6 +2884,18 @@
 // I386-LINUX:#define __FLT_MIN_EXP__ (-125)
 // I386-LINUX:#define __FLT_MIN__ 1.17549435e-38F
 // I386-LINUX:#define __FLT_RADIX__ 2
+// I386-LINUX:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_INT_LOCK_FREE 2
+// I386-LINUX-ALIGN32:#define __GCC_ATOMIC_LLONG_LOCK_FREE 1
+// I386-LINUX-ALIGN64:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
+// I386-LINUX:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// I386-LINUX:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
 // I386-LINUX:#define __INT16_C_SUFFIX__
 // I386-LINUX:#define __INT16_FMTd__ "hd"
 // I386-LINUX:#define __INT16_FMTi__ "hi"
@@ -3034,8 +3047,10 @@
 // I386-LINUX:#define __i386__ 1
 // I386-LINUX:#define i386 1
 //
-// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s
-// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD -check-prefix I386-NETBSD-CXX %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s
+// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD -check-prefix I386-NETBSD-CXX %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s
+//
 //
 // I386-NETBSD-NOT:#define _LP64
 // I386-NETBSD:#define __BIGGEST_ALIGNMENT__ 16
@@ -3072,6 +3087,17 @@
 // I386-NETBSD:#define __FLT_MIN_EXP__ (-125)
 // I386-NETBSD:#define __FLT_MIN__ 1.17549435e-38F
 // I386-NETBSD:#define __FLT_RADIX__ 2
+// I386-NETBSD:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_INT_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_LLONG_LOCK_FREE 1
+// I386-NETBSD:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
+// I386-NETBSD:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// I386-NETBSD:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
 // I386-NETBSD:#define __INT16_C_SUFFIX__
 // I386-NETBSD:#define __INT16_FMTd__ "hd"
 // I386-NETBSD:#define __INT16_FMTi__ "hi"
@@ -8947,6 +8973,17 @@
 // X86_64-LINUX:#define __FLT_MIN_EXP__ (-125)
 // X86_64-LINUX:#define __FLT_MIN__ 1.17549435e-38F
 // X86_64-LINUX:#define __FLT_RADIX__ 2
+// X86_64-LINUX:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_INT_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
+// X86_64-LINUX:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// X86_64-LINUX:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
 // X86_64-LINUX:#define __INT16_C_SUFFIX__
 // X86_64-LINUX:#define __INT16_FMTd__ "hd"
 // X86_64-LINUX:#define __INT16_FMTi__ "hi"
@@ -9149,6 +9186,17 @@
 // X86_64-NETBSD:#define __FLT_MIN_EXP__ (-125)
 // X86_64-NETBSD:#define __FLT_MIN__ 1.17549435e-38F
 // X86_64-NETBSD:#define __FLT_RADIX__ 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_INT_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_LONG_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
+// X86_64-NETBSD:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
+// X86_64-NETBSD:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
 // X86_64-NETBSD:#define __INT16_C_SUFFIX__
 // X86_64-NETBSD:#define __INT16_FMTd__ "hd"
 // X86_64-NETBSD:#define __INT16_FMTi__ "hi"
diff --git a/test/Preprocessor/predefined-win-macros.c b/test/Preprocessor/predefined-win-macros.c
index 5e5f1e82f9b..6034c085024 100644
--- a/test/Preprocessor/predefined-win-macros.c
+++ b/test/Preprocessor/predefined-win-macros.c
@@ -3,7 +3,7 @@
 // RUN: %clang_cc1 %s -x c++ -E -dM -triple x86_64-pc-win32 -fms-extensions -fms-compatibility \
 // RUN:     -fms-compatibility-version=19.00 -std=c++14 -o - | FileCheck -match-full-lines %s --check-prefix=CHECK-MS64
 // RUN: %clang_cc1 %s -x c++ -E -dM -triple x86_64-pc-win32 -fms-extensions -fms-compatibility \
-// RUN:     -fms-compatibility-version=19.00 -std=c++14 -o - | grep GCC | count 1
+// RUN:     -fms-compatibility-version=19.00 -std=c++14 -o - | grep GCC | count 5
 // CHECK-MS64: #define _INTEGRAL_MAX_BITS 64
 // CHECK-MS64: #define _MSC_EXTENSIONS 1
 // CHECK-MS64: #define _MSC_VER 1900
@@ -15,13 +15,17 @@
 // CHECK-MS64-NOT: GNU
 // CHECK-MS64-NOT: GXX
 // CHECK-MS64: #define __GCC_ASM_FLAG_OUTPUTS__ 1
+// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
+// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
+// CHECK-MS64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
 // CHECK-MS64-NOT: GNU
 // CHECK-MS64-NOT: GXX
 
 // RUN: %clang_cc1 %s -x c++ -E -dM -triple i686-pc-win32 -fms-extensions -fms-compatibility \
 // RUN:     -fms-compatibility-version=19.00 -std=c++17 -o - | FileCheck -match-full-lines %s --check-prefix=CHECK-MS
 // RUN: %clang_cc1 %s -x c++ -E -dM -triple i686-pc-win32 -fms-extensions -fms-compatibility \
-// RUN:     -fms-compatibility-version=19.00 -std=c++17 -o - | grep GCC | count 1
+// RUN:     -fms-compatibility-version=19.00 -std=c++17 -o - | grep GCC | count 5
 // CHECK-MS: #define _INTEGRAL_MAX_BITS 64
 // CHECK-MS: #define _MSC_EXTENSIONS 1
 // CHECK-MS: #define _MSC_VER 1900
@@ -33,6 +37,10 @@
 // CHECK-MS-NOT: GNU
 // CHECK-MS-NOT: GXX
 // CHECK-MS: #define __GCC_ASM_FLAG_OUTPUTS__ 1
+// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
+// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
+// CHECK-MS: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
 // CHECK-MS-NOT: GNU
 // CHECK-MS-NOT: GXX
 
diff --git a/test/SemaCXX/exceptions.cpp b/test/SemaCXX/exceptions.cpp
index 9e76783ca8a..1e786adaa1b 100644
--- a/test/SemaCXX/exceptions.cpp
+++ b/test/SemaCXX/exceptions.cpp
@@ -7,6 +7,7 @@ struct A; // expected-note 4 {{forward declaration of 'A'}}
 struct Abstract { virtual void f() = 0; }; // expected-note {{unimplemented pure virtual method 'f'}}
 
 void trys() {
+  int k = 42;
   try {
   } catch(int i) { // expected-note {{previous definition}}
     int j = i;
@@ -18,6 +19,10 @@ void trys() {
   } catch(A &a) { // expected-error {{cannot catch reference to incomplete type 'A'}}
   } catch(Abstract) { // expected-error {{variable type 'Abstract' is an abstract class}}
   } catch(...) {
+    int ref = k;
+    {
+      int ref = k;
+    }
     int j = i; // expected-error {{use of undeclared identifier 'i'}}
   }
 
diff --git a/tools/libclang/CIndexer.cpp b/tools/libclang/CIndexer.cpp
index 4c06b63b5a2..0054c15bc72 100644
--- a/tools/libclang/CIndexer.cpp
+++ b/tools/libclang/CIndexer.cpp
@@ -32,12 +32,69 @@
 
 #ifdef _WIN32
 #include <windows.h>
+#elif defined(_AIX)
+#include <errno.h>
+#include <sys/ldr.h>
 #else
 #include <dlfcn.h>
 #endif
 
 using namespace clang;
 
+#ifdef _AIX
+namespace clang {
+namespace {
+
+template <typename LibClangPathType>
+void getClangResourcesPathImplAIX(LibClangPathType &LibClangPath) {
+  int PrevErrno = errno;
+
+  size_t BufSize = 2048u;
+  std::unique_ptr<char[]> Buf;
+  while (true) {
+    Buf = llvm::make_unique<char []>(BufSize);
+    errno = 0;
+    int Ret = loadquery(L_GETXINFO, Buf.get(), (unsigned int)BufSize);
+    if (Ret != -1)
+      break; // loadquery() was successful.
+    if (errno != ENOMEM)
+      llvm_unreachable("Encountered an unexpected loadquery() failure");
+
+    // errno == ENOMEM; try to allocate more memory.
+    if ((BufSize & ~((-1u) >> 1u)) != 0u)
+      llvm::report_fatal_error("BufSize needed for loadquery() too large");
+
+    Buf.release();
+    BufSize <<= 1u;
+  }
+
+  // Extract the function entry point from the function descriptor.
+  uint64_t EntryAddr =
+      reinterpret_cast<uintptr_t &>(clang_createTranslationUnit);
+
+  // Loop to locate the function entry point in the loadquery() results.
+  ld_xinfo *CurInfo = reinterpret_cast<ld_xinfo *>(Buf.get());
+  while (true) {
+    uint64_t CurTextStart = (uint64_t)CurInfo->ldinfo_textorg;
+    uint64_t CurTextEnd = CurTextStart + CurInfo->ldinfo_textsize;
+    if (CurTextStart <= EntryAddr && EntryAddr < CurTextEnd)
+      break; // Successfully located.
+
+    if (CurInfo->ldinfo_next == 0u)
+      llvm::report_fatal_error("Cannot locate entry point in "
+                               "the loadquery() results");
+    CurInfo = reinterpret_cast<ld_xinfo *>(reinterpret_cast<char *>(CurInfo) +
+                                           CurInfo->ldinfo_next);
+  }
+
+  LibClangPath += reinterpret_cast<char *>(CurInfo) + CurInfo->ldinfo_filename;
+  errno = PrevErrno;
+}
+
+} // end anonymous namespace
+} // end namespace clang
+#endif
+
 const std::string &CIndexer::getClangResourcesPath() {
   // Did we already compute the path?
   if (!ResourcesPath.empty())
@@ -64,6 +121,8 @@ const std::string &CIndexer::getClangResourcesPath() {
 #endif
 
   LibClangPath += path;
+#elif defined(_AIX)
+  getClangResourcesPathImplAIX(LibClangPath);
 #else
   // This silly cast below avoids a C++ warning.
   Dl_info info;
diff --git a/unittests/AST/OMPStructuredBlockTest.cpp b/unittests/AST/OMPStructuredBlockTest.cpp
index 623bed388c0..f4a3fad4a1b 100644
--- a/unittests/AST/OMPStructuredBlockTest.cpp
+++ b/unittests/AST/OMPStructuredBlockTest.cpp
@@ -102,11 +102,12 @@ void test() {
     #pragma omp cancel parallel
 }
 })";
-  ASSERT_TRUE(
-      PrintedOMPStmtMatches(Source, OMPInnermostStructuredBlockMatcher(), R"({
+  const char *Expected = R"({
     #pragma omp cancel parallel
 }
-)"));
+)";
+  ASSERT_TRUE(PrintedOMPStmtMatches(
+      Source, OMPInnermostStructuredBlockMatcher(), Expected));
   ASSERT_TRUE(PrintedOMPStmtMatches(Source, OMPStandaloneDirectiveMatcher(),
                                     "#pragma omp cancel parallel\n"));
 }
@@ -117,14 +118,15 @@ TEST(OMPStructuredBlock, TestCancellationPoint) {
 void test() {
 #pragma omp parallel
 {
-#pragma omp cancellation point parallel
+    #pragma omp cancellation point parallel
 }
 })";
-  ASSERT_TRUE(
-      PrintedOMPStmtMatches(Source, OMPInnermostStructuredBlockMatcher(), R"({
+  const char *Expected = R"({
     #pragma omp cancellation point parallel
 }
-)"));
+)";
+  ASSERT_TRUE(PrintedOMPStmtMatches(
+      Source, OMPInnermostStructuredBlockMatcher(), Expected));
   ASSERT_TRUE(
       PrintedOMPStmtMatches(Source, OMPStandaloneDirectiveMatcher(),
                             "#pragma omp cancellation point parallel\n"));
diff --git a/unittests/ASTMatchers/ASTMatchersTest.h b/unittests/ASTMatchers/ASTMatchersTest.h
index 3f3118c2ef4..78c551f806f 100644
--- a/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/unittests/ASTMatchers/ASTMatchersTest.h
@@ -237,13 +237,13 @@ testing::AssertionResult notMatchesWithCuda(const std::string &Code,
 template <typename T>
 testing::AssertionResult matchesWithOpenMP(const std::string &Code,
                                            const T &AMatcher) {
-  return matchesConditionally(Code, AMatcher, true, "-fopenmp");
+  return matchesConditionally(Code, AMatcher, true, "-fopenmp=libomp");
 }
 
 template <typename T>
 testing::AssertionResult notMatchesWithOpenMP(const std::string &Code,
                                               const T &AMatcher) {
-  return matchesConditionally(Code, AMatcher, false, "-fopenmp");
+  return matchesConditionally(Code, AMatcher, false, "-fopenmp=libomp");
 }
 
 template <typename T>
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index eaf818c16c6..f9b2fe2e9f6 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -3956,6 +3956,191 @@ TEST_F(FormatTest, ConstructorInitializers) {
                    "    aaaa(aaaa) {}"));
 }
 
+TEST_F(FormatTest, AllowAllConstructorInitializersOnNextLine) {
+  FormatStyle Style = getLLVMStyle();
+  Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+  Style.ColumnLimit = 60;
+  Style.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
+  Style.AllowAllConstructorInitializersOnNextLine = true;
+  Style.BinPackParameters = false;
+
+  for (int i = 0; i < 4; ++i) {
+    // Test all combinations of parameters that should not have an effect.
+    Style.AllowAllParametersOfDeclarationOnNextLine = i & 1;
+    Style.AllowAllArgumentsOnNextLine = i & 2;
+
+    Style.AllowAllConstructorInitializersOnNextLine = true;
+    Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+    verifyFormat("Constructor()\n"
+                 "    : aaaaaaaaaaaaaaaaaaaa(a), bbbbbbbbbbbbbbbbbbbbb(b) {}",
+                 Style);
+    verifyFormat("Constructor() : a(a), b(b) {}", Style);
+
+    Style.AllowAllConstructorInitializersOnNextLine = false;
+    verifyFormat("Constructor()\n"
+                 "    : aaaaaaaaaaaaaaaaaaaa(a)\n"
+                 "    , bbbbbbbbbbbbbbbbbbbbb(b) {}",
+                 Style);
+    verifyFormat("Constructor() : a(a), b(b) {}", Style);
+
+    Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;
+    Style.AllowAllConstructorInitializersOnNextLine = true;
+    verifyFormat("Constructor()\n"
+                 "    : aaaaaaaaaaaaaaaaaaaa(a), bbbbbbbbbbbbbbbbbbbbb(b) {}",
+                 Style);
+
+    Style.AllowAllConstructorInitializersOnNextLine = false;
+    verifyFormat("Constructor()\n"
+                 "    : aaaaaaaaaaaaaaaaaaaa(a),\n"
+                 "      bbbbbbbbbbbbbbbbbbbbb(b) {}",
+                 Style);
+
+    Style.BreakConstructorInitializers = FormatStyle::BCIS_AfterColon;
+    Style.AllowAllConstructorInitializersOnNextLine = true;
+    verifyFormat("Constructor() :\n"
+                 "    aaaaaaaaaaaaaaaaaa(a), bbbbbbbbbbbbbbbbbbbbb(b) {}",
+                 Style);
+
+    Style.AllowAllConstructorInitializersOnNextLine = false;
+    verifyFormat("Constructor() :\n"
+                 "    aaaaaaaaaaaaaaaaaa(a),\n"
+                 "    bbbbbbbbbbbbbbbbbbbbb(b) {}",
+                 Style);
+  }
+
+  // Test interactions between AllowAllParametersOfDeclarationOnNextLine and
+  // AllowAllConstructorInitializersOnNextLine in all
+  // BreakConstructorInitializers modes
+  Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+  Style.AllowAllParametersOfDeclarationOnNextLine = true;
+  Style.AllowAllConstructorInitializersOnNextLine = false;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa, int bbbbbbbbbbbbb)\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a)\n"
+               "    , bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.AllowAllConstructorInitializersOnNextLine = true;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbbb,\n"
+               "    int cccccccccccccccc)\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a), bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.AllowAllParametersOfDeclarationOnNextLine = false;
+  Style.AllowAllConstructorInitializersOnNextLine = false;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbbb)\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a)\n"
+               "    , bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;
+
+  Style.AllowAllParametersOfDeclarationOnNextLine = true;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa, int bbbbbbbbbbbbb)\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a),\n"
+               "      bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.AllowAllConstructorInitializersOnNextLine = true;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbbb,\n"
+               "    int cccccccccccccccc)\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a), bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.AllowAllParametersOfDeclarationOnNextLine = false;
+  Style.AllowAllConstructorInitializersOnNextLine = false;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbbb)\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a),\n"
+               "      bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.BreakConstructorInitializers = FormatStyle::BCIS_AfterColon;
+  Style.AllowAllParametersOfDeclarationOnNextLine = true;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa, int bbbbbbbbbbbbb) :\n"
+               "    aaaaaaaaaaaaaaaaaaaa(a),\n"
+               "    bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.AllowAllConstructorInitializersOnNextLine = true;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbbb,\n"
+               "    int cccccccccccccccc) :\n"
+               "    aaaaaaaaaaaaaaaaaaaa(a), bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+
+  Style.AllowAllParametersOfDeclarationOnNextLine = false;
+  Style.AllowAllConstructorInitializersOnNextLine = false;
+  verifyFormat("SomeClassWithALongName::Constructor(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbbb) :\n"
+               "    aaaaaaaaaaaaaaaaaaaa(a),\n"
+               "    bbbbbbbbbbbbbbbbbbbbb(b) {}",
+               Style);
+}
+
+TEST_F(FormatTest, AllowAllArgumentsOnNextLine) {
+  FormatStyle Style = getLLVMStyle();
+  Style.ColumnLimit = 60;
+  Style.BinPackArguments = false;
+  for (int i = 0; i < 4; ++i) {
+    // Test all combinations of parameters that should not have an effect.
+    Style.AllowAllParametersOfDeclarationOnNextLine = i & 1;
+    Style.AllowAllConstructorInitializersOnNextLine = i & 2;
+
+    Style.AllowAllArgumentsOnNextLine = true;
+    verifyFormat("void foo() {\n"
+                 "  FunctionCallWithReallyLongName(\n"
+                 "      aaaaaaaaaaaaaaaaaaaaaaaaaaa, bbbbbbbbbbbb);\n"
+                 "}",
+                 Style);
+    Style.AllowAllArgumentsOnNextLine = false;
+    verifyFormat("void foo() {\n"
+                 "  FunctionCallWithReallyLongName(\n"
+                 "      aaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
+                 "      bbbbbbbbbbbb);\n"
+                 "}",
+                 Style);
+
+    Style.AllowAllArgumentsOnNextLine = true;
+    verifyFormat("void foo() {\n"
+                 "  auto VariableWithReallyLongName = {\n"
+                 "      aaaaaaaaaaaaaaaaaaaaaaaaaaa, bbbbbbbbbbbb};\n"
+                 "}",
+                 Style);
+    Style.AllowAllArgumentsOnNextLine = false;
+    verifyFormat("void foo() {\n"
+                 "  auto VariableWithReallyLongName = {\n"
+                 "      aaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
+                 "      bbbbbbbbbbbb};\n"
+                 "}",
+                 Style);
+  }
+
+  // This parameter should not affect declarations.
+  Style.BinPackParameters = false;
+  Style.AllowAllArgumentsOnNextLine = false;
+  Style.AllowAllParametersOfDeclarationOnNextLine = true;
+  verifyFormat("void FunctionCallWithReallyLongName(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaa, int bbbbbbbbbbbb);",
+               Style);
+  Style.AllowAllParametersOfDeclarationOnNextLine = false;
+  verifyFormat("void FunctionCallWithReallyLongName(\n"
+               "    int aaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    int bbbbbbbbbbbb);",
+               Style);
+}
+
 TEST_F(FormatTest, BreakConstructorInitializersAfterColon) {
   FormatStyle Style = getLLVMStyle();
   Style.BreakConstructorInitializers = FormatStyle::BCIS_AfterColon;
@@ -3973,17 +4158,23 @@ TEST_F(FormatTest, BreakConstructorInitializersAfterColon) {
   verifyFormat("template <typename T>\n"
                "Constructor() : Initializer(FitsOnTheLine) {}",
                getStyleWithColumns(Style, 50));
+  Style.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
+  verifyFormat(
+      "SomeClass::Constructor() :\n"
+      "    aaaaaaaaaaaaa(aaaaaaaaaaaaaa), aaaaaaaaaaaaaaa(aaaaaaaaaaaa) {}",
+      Style);
 
+  Style.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
   verifyFormat(
       "SomeClass::Constructor() :\n"
       "    aaaaaaaaaaaaa(aaaaaaaaaaaaaa), aaaaaaaaaaaaaaa(aaaaaaaaaaaa) {}",
-	  Style);
+      Style);
 
   verifyFormat(
       "SomeClass::Constructor() :\n"
       "    aaaaaaaaaaaaa(aaaaaaaaaaaaaa), aaaaaaaaaaaaa(aaaaaaaaaaaaaa),\n"
       "    aaaaaaaaaaaaa(aaaaaaaaaaaaaa) {}",
-	  Style);
+      Style);
   verifyFormat(
       "SomeClass::Constructor() :\n"
       "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa),\n"
@@ -4029,7 +4220,7 @@ TEST_F(FormatTest, BreakConstructorInitializersAfterColon) {
 
   FormatStyle OnePerLine = Style;
   OnePerLine.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
-  OnePerLine.AllowAllParametersOfDeclarationOnNextLine = false;
+  OnePerLine.AllowAllConstructorInitializersOnNextLine = false;
   verifyFormat("SomeClass::Constructor() :\n"
                "    aaaaaaaaaaaaa(aaaaaaaaaaaaaa),\n"
                "    aaaaaaaaaaaaa(aaaaaaaaaaaaaa),\n"
@@ -11025,6 +11216,8 @@ TEST_F(FormatTest, ParsesConfigurationBools) {
   CHECK_PARSE_BOOL(AlignTrailingComments);
   CHECK_PARSE_BOOL(AlignConsecutiveAssignments);
   CHECK_PARSE_BOOL(AlignConsecutiveDeclarations);
+  CHECK_PARSE_BOOL(AllowAllArgumentsOnNextLine);
+  CHECK_PARSE_BOOL(AllowAllConstructorInitializersOnNextLine);
   CHECK_PARSE_BOOL(AllowAllParametersOfDeclarationOnNextLine);
   CHECK_PARSE_BOOL(AllowShortBlocksOnASingleLine);
   CHECK_PARSE_BOOL(AllowShortCaseLabelsOnASingleLine);
diff --git a/unittests/Format/FormatTestProto.cpp b/unittests/Format/FormatTestProto.cpp
index f4196f731f6..fd4870c27f2 100644
--- a/unittests/Format/FormatTestProto.cpp
+++ b/unittests/Format/FormatTestProto.cpp
@@ -107,6 +107,12 @@ TEST_F(FormatTestProto, FormatsEnums) {
                "};");
 }
 
+TEST_F(FormatTestProto, EnumAsFieldName) {
+  verifyFormat("message SomeMessage {\n"
+               "  required int32 enum = 1;\n"
+               "}");
+}
+
 TEST_F(FormatTestProto, UnderstandsReturns) {
   verifyFormat("rpc Search(SearchRequest) returns (SearchResponse);");
 }
@@ -387,6 +393,12 @@ TEST_F(FormatTestProto, FormatsOptions) {
                "};");
 }
 
+TEST_F(FormatTestProto, DoesntWrapPackageStatements) {
+  verifyFormat(
+      "package"
+      " some.really.long.package.that.exceeds.the.column.limit00000000;");
+}
+
 TEST_F(FormatTestProto, FormatsService) {
   verifyFormat("service SearchService {\n"
                "  rpc Search(SearchRequest) returns (SearchResponse) {\n"