diff --git a/include/swift/Basic/Compiler.h b/include/swift/Basic/Compiler.h index 0eb07856d33ca..680f8096988b2 100644 --- a/include/swift/Basic/Compiler.h +++ b/include/swift/Basic/Compiler.h @@ -13,6 +13,8 @@ #ifndef SWIFT_BASIC_COMPILER_H #define SWIFT_BASIC_COMPILER_H +#include + #if defined(_MSC_VER) && !defined(__clang__) #define SWIFT_COMPILER_IS_MSVC 1 #else @@ -190,4 +192,21 @@ #define ENUM_EXTENSIBILITY_ATTR(arg) #endif +// The 'u8' string literal prefix creates `char` types on C++14/17 but +// `char8_t` types on C++20. To support compiling in both modes +// simultaneously, wrap Unicode literals in `SWIFT_UTF8("...")` to ensure +// that they are interpreted by the compiler as UTF-8 but always return +// `char` types. +#if defined(__cplusplus) +#if defined(__cpp_char8_t) +inline constexpr char operator""_swift_u8(char8_t c) { return c; } +inline const char *operator""_swift_u8(const char8_t *p, std::size_t) { + return reinterpret_cast(p); +} +#define SWIFT_UTF8(literal) u8##literal##_swift_u8 +#else // !defined(__cpp_char8_t) +#define SWIFT_UTF8(literal) u8##literal +#endif // defined(__cpp_char8_t) +#endif // defined(__cplusplus) + #endif // SWIFT_BASIC_COMPILER_H diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp index 36f0a9f95f134..0fd8dd7e86611 100644 --- a/lib/AST/Type.cpp +++ b/lib/AST/Type.cpp @@ -36,6 +36,7 @@ #include "swift/AST/SubstitutionMap.h" #include "swift/AST/TypeLoc.h" #include "swift/AST/TypeRepr.h" +#include "swift/Basic/Compiler.h" #include "clang/AST/Type.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallPtrSet.h" @@ -2037,8 +2038,8 @@ Identifier GenericTypeParamType::getName() const { llvm::SmallString<10> nameBuf; llvm::raw_svector_ostream os(nameBuf); - static const char *tau = u8"\u03C4_"; - + static const char *tau = SWIFT_UTF8("\u03C4_"); + os << tau << getDepth() << '_' << getIndex(); Identifier name = C.getIdentifier(os.str()); names.insert({depthIndex, name}); diff --git a/lib/Basic/Unicode.cpp b/lib/Basic/Unicode.cpp index baff2738ec647..d9706fc933251 100644 --- a/lib/Basic/Unicode.cpp +++ b/lib/Basic/Unicode.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "swift/Basic/Unicode.h" +#include "swift/Basic/Compiler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ConvertUTF.h" @@ -135,7 +136,7 @@ std::string swift::unicode::sanitizeUTF8(StringRef Text) { Builder.reserve(Text.size()); const llvm::UTF8* Data = reinterpret_cast(Text.begin()); const llvm::UTF8* End = reinterpret_cast(Text.end()); - StringRef Replacement = u8"\ufffd"; + StringRef Replacement = SWIFT_UTF8("\ufffd"); while (Data < End) { auto Step = llvm::getNumBytesForUTF8(*Data); if (Data + Step > End) { diff --git a/lib/IRGen/IRGenDebugInfo.cpp b/lib/IRGen/IRGenDebugInfo.cpp index 2e21338f64392..3811f4a8ab799 100644 --- a/lib/IRGen/IRGenDebugInfo.cpp +++ b/lib/IRGen/IRGenDebugInfo.cpp @@ -28,6 +28,7 @@ #include "swift/AST/ModuleLoader.h" #include "swift/AST/Pattern.h" #include "swift/AST/TypeDifferenceVisitor.h" +#include "swift/Basic/Compiler.h" #include "swift/Basic/Dwarf.h" #include "swift/Basic/SourceManager.h" #include "swift/Basic/Version.h" @@ -3034,7 +3035,7 @@ void IRGenDebugInfoImpl::emitTypeMetadata(IRGenFunction &IGF, return; llvm::SmallString<8> Buf; - static const char *Tau = u8"\u03C4"; + static const char *Tau = SWIFT_UTF8("\u03C4"); llvm::raw_svector_ostream OS(Buf); OS << '$' << Tau << '_' << Depth << '_' << Index; uint64_t PtrWidthInBits = CI.getTargetInfo().getPointerWidth(0); diff --git a/unittests/IDE/FuzzyStringMatcherTest.cpp b/unittests/IDE/FuzzyStringMatcherTest.cpp index 18ea2e4dba315..5a00a778d570b 100644 --- a/unittests/IDE/FuzzyStringMatcherTest.cpp +++ b/unittests/IDE/FuzzyStringMatcherTest.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "swift/IDE/FuzzyStringMatcher.h" +#include "swift/Basic/Compiler.h" #include "gtest/gtest.h" using FuzzyStringMatcher = swift::ide::FuzzyStringMatcher; @@ -53,26 +54,31 @@ TEST(FuzzyStringMatcher, SingleCharacterMatching) { TEST(FuzzyStringMatcher, UnicodeMatching) { // Single code point matching. - EXPECT_TRUE(FuzzyStringMatcher(u8"\u2602a\U0002000Bz") - .matchesCandidate(u8"\u2602A\U0002000BZ")); + EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("\u2602a\U0002000Bz")) + .matchesCandidate(SWIFT_UTF8("\u2602A\U0002000BZ"))); // Same-order combining marks. - EXPECT_TRUE(FuzzyStringMatcher(u8"a\u0323\u0307") - .matchesCandidate(u8"A\u0323\u0307")); + EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("a\u0323\u0307")) + .matchesCandidate(SWIFT_UTF8("A\u0323\u0307"))); // FIXME: Canonical equivalence. These should be the same. - EXPECT_FALSE(FuzzyStringMatcher(u8"a\u0307\u0323") - .matchesCandidate(u8"A\u0323\u0307")); - EXPECT_FALSE(FuzzyStringMatcher(u8"a\u00C5").matchesCandidate(u8"A\u030A")); + EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u0307\u0323")) + .matchesCandidate(SWIFT_UTF8("A\u0323\u0307"))); + EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u00C5")) + .matchesCandidate(SWIFT_UTF8("A\u030A"))); // FIXME: Compatibility equivalence. It would be good to make these the same // too, since we're fuzzy matching. - EXPECT_FALSE(FuzzyStringMatcher(u8"fi").matchesCandidate(u8"\uFB01")); - EXPECT_FALSE(FuzzyStringMatcher(u8"25").matchesCandidate(u8"2\u2075")); + EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("fi")) + .matchesCandidate(SWIFT_UTF8("\uFB01"))); + EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("25")) + .matchesCandidate(SWIFT_UTF8("2\u2075"))); // FIXME: Case-insensitivity in non-ASCII characters. - EXPECT_FALSE(FuzzyStringMatcher(u8"\u00E0").matchesCandidate(u8"\u00C0")); - EXPECT_FALSE(FuzzyStringMatcher(u8"ss").matchesCandidate(u8"\u00DF")); + EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("\u00E0")) + .matchesCandidate(SWIFT_UTF8("\u00C0"))); + EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("ss")) + .matchesCandidate(SWIFT_UTF8("\u00DF"))); } TEST(FuzzyStringMatcher, BasicScoring) {