Skip to content

Commit b2fd3eb

Browse files
authored
Merge pull request #67707 from allevato/no-u8-prefixes
2 parents aa5436d + 300a952 commit b2fd3eb

File tree

5 files changed

+43
-15
lines changed

5 files changed

+43
-15
lines changed

include/swift/Basic/Compiler.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#ifndef SWIFT_BASIC_COMPILER_H
1414
#define SWIFT_BASIC_COMPILER_H
1515

16+
#include <stddef.h>
17+
1618
#if defined(_MSC_VER) && !defined(__clang__)
1719
#define SWIFT_COMPILER_IS_MSVC 1
1820
#else
@@ -190,4 +192,21 @@
190192
#define ENUM_EXTENSIBILITY_ATTR(arg)
191193
#endif
192194

195+
// The 'u8' string literal prefix creates `char` types on C++14/17 but
196+
// `char8_t` types on C++20. To support compiling in both modes
197+
// simultaneously, wrap Unicode literals in `SWIFT_UTF8("...")` to ensure
198+
// that they are interpreted by the compiler as UTF-8 but always return
199+
// `char` types.
200+
#if defined(__cplusplus)
201+
#if defined(__cpp_char8_t)
202+
inline constexpr char operator""_swift_u8(char8_t c) { return c; }
203+
inline const char *operator""_swift_u8(const char8_t *p, std::size_t) {
204+
return reinterpret_cast<const char *>(p);
205+
}
206+
#define SWIFT_UTF8(literal) u8##literal##_swift_u8
207+
#else // !defined(__cpp_char8_t)
208+
#define SWIFT_UTF8(literal) u8##literal
209+
#endif // defined(__cpp_char8_t)
210+
#endif // defined(__cplusplus)
211+
193212
#endif // SWIFT_BASIC_COMPILER_H

lib/AST/Type.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "swift/AST/SubstitutionMap.h"
3737
#include "swift/AST/TypeLoc.h"
3838
#include "swift/AST/TypeRepr.h"
39+
#include "swift/Basic/Compiler.h"
3940
#include "clang/AST/Type.h"
4041
#include "llvm/ADT/APFloat.h"
4142
#include "llvm/ADT/SmallPtrSet.h"
@@ -2036,8 +2037,8 @@ Identifier GenericTypeParamType::getName() const {
20362037
llvm::SmallString<10> nameBuf;
20372038
llvm::raw_svector_ostream os(nameBuf);
20382039

2039-
static const char *tau = u8"\u03C4_";
2040-
2040+
static const char *tau = SWIFT_UTF8("\u03C4_");
2041+
20412042
os << tau << getDepth() << '_' << getIndex();
20422043
Identifier name = C.getIdentifier(os.str());
20432044
names.insert({depthIndex, name});

lib/Basic/Unicode.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "swift/Basic/Unicode.h"
14+
#include "swift/Basic/Compiler.h"
1415
#include "llvm/ADT/SmallString.h"
1516
#include "llvm/ADT/SmallVector.h"
1617
#include "llvm/Support/ConvertUTF.h"
@@ -135,7 +136,7 @@ std::string swift::unicode::sanitizeUTF8(StringRef Text) {
135136
Builder.reserve(Text.size());
136137
const llvm::UTF8* Data = reinterpret_cast<const llvm::UTF8*>(Text.begin());
137138
const llvm::UTF8* End = reinterpret_cast<const llvm::UTF8*>(Text.end());
138-
StringRef Replacement = u8"\ufffd";
139+
StringRef Replacement = SWIFT_UTF8("\ufffd");
139140
while (Data < End) {
140141
auto Step = llvm::getNumBytesForUTF8(*Data);
141142
if (Data + Step > End) {

lib/IRGen/IRGenDebugInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "swift/AST/ModuleLoader.h"
2929
#include "swift/AST/Pattern.h"
3030
#include "swift/AST/TypeDifferenceVisitor.h"
31+
#include "swift/Basic/Compiler.h"
3132
#include "swift/Basic/Dwarf.h"
3233
#include "swift/Basic/SourceManager.h"
3334
#include "swift/Basic/Version.h"
@@ -3034,7 +3035,7 @@ void IRGenDebugInfoImpl::emitTypeMetadata(IRGenFunction &IGF,
30343035
return;
30353036

30363037
llvm::SmallString<8> Buf;
3037-
static const char *Tau = u8"\u03C4";
3038+
static const char *Tau = SWIFT_UTF8("\u03C4");
30383039
llvm::raw_svector_ostream OS(Buf);
30393040
OS << '$' << Tau << '_' << Depth << '_' << Index;
30403041
uint64_t PtrWidthInBits = CI.getTargetInfo().getPointerWidth(0);

unittests/IDE/FuzzyStringMatcherTest.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "swift/IDE/FuzzyStringMatcher.h"
14+
#include "swift/Basic/Compiler.h"
1415
#include "gtest/gtest.h"
1516

1617
using FuzzyStringMatcher = swift::ide::FuzzyStringMatcher;
@@ -53,26 +54,31 @@ TEST(FuzzyStringMatcher, SingleCharacterMatching) {
5354

5455
TEST(FuzzyStringMatcher, UnicodeMatching) {
5556
// Single code point matching.
56-
EXPECT_TRUE(FuzzyStringMatcher(u8"\u2602a\U0002000Bz")
57-
.matchesCandidate(u8"\u2602A\U0002000BZ"));
57+
EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("\u2602a\U0002000Bz"))
58+
.matchesCandidate(SWIFT_UTF8("\u2602A\U0002000BZ")));
5859

5960
// Same-order combining marks.
60-
EXPECT_TRUE(FuzzyStringMatcher(u8"a\u0323\u0307")
61-
.matchesCandidate(u8"A\u0323\u0307"));
61+
EXPECT_TRUE(FuzzyStringMatcher(SWIFT_UTF8("a\u0323\u0307"))
62+
.matchesCandidate(SWIFT_UTF8("A\u0323\u0307")));
6263

6364
// FIXME: Canonical equivalence. These should be the same.
64-
EXPECT_FALSE(FuzzyStringMatcher(u8"a\u0307\u0323")
65-
.matchesCandidate(u8"A\u0323\u0307"));
66-
EXPECT_FALSE(FuzzyStringMatcher(u8"a\u00C5").matchesCandidate(u8"A\u030A"));
65+
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u0307\u0323"))
66+
.matchesCandidate(SWIFT_UTF8("A\u0323\u0307")));
67+
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("a\u00C5"))
68+
.matchesCandidate(SWIFT_UTF8("A\u030A")));
6769

6870
// FIXME: Compatibility equivalence. It would be good to make these the same
6971
// too, since we're fuzzy matching.
70-
EXPECT_FALSE(FuzzyStringMatcher(u8"fi").matchesCandidate(u8"\uFB01"));
71-
EXPECT_FALSE(FuzzyStringMatcher(u8"25").matchesCandidate(u8"2\u2075"));
72+
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("fi"))
73+
.matchesCandidate(SWIFT_UTF8("\uFB01")));
74+
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("25"))
75+
.matchesCandidate(SWIFT_UTF8("2\u2075")));
7276

7377
// FIXME: Case-insensitivity in non-ASCII characters.
74-
EXPECT_FALSE(FuzzyStringMatcher(u8"\u00E0").matchesCandidate(u8"\u00C0"));
75-
EXPECT_FALSE(FuzzyStringMatcher(u8"ss").matchesCandidate(u8"\u00DF"));
78+
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("\u00E0"))
79+
.matchesCandidate(SWIFT_UTF8("\u00C0")));
80+
EXPECT_FALSE(FuzzyStringMatcher(SWIFT_UTF8("ss"))
81+
.matchesCandidate(SWIFT_UTF8("\u00DF")));
7682
}
7783

7884
TEST(FuzzyStringMatcher, BasicScoring) {

0 commit comments

Comments
 (0)