diff --git a/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift b/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift new file mode 100644 index 000000000..0856361d8 --- /dev/null +++ b/Sources/_RegexParser/Regex/Parse/CompilerInterface.swift @@ -0,0 +1,115 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +// The version number for the regex. This gets emitted as an argument to the +// Regex(_regexString:version:) initializer and should be bumped if the format +// of the regex string needs to be changed in such a that requires the runtime +// to updated. +public let currentRegexLiteralFormatVersion = 1 + +@_spi(CompilerInterface) +public struct CompilerLexError: Error { + public var message: String + public var location: UnsafeRawPointer + public var completelyErroneous: Bool +} + +/// Interface for the Swift compiler. +/// +/// Attempt to lex a regex literal string. +/// +/// - Parameters: +/// - start: The pointer at which to start lexing the literal. +/// - bufferEnd: A pointer to the end of the buffer, which should not be lexed +/// past. +/// - mustBeRegex: Whether we expect a regex literal to be lexed here. If +/// `false`, a regex literal will only be lexed if it does not +/// produce an error. +/// +/// - Returns: If a regex literal was lexed, `resumePtr` specifies where to +/// resume lexing and `error` specifies a lexing error to emit. If +/// a regex literal was not lexed, `nil` is returned. +/// +@_spi(CompilerInterface) +public func swiftCompilerLexRegexLiteral( + start: UnsafeRawPointer, bufferEnd: UnsafeRawPointer, mustBeRegex: Bool +) -> (resumePtr: UnsafeRawPointer, error: CompilerLexError?)? { + do { + let (_, _, endPtr) = try lexRegex(start: start, end: bufferEnd) + return (resumePtr: endPtr, error: nil) + } catch let error as DelimiterLexError { + if !mustBeRegex { + // This token can be something else. Let the client fallback. + return nil + } + let completelyErroneous: Bool + switch error.kind { + case .unterminated, .multilineClosingNotOnNewline: + // These can be recovered from. + completelyErroneous = false + case .unprintableASCII, .invalidUTF8: + // We don't currently have good recovery behavior for these. + completelyErroneous = true + case .unknownDelimiter: + // An unknown delimiter should be recovered from, as we may want to try + // lex something else. + return nil + } + // For now every lexer error is emitted at the starting delimiter. + let compilerError = CompilerLexError( + message: "\(error)", location: start, + completelyErroneous: completelyErroneous + ) + return (error.resumePtr, compilerError) + } catch { + fatalError("Should be a DelimiterLexError") + } +} + +@_spi(CompilerInterface) +public struct CompilerParseError: Error { + public var message: String + public var location: String.Index? +} + +/// Interface for the Swift compiler. +/// +/// Attempt to parse a regex literal string. +/// +/// - Parameters: +/// - input: The regex input string, including delimiters. +/// - captureBufferOut: A buffer into which the captures of the regex will +/// be encoded into upon a successful parse. +/// +/// - Returns: The string to emit along with its version number. +/// - Throws: `CompilerParseError` if there was a parsing error. +@_spi(CompilerInterface) +public func swiftCompilerParseRegexLiteral( + _ input: String, captureBufferOut: UnsafeMutableRawBufferPointer +) throws -> (regexToEmit: String, version: Int) { + do { + let ast = try parseWithDelimiters(input) + // Serialize the capture structure for later type inference. + assert(captureBufferOut.count >= input.utf8.count) + ast.captureStructure.encode(to: captureBufferOut) + + // For now we just return the input as the regex to emit. This could be + // changed in the future if need to back-deploy syntax to something already + // known to the matching engine, or otherwise change the format. Note + // however that it will need plumbing through on the compiler side. + return (regexToEmit: input, version: currentRegexLiteralFormatVersion) + } catch { + throw CompilerParseError( + message: "cannot parse regular expression: \(String(describing: error))", + location: (error as? LocatedErrorProtocol)?.location.start + ) + } +} diff --git a/Sources/_RegexParser/Regex/Parse/Mocking.swift b/Sources/_RegexParser/Regex/Parse/Mocking.swift deleted file mode 100644 index 56294e2d3..000000000 --- a/Sources/_RegexParser/Regex/Parse/Mocking.swift +++ /dev/null @@ -1,128 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -@available(*, deprecated, message: "moving to SwiftCompilerModules") -private func copyCString(_ str: String) -> UnsafePointer { - let count = str.utf8.count + 1 - return str.withCString { - assert($0[count-1] == 0) - let ptr = UnsafeMutablePointer.allocate(capacity: count) - ptr.initialize(from: $0, count: count) - return UnsafePointer(ptr) - } -} - -/// Interface for libswift. -/// -/// Attempt to lex a regex literal string. -/// -/// - Parameters: -/// - CurPtrPtr: A pointer to the current pointer of lexer, which should be -/// the start of the literal. This will be advanced to the point -/// at which the lexer should resume, or will remain the same if -/// this is not a regex literal. -/// - BufferEnd: A pointer to the end of the buffer, which should not be lexed -/// past. -/// - ErrorOut: If an error is encountered, this will be set to the error -/// string. -/// -/// - Returns: A bool indicating whether lexing was completely erroneous, and -/// cannot be recovered from, or false if there either was no error, -/// or there was a recoverable error. -@available(*, deprecated, message: "moving to SwiftCompilerModules") -func libswiftLexRegexLiteral( - _ curPtrPtr: UnsafeMutablePointer?>?, - _ bufferEndPtr: UnsafePointer?, - _ errOut: UnsafeMutablePointer?>? -) -> /*CompletelyErroneous*/ CBool { - guard let curPtrPtr = curPtrPtr, let inputPtr = curPtrPtr.pointee, - let bufferEndPtr = bufferEndPtr - else { - fatalError("Expected lexing pointers") - } - guard let errOut = errOut else { fatalError("Expected error out param") } - - do { - let (_, _, endPtr) = try lexRegex(start: inputPtr, end: bufferEndPtr) - curPtrPtr.pointee = endPtr.assumingMemoryBound(to: CChar.self) - return false - } catch let error as DelimiterLexError { - if error.kind == .unknownDelimiter { - // An unknown delimiter should be recovered from, as we may want to try - // lex something else. - return false - } - errOut.pointee = copyCString("\(error)") - curPtrPtr.pointee = error.resumePtr.assumingMemoryBound(to: CChar.self) - - switch error.kind { - case .unterminated, .multilineClosingNotOnNewline: - // These can be recovered from. - return false - case .unprintableASCII, .invalidUTF8: - // We don't currently have good recovery behavior for these. - return true - case .unknownDelimiter: - fatalError("Already handled") - } - } catch { - fatalError("Should be a DelimiterLexError") - } -} - -// The version number for the regex. This gets emitted as an argument to the -// Regex(_regexString:version:) initializer and should be bumped if the format -// of the regex string needs to be changed in such a that requires the runtime -// to updated. -public let currentRegexLiteralFormatVersion: CUnsignedInt = 1 - -/// Interface for libswift. -/// -/// - Parameters: -/// - inputPtr: A null-terminated C string. -/// - errOut: A buffer accepting an error string upon error. -/// - versionOut: A buffer accepting a regex literal format -/// version. -/// - captureStructureOut: A buffer accepting a byte sequence representing the -/// capture structure. -/// - captureStructureSize: The size of the capture structure buffer. Must be -/// greater than or equal to `strlen(inputPtr)`. -@available(*, deprecated, message: "moving to SwiftCompilerModules") -func libswiftParseRegexLiteral( - _ inputPtr: UnsafePointer?, - _ errOut: UnsafeMutablePointer?>?, - _ versionOut: UnsafeMutablePointer?, - _ captureStructureOut: UnsafeMutableRawPointer?, - _ captureStructureSize: CUnsignedInt -) { - guard let s = inputPtr else { fatalError("Expected input param") } - guard let errOut = errOut else { fatalError("Expected error out param") } - guard let versionOut = versionOut else { - fatalError("Expected version out param") - } - - versionOut.pointee = currentRegexLiteralFormatVersion - - let str = String(cString: s) - do { - let ast = try parseWithDelimiters(str) - // Serialize the capture structure for later type inference. - if let captureStructureOut = captureStructureOut { - assert(captureStructureSize >= str.utf8.count) - let buffer = UnsafeMutableRawBufferPointer( - start: captureStructureOut, count: Int(captureStructureSize)) - ast.captureStructure.encode(to: buffer) - } - } catch { - errOut.pointee = copyCString( - "cannot parse regular expression: \(String(describing: error))") - } -} diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 94c134853..e66078831 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -@testable import _RegexParser +@testable @_spi(CompilerInterface) import _RegexParser import XCTest @testable import _StringProcessing @@ -281,24 +281,20 @@ func delimiterLexingDiagnosticTest( } } -func libswiftDiagnosticMessageTest( - _ input: String, _ expectedErr: String, file: StaticString = #file, - line: UInt = #line +func compilerInterfaceDiagnosticMessageTest( + _ input: String, _ expectedErr: String, + file: StaticString = #file, line: UInt = #line ) { - var errPtr: UnsafePointer? - var version: CUnsignedInt = 0 - - libswiftParseRegexLiteral( - input, &errPtr, &version, /*captureStructure*/ nil, - /*captureStructureSize*/ 0 - ) - - guard let errPtr = errPtr else { - XCTFail("Unexpected test pass", file: file, line: line) - return + do { + let captureBuffer = UnsafeMutableRawBufferPointer(start: nil, count: 0) + _ = try swiftCompilerParseRegexLiteral( + input, captureBufferOut: captureBuffer) + XCTFail("Expected parse error", file: file, line: line) + } catch let error as CompilerParseError { + XCTAssertEqual(expectedErr, error.message, file: file, line: line) + } catch { + fatalError("Expected CompilerParseError") } - let err = String(cString: errPtr) - XCTAssertEqual(expectedErr, err, file: file, line: line) } extension RegexTests { @@ -2547,8 +2543,8 @@ extension RegexTests { delimiterLexingDiagnosticTest("#/\n#/#", .multilineClosingNotOnNewline) } - func testlibswiftDiagnostics() { - libswiftDiagnosticMessageTest( + func testCompilerInterfaceDiagnostics() { + compilerInterfaceDiagnosticMessageTest( "#/[x*/#", "cannot parse regular expression: expected ']'") } }