Add RemoveIndentation() to the lexer for multi-line strings.

leebyron · leebyron · commit 7c00820104d1 · 2017-11-28T19:04:01.000-08:00
diff --git a/src/jsutils/dedent.js b/src/jsutils/dedent.js
@@ -7,16 +7,7 @@
  * @flow
  */
 
- /**
-  * fixes identation by removing leading spaces from each line
-  */
-function fixIdent(str: string): string {
-  const indent = /^\n?( *)/.exec(str)[1]; // figure out ident
-  return str
-    .replace(RegExp('^' + indent, 'mg'), '') // remove ident
-    .replace(/^\n*/m, '') //  remove leading newline
-    .replace(/ *$/, ''); // remove trailing spaces
-}
+import removeIndentation from './removeIndentation';
 
 /**
  * An ES6 string tag that fixes identation. Also removes leading newlines
@@ -45,5 +36,5 @@ export default function dedent(
     }
   }
 
-  return fixIdent(res);
+  return removeIndentation(res) + '\n';
 }
diff --git a/src/jsutils/removeIndentation.js b/src/jsutils/removeIndentation.js
@@ -0,0 +1,66 @@
+/* @flow */
+/**
+ *  Copyright (c) 2017, Facebook, Inc.
+ *  All rights reserved.
+ *
+ *  This source code is licensed under the BSD-style license found in the
+ *  LICENSE file in the root directory of this source tree. An additional grant
+ *  of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/**
+ * Removes leading identation from each line in a multi-line string.
+ *
+ * This implements RemoveIndentation() algorithm in the GraphQL spec.
+ *
+ * Note: this is similar to Python's docstring "trim" operation.
+ * https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
+ */
+export default function removeIndentation(rawString: string): string {
+  // Expand a multi-line string into independent lines.
+  const lines = rawString.split(/\r\n|[\n\r]/g);
+
+  // Determine minimum indentation, not including the first line.
+  let minIndent;
+  for (let i = 1; i < lines.length; i++) {
+    const line = lines[i];
+    const lineIndent = leadingWhitespace(line);
+    if (
+      lineIndent < line.length &&
+      (minIndent === undefined || lineIndent < minIndent)
+    ) {
+      minIndent = lineIndent;
+      if (minIndent === 0) {
+        break;
+      }
+    }
+  }
+
+  // Remove indentation, not including the first line.
+  if (minIndent) {
+    for (let i = 1; i < lines.length; i++) {
+      lines[i] = lines[i].slice(minIndent);
+    }
+  }
+
+  // Remove leading and trailing empty lines.
+  while (lines.length > 0 && lines[0].length === 0) {
+    lines.shift();
+  }
+  while (lines.length > 0 && lines[lines.length - 1].length === 0) {
+    lines.pop();
+  }
+
+  // Return a multi-line string joined with U+000A.
+  return lines.join('\n');
+}
+
+function leadingWhitespace(str) {
+  let i = 0;
+  for (; i < str.length; i++) {
+    if (str[i] !== ' ' && str[i] !== '\t') {
+      break;
+    }
+  }
+  return i;
+}
diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
@@ -301,11 +301,11 @@ describe('Lexer', () => {
     });
 
     expect(
-      lexOne('""" white space """')
+      lexOne('" white space "')
     ).to.containSubset({
-      kind: TokenKind.MULTI_LINE_STRING,
+      kind: TokenKind.STRING,
       start: 0,
-      end: 19,
+      end: 15,
       value: ' white space '
     });
 
@@ -337,12 +337,12 @@ describe('Lexer', () => {
     });
 
     expect(
-      lexOne('"""multi\rline"""')
+      lexOne('"""multi\rline\r\nnormalized"""')
     ).to.containSubset({
       kind: TokenKind.MULTI_LINE_STRING,
       start: 0,
-      end: 16,
-      value: 'multi\rline'
+      end: 28,
+      value: 'multi\nline\nnormalized'
     });
 
     expect(
@@ -363,6 +363,21 @@ describe('Lexer', () => {
       value: 'slashes \\\\ \\/'
     });
 
+    expect(
+      lexOne(`"""
+
+        spans
+          multiple
+            lines
+
+        """`)
+    ).to.containSubset({
+      kind: TokenKind.MULTI_LINE_STRING,
+      start: 0,
+      end: 68,
+      value: 'spans\n  multiple\n    lines'
+    });
+
   });
 
   it('lex reports useful multi-line string errors', () => {
diff --git a/src/language/lexer.js b/src/language/lexer.js
@@ -10,6 +10,7 @@
 import type { Token } from './ast';
 import type { Source } from './source';
 import { syntaxError } from '../error';
+import removeIndentation from '../jsutils/removeIndentation';
 
 /**
  * Given a Source object, this returns a Lexer for that source.
@@ -532,7 +533,7 @@ function readMultiLineString(source, start, line, col, prev): Token {
   let position = start + 3;
   let chunkStart = position;
   let code = 0;
-  let value = '';
+  let rawValue = '';
 
   while (
     position < body.length &&
@@ -544,15 +545,15 @@ function readMultiLineString(source, start, line, col, prev): Token {
       charCodeAt.call(body, position + 1) === 34 &&
       charCodeAt.call(body, position + 2) === 34
     ) {
-      value += slice.call(body, chunkStart, position);
+      rawValue += slice.call(body, chunkStart, position);
       return new Tok(
         MULTI_LINE_STRING,
         start,
         position + 3,
         line,
         col,
         prev,
-        value
+        removeIndentation(rawValue)
       );
     }
 
@@ -577,7 +578,7 @@ function readMultiLineString(source, start, line, col, prev): Token {
       charCodeAt.call(body, position + 2) === 34 &&
       charCodeAt.call(body, position + 3) === 34
     ) {
-      value += slice.call(body, chunkStart, position) + '"""';
+      rawValue += slice.call(body, chunkStart, position) + '"""';
       position += 4;
       chunkStart = position;
     } else {
diff --git a/src/language/printer.js b/src/language/printer.js
@@ -73,9 +73,7 @@ const printDocASTReducer = {
   IntValue: ({ value }) => value,
   FloatValue: ({ value }) => value,
   StringValue: ({ value, multiLine }) =>
-    multiLine ?
-      `"""${value.replace(/"""/g, '\\"""')}"""` :
-      JSON.stringify(value),
+    multiLine ? printMultiLineString(value) : JSON.stringify(value),
   BooleanValue: ({ value }) => JSON.stringify(value),
   NullValue: () => 'null',
   EnumValue: ({ value }) => value,
@@ -204,3 +202,18 @@ function wrap(start, maybeString, end) {
 function indent(maybeString) {
   return maybeString && maybeString.replace(/\n/g, '\n  ');
 }
+
+function printMultiLineString(value) {
+  const hasLineBreak = value.indexOf('\n') !== -1;
+  const hasLeadingSpace = value[0] === ' ' || value[0] === '\t';
+  let printed = '"""';
+  if (hasLineBreak && !hasLeadingSpace) {
+    printed += '\n';
+  }
+  printed += value.replace(/"""/g, '\\"""');
+  if (hasLineBreak) {
+    printed += '\n';
+  }
+  printed += '"""';
+  return printed;
+}