python · gvanrossum · Sep 19, 2016 · Sep 21, 2016 · Sep 30, 2016 · Sep 30, 2016
diff --git a/mypy/fastparse.py b/mypy/fastparse.py
@@ -219,6 +219,7 @@ def translate_module_id(self, id: str) -> str:
         if id == self.custom_typing_module:
             return 'typing'
         elif id == '__builtin__' and self.pyversion[0] == 2:
+            assert False  # Shouldn't get here
             # HACK: __builtin__ in Python 2 is aliases to builtins. However, the implementation
             #   is named __builtin__.py (there is another layer of translation elsewhere).
             return 'builtins'
@@ -744,6 +745,7 @@ def visit_Str(self, n: ast35.Str) -> Node:
             # unicode.
             return StrExpr(n.s)
         else:
+            assert False  # Shouldn't get here with an ast35.Str
             return UnicodeExpr(n.s)
 
     # Bytes(bytes s)
@@ -756,6 +758,7 @@ def visit_Bytes(self, n: ast35.Bytes) -> Node:
         if self.pyversion[0] >= 3:
             return BytesExpr(contents)
         else:
+            assert False  # Shouldn't get here with an ast35.Str
             return StrExpr(contents)
 
     # NameConstant(singleton value)

diff --git a/mypy/fastparse2.py b/mypy/fastparse2.py
@@ -236,7 +236,8 @@ def translate_module_id(self, id: str) -> str:
         """
         if id == self.custom_typing_module:
             return 'typing'
-        elif id == '__builtin__' and self.pyversion[0] == 2:
+        elif id == '__builtin__':
+            assert self.pyversion[0] == 2
             # HACK: __builtin__ in Python 2 is aliases to builtins. However, the implementation
             #   is named __builtin__.py (there is another layer of translation elsewhere).
             return 'builtins'
@@ -819,11 +820,16 @@ def visit_Str(self, s: ast27.Str) -> Node:
             contents = str(n)[2:-1]
 
             if self.pyversion[0] >= 3:
+                assert False  # Shouldn't get here with an ast27.Str
                 return BytesExpr(contents)
             else:
-                return StrExpr(contents)
+                if s.has_b:
+                    return BytesExpr(contents)
+                else:
+                    return StrExpr(contents)
         else:
             if self.pyversion[0] >= 3 or self.is_stub:
+                assert False  # Shouldn't get here with an ast27.Str
                 return StrExpr(s.s)
             else:
                 return UnicodeExpr(s.s)

diff --git a/mypy/nodes.py b/mypy/nodes.py
@@ -1007,9 +1007,12 @@ def accept(self, visitor: NodeVisitor[T]) -> T:
 
 # How mypy uses StrExpr, BytesExpr, and UnicodeExpr:
 # In Python 2 mode:
-# b'x', 'x' -> StrExpr
+# b'x' -> BytesExpr  [new!]
+# 'x' -> StrExpr
 # u'x' -> UnicodeExpr
-# BytesExpr is unused
+# However after `from __future__ import unicode_literals` [also new!]:
+# b'x' -> BytesExpr
+# 'x', u'x' -> UnicodeExpr
 #
 # In Python 3 mode:
 # b'x' -> BytesExpr
@@ -1033,7 +1036,7 @@ def accept(self, visitor: NodeVisitor[T]) -> T:
 class BytesExpr(Expression):
     """Bytes literal"""
 
-    value = ''  # TODO use bytes
+    value = ''
     literal = LITERAL_YES
 
     def __init__(self, value: str) -> None:
@@ -1047,7 +1050,7 @@ def accept(self, visitor: NodeVisitor[T]) -> T:
 class UnicodeExpr(Expression):
     """Unicode literal (Python 2.x)"""
 
-    value = ''  # TODO use bytes
+    value = ''
     literal = LITERAL_YES
 
     def __init__(self, value: str) -> None:

diff --git a/mypy/semanal.py b/mypy/semanal.py
@@ -122,7 +122,7 @@
 TYPE_PROMOTIONS_PYTHON2 = TYPE_PROMOTIONS.copy()
 TYPE_PROMOTIONS_PYTHON2.update({
     'builtins.str': 'builtins.unicode',
-    'builtins.bytearray': 'builtins.str',
+    'builtins.bytearray': 'builtins.bytes',
 })
 
 # When analyzing a function, should we analyze the whole function in one go, or

diff --git a/mypy/test/testcheck.py b/mypy/test/testcheck.py
@@ -28,6 +28,7 @@
 # List of files that contain test case descriptions.
 files = [
     'check-basic.test',
+    'check-bytes-str-unicode-python2.test',
     'check-classes.test',
     'check-expressions.test',
     'check-statements.test',

diff --git a/mypy/test/testpythoneval.py b/mypy/test/testpythoneval.py
@@ -62,6 +62,7 @@ def test_python_evaluation(testcase):
         interpreter = python3_path
         args = []
         py2 = False
+    args.append('--fast-parser')  # Some tests require this now.
     args.append('--tb')  # Show traceback on crash.
     # Write the program to a file.
     program = '_program.py'

diff --git a/test-data/unit/check-bytes-str-unicode-python2.test b/test-data/unit/check-bytes-str-unicode-python2.test
@@ -0,0 +1,49 @@
+-- Test cases for bytes/str/unicode in Python 2.x.
+
+[case testBytesStrUnicodeBasics]
+# flags: --hide-error-context --fast-parser
+def needs_bytes(b):
+    # type: (bytes) -> None
+    needs_bytes(b)
+    needs_str(b)  # E: Argument 1 to "needs_str" has incompatible type "bytes"; expected "str"
+    needs_unicode(b)  # E: Argument 1 to "needs_unicode" has incompatible type "bytes"; expected "unicode"
+
+def needs_str(s):
+    # type: (str) -> None
+    # TODO: The following line should not be an error
+    needs_bytes(s)  # E: Argument 1 to "needs_bytes" has incompatible type "str"; expected "bytes"
+    needs_str(s)
+    needs_unicode(s)
+
+def needs_unicode(u):
+    # type: (unicode) -> None
+    needs_bytes(u)  # E: Argument 1 to "needs_bytes" has incompatible type "unicode"; expected "bytes"
+    needs_str(u)  # E: Argument 1 to "needs_str" has incompatible type "unicode"; expected "str"
+    needs_unicode(u)
+[builtins_py2 fixtures/bytes.pyi]
+
+[case testBytesStrUnicodeLiterals]
+# flags: --fast-parser
+def needs_bytes(b):
+    # type: (bytes) -> None
+    pass
+def needs_str(s):
+    # type: (str) -> None
+    pass
+def needs_unicode(u):
+    # type: (unicode) -> None
+    pass
+
+needs_bytes(b'x')
+# TODO: The following line should not be an error
+needs_bytes('x')  # E: Argument 1 to "needs_bytes" has incompatible type "str"; expected "bytes"
+needs_bytes(u'x')  # E: Argument 1 to "needs_bytes" has incompatible type "unicode"; expected "bytes"
+
+needs_str(b'x')  # E: Argument 1 to "needs_str" has incompatible type "bytes"; expected "str"
+needs_str('x')
+needs_str(u'x')  # E: Argument 1 to "needs_str" has incompatible type "unicode"; expected "str"
+
+needs_unicode(b'x')  # E: Argument 1 to "needs_unicode" has incompatible type "bytes"; expected "unicode"
+needs_unicode('x')
+needs_unicode(u'x')
+[builtins_py2 fixtures/bytes.pyi]
diff --git a/test-data/unit/fixtures/bytes.pyi b/test-data/unit/fixtures/bytes.pyi
@@ -0,0 +1,20 @@
+class Any: pass
+
+class object:
+    def __init__(self) -> None: pass
+class type:
+    def __init__(self, x: Any) -> None: pass
+class bytes:
+    pass
+class str:
+    pass
+class unicode:
+    pass
+Text = unicode
+NativeStr = str
+
+class int: pass
+class float: pass
+class tuple: pass
+class function: pass
+class ellipsis: pass
diff --git a/test-data/unit/python2eval.test b/test-data/unit/python2eval.test
@@ -79,14 +79,18 @@ x = 1.5
 [case testAnyStr_python2]
 from typing import AnyStr
 def f(x): # type: (AnyStr) -> AnyStr
-    if isinstance(x, str):
+    if isinstance(x, bytes):
+        return b'foo'
+    elif isinstance(x, str):
         return 'foo'
     else:
         return u'zar'
 print f('')
+print f(b'')
 print f(u'')
 [out]
 foo
+foo
 zar
 
 [case testGenericPatterns_python2]
@@ -152,13 +156,16 @@ f(**params)
 
 [case testFromFutureImportUnicodeLiterals2_python2]
 from __future__ import unicode_literals
-def f(x: str) -> None: pass
+def f(x):
+    # type: (str) -> None
+    pass
 f(b'')
 f(u'')
 f('')
 [out]
-_program.py:4: error: Argument 1 to "f" has incompatible type "unicode"; expected "str"
-_program.py:5: error: Argument 1 to "f" has incompatible type "unicode"; expected "str"
+_program.py:5: error: Argument 1 to "f" has incompatible type "bytes"; expected "str"
+_program.py:6: error: Argument 1 to "f" has incompatible type "unicode"; expected "str"
+_program.py:7: error: Argument 1 to "f" has incompatible type "unicode"; expected "str"
 
 [case testStrUnicodeCompatibility_python2]
 import typing
@@ -233,7 +240,7 @@ u'\x89'
 import typing
 import io
 c = io.BytesIO()
-c.write('\x89')
+c.write(b'\x89')
 print(repr(c.getvalue()))
 [out]
 '\x89'
@@ -396,11 +403,12 @@ def f(x: unicode) -> int: pass
 def f(x: bytearray) -> int: pass
 [out]
 _program.py:2: error: No overload variant of "f" matches argument types [builtins.int]
+_program.py:5: error: No overload variant of "f" matches argument types [builtins.bytes]
 
 [case testByteArrayStrCompatibility_python2]
-def f(x): # type: (str) -> None
+def f(x): # type: (bytes) -> None
     pass
-f(bytearray('foo'))
+f(bytearray(b'foo'))
 
 [case testAbstractProperty_python2]
 from abc import abstractproperty, ABCMeta
@@ -465,7 +473,8 @@ re.subn(upat, lambda m: u'', u'')[0] + u''
 
 [case testYieldRegressionTypingAwaitable_python2]
 # Make sure we don't reference typing.Awaitable in Python 2 mode.
-def g() -> int:
+def g():
+    # type: () -> int
     yield
 [out]
 _program.py: note: In function "g":

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,4 +1,4 @@
 flake8
-typed-ast
+typed-ast>=0.6.1
 pytest>=2.8
 pytest-xdist>=1.13
diff --git a/typeshed b/typeshed
+105 −15		stdlib/2.7/__builtin__.pyi
+11 −11		stdlib/2.7/io.pyi
+18 −18		stdlib/2.7/re.pyi
+2 −2		stdlib/2.7/typing.pyi
+2 −1		stdlib/2and3/logging/__init__.pyi
+2 −1		stdlib/3.4/enum.pyi
+68 −0		stdlib/3/http/__init__.pyi
+2 −1		third_party/3/typed_ast/ast35.pyi