diff --git a/mypy_self_check.ini b/mypy_self_check.ini index 0b49b3de862b..f4f8d2d0e08b 100644 --- a/mypy_self_check.ini +++ b/mypy_self_check.ini @@ -8,7 +8,7 @@ pretty = True always_false = MYPYC plugins = mypy.plugins.proper_plugin python_version = 3.9 -exclude = mypy/typeshed/|mypyc/test-data/|mypyc/lib-rt/ +exclude = mypy/typeshed/|mypyc/test-data/ enable_error_code = ignore-without-code,redundant-expr enable_incomplete_feature = PreciseTupleTypes show_error_code_links = True diff --git a/mypyc/build.py b/mypyc/build.py index 848892da669a..c9ddaa7dab1f 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -28,6 +28,7 @@ from collections.abc import Iterable from typing import TYPE_CHECKING, Any, NamedTuple, NoReturn, Union, cast +import mypyc.build_setup # noqa: F401 from mypy.build import BuildSource from mypy.errors import CompileError from mypy.fscache import FileSystemCache @@ -36,7 +37,7 @@ from mypy.util import write_junit_xml from mypyc.annotate import generate_annotated_html from mypyc.codegen import emitmodule -from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name +from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name from mypyc.errors import Errors from mypyc.ir.pprint import format_modules from mypyc.namegen import exported_name @@ -71,6 +72,13 @@ class ModDesc(NamedTuple): "base64/arch/neon64/codec.c", ], [ + "base64/arch/avx/enc_loop_asm.c", + "base64/arch/avx2/enc_loop.c", + "base64/arch/avx2/enc_loop_asm.c", + "base64/arch/avx2/enc_reshuffle.c", + "base64/arch/avx2/enc_translate.c", + "base64/arch/avx2/dec_loop.c", + "base64/arch/avx2/dec_reshuffle.c", "base64/arch/generic/32/enc_loop.c", "base64/arch/generic/64/enc_loop.c", "base64/arch/generic/32/dec_loop.c", @@ -662,9 +670,6 @@ def mypycify( # See https://github.com/mypyc/mypyc/issues/956 "-Wno-cpp", ] - if X86_64: - # Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2. - cflags.append("-msse4.2") if log_trace: cflags.append("-DMYPYC_LOG_TRACE") if experimental_features: @@ -693,10 +698,6 @@ def mypycify( # that we actually get the compilation speed and memory # use wins that multi-file mode is intended for. cflags += ["/GL-", "/wd9025"] # warning about overriding /GL - if X86_64: - # Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2. - # Also Windows 11 requires SSE4.2 since 24H2. - cflags.append("/arch:SSE4.2") if log_trace: cflags.append("/DMYPYC_LOG_TRACE") if experimental_features: diff --git a/mypyc/build_setup.py b/mypyc/build_setup.py new file mode 100644 index 000000000000..a3e7a669abee --- /dev/null +++ b/mypyc/build_setup.py @@ -0,0 +1,62 @@ +import platform +import sys + +try: + # Import setuptools so that it monkey-patch overrides distutils + import setuptools # noqa: F401 +except ImportError: + pass + +if sys.version_info >= (3, 12): + # From setuptools' monkeypatch + from distutils import ccompiler # type: ignore[import-not-found] +else: + from distutils import ccompiler + +EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { + "unix": { + "base64/arch/ssse3": ["-mssse3"], + "base64/arch/sse41": ["-msse4.1"], + "base64/arch/sse42": ["-msse4.2"], + "base64/arch/avx2": ["-mavx2"], + "base64/arch/avx": ["-mavx"], + }, + "msvc": { + "base64/arch/sse42": ["/arch:SSE4.2"], + "base64/arch/avx2": ["/arch:AVX2"], + "base64/arch/avx": ["/arch:AVX"], + }, +} + +ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined] +X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64") + + +def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def] + compiler_type: str = self.compiler_type + extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type] + new_cmd = list(cmd) + if X86_64 and extra_options is not None: + # filenames are closer to the end of command line + for argument in reversed(new_cmd): + # Check if the matching argument contains a source filename. + if not str(argument).endswith(".c"): + continue + + for path in extra_options.keys(): + if path in str(argument): + if compiler_type == "bcpp": + compiler = new_cmd.pop() + # Borland accepts a source file name at the end, + # insert the options before it + new_cmd.extend(extra_options[path]) + new_cmd.append(compiler) + else: + new_cmd.extend(extra_options[path]) + + # path component is found, no need to search any further + break + self.__spawn(new_cmd, **kwargs) + + +ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign] diff --git a/mypyc/common.py b/mypyc/common.py index 98f8a89f6fcb..2de63c09bb2c 100644 --- a/mypyc/common.py +++ b/mypyc/common.py @@ -1,6 +1,5 @@ from __future__ import annotations -import platform import sys import sysconfig from typing import Any, Final @@ -45,8 +44,6 @@ IS_32_BIT_PLATFORM: Final = int(SIZEOF_SIZE_T) == 4 -X86_64: Final = platform.machine() in ("x86_64", "AMD64", "amd64") - PLATFORM_SIZE = 4 if IS_32_BIT_PLATFORM else 8 # Maximum value for a short tagged integer. diff --git a/mypyc/lib-rt/base64/arch/avx/codec.c b/mypyc/lib-rt/base64/arch/avx/codec.c index 8e2ef5c2e724..7a64a94be2af 100644 --- a/mypyc/lib-rt/base64/arch/avx/codec.c +++ b/mypyc/lib-rt/base64/arch/avx/codec.c @@ -24,7 +24,7 @@ #include "../ssse3/dec_loop.c" #if BASE64_AVX_USE_ASM -# include "enc_loop_asm.c" +# include "./enc_loop_asm.c" #else # include "../ssse3/enc_translate.c" # include "../ssse3/enc_reshuffle.c" diff --git a/mypyc/lib-rt/base64/arch/avx2/codec.c b/mypyc/lib-rt/base64/arch/avx2/codec.c index fe9200296914..a54385bf89be 100644 --- a/mypyc/lib-rt/base64/arch/avx2/codec.c +++ b/mypyc/lib-rt/base64/arch/avx2/codec.c @@ -20,15 +20,15 @@ # endif #endif -#include "dec_reshuffle.c" -#include "dec_loop.c" +#include "./dec_reshuffle.c" +#include "./dec_loop.c" #if BASE64_AVX2_USE_ASM -# include "enc_loop_asm.c" +# include "./enc_loop_asm.c" #else -# include "enc_translate.c" -# include "enc_reshuffle.c" -# include "enc_loop.c" +# include "./enc_translate.c" +# include "./enc_reshuffle.c" +# include "./enc_loop.c" #endif #endif // HAVE_AVX2 diff --git a/mypyc/lib-rt/base64/config.h b/mypyc/lib-rt/base64/config.h index b5e47fb04e75..467a722c2f11 100644 --- a/mypyc/lib-rt/base64/config.h +++ b/mypyc/lib-rt/base64/config.h @@ -1,29 +1,15 @@ #ifndef BASE64_CONFIG_H #define BASE64_CONFIG_H -#define BASE64_WITH_SSSE3 0 -#define HAVE_SSSE3 BASE64_WITH_SSSE3 - -#define BASE64_WITH_SSE41 0 -#define HAVE_SSE41 BASE64_WITH_SSE41 - -#if defined(__x86_64__) || defined(_M_X64) -#define BASE64_WITH_SSE42 1 -#else -#define BASE64_WITH_SSE42 0 +#if !defined(__APPLE__) && ((defined(__x86_64__) && defined(__LP64__)) || defined(_M_X64)) + #define HAVE_SSSE3 1 + #define HAVE_SSE41 1 + #define HAVE_SSE42 1 + #define HAVE_AVX 1 + #define HAVE_AVX2 1 + #define HAVE_AVX512 0 #endif -#define HAVE_SSE42 BASE64_WITH_SSE42 - -#define BASE64_WITH_AVX 0 -#define HAVE_AVX BASE64_WITH_AVX - -#define BASE64_WITH_AVX2 0 -#define HAVE_AVX2 BASE64_WITH_AVX2 - -#define BASE64_WITH_AVX512 0 -#define HAVE_AVX512 BASE64_WITH_AVX512 - #define BASE64_WITH_NEON32 0 #define HAVE_NEON32 BASE64_WITH_NEON32 diff --git a/mypyc/lib-rt/setup.py b/mypyc/lib-rt/setup.py index c28c231a5f0b..1333ca72a363 100644 --- a/mypyc/lib-rt/setup.py +++ b/mypyc/lib-rt/setup.py @@ -25,9 +25,55 @@ "pythonsupport.c", ] +EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = { + "unix": { + "base64/arch/ssse3": ["-mssse3"], + "base64/arch/sse41": ["-msse4.1"], + "base64/arch/sse42": ["-msse4.2"], + "base64/arch/avx2": ["-mavx2"], + "base64/arch/avx": ["-mavx"], + }, + "msvc": { + "base64/arch/sse42": ["/arch:SSE4.2"], + "base64/arch/avx2": ["/arch:AVX2"], + "base64/arch/avx": ["/arch:AVX"], + }, +} + +ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined] X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64") +def spawn(self, cmd, **kwargs) -> None: # type: ignore[no-untyped-def] + compiler_type: str = self.compiler_type + extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type] + new_cmd = list(cmd) + if X86_64 and extra_options is not None: + # filenames are closer to the end of command line + for argument in reversed(new_cmd): + # Check if the matching argument contains a source filename. + if not str(argument).endswith(".c"): + continue + + for path in extra_options.keys(): + if path in str(argument): + if compiler_type == "bcpp": + compiler = new_cmd.pop() + # Borland accepts a source file name at the end, + # insert the options before it + new_cmd.extend(extra_options[path]) + new_cmd.append(compiler) + else: + new_cmd.extend(extra_options[path]) + + # path component is found, no need to search any further + break + self.__spawn(new_cmd, **kwargs) + + +ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign] + + class BuildExtGtest(build_ext): def get_library_names(self) -> list[str]: return ["gtest"] @@ -80,14 +126,10 @@ def run(self) -> None: compiler = ccompiler.new_compiler() sysconfig.customize_compiler(compiler) cflags: list[str] = [] - if compiler.compiler_type == "unix": + if compiler.compiler_type == "unix": # type: ignore[attr-defined] cflags += ["-O3"] - if X86_64: - cflags.append("-msse4.2") # Enable SIMD (see also mypyc/build.py) - elif compiler.compiler_type == "msvc": + elif compiler.compiler_type == "msvc": # type: ignore[attr-defined] cflags += ["/O2"] - if X86_64: - cflags.append("/arch:SSE4.2") # Enable SIMD (see also mypyc/build.py) setup( ext_modules=[ diff --git a/setup.py b/setup.py index 0037624f9bbc..f20c1db5d045 100644 --- a/setup.py +++ b/setup.py @@ -99,6 +99,7 @@ def run(self) -> None: os.path.join("mypyc", "lib-rt", "setup.py"), # Uses __file__ at top level https://github.com/mypyc/mypyc/issues/700 os.path.join("mypyc", "__main__.py"), + os.path.join("mypyc", "build_setup.py"), # for monkeypatching ) everything = [os.path.join("mypy", x) for x in find_package_data("mypy", ["*.py"])] + [