Skip to content

Commit f21a6bd

Browse files
committed
[Windows] Fix build issues using Clang-CL on Windows, add CI
1 parent 00d86aa commit f21a6bd

File tree

6 files changed

+53
-7
lines changed

6 files changed

+53
-7
lines changed

.github/workflows/pull.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,27 @@ jobs:
3434
3535
# Run tests
3636
pytest
37+
38+
windows:
39+
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
40+
with:
41+
submodules: 'recursive'
42+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
43+
script: |
44+
conda init powershell
45+
powershell -Command "& {
46+
Set-PSDebug -Trace 1
47+
\$ErrorActionPreference = 'Stop'
48+
\$PSNativeCommandUseErrorActionPreference = \$true
49+
50+
cmake -DCMAKE_BUILD_TYPE=Debug test -Bbuild/test
51+
cmake --build build/test -j9 --config Debug
52+
pushd build/test && ctest && popd
53+
54+
# Install tokenizers
55+
pip install . -v
56+
pip install pytest blobfile transformers>=4.53.1
57+
58+
# Run tests
59+
pytest
60+
}"

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,7 @@ pip-out/
3434
*~
3535
.~lock.*
3636
*.idea
37+
38+
*.so
39+
*.dylib
40+
*.pyd

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ include(CMakePackageConfigHelpers)
2828
include(Utils.cmake)
2929

3030
# Ignore weak attribute warning
31-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
31+
if(NOT MSVC)
32+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
33+
endif()
3234

3335
set(ABSL_ENABLE_INSTALL ON)
3436
set(ABSL_PROPAGATE_CXX_STD ON)

include/pytorch/tokenizers/tiktoken.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
#include <pytorch/tokenizers/result.h>
2424
#include <pytorch/tokenizers/tokenizer.h>
2525

26+
#ifdef _WIN32
27+
// ssize_t isn't available on Windows. Alias it to the Windows SSIZE_T value.
28+
#include <BaseTsd.h>
29+
typedef SSIZE_T ssize_t;
30+
#endif
31+
2632
namespace tokenizers {
2733

2834
static constexpr int32_t kSpecialTokensSize = 256;

setup.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ class CMakeBuild(build_ext):
3030
def build_extension(self, ext): # noqa C901
3131
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
3232

33-
# Ensure the extension goes into the pytorch_tokenizers package directory
34-
extdir = os.path.join(extdir, "pytorch_tokenizers")
35-
3633
# Required for auto-detection & inclusion of auxiliary "native" libs
3734
if not extdir.endswith(os.path.sep):
3835
extdir += os.path.sep
@@ -55,6 +52,10 @@ def build_extension(self, ext): # noqa C901
5552
]
5653
build_args = ["--target", "pytorch_tokenizers_cpp"]
5754

55+
# Use Clang for Windows builds.
56+
if sys.platform == "win32":
57+
cmake_args += ["-T ClangCL"]
58+
5859
# Adding CMake arguments set as environment variable
5960
# (needed e.g. to build for ARM OSX on conda-forge)
6061
if "CMAKE_ARGS" in os.environ:
@@ -124,6 +125,15 @@ def build_extension(self, ext): # noqa C901
124125
["cmake", "--build", "."] + build_args, cwd=build_temp, check=True
125126
)
126127

128+
if sys.platform == "win32":
129+
# Setuptools seems to look for the artifact in a different location on
130+
# Windows. I'm not entirely sure why... Move the artifact up one level.
131+
pyd_files = list(Path(extdir).glob("*.pyd"))
132+
artifact_dst_dir = Path(extdir).parent
133+
for f in pyd_files:
134+
dst_path = artifact_dst_dir / os.path.basename(f)
135+
os.replace(f, dst_path)
136+
127137

128138
setup(
129139
name="pytorch-tokenizers",
@@ -132,7 +142,7 @@ def build_extension(self, ext): # noqa C901
132142
long_description_content_type="text/markdown",
133143
url="https://github.com/meta-pytorch/tokenizers",
134144
packages=find_packages(),
135-
ext_modules=[CMakeExtension("pytorch_tokenizers_cpp")],
145+
ext_modules=[CMakeExtension("pytorch_tokenizers.pytorch_tokenizers_cpp")],
136146
cmdclass={"build_ext": CMakeBuild},
137147
zip_safe=False,
138148
python_requires=">=3.10",

src/hf_tokenizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ Error HFTokenizer::load(const std::string& path) {
3434
std::string model_config_json = "";
3535
if (fs::is_directory(path)) {
3636
const fs::path root(path);
37-
model_json = root / "tokenizer.json";
37+
model_json = (root / "tokenizer.json").string();
3838
if (!fs::exists(model_json)) {
3939
TK_LOG(Info, "no tokenizer.json found in %s", path.c_str());
4040
return Error::LoadFailure;
4141
}
4242
const auto model_config_json_path = root / "tokenizer_config.json";
4343
if (fs::exists(model_config_json_path)) {
44-
model_config_json = model_config_json_path;
44+
model_config_json = model_config_json_path.string();
4545
}
4646
}
4747

0 commit comments

Comments
 (0)