lly-zero-one
diff --git a/‎caffe2/CMakeLists.txt‎
Lines changed: 41 additions & 1 deletion b/‎caffe2/CMakeLists.txt‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎test/cpp/tensorexpr/CMakeLists.txt‎
Lines changed: 40 additions & 0 deletions b/‎test/cpp/tensorexpr/CMakeLists.txt‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎test/cpp/tensorexpr/README.md‎
Lines changed: 69 additions & 0 deletions b/‎test/cpp/tensorexpr/README.md‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎test/cpp/tensorexpr/__init__.py‎ b/‎test/cpp/tensorexpr/__init__.py‎
diff --git a/‎test/cpp/tensorexpr/gtest.cpp‎
Lines changed: 23 additions & 0 deletions b/‎test/cpp/tensorexpr/gtest.cpp‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎test/cpp/tensorexpr/padded_buffer.cpp‎
Lines changed: 110 additions & 0 deletions b/‎test/cpp/tensorexpr/padded_buffer.cpp‎
Lines changed: 110 additions & 0 deletions
@@ -418,6 +418,7 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
     ${TORCH_SRC_DIR}/csrc/jit/passes/requires_grad_analysis.cpp
     ${TORCH_SRC_DIR}/csrc/jit/passes/specialize_autogradzero.cpp
     ${TORCH_SRC_DIR}/csrc/jit/passes/subgraph_rewrite.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/passes/tensorexpr_fuser.cpp
     ${TORCH_SRC_DIR}/csrc/jit/passes/python_print.cpp
     ${TORCH_SRC_DIR}/csrc/jit/passes/utils/subgraph_utils.cpp
     ${TORCH_SRC_DIR}/csrc/jit/passes/utils/check_alias_annotation.cpp
@@ -461,8 +462,38 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
     ${TORCH_SRC_DIR}/csrc/jit/fuser/fallback.cpp
     ${TORCH_SRC_DIR}/csrc/jit/function.cpp
     ${TORCH_SRC_DIR}/csrc/jit/vararg_functions.cpp
+
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/expr.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/function.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/ir.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/ir_visitor.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/asmjit_codegen.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_codegen.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/types.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/ir_printer.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/ir_mutator.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/schedule.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/tensor.cpp
+    ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/unique_name_manager.cpp
     )
 
+  if (USE_LLVM)
+    message(STATUS "Looking for LLVM in ${USE_LLVM}")
+    find_package(LLVM QUIET PATHS ${USE_LLVM} NO_DEFAULT_PATH)
+
+    if (LLVM_FOUND)
+      message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+      message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
+
+      include_directories(${LLVM_INCLUDE_DIRS})
+      add_definitions(-DENABLE_LLVM ${LLVM_DEFINITIONS})
+    endif (LLVM_FOUND)
+  endif (USE_LLVM)
+
+  set_source_files_properties(${TORCH_SRC_DIR}/csrc/jit/tensorexpr/llvm_jit.cpp PROPERTIES COMPILE_FLAGS -fno-rtti)
+
+
   if (NOT INTERN_BUILD_MOBILE)
     set (MOBILE_SRCS
         ${TORCH_SRC_DIR}/csrc/jit/mobile/function.cpp
@@ -525,10 +556,11 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
 
   if (USE_CUDA)
     list(APPEND Caffe2_GPU_SRCS
-      ${TORCH_SRC_DIR}/csrc/jit/fuser/cuda/fused_kernel.cpp
       ${TORCH_SRC_DIR}/csrc/autograd/profiler_cuda.cpp
       ${TORCH_SRC_DIR}/csrc/autograd/functions/comm.cpp
       ${TORCH_SRC_DIR}/csrc/cuda/comm.cpp
+      ${TORCH_SRC_DIR}/csrc/jit/fuser/cuda/fused_kernel.cpp
+      ${TORCH_SRC_DIR}/csrc/jit/tensorexpr/cuda_codegen.cpp
     )
     add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
     target_link_libraries(caffe2_nvrtc ${CUDA_NVRTC} ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB})
@@ -626,6 +658,13 @@ endif()
 add_library(torch_cpu ${Caffe2_CPU_SRCS})
 torch_compile_options(torch_cpu)  # see cmake/public/utils.cmake
 
+if (LLVM_FOUND)
+  llvm_map_components_to_libnames(LLVM_LINK_LIBS
+    support core analysis executionengine instcombine
+    scalaropts transformutils native orcjit)
+  target_link_libraries(torch_cpu PRIVATE ${LLVM_LINK_LIBS})
+endif (LLVM_FOUND)
+
 # This is required for older versions of CMake, which don't allow
 # specifying add_library() without a list of source files
 set(DUMMY_EMPTY_FILE ${CMAKE_BINARY_DIR}/empty.cpp)
@@ -759,6 +798,7 @@ ENDIF()
 
   if (BUILD_TEST AND NOT MSVC AND NOT USE_ROCM)
     add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit)
+    add_subdirectory(${TORCH_ROOT}/test/cpp/tensorexpr ${CMAKE_BINARY_DIR}/test_tensorexpr)
     if (USE_DISTRIBUTED)
       add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc)
     endif()
 
@@ -0,0 +1,40 @@
+set(TENSOREXPR_TEST_ROOT ${TORCH_ROOT}/test/cpp/tensorexpr)
+
+file(GLOB TENSOREXPR_TEST_SRCS ${TENSOREXPR_TEST_ROOT}/test_*.cpp)
+set(TENSOREXPR_TEST_SRCS ${TENSOREXPR_TEST_SRCS} PARENT_SCOPE)
+
+add_executable(test_tensorexpr
+  ${TORCH_ROOT}/test/cpp/common/main.cpp
+  ${TENSOREXPR_TEST_ROOT}/gtest.cpp
+  ${TENSOREXPR_TEST_ROOT}/padded_buffer.cpp
+  ${TENSOREXPR_TEST_SRCS})
+
+target_link_libraries(test_tensorexpr PRIVATE torch gtest asmjit)
+target_include_directories(test_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
+
+if (USE_CUDA)
+  target_link_libraries(test_tensorexpr PRIVATE
+    ${CUDA_LIBRARIES}
+    ${CUDA_NVRTC_LIB}
+    ${CUDA_CUDA_LIB}
+    ${TORCH_CUDA_LIBRARIES})
+
+  target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
+elseif (USE_ROCM)
+  target_link_libraries(test_tensorexpr PRIVATE
+    ${ROCM_HIPRTC_LIB}
+    ${PYTORCH_HIP_HCC_LIBRARIES}
+    ${TORCH_CUDA_LIBRARIES})
+
+  target_link_libraries(test_tensorexpr PRIVATE caffe2_gpu)
+
+  target_compile_definitions(test_tensorexpr PRIVATE USE_ROCM)
+endif()
+
+if (INSTALL_TEST)
+  install(TARGETS test_tensorexpr DESTINATION bin)
+  # Install PDB files for MSVC builds
+  if (MSVC AND BUILD_SHARED_LIBS)
+    install(FILES $<TARGET_PDB_FILE:test_tensorexpr> DESTINATION bin OPTIONAL)
+  endif()
+endif()
@@ -0,0 +1,69 @@
+# JIT C++ Tests
+
+## How to add a new test
+First, create a new test file. Test files should have be placed in this
+directory, with a name that starts with `test_`, like `test_foo.cpp`.
+
+Here is an example test file you can copy-paste.
+```cpp
+#include <test/cpp/jit/test_base.h>
+
+// Tests go in torch::jit
+namespace torch {
+namespace jit {
+
+// 1. Test cases are void() functions.
+// 2. They start with the prefix `test`
+void testCaseOne() {
+    // ...
+}
+
+void testCaseTwo() {
+    // ...
+}
+}
+}
+```
+
+Then, register your test in `tests.h`:
+```cpp
+// Add to TH_FORALL_TESTS_CUDA instead for CUDA-requiring tests
+#define TH_FORALL_TESTS(_)             \
+  _(ADFormulas)                        \
+  _(Attributes)                        \
+  ...
+  _(CaseOne)  // note that the `test` prefix is omitted.
+  _(CaseTwo)
+```
+
+We glob all the test files together in `CMakeLists.txt` so that you don't
+have to edit it every time you add a test. Unfortunately, this means that in
+order to get the build to pick up your new test file, you need to re-run
+cmake:
+```
+python setup.py build --cmake
+```
+
+## Why do we have two different test runners?
+We have two different ways of running our cpp tests:
+1. With `gtest`, from a standalone binary.
+2. With Python, from `TestJit.test_cpp` and `TestJit.test_cpp_cuda` (in
+   `test/test_jit.py`)
+
+We want both because we need to test things from a pure-C++ environment and
+with all our various Python patch-points enabled.
+
+## How do I run the tests?
+The following commands assume you are in PyTorch root.
+
+1. With `gtest`:
+   ```bash
+   # (re)build the test binary
+   ninja build/bin/test_jit
+   # run
+   build/bin/test_jit --gtest_filter='glob_style_filter*'
+   ```
+2. With Python:
+   ```
+   python test/test_jit.py TestJit.test_cpp TestJit.test_cpp_cuda
+   ```
@@ -0,0 +1,23 @@
+#include <test/cpp/tensorexpr/tests.h>
+
+#include <gtest/gtest.h>
+
+namespace torch {
+namespace jit {
+
+#define TENSOREXPR_GTEST(name) \
+  TEST(TensorExprTest, name) { \
+    test##name();       \
+  }
+TH_FORALL_TESTS(TENSOREXPR_GTEST)
+#undef TENSOREXPR_GTEST
+
+#define TENSOREXPR_GTEST_CUDA(name)   \
+  TEST(TensorExprTest, name##_CUDA) { \
+    test##name();              \
+  }
+TH_FORALL_TESTS_CUDA(TENSOREXPR_GTEST_CUDA)
+#undef TENSOREXPR_GTEST_CUDA
+
+} // namespace jit
+} // namespace torch
@@ -0,0 +1,110 @@
+#include "test/cpp/tensorexpr/padded_buffer.h"
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+
+#include <c10/util/Logging.h>
+
+namespace torch {
+namespace jit {
+namespace tensorexpr {
+
+int PaddedBufferBase::Index(const std::vector<int>& indices) const {
+  DCHECK_EQ(dims_.size(), indices.size());
+  int total_index = 0;
+  for (int i = 0; i < dims_.size(); i++) {
+    total_index += indices[i] * strides_[i];
+  }
+  return total_index;
+}
+
+PaddedBufferBase::PaddedBufferBase(
+    const std::vector<int>& dims,
+    const std::string& name)
+    : dims_(dims), name_(name), strides_(dims.size()) {
+  for (int i = dims.size() - 1; i >= 0; --i) {
+    if (i == dims.size() - 1) {
+      strides_[i] = 1;
+    } else {
+      strides_[i] = strides_[i + 1] * dims[i + 1];
+    }
+  }
+  total_size_ = strides_[0] * dims[0];
+}
+
+template <typename T>
+std::string CompareErrorMsg(
+    const PaddedBuffer<T>& v1,
+    const PaddedBuffer<T>& v2,
+    int index) {
+  std::ostringstream oss;
+  oss << "index: " << index << ", names: " << v1.name() << ", " << v2.name();
+  return oss.str();
+}
+
+template <typename T>
+void PaddedBuffer<T>::ValidateWatermark() const {
+  for (int i = 0; i < kPaddingSize; i++) {
+    EXPECT_EQ(data_[i], kPaddingValue)
+        << "left-side watermark broken: "
+        << "index: " << i << ", name: " << name();
+    EXPECT_EQ(data_[i + total_size_ + kPaddingSize], kPaddingValue)
+        << "right-side watermark broken: "
+        << "index: " << i << ", name: " << name();
+  }
+}
+
+template <typename T>
+void PaddedBuffer<T>::CheckBackup() const {
+  ValidateWatermark();
+  DCHECK(backup_data_.size() == data_.size())
+      << "Please make sure you have call Backup() before calling CheckBackup()";
+  for (int i = 0; i < total_size_; i++) {
+    EXPECT_EQ(data_[i + kPaddingSize], backup_data_[i + kPaddingSize])
+        << "mismatch against backup, "
+        << "index: " << i << ", name: " << name();
+  }
+}
+
+template <typename T>
+void ExpectAllEqual(const PaddedBuffer<T>& f1, const PaddedBuffer<T>& f2) {
+  const std::vector<T>& v1 = f1.data_;
+  const std::vector<T>& v2 = f2.data_;
+  const int kPaddingSize = f1.kPaddingSize;
+  const int total_size = f1.total_size_;
+  ASSERT_EQ(v1.size(), v2.size());
+  f1.ValidateWatermark();
+  f2.ValidateWatermark();
+  for (int i = 0; i < total_size; i++) {
+    EXPECT_EQ(v1[kPaddingSize + i], v2[kPaddingSize + i])
+        << CompareErrorMsg(f1, f2, i);
+  }
+}
+
+void ExpectAllNear(
+    const PaddedBuffer<float>& f1,
+    const PaddedBuffer<float>& f2,
+    float abs_error) {
+  const std::vector<float>& v1 = f1.data_;
+  const std::vector<float>& v2 = f2.data_;
+  const int kPaddingSize = f1.kPaddingSize;
+  const int total_size = f1.total_size_;
+  ASSERT_EQ(v1.size(), v2.size());
+  f1.ValidateWatermark();
+  f2.ValidateWatermark();
+  for (int i = 0; i < total_size; i++) {
+    EXPECT_NEAR(v1[kPaddingSize + i], v2[kPaddingSize + i], abs_error)
+        << CompareErrorMsg(f1, f2, i);
+  }
+}
+
+template class PaddedBuffer<int>;
+template class PaddedBuffer<float>;
+template void ExpectAllEqual(
+    const PaddedBuffer<int>& f1,
+    const PaddedBuffer<int>& f2);
+
+} // namespace tensorexpr
+} // namespace jit
+} // namespace torch