Skip to content

Commit dc45835

Browse files
authored
Test util cleanup (#2003)
Don't clear the memory allocator cache as it shouldn't be necessary
1 parent 3ca21eb commit dc45835

File tree

2 files changed

+56
-52
lines changed

2 files changed

+56
-52
lines changed

torch/csrc/jit/codegen/cuda/test/test_gpu_validator.h

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,22 @@
1-
#include <torch/csrc/jit/codegen/cuda/executor.h>
1+
#pragma once
2+
23
#include <torch/csrc/jit/codegen/cuda/executor_utils.h>
34
#include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>
45
#include <torch/csrc/jit/codegen/cuda/fusion.h>
56
#include <torch/csrc/jit/codegen/cuda/ir_iostream.h>
67
#include <torch/csrc/jit/codegen/cuda/lower_utils.h>
78

89
#include <ATen/cuda/CUDAContext.h>
9-
#include <c10/cuda/CUDACachingAllocator.h>
10-
#include <torch/torch.h>
1110

1211
#include <unordered_map>
1312

13+
// Tests go in torch::jit
1414
namespace torch {
1515
namespace jit {
16-
namespace fuser {
17-
namespace cuda {
18-
19-
inline bool deviceMajorMinorCheck(int major, int minor = 0) {
20-
auto dev_prop = at::cuda::getCurrentDeviceProperties();
21-
if (dev_prop->major < major ||
22-
(dev_prop->major == major && dev_prop->minor < minor)) {
23-
return false;
24-
}
25-
return true;
26-
}
2716

28-
inline int deviceSMCount() {
29-
int sm_count = at::cuda::getCurrentDeviceProperties()->multiProcessorCount;
30-
return sm_count;
31-
}
17+
using namespace torch::jit::fuser::cuda;
3218

33-
class NVFuserTest : public ::testing::Test {
34-
protected:
35-
void SetUp() override {
36-
// requires PASCAL or newer
37-
if (!deviceMajorMinorCheck(6)) {
38-
GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";
39-
}
40-
setFillAllocationWithNan(true);
41-
}
42-
43-
void TearDown() override {
44-
c10::cuda::CUDACachingAllocator::emptyCache();
45-
}
46-
};
19+
namespace {
4720

4821
struct ValidationConstants {
4922
// Tolerances generated from randn + add + sum fusion
@@ -74,8 +47,6 @@ struct ValidationConstants {
7447
double base_float_rel_tol = -1;
7548
};
7649

77-
namespace {
78-
7950
// Returns abs and relative values to use for validation
8051
std::pair<double, double> getTolerance(
8152
DataType dtype,
@@ -338,15 +309,13 @@ ExpressionEvaluator bindInputsAndLaunchParams(
338309
return expr_eval;
339310
}
340311

341-
} // namespace
342-
343312
// Validation will look through the fusion and figure out how many elements were
344313
// reduced to create each output. It will then compute a tolernace to use for
345314
// allclose based on experimental results. The experimental results were based
346315
// on adding two tensors then summing them. This of course has an assumption
347316
// that we're always summing values between -2 and 2. If we start summing values
348317
// larger than that this approach might not hold.
349-
inline void testValidate(
318+
void testValidate(
350319
Fusion* fusion,
351320
const std::vector<at::Tensor>& fusion_outputs,
352321
const at::ArrayRef<IValue>& aten_inputs,
@@ -466,18 +435,6 @@ inline void testValidate(
466435
}
467436
}
468437

469-
inline void clearL2Cache() {
470-
torch::NoGradGuard no_grad;
471-
auto l2_cache_size = at::cuda::getCurrentDeviceProperties()->l2CacheSize;
472-
auto options =
473-
torch::TensorOptions().dtype(torch::kFloat32).device(at::kCUDA, 0);
474-
475-
auto l2_elems = l2_cache_size / 4;
476-
torch::Tensor t0 = torch::empty(l2_elems, options);
477-
torch::Tensor t1 = torch::clone(t0);
478-
};
479-
480-
} // namespace cuda
481-
} // namespace fuser
438+
} // namespace
482439
} // namespace jit
483440
} // namespace torch

torch/csrc/jit/codegen/cuda/test/test_utils.h

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
#pragma once
22

3-
#include <cstddef>
4-
3+
#include <torch/csrc/jit/codegen/cuda/executor.h>
4+
#include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>
55
#include <torch/csrc/jit/codegen/cuda/ir_all_nodes.h>
66

7+
#include <ATen/cuda/CUDAContext.h>
8+
#include <c10/cuda/CUDACachingAllocator.h>
9+
#include <torch/torch.h>
10+
11+
#include <gtest/gtest.h>
12+
13+
#include <cstddef>
14+
715
// Tests go in torch::jit
816
namespace torch {
917
namespace jit {
@@ -84,6 +92,45 @@ int64_t prime_numbers[] = {
8492
1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
8593
1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223};
8694

95+
bool deviceMajorMinorCheck(int major, int minor = 0) {
96+
auto dev_prop = at::cuda::getCurrentDeviceProperties();
97+
if (dev_prop->major < major ||
98+
(dev_prop->major == major && dev_prop->minor < minor)) {
99+
return false;
100+
}
101+
return true;
102+
}
103+
104+
int deviceSMCount() {
105+
int sm_count = at::cuda::getCurrentDeviceProperties()->multiProcessorCount;
106+
return sm_count;
107+
}
108+
109+
void clearL2Cache() {
110+
torch::NoGradGuard no_grad;
111+
auto l2_cache_size = at::cuda::getCurrentDeviceProperties()->l2CacheSize;
112+
auto options =
113+
torch::TensorOptions().dtype(torch::kFloat32).device(at::kCUDA, 0);
114+
115+
auto l2_elems = l2_cache_size / 4;
116+
torch::Tensor t0 = torch::empty(l2_elems, options);
117+
torch::Tensor t1 = torch::clone(t0);
118+
};
119+
87120
} // namespace
121+
122+
// Fixture class must be uniquely identified, i.e., can't be in an
123+
// anonymous namespace
124+
class NVFuserTest : public ::testing::Test {
125+
protected:
126+
void SetUp() override {
127+
// requires PASCAL or newer
128+
if (!deviceMajorMinorCheck(6)) {
129+
GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";
130+
}
131+
setFillAllocationWithNan(true);
132+
}
133+
};
134+
88135
} // namespace jit
89136
} // namespace torch

0 commit comments

Comments
 (0)