diff --git a/BUILD b/BUILD
index 98792b2dc1..83e321c0b9 100644
--- a/BUILD
+++ b/BUILD
@@ -22,9 +22,9 @@ pkg_tar(
         "//core/lowering:include",
         "//core/lowering/passes:include",
         "//core/partitioning:include",
-        "//core/partitioning/segmentedblock:include",
-        "//core/partitioning/partitioninginfo:include",
         "//core/partitioning/partitioningctx:include",
+        "//core/partitioning/partitioninginfo:include",
+        "//core/partitioning/segmentedblock:include",
         "//core/plugins:impl_include",
         "//core/plugins:include",
         "//core/runtime:include",
diff --git a/tests/core/partitioning/test_conditionals.cpp b/tests/core/partitioning/test_conditionals.cpp
index ba336db663..e0f93dde59 100644
--- a/tests/core/partitioning/test_conditionals.cpp
+++ b/tests/core/partitioning/test_conditionals.cpp
@@ -71,5 +71,5 @@ TEST(Partitioning, FallbackInplaceOPInConditionalsCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
diff --git a/tests/core/partitioning/test_fallback_graph_output.cpp b/tests/core/partitioning/test_fallback_graph_output.cpp
index f6ce657ae3..29e1a1dd38 100644
--- a/tests/core/partitioning/test_fallback_graph_output.cpp
+++ b/tests/core/partitioning/test_fallback_graph_output.cpp
@@ -34,7 +34,7 @@ TEST(Partitioning, ComputeResNet50FallbackGraphCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 
 TEST(Partitioning, ComputeMobileNetFallbackGraphCorrectly) {
@@ -64,6 +64,6 @@ TEST(Partitioning, ComputeMobileNetFallbackGraphCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 #endif
diff --git a/tests/core/partitioning/test_loop_fallback.cpp b/tests/core/partitioning/test_loop_fallback.cpp
index 5f6bc2ae4d..1da56f1a8d 100644
--- a/tests/core/partitioning/test_loop_fallback.cpp
+++ b/tests/core/partitioning/test_loop_fallback.cpp
@@ -30,7 +30,7 @@ TEST(Partitioning, CheckLoopFallbackEvalCompilesCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 
 TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {
@@ -58,5 +58,5 @@ TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::core::CompileGraph(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
diff --git a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp
index 950859e524..1c1a1631ea 100644
--- a/tests/core/partitioning/test_resolve_nontensor_inputs.cpp
+++ b/tests/core/partitioning/test_resolve_nontensor_inputs.cpp
@@ -87,7 +87,7 @@ TEST(Partitioning, ResolveNonTensorInputsForIFBlockCorrectly) {
   auto jit_results = mod.forward({jit_in0, jit_in1});
   auto trt_results = new_mod.forward({trt_in0, trt_in1});
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results.toTensor(), trt_results.toTensor(), 2e-6));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results.toTensor(), trt_results.toTensor()));
 }
 
 TEST(Partitioning, ResolveNonTensorInputsCorrectly) {
diff --git a/tests/cpp/cpp_api_test.h b/tests/cpp/cpp_api_test.h
index 3addfbc2ed..15d848f1cc 100644
--- a/tests/cpp/cpp_api_test.h
+++ b/tests/cpp/cpp_api_test.h
@@ -6,7 +6,7 @@
 #include "torch/script.h"
 #include "torch_tensorrt/torch_tensorrt.h"
 
-using PathAndInput = std::tuple<std::string, std::vector<std::vector<int64_t>>, std::vector<c10::ScalarType>, float>;
+using PathAndInput = std::tuple<std::string, std::vector<std::vector<int64_t>>, std::vector<c10::ScalarType>>;
 
 class CppAPITests : public testing::TestWithParam<PathAndInput> {
  public:
@@ -22,7 +22,6 @@ class CppAPITests : public testing::TestWithParam<PathAndInput> {
     }
     input_shapes = std::get<1>(params);
     input_types = std::get<2>(params);
-    threshold = std::get<3>(params);
   }
 
   void TearDown() {
@@ -34,5 +33,4 @@ class CppAPITests : public testing::TestWithParam<PathAndInput> {
   torch::jit::script::Module mod;
   std::vector<std::vector<int64_t>> input_shapes;
   std::vector<c10::ScalarType> input_types;
-  float threshold;
 };
diff --git a/tests/cpp/test_collections.cpp b/tests/cpp/test_collections.cpp
index e3f0d91dfe..7fcc006980 100644
--- a/tests/cpp/test_collections.cpp
+++ b/tests/cpp/test_collections.cpp
@@ -42,7 +42,7 @@ TEST(CppAPITests, TestCollectionStandardTensorInput) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(inputs_);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionTupleInput) {
@@ -85,7 +85,7 @@ TEST(CppAPITests, TestCollectionTupleInput) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionListInput) {
@@ -144,7 +144,7 @@ TEST(CppAPITests, TestCollectionListInput) {
   LOG_DEBUG("Finish compile");
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor(), 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(out.toTensor(), trt_out.toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionTupleInputOutput) {
@@ -178,23 +178,20 @@ TEST(CppAPITests, TestCollectionTupleInputOutput) {
   torch::jit::IValue complex_input_shape(input_shape_tuple);
   std::tuple<torch::jit::IValue> input_tuple2(complex_input_shape);
   torch::jit::IValue complex_input_shape2(input_tuple2);
-  // torch::jit::IValue complex_input_shape(list);
 
   auto compile_settings = torch_tensorrt::ts::CompileSpec(complex_input_shape2);
   compile_settings.min_block_size = 1;
 
-  // compile_settings.torch_executed_ops.push_back("prim::TupleConstruct");
-
   // // FP16 execution
   compile_settings.enabled_precisions = {torch::kHalf};
   // // Compile module
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor()));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionListInputOutput) {
@@ -252,10 +249,10 @@ TEST(CppAPITests, TestCollectionListInputOutput) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toList().vec()[0].toTensor(), trt_out.toList().vec()[0].toTensor()));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toList().vec()[1].toTensor(), trt_out.toList().vec()[1].toTensor()));
 }
 
 TEST(CppAPITests, TestCollectionComplexModel) {
@@ -313,8 +310,8 @@ TEST(CppAPITests, TestCollectionComplexModel) {
   auto trt_mod = torch_tensorrt::torchscript::compile(mod, compile_settings);
   auto trt_out = trt_mod.forward(complex_inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor(), 1e-5));
-  ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor(), 1e-5));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[0].toTensor(), trt_out.toTuple()->elements()[0].toTensor()));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+      out.toTuple()->elements()[1].toTensor(), trt_out.toTuple()->elements()[1].toTensor()));
 }
diff --git a/tests/cpp/test_compiled_modules.cpp b/tests/cpp/test_compiled_modules.cpp
index 3a81f0a531..d982c8ec6a 100644
--- a/tests/cpp/test_compiled_modules.cpp
+++ b/tests/cpp/test_compiled_modules.cpp
@@ -41,8 +41,7 @@ TEST_P(CppAPITests, CompiledModuleIsClose) {
   }
 
   for (size_t i = 0; i < trt_results.size(); i++) {
-    ASSERT_TRUE(
-        torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i]), 0.99));
+    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results[i], trt_results[i].reshape_as(jit_results[i])));
   }
 }
 
@@ -52,10 +51,10 @@ INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
     testing::Values(
-        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}),
-        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}),
-        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-3}),
-        PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}, 8e-2}),
-        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 8e-2})));
+        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/bert_base_uncased_traced.jit.pt", {{1, 14}, {1, 14}}, {at::kInt, at::kInt}}),
+        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
 
 #endif
diff --git a/tests/cpp/test_default_input_types.cpp b/tests/cpp/test_default_input_types.cpp
index a79ddafe0c..dd320017b8 100644
--- a/tests/cpp/test_default_input_types.cpp
+++ b/tests/cpp/test_default_input_types.cpp
@@ -116,5 +116,4 @@ TEST_P(CppAPITests, InputsRespectUserSettingFP32WeightsFP16In) {
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
-    testing::Values(
-        PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat} /*unused*/, 2e-5})));
+    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
diff --git a/tests/cpp/test_example_tensors.cpp b/tests/cpp/test_example_tensors.cpp
index 256e6f1b59..92f373add5 100644
--- a/tests/cpp/test_example_tensors.cpp
+++ b/tests/cpp/test_example_tensors.cpp
@@ -21,4 +21,4 @@ TEST_P(CppAPITests, InputsFromTensors) {
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
-    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5})));
+    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
diff --git a/tests/cpp/test_module_fallback.cpp b/tests/cpp/test_module_fallback.cpp
index bfdfc46b04..55022deff2 100644
--- a/tests/cpp/test_module_fallback.cpp
+++ b/tests/cpp/test_module_fallback.cpp
@@ -30,7 +30,7 @@ TEST(CppAPITest, ResNetModuleFallbacksCorrectly) {
   auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
   auto trt_mod = torch_tensorrt::ts::compile(mod, cfg);
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 
 TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) {
@@ -69,6 +69,6 @@ TEST(CppAPITest, MobileNetModuleFallbacksCorrectlyWithOneEngine) {
   ASSERT_TRUE(trt_count == 1);
 
   auto trt_results = trt_mod.forward(trt_inputs_ivalues).toTensor();
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results, 0.99));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results, trt_results));
 }
 #endif
diff --git a/tests/cpp/test_modules_as_engines.cpp b/tests/cpp/test_modules_as_engines.cpp
index 11b7a54fb0..b76bae7333 100644
--- a/tests/cpp/test_modules_as_engines.cpp
+++ b/tests/cpp/test_modules_as_engines.cpp
@@ -14,8 +14,7 @@ TEST_P(CppAPITests, ModuleAsEngineIsClose) {
   jit_results.push_back(jit_results_ivalues.toTensor());
   auto trt_results = torch_tensorrt::tests::util::RunModuleForwardAsEngine(mod, inputs);
 
-  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-      jit_results[0], trt_results[0].reshape_as(jit_results[0]), threshold));
+  ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(jit_results[0], trt_results[0].reshape_as(jit_results[0])));
 }
 
 #ifndef DISABLE_TEST_IN_CI
@@ -24,8 +23,8 @@ INSTANTIATE_TEST_SUITE_P(
     ModuleAsEngineForwardIsCloseSuite,
     CppAPITests,
     testing::Values(
-        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}),
-        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}),
-        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99}),
-        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99})));
+        PathAndInput({"tests/modules/resnet18_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/mobilenet_v2_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/efficientnet_b0_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/vit_scripted.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
 #endif
diff --git a/tests/cpp/test_multi_gpu_serde.cpp b/tests/cpp/test_multi_gpu_serde.cpp
index 0b3944125b..df5a3a9277 100644
--- a/tests/cpp/test_multi_gpu_serde.cpp
+++ b/tests/cpp/test_multi_gpu_serde.cpp
@@ -24,11 +24,11 @@ TEST_P(CppAPITests, CompiledModuleIsClose) {
 
   for (size_t i = 0; i < trt_results.size(); i++) {
     ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-        jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0")), threshold));
+        jit_results[i], trt_results[i].reshape_as(jit_results[i]).to(torch::Device("cuda:0"))));
   }
 }
 
 INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
-    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 0.99})));
+    testing::Values(PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}})));
diff --git a/tests/cpp/test_multiple_registered_engines.cpp b/tests/cpp/test_multiple_registered_engines.cpp
index 658f59ca74..23029bb377 100644
--- a/tests/cpp/test_multiple_registered_engines.cpp
+++ b/tests/cpp/test_multiple_registered_engines.cpp
@@ -56,13 +56,13 @@ TEST(CppAPITest, CanRunMultipleEngines) {
   trt2_results.push_back(trt2_results_ivalues.toTensor());
 
   for (size_t i = 0; i < trt1_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-        jit1_results[i], trt1_results[i].reshape_as(jit1_results[i]), 0.99));
+    ASSERT_TRUE(
+        torch_tensorrt::tests::util::cosineSimEqual(jit1_results[i], trt1_results[i].reshape_as(jit1_results[i])));
   }
 
   for (size_t i = 0; i < trt2_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
-        jit2_results[i], trt2_results[i].reshape_as(jit2_results[i]), 0.99));
+    ASSERT_TRUE(
+        torch_tensorrt::tests::util::cosineSimEqual(jit2_results[i], trt2_results[i].reshape_as(jit2_results[i])));
   }
 }
 #endif
diff --git a/tests/cpp/test_runtime_thread_safety.cpp b/tests/cpp/test_runtime_thread_safety.cpp
index 0e1352c8b3..414f31e43f 100644
--- a/tests/cpp/test_runtime_thread_safety.cpp
+++ b/tests/cpp/test_runtime_thread_safety.cpp
@@ -78,7 +78,7 @@ TEST(CppAPITests, RuntimeThreadSafety) {
 
   bool flag = true;
   for (int i = 0; i < num_threads; i++) {
-    bool f = torch_tensorrt::tests::util::almostEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor(), 1e-2);
+    bool f = torch_tensorrt::tests::util::cosineSimEqual(out_vec[i].toTensor(), trt_out_vec[i].toTensor());
     flag = flag && f;
   }
   ASSERT_TRUE(flag);
diff --git a/tests/cpp/test_serialization.cpp b/tests/cpp/test_serialization.cpp
index 0086500be5..fea4e467f0 100644
--- a/tests/cpp/test_serialization.cpp
+++ b/tests/cpp/test_serialization.cpp
@@ -42,8 +42,8 @@ TEST_P(CppAPITests, SerializedModuleIsStillCorrect) {
   post_serialized_results.push_back(post_serialized_results_ivalues.toTensor());
 
   for (size_t i = 0; i < pre_serialized_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i]), threshold));
+    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i])));
   }
 }
 
@@ -72,8 +72,8 @@ TEST_P(CppAPITests, SerializedDynamicModuleIsStillCorrect) {
   post_serialized_results.push_back(post_serialized_results_ivalues.toTensor());
 
   for (size_t i = 0; i < pre_serialized_results.size(); i++) {
-    ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(
-        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i]), threshold));
+    ASSERT_TRUE(torch_tensorrt::tests::util::cosineSimEqual(
+        post_serialized_results[i], pre_serialized_results[i].reshape_as(post_serialized_results[i])));
   }
 }
 
@@ -81,5 +81,5 @@ INSTANTIATE_TEST_SUITE_P(
     CompiledModuleForwardIsCloseSuite,
     CppAPITests,
     testing::Values(
-        PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}, 2e-5}),
-        PathAndInput({"tests/modules/pooling_traced.jit.pt", {{1, 3, 10, 10}}, {at::kFloat}, 2e-5})));
+        PathAndInput({"tests/modules/resnet18_traced.jit.pt", {{1, 3, 224, 224}}, {at::kFloat}}),
+        PathAndInput({"tests/modules/pooling_traced.jit.pt", {{1, 3, 10, 10}}, {at::kFloat}})));
diff --git a/tests/modules/hub.py b/tests/modules/hub.py
index 6b1b87d08d..f4f68ffa99 100644
--- a/tests/modules/hub.py
+++ b/tests/modules/hub.py
@@ -46,12 +46,6 @@
         "model": torch.hub.load("pytorch/vision:v0.9.0", "resnet50", pretrained=True),
         "path": "both",
     },
-    "ssd": {
-        "model": torch.hub.load(
-            "NVIDIA/DeepLearningExamples:torchhub", "nvidia_ssd", model_math="fp32"
-        ),
-        "path": "trace",
-    },
     "efficientnet_b0": {
         "model": timm.create_model("efficientnet_b0", pretrained=True),
         "path": "script",
diff --git a/tests/util/util.cpp b/tests/util/util.cpp
index 3bfec24c5a..39d1eb14b1 100644
--- a/tests/util/util.cpp
+++ b/tests/util/util.cpp
@@ -1,3 +1,4 @@
+#include "util.h"
 #include "core/util/prelude.h"
 #include "torch/script.h"
 #include "torch/torch.h"
@@ -6,19 +7,7 @@ namespace torch_tensorrt {
 namespace tests {
 namespace util {
 
-bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold = 0.99f) {
-  torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity(
-      computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0));
-  std::ostringstream ss;
-  ss << computed_tensor << std::endl << gt_tensor << std::endl;
-  LOG_DEBUG(ss.str());
-  LOG_DEBUG(std::string("Cosine Similarity score: ") + std::to_string(cosine_sim.item<float>()));
-  LOG_DEBUG(std::string("Acceptable Threshold: ") + std::to_string(threshold));
-
-  return cosine_sim.item<float>() >= threshold;
-}
-
-bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol = 1e-8, float rtol = 1e-5) {
+bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol, float rtol) {
   std::ostringstream ss;
   ss << computed_tensor << std::endl << gt_tensor << std::endl;
   ss << " atol: " << atol << " rtol: " << rtol << std::endl;
@@ -37,6 +26,21 @@ bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor,
   return result <= threshold;
 }
 
+bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold) {
+  torch::Tensor cosine_sim = torch::nn::functional::cosine_similarity(
+      computed_tensor.flatten(), gt_tensor.flatten(), torch::nn::functional::CosineSimilarityFuncOptions().dim(0));
+  std::ostringstream ss;
+  ss << computed_tensor << std::endl << gt_tensor << std::endl;
+  LOG_DEBUG(ss.str());
+  if (computed_tensor.sum().item<float>() == 0.f || gt_tensor.sum().item<float>() == 0.f) {
+    return almostEqual(computed_tensor, gt_tensor);
+  } else {
+    LOG_DEBUG(std::string("Cosine Similarity score: ") + std::to_string(cosine_sim.item<float>()));
+    LOG_DEBUG(std::string("Acceptable Threshold: ") + std::to_string(threshold));
+    return cosine_sim.item<float>() >= threshold;
+  }
+}
+
 bool exactlyEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor) {
   LOG_DEBUG(computed_tensor << std::endl << gt_tensor << std::endl);
   std::cout << "Max Difference: " << (computed_tensor - gt_tensor).abs().max().item<float>() << std::endl;
diff --git a/tests/util/util.h b/tests/util/util.h
index 1ea62a16e0..23207ef02c 100644
--- a/tests/util/util.h
+++ b/tests/util/util.h
@@ -11,9 +11,13 @@ namespace torch_tensorrt {
 namespace tests {
 namespace util {
 
-bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold);
+const float ATOL = 1e-8;
+const float RTOL = 1e-5;
+const float COSINE_THRESHOLD = 0.99f;
 
-bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol = 1e-8, float rtol = 1e-5);
+bool cosineSimEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float threshold = COSINE_THRESHOLD);
+
+bool almostEqual(const at::Tensor& computed_tensor, const at::Tensor& gt_tensor, float atol = ATOL, float rtol = RTOL);
 
 bool exactlyEqual(const at::Tensor& a, const at::Tensor& b);