Skip to content

Commit bec6a69

Browse files
committed
fix smybol bug & rm comment
1 parent fb7f0a8 commit bec6a69

File tree

2 files changed

+10
-12
lines changed

2 files changed

+10
-12
lines changed

fbgemm_gpu/codegen/training/backward/embedding_backward_split_kernel_warp_template.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
{%- set locs_or_addrs_tensor = "ssd_row_addrs" if ssd else "lxu_cache_locations" %}
3434
{%- set locs_or_addrs_type = "int64_t" if ssd else "int32_t" %}
35-
{%- set is_optimized_hip_kernel_supported_mode_ori = is_rocm and
35+
{%- set is_optimized_hip_kernel_supported_mode = is_rocm and
3636
optimizer == "rowwise_adagrad" and
3737
not dense and
3838
not nobag and
@@ -934,7 +934,7 @@ hip_mixed_d_split_embedding{{ ndesc }}_backward_codegen_{{ optimizer }}_{{ wdesc
934934

935935
{%- endif %}
936936

937-
{%- if is_optimized_hip_kernel_supported_mode_ori %}
937+
{%- if is_optimized_hip_kernel_supported_mode %}
938938
#include <hip/hip_runtime.h>
939939
#include <hip/hip_fp16.h>
940940
#include "fbgemm_gpu/rocm/split_embeddings_common.h"
@@ -1150,10 +1150,10 @@ hip_split_embedding{{ ndesc }}_backward_codegen_{{ optimizer }}_{{ wdesc }}{{ vd
11501150

11511151
{%- macro hip_bulk_template_instantiations(kFixedMaxVecsPerThread, kThreadGroupSize, kUseVecBlocking) %}
11521152
{%- for grad_type in ['float', 'at::Half', 'at::BFloat16'] %}
1153-
{%- for emb_type in (['float', 'at::Half', 'at::BFloat16'] + (['at::Float8_e4m3fnuz'] if is_rocm else ['at::Float8_e4m3fn'])) %}
1154-
{%- for cache_type in ['float', 'at::Half', 'at::BFloat16'] %}
1155-
{%- for index_type in ['int32_t', 'int64_t', 'at::BFloat16'] %}
1156-
{%- for kEmbeddingDim in [64, 128, 160, 192, 256] %}
1153+
{%- for emb_type in (['float', 'at::Half'] + (['at::Float8_e4m3fnuz'] if is_rocm else ['at::Float8_e4m3fn'])) %}
1154+
{%- for cache_type in ['float', 'at::Half'] %}
1155+
{%- for index_type in ['int32_t', 'int64_t'] %}
1156+
{%- for kEmbeddingDim in [64, 128, 160, 192, 256, 320] %}
11571157
{%- for kWeighDecayMode in [0, 1, 2] %}
11581158
{{ hip_template_instantiation(
11591159
emb_type,

fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ using namespace fbgemm_gpu;
4848
has_global_weight_decay_support,
4949
ssd) %}
5050
{%- set desc_suffix = get_desc_suffix(is_gwd_kernel) %}
51-
{%- set is_optimized_hip_kernel_supported_mode_ori = is_rocm and
51+
{%- set is_optimized_hip_kernel_supported_mode = is_rocm and
5252
optimizer == "rowwise_adagrad" and
5353
not dense and
5454
not nobag and
@@ -244,7 +244,7 @@ batch_index_select_dim0_codegen_backward_kernel_warp_per_row(
244244
{%- endif %}
245245
);
246246

247-
{%- if is_optimized_hip_kernel_supported_mode_ori %}
247+
{%- if is_optimized_hip_kernel_supported_mode %}
248248
#include "fbgemm_gpu/rocm/split_embeddings_common.h"
249249
template <
250250
typename emb_t,
@@ -1019,7 +1019,7 @@ Tensor {{ embedding_cuda_op }}(
10191019
}
10201020
{%- endif %}
10211021

1022-
{%- if is_optimized_hip_kernel_supported_mode_ori %}
1022+
{%- if is_optimized_hip_kernel_supported_mode %}
10231023
{%- set hip_kernel = "hip_split_embedding{}_backward_codegen_{}_{}{}_kernel_warp_per_row_1".format(
10241024
ndesc,
10251025
optimizer,
@@ -1261,7 +1261,6 @@ Tensor {{ embedding_cuda_op }}(
12611261
auto cta_blockSize = dim3(kThreadGroupSize, num_cta_per_row_groups);
12621262
{%- endif %}
12631263

1264-
// printf("%s:%d %d\n", __FILE__, __LINE__, num_cta_per_row_groups);
12651264
// Compute shared memory size for cta_per_row
12661265
constexpr auto kCacheAccBytes = sizeof(at::acc_type<cache_t, true>);
12671266
const size_t cta_per_row_smem_bytes = compute_num_groups_and_dynamic_smem_bytes(
@@ -1426,7 +1425,6 @@ Tensor {{ embedding_cuda_op }}(
14261425
32,
14271426
false>;
14281427
blockSize = dim3(32, num_warp_per_row_groups);
1429-
// printf("%s:%d warp kernel %d\n", __FILE__, __LINE__, num_warp_per_row_groups);
14301428
}
14311429
}
14321430
{%- endif %}
@@ -1449,7 +1447,7 @@ Tensor {{ embedding_cuda_op }}(
14491447
get_max_thread_blocks_());
14501448

14511449
#ifdef USE_ROCM
1452-
{%- if is_optimized_hip_kernel_supported_mode_ori %}
1450+
{%- if is_optimized_hip_kernel_supported_mode %}
14531451

14541452
const static auto use_hip_kernel = fbgemm_gpu::config::is_feature_enabled(fbgemm_gpu::config::FeatureGateName::TBE_ROCM_HIP_BACKWARD_KERNEL);
14551453

0 commit comments

Comments
 (0)