@@ -48,7 +48,7 @@ using namespace fbgemm_gpu;
4848 has_global_weight_decay_support,
4949 ssd) %}
5050{%- set desc_suffix = get_desc_suffix (is_gwd_kernel) %}
51- {%- set is_optimized_hip_kernel_supported_mode_ori = is_rocm and
51+ {%- set is_optimized_hip_kernel_supported_mode = is_rocm and
5252 optimizer == " rowwise_adagrad" and
5353 not dense and
5454 not nobag and
@@ -244,7 +244,7 @@ batch_index_select_dim0_codegen_backward_kernel_warp_per_row(
244244 {%- endif %}
245245);
246246
247- {%- if is_optimized_hip_kernel_supported_mode_ori %}
247+ {%- if is_optimized_hip_kernel_supported_mode %}
248248#include " fbgemm_gpu/rocm/split_embeddings_common.h"
249249template <
250250 typename emb_t ,
@@ -1019,7 +1019,7 @@ Tensor {{ embedding_cuda_op }}(
10191019 }
10201020 {%- endif %}
10211021
1022- {%- if is_optimized_hip_kernel_supported_mode_ori %}
1022+ {%- if is_optimized_hip_kernel_supported_mode %}
10231023 {%- set hip_kernel = " hip_split_embedding{}_backward_codegen_{}_{}{}_kernel_warp_per_row_1" .format (
10241024 ndesc,
10251025 optimizer,
@@ -1261,7 +1261,6 @@ Tensor {{ embedding_cuda_op }}(
12611261 auto cta_blockSize = dim3 (kThreadGroupSize , num_cta_per_row_groups);
12621262 {%- endif %}
12631263
1264- // printf("%s:%d %d\n", __FILE__, __LINE__, num_cta_per_row_groups);
12651264 // Compute shared memory size for cta_per_row
12661265 constexpr auto kCacheAccBytes = sizeof (at::acc_type<cache_t , true >);
12671266 const size_t cta_per_row_smem_bytes = compute_num_groups_and_dynamic_smem_bytes (
@@ -1426,7 +1425,6 @@ Tensor {{ embedding_cuda_op }}(
14261425 32 ,
14271426 false >;
14281427 blockSize = dim3 (32 , num_warp_per_row_groups);
1429- // printf("%s:%d warp kernel %d\n", __FILE__, __LINE__, num_warp_per_row_groups);
14301428 }
14311429 }
14321430 {%- endif %}
@@ -1449,7 +1447,7 @@ Tensor {{ embedding_cuda_op }}(
14491447 get_max_thread_blocks_ ());
14501448
14511449#ifdef USE_ROCM
1452- {%- if is_optimized_hip_kernel_supported_mode_ori %}
1450+ {%- if is_optimized_hip_kernel_supported_mode %}
14531451
14541452 const static auto use_hip_kernel = fbgemm_gpu::config::is_feature_enabled (fbgemm_gpu::config::FeatureGateName::TBE_ROCM_HIP_BACKWARD_KERNEL);
14551453
0 commit comments