@@ -3154,23 +3154,27 @@ static __device__ float rope_ntkv2_ramp(const float low, const float high, const
3154
3154
return 1 .0f - min (1 .0f , max (0 .0f , y));
3155
3155
}
3156
3156
3157
+ struct rope_corr_factors {
3158
+ float v[4 ];
3159
+ };
3160
+
3157
3161
// NTKv2 algorithm based on LlamaPartNTKScaledRotaryEmbedding.py from https://github.com/jquesnelle/scaled-rope
3158
3162
// MIT licensed. Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng.
3159
3163
static __device__ float rope_ntkv2 (
3160
3164
const float theta_base,
3161
3165
const float theta_linear,
3162
3166
const float theta_ntk,
3163
- const float corr_factors[ 4 ] ,
3167
+ const rope_corr_factors corr_factors,
3164
3168
const int64_t i0,
3165
3169
const float ntk_factor,
3166
3170
const float ext_factor) {
3167
3171
float ramp_mix;
3168
3172
float theta;
3169
3173
3170
- ramp_mix = rope_ntkv2_ramp (corr_factors[0 ], corr_factors[1 ], i0) * ntk_factor;
3174
+ ramp_mix = rope_ntkv2_ramp (corr_factors. v [0 ], corr_factors. v [1 ], i0) * ntk_factor;
3171
3175
theta = theta_linear * (1 - ramp_mix) + theta_ntk * ramp_mix;
3172
3176
3173
- ramp_mix = rope_ntkv2_ramp (corr_factors[2 ], corr_factors[3 ], i0) * ext_factor;
3177
+ ramp_mix = rope_ntkv2_ramp (corr_factors. v [2 ], corr_factors. v [3 ], i0) * ext_factor;
3174
3178
theta = theta * (1 - ramp_mix) + theta_base * ramp_mix;
3175
3179
return theta;
3176
3180
}
@@ -3187,7 +3191,7 @@ static __global__ void rope_f32(
3187
3191
const float theta_ntk_scale,
3188
3192
const float p0,
3189
3193
const int p_delta_rows,
3190
- const float corr_factors[ 4 ] ) {
3194
+ const rope_corr_factors corr_factors) {
3191
3195
const int col = 2 *(blockDim .x *blockIdx .x + threadIdx .x );
3192
3196
3193
3197
if (col >= ncols) {
@@ -3817,7 +3821,7 @@ static void rope_f32_cuda(
3817
3821
const float theta_ntk_scale,
3818
3822
const float p0,
3819
3823
const int p_delta_rows,
3820
- const float corr_factors[ 4 ] ,
3824
+ const rope_corr_factors corr_factors,
3821
3825
cudaStream_t stream) {
3822
3826
GGML_ASSERT (nrows % 2 == 0 );
3823
3827
const dim3 block_dims (2 *CUDA_ROPE_BLOCK_SIZE, 1 , 1 );
@@ -4546,8 +4550,8 @@ inline void ggml_cuda_op_rope(
4546
4550
} else {
4547
4551
const float p0 = (mode & 1 ) == 0 ? n_past : 0 ;
4548
4552
const float theta_ntk_scale = powf (freq_base * powf (freq_scale, (n_dims / (n_dims - 2 .0f ))), -2 .0f /n_dims);
4549
- float corr_factors[ 4 ] ;
4550
- ggml_rope_ntkv2_corr_factors (n_dims, freq_base, corr_factors);
4553
+ rope_corr_factors corr_factors;
4554
+ ggml_rope_ntkv2_corr_factors (n_dims, freq_base, corr_factors. v );
4551
4555
4552
4556
rope_f32_cuda (src0_ddf_i, dst_ddf_i, ne00, i01_diff, freq_scale, ntk_factor, ext_factor, theta_scale,
4553
4557
theta_ntk_scale, p0, ne01, corr_factors, cudaStream_main);
0 commit comments