@@ -1909,8 +1909,31 @@ whisper_vocab::id whisper_sample_timestamp(
1909
1909
return probs_id[0 ].second ;
1910
1910
}
1911
1911
1912
+ // naive Discrete Fourier Transform
1913
+ // input is real-valued
1914
+ // output is complex-valued
1915
+ void dft (const std::vector<float > & in, std::vector<float > & out) {
1916
+ int N = in.size ();
1917
+
1918
+ out.resize (N*2 );
1919
+
1920
+ for (int k = 0 ; k < N; k++) {
1921
+ float re = 0 ;
1922
+ float im = 0 ;
1923
+
1924
+ for (int n = 0 ; n < N; n++) {
1925
+ float angle = 2 *M_PI*k*n/N;
1926
+ re += in[n]*cos (angle);
1927
+ im -= in[n]*sin (angle);
1928
+ }
1929
+
1930
+ out[k*2 + 0 ] = re;
1931
+ out[k*2 + 1 ] = im;
1932
+ }
1933
+ }
1934
+
1912
1935
// Cooley-Tukey FFT
1913
- // poor man's implmentation - use something better
1936
+ // poor man's implementation - use something better
1914
1937
// input is real-valued
1915
1938
// output is complex-valued
1916
1939
void fft (const std::vector<float > & in, std::vector<float > & out) {
@@ -1924,6 +1947,11 @@ void fft(const std::vector<float> & in, std::vector<float> & out) {
1924
1947
return ;
1925
1948
}
1926
1949
1950
+ if (N%2 == 1 ) {
1951
+ dft (in, out);
1952
+ return ;
1953
+ }
1954
+
1927
1955
std::vector<float > even;
1928
1956
std::vector<float > odd;
1929
1957
@@ -2014,9 +2042,20 @@ bool log_mel_spectrogram(
2014
2042
// FFT -> mag^2
2015
2043
fft (fft_in, fft_out);
2016
2044
2017
- for (int j = 0 ; j < n_fft ; j++) {
2045
+ for (int j = 0 ; j < fft_size ; j++) {
2018
2046
fft_out[j] = (fft_out[2 *j + 0 ]*fft_out[2 *j + 0 ] + fft_out[2 *j + 1 ]*fft_out[2 *j + 1 ]);
2019
2047
}
2048
+ for (int j = 1 ; j < fft_size/2 ; j++) {
2049
+ // if (i == 0) {
2050
+ // printf("%d: %f %f\n", j, fft_out[j], fft_out[fft_size - j]);
2051
+ // }
2052
+ fft_out[j] += fft_out[fft_size - j];
2053
+ }
2054
+ if (i == 0 ) {
2055
+ // for (int j = 0; j < fft_size; j++) {
2056
+ // printf("%d: %e\n", j, fft_out[j]);
2057
+ // }
2058
+ }
2020
2059
2021
2060
// mel spectrogram
2022
2061
for (int j = 0 ; j < mel.n_mel ; j++) {
@@ -2048,6 +2087,7 @@ bool log_mel_spectrogram(
2048
2087
mmax = mel.data [i];
2049
2088
}
2050
2089
}
2090
+ // printf("%s: max = %f\n", __func__, mmax);
2051
2091
2052
2092
mmax -= 8.0 ;
2053
2093
0 commit comments