Skip to content

Commit 0df8e3d

Browse files
authored
Explicit sample_rate in owhisper client (#1651)
1 parent 9f7b009 commit 0df8e3d

File tree

29 files changed

+268
-131
lines changed

29 files changed

+268
-131
lines changed

.cursor/rules/simple.mdc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ alwaysApply: true
99
# Typescript
1010

1111
- Avoid creating a bunch of types/interfaces if they are not shared. Especially for function props. Just inline them.
12+
- After some amount of TypeScript changes, run `pnpm -r typecheck`.
13+
14+
# Rust
15+
16+
- After some amount of Rust changes, run `cargo check`.
1217

1318
# Mutation
1419
- Never do manual state management for form/mutation. Things like setError is anti-pattern. use useForm(from tanstack-form) and useQuery/useMutation(from tanstack-query) for 99% cases.
@@ -19,7 +24,6 @@ alwaysApply: true
1924

2025
# Misc
2126
- Do not create summary docs or example code file if not requested. Plan is ok.
22-
- After a significant amount of TypeScript changes, run `pnpm -r typecheck`.
2327
- If there are many classNames and they have conditional logic, use `cn` (import it with `import { cn } from "@hypr/utils"`). It is similar to `clsx`. Always pass an array. Split by logical grouping.
2428
- Use `motion/react` instead of `framer-motion`.
2529

Cargo.lock

Lines changed: 24 additions & 31 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,8 @@ async-stream = "0.3.6"
132132
futures-channel = "0.3.31"
133133
futures-core = "0.3.31"
134134
futures-util = "0.3.31"
135-
ractor = "0.15"
135+
ractor = { version = "0.14.3" }
136+
ractor-supervisor = "0.1.9"
136137
reqwest = "0.12"
137138
reqwest-streams = "0.10.0"
138139
tokio = "1"

Taskfile.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,12 @@ tasks:
7272
cmds:
7373
- chmod +x ./apps/desktop/src-tauri/resources/stt-aarch64-apple-darwin
7474
- chmod +x ./apps/desktop/src-tauri/resources/passthrough-aarch64-apple-darwin
75+
76+
db:
77+
env:
78+
DB: /Users/yujonglee/Library/Application Support/com.hyprnote.nightly/db.sqlite
79+
cmds:
80+
- |
81+
sqlite3 -json "$DB" 'SELECT store FROM main LIMIT 1;' |
82+
jq -r '.[0].store' |
83+
jless

apps/desktop/src/components/main/body/sessions/floating/listen.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ function OptionsMenu({
200200
queryClient.invalidateQueries({ queryKey: ["audio", sessionId, "url"] });
201201
})
202202
),
203-
Effect.flatMap((importedPath) => Effect.promise(() => runBatch(importedPath, { channels: 1 }))),
203+
Effect.flatMap((importedPath) => Effect.promise(() => runBatch(importedPath))),
204204
);
205205
},
206206
[queryClient, runBatch, sessionId],

apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/hooks.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@ export function useFinalWords(transcriptId: string): (main.Word & { id: string }
1313
return [];
1414
}
1515

16-
return Object.entries(resultTable)
16+
const ret = Object.entries(resultTable)
1717
.map(([wordId, row]) => ({ ...(row as unknown as main.Word), id: wordId }))
1818
.sort((a, b) => a.start_ms - b.start_ms);
19+
20+
return ret;
1921
}, [resultTable]);
2022
}
2123

apps/desktop/src/hooks/useAutoEnhance.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,7 @@ export function useAutoEnhance(tab: Extract<Tab, { type: "sessions" }>) {
5959

6060
if (listenerJustStopped) {
6161
startEnhance();
62-
}
63-
}, [listenerStatus, prevListenerStatus, startEnhance]);
64-
65-
useEffect(() => {
66-
if (enhanceTask.status === "generating" && tab.state.editor !== "enhanced") {
6762
updateSessionTabState(tab, { editor: "enhanced" });
6863
}
69-
}, [enhanceTask.status, tab, updateSessionTabState]);
64+
}, [listenerStatus, prevListenerStatus, startEnhance]);
7065
}

apps/desktop/src/hooks/useRunBatch.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import { useSTTConnection } from "./useSTTConnection";
1313

1414
type RunOptions = {
1515
handlePersist?: HandlePersistCallback;
16-
channels?: number;
1716
model?: string;
1817
baseUrl?: string;
1918
apiKey?: string;
@@ -138,7 +137,6 @@ export const useRunBatch = (sessionId: string) => {
138137
api_key: options?.apiKey ?? conn.apiKey,
139138
keywords: options?.keywords ?? keywords ?? [],
140139
languages: options?.languages ?? languages ?? [],
141-
channels: options?.channels,
142140
};
143141

144142
await runBatch(params, { handlePersist: persist, sessionId });

crates/audio-utils/src/lib.rs

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::convert::TryFrom;
2+
13
use bytes::{BufMut, Bytes, BytesMut};
24
use futures_util::{Stream, StreamExt};
35
use kalosm_sound::AsyncSource;
@@ -11,6 +13,12 @@ pub use rodio::Source;
1113

1214
const I16_SCALE: f32 = 32768.0;
1315

16+
#[derive(Debug, Clone, Copy)]
17+
pub struct AudioMetadata {
18+
pub sample_rate: u32,
19+
pub channels: u8,
20+
}
21+
1422
impl<T: AsyncSource> AudioFormatExt for T {}
1523

1624
pub trait AudioFormatExt: AsyncSource {
@@ -81,6 +89,40 @@ pub fn source_from_path(
8189
Ok(decoder)
8290
}
8391

92+
fn metadata_from_source<S>(source: &S) -> Result<AudioMetadata, crate::Error>
93+
where
94+
S: Source,
95+
S::Item: rodio::Sample,
96+
{
97+
let sample_rate = source.sample_rate();
98+
if sample_rate == 0 {
99+
return Err(crate::Error::InvalidSampleRate(sample_rate));
100+
}
101+
102+
let channels_u16 = source.channels();
103+
if channels_u16 == 0 {
104+
return Err(crate::Error::UnsupportedChannelCount {
105+
count: channels_u16,
106+
});
107+
}
108+
let channels =
109+
u8::try_from(channels_u16).map_err(|_| crate::Error::UnsupportedChannelCount {
110+
count: channels_u16,
111+
})?;
112+
113+
Ok(AudioMetadata {
114+
sample_rate,
115+
channels,
116+
})
117+
}
118+
119+
pub fn audio_file_metadata(
120+
path: impl AsRef<std::path::Path>,
121+
) -> Result<AudioMetadata, crate::Error> {
122+
let source = source_from_path(path)?;
123+
metadata_from_source(&source)
124+
}
125+
84126
pub fn resample_audio<S, T>(source: S, to_rate: u32) -> Result<Vec<f32>, crate::Error>
85127
where
86128
S: rodio::Source<Item = T> + Iterator<Item = T>,
@@ -136,32 +178,48 @@ where
136178
pub struct ChunkedAudio {
137179
pub chunks: Vec<Bytes>,
138180
pub sample_count: usize,
181+
pub frame_count: usize,
182+
pub metadata: AudioMetadata,
139183
}
140184

141185
pub fn chunk_audio_file(
142186
path: impl AsRef<std::path::Path>,
143-
sample_rate: u32,
144-
chunk_size: usize,
187+
chunk_ms: u64,
145188
) -> Result<ChunkedAudio, crate::Error> {
146189
let source = source_from_path(path)?;
147-
let samples = resample_audio(source, sample_rate)?;
190+
let metadata = metadata_from_source(&source)?;
191+
let samples = resample_audio(source, metadata.sample_rate)?;
148192

149193
if samples.is_empty() {
150194
return Ok(ChunkedAudio {
151195
chunks: Vec::new(),
152196
sample_count: 0,
197+
frame_count: 0,
198+
metadata,
153199
});
154200
}
155201

156-
let chunk_size = chunk_size.max(1);
202+
let channels = metadata.channels.max(1) as usize;
203+
let frames_per_chunk = {
204+
let frames = ((chunk_ms as u128).saturating_mul(metadata.sample_rate as u128) + 999) / 1000;
205+
frames.max(1).min(usize::MAX as u128) as usize
206+
};
207+
let samples_per_chunk = frames_per_chunk
208+
.saturating_mul(channels)
209+
.max(1)
210+
.min(usize::MAX);
211+
157212
let sample_count = samples.len();
213+
let frame_count = sample_count / channels;
158214
let chunks = samples
159-
.chunks(chunk_size)
215+
.chunks(samples_per_chunk)
160216
.map(|chunk| f32_to_i16_bytes(chunk.iter().copied()))
161217
.collect();
162218

163219
Ok(ChunkedAudio {
164220
chunks,
165221
sample_count,
222+
frame_count,
223+
metadata,
166224
})
167225
}

crates/audio/src/mic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ impl MicInput {
6565
config,
6666
})
6767
}
68+
69+
pub fn sample_rate(&self) -> u32 {
70+
self.config.sample_rate().0
71+
}
6872
}
6973

7074
impl MicInput {

0 commit comments

Comments
 (0)