fastrepl · yujonglee · Apr 18, 2025
diff --git a/apps/app/server/src/native/listen/realtime.rs b/apps/app/server/src/native/listen/realtime.rs
@@ -47,10 +47,17 @@ async fn websocket(socket: WebSocket, state: STTState, params: ListenParams) {
             last_activity_receive.store(start_time.elapsed().as_secs(), Ordering::SeqCst);
 
             let input: ListenInputChunk = serde_json::from_str(&data).unwrap();
-            let audio = Bytes::from(input.audio);
+            match input {
+                ListenInputChunk::End => {
+                    break;
+                }
+                ListenInputChunk::Audio { data } => {
+                    let audio = Bytes::from(data);
 
-            if tx.send(audio).is_err() {
-                break;
+                    if tx.send(audio).is_err() {
+                        break;
+                    }
+                }
             }
         }
 

diff --git a/crates/diart/src/lib.rs b/crates/diart/src/lib.rs
@@ -59,12 +59,16 @@ impl WebSocketIO for DiarizeClient {
     type Input = DiarizeInputChunk;
     type Output = DiarizeOutputChunk;
 
-    fn to_input(data: bytes::Bytes) -> Self::Input {
+    fn to_audio_input(data: bytes::Bytes) -> Self::Input {
         DiarizeInputChunk {
             audio: data.to_vec(),
         }
     }
 
+    fn to_flush_input() -> Self::Input {
+        DiarizeInputChunk { audio: Vec::new() }
+    }
+
     fn to_message(input: Self::Input) -> Message {
         Message::Text(serde_json::to_string(&input).unwrap().into())
     }

diff --git a/crates/whisper/src/cloud.rs b/crates/whisper/src/cloud.rs
@@ -92,10 +92,14 @@ impl WebSocketIO for WhisperClient {
     type Input = bytes::Bytes;
     type Output = WhisperOutput;
 
-    fn to_input(data: bytes::Bytes) -> Self::Input {
+    fn to_audio_input(data: bytes::Bytes) -> Self::Input {
         data
     }
 
+    fn to_flush_input() -> Self::Input {
+        bytes::Bytes::new()
+    }
+
     fn to_message(input: Self::Input) -> Message {
         Message::Binary(input)
     }

diff --git a/crates/whisper/src/local/stream.rs b/crates/whisper/src/local/stream.rs
@@ -1,3 +1,4 @@
+// https://github.com/floneum/floneum/blob/917fa37/models/rwhisper/src/lib.rs#L230-L231
 use std::{
     pin::Pin,
     task::{Context, Poll},

diff --git a/crates/ws/src/client.rs b/crates/ws/src/client.rs
@@ -7,10 +7,11 @@ use tokio_tungstenite::{connect_async, tungstenite::client::IntoClientRequest};
 pub use tokio_tungstenite::tungstenite::{protocol::Message, ClientRequestBuilder};
 
 pub trait WebSocketIO: Send + 'static {
-    type Input: Send + Default;
+    type Input: Send;
     type Output: DeserializeOwned;
 
-    fn to_input(data: bytes::Bytes) -> Self::Input;
+    fn to_audio_input(data: bytes::Bytes) -> Self::Input;
+    fn to_flush_input() -> Self::Input;
     fn to_message(input: Self::Input) -> Message;
     fn from_message(msg: Message) -> Option<Self::Output>;
 }
@@ -52,7 +53,7 @@ impl WebSocketClient {
 
         let _send_task = tokio::spawn(async move {
             while let Some(data) = audio_stream.next().await {
-                let input = T::to_input(data);
+                let input = T::to_audio_input(data);
                 let msg = T::to_message(input);
 
                 if let Err(e) = ws_sender.send(msg).await {
@@ -62,7 +63,7 @@ impl WebSocketClient {
             }
 
             // We shouldn't send a 'Close' message, as it would prevent receiving remaining transcripts from the server.
-            let _ = ws_sender.send(T::to_message(T::Input::default())).await;
+            let _ = ws_sender.send(T::to_message(T::to_flush_input())).await;
         });
 
         let output_stream = async_stream::stream! {

diff --git a/packages/utils/src/stores/ongoing-session.ts b/packages/utils/src/stores/ongoing-session.ts
@@ -67,13 +67,19 @@ export const createOngoingSessionStore = (sessionsStore: ReturnType<typeof creat
     stop: () => {
       const { sessionId, channel } = get();
 
-      if (channel) {
-        listenerCommands.unsubscribe(channel);
-      }
+      set((state) =>
+        mutate(state, (draft) => {
+          draft.loading = true;
+        })
+      );
 
       listenerCommands.stopSession().then(() => {
         set(initialState);
 
+        if (channel) {
+          listenerCommands.unsubscribe(channel);
+        }
+
         // session stored in sessionStore become stale during ongoing-session. Refresh it here.
         if (sessionId) {
           const sessionStore = sessionsStore.getState().sessions[sessionId];

diff --git a/plugins/listener-interface/src/lib.rs b/plugins/listener-interface/src/lib.rs
@@ -39,10 +39,12 @@ common_derives! {
 }
 
 common_derives! {
-    #[derive(Default)]
-    pub struct ListenInputChunk {
-        #[serde(serialize_with = "serde_bytes::serialize")]
-        pub audio: Vec<u8>,
+    pub enum ListenInputChunk {
+        Audio {
+            #[serde(serialize_with = "serde_bytes::serialize")]
+            data: Vec<u8>,
+        },
+        End,
     }
 }
 

diff --git a/plugins/listener/src/client.rs b/plugins/listener/src/client.rs
@@ -72,12 +72,16 @@ impl WebSocketIO for ListenClient {
     type Input = ListenInputChunk;
     type Output = ListenOutputChunk;
 
-    fn to_input(data: bytes::Bytes) -> Self::Input {
-        ListenInputChunk {
-            audio: data.to_vec(),
+    fn to_audio_input(data: bytes::Bytes) -> Self::Input {
+        ListenInputChunk::Audio {
+            data: data.to_vec(),
         }
     }
 
+    fn to_flush_input() -> Self::Input {
+        ListenInputChunk::End
+    }
+
     fn to_message(input: Self::Input) -> Message {
         Message::Text(serde_json::to_string(&input).unwrap().into())
     }

diff --git a/plugins/listener/src/fsm.rs b/plugins/listener/src/fsm.rs
@@ -250,14 +250,11 @@ impl Session {
                         }
                     }
 
-                    Session::broadcast(
-                        &channels,
-                        SessionEvent::TimelineView(SessionEventTimelineView {
-                            timeline: timeline.view(TimelineFilter::default()),
-                        }),
-                    )
-                    .await
-                    .unwrap();
+                    let view = SessionEvent::TimelineView(SessionEventTimelineView {
+                        timeline: timeline.view(TimelineFilter::default()),
+                    });
+
+                    Session::broadcast(&channels, view).await.unwrap();
                 }
             }
         }));
@@ -284,8 +281,17 @@ impl Session {
         }
 
         if let Some(handle) = self.listen_stream_handle.take() {
-            handle.abort();
-            let _ = handle.await;
+            let abort_handle = handle.abort_handle();
+
+            match tokio::time::timeout(std::time::Duration::from_secs(2), handle).await {
+                Ok(_) => {
+                    tracing::info!("listen_stream_completed");
+                }
+                Err(_) => {
+                    tracing::info!("listen_stream_timeout");
+                    abort_handle.abort();
+                }
+            }
         }
 
         let mut channels = self.channels.lock().await;

diff --git a/plugins/local-stt/src/server.rs b/plugins/local-stt/src/server.rs
@@ -208,20 +208,23 @@ impl kalosm_sound::AsyncSource for WebSocketAudioSource {
             match item {
                 Some(Ok(Message::Text(data))) => {
                     let input: ListenInputChunk = serde_json::from_str(&data).unwrap();
-
-                    if input.audio.is_empty() {
-                        None
-                    } else {
-                        let samples: Vec<f32> = input
-                            .audio
-                            .chunks_exact(2)
-                            .map(|chunk| {
-                                let sample = i16::from_le_bytes([chunk[0], chunk[1]]);
-                                sample as f32 / 32767.0
-                            })
-                            .collect();
-
-                        Some((samples, receiver))
+                    match input {
+                        ListenInputChunk::End => None,
+                        ListenInputChunk::Audio { data } => {
+                            if data.is_empty() {
+                                None
+                            } else {
+                                let samples: Vec<f32> = data
+                                    .chunks_exact(2)
+                                    .map(|chunk| {
+                                        let sample = i16::from_le_bytes([chunk[0], chunk[1]]);
+                                        sample as f32 / 32767.0
+                                    })
+                                    .collect();
+
+                                Some((samples, receiver))
+                            }
+                        }
                     }
                 }
                 Some(Ok(Message::Close(_))) => None,