From 0c290e05ec6e488ac5db23b600d094ddf0a876bc Mon Sep 17 00:00:00 2001 From: Evan Liu Date: Mon, 23 Dec 2024 12:29:24 -0800 Subject: [PATCH 1/2] Add on-device speech recognition support to the Web Speech API --- index.bs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/index.bs b/index.bs index e62c62c..bc01ab3 100644 --- a/index.bs +++ b/index.bs @@ -1,8 +1,9 @@
 Title: Web Speech API
 Level:
-Status: CG-DRAFT
-Group: Audio Working Group
+Status: WD
+Group: audiowg
+TR:
 URL: https://webaudio.github.io/web-speech-api/
 Repository: WebAudio/web-speech-api
 Shortname: speech-api
@@ -155,12 +156,15 @@ interface SpeechRecognition : EventTarget {
     attribute boolean continuous;
     attribute boolean interimResults;
     attribute unsigned long maxAlternatives;
+    attribute SpeechRecognitionMode mode;
 
     // methods to drive the speech interaction
     undefined start();
     undefined start(MediaStreamTrack audioTrack);
     undefined stop();
     undefined abort();
+    boolean onDeviceWebSpeechAvailable(DOMString lang);
+    boolean installOnDeviceSpeechRecognition(DOMString lang);
 
     // event methods
     attribute EventHandler onaudiostart;
@@ -187,6 +191,12 @@ enum SpeechRecognitionErrorCode {
     "language-not-supported"
 };
 
+enum SpeechRecognitionMode {
+    "ondevice-preferred", // On-device speech recognition if available, otherwise use Cloud speech recognition as a fallback.
+    "ondevice-only", // On-device speech recognition only. Returns an error if on-device speech recognition is not available.
+    "cloud-only", // Cloud speech recognition only.
+};
+
 [Exposed=Window]
 interface SpeechRecognitionErrorEvent : Event {
     constructor(DOMString type, SpeechRecognitionErrorEventInit eventInitDict);
@@ -282,6 +292,9 @@ interface SpeechGrammarList {
   
maxAlternatives attribute
This attribute will set the maximum number of {{SpeechRecognitionAlternative}}s per result. The default value is 1.
+ +
mode attribute
+
An enum to determine where speech recognition takes place. The default value is "ondevice-preferred".

The group has discussed whether WebRTC might be used to specify selection of audio sources and remote recognizers. @@ -312,6 +325,13 @@ See SpeechRecognition Events From 8b5b9ecd46a2c33ac5925abc07502a29316f4396 Mon Sep 17 00:00:00 2001 From: Evan Liu Date: Mon, 23 Dec 2024 12:29:24 -0800 Subject: [PATCH 2/2] Add on-device speech recognition support to the Web Speech API --- index.bs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/index.bs b/index.bs index e62c62c..bbcbde3 100644 --- a/index.bs +++ b/index.bs @@ -1,8 +1,9 @@