diff --git a/packages/google-cloud-speech/CONTRIBUTING.rst b/packages/google-cloud-speech/CONTRIBUTING.rst index 17535ece142c..de0818bb54f1 100644 --- a/packages/google-cloud-speech/CONTRIBUTING.rst +++ b/packages/google-cloud-speech/CONTRIBUTING.rst @@ -35,21 +35,21 @@ Using a Development Checkout You'll have to create a development environment using a Git checkout: - While logged into your GitHub account, navigate to the - ``python-speech`` `repo`_ on GitHub. + ``google-cloud-python`` `repo`_ on GitHub. -- Fork and clone the ``python-speech`` repository to your GitHub account by +- Fork and clone the ``google-cloud-python`` repository to your GitHub account by clicking the "Fork" button. -- Clone your fork of ``python-speech`` from your GitHub account to your local +- Clone your fork of ``google-cloud-python`` from your GitHub account to your local computer, substituting your account username and specifying the destination - as ``hack-on-python-speech``. E.g.:: + as ``hack-on-google-cloud-python``. E.g.:: $ cd ${HOME} - $ git clone git@github.com:USERNAME/python-speech.git hack-on-python-speech - $ cd hack-on-python-speech - # Configure remotes such that you can pull changes from the googleapis/python-speech + $ git clone git@github.com:USERNAME/google-cloud-python.git hack-on-google-cloud-python + $ cd hack-on-google-cloud-python + # Configure remotes such that you can pull changes from the googleapis/google-cloud-python # repository into your local repository. - $ git remote add upstream git@github.com:googleapis/python-speech.git + $ git remote add upstream git@github.com:googleapis/google-cloud-python.git # fetch and merge changes from upstream into main $ git fetch upstream $ git merge upstream/main @@ -60,7 +60,7 @@ repo, from which you can submit a pull request. To work on the codebase and run the tests, we recommend using ``nox``, but you can also use a ``virtualenv`` of your own creation. -.. _repo: https://github.com/googleapis/python-speech +.. _repo: https://github.com/googleapis/google-cloud-python Using ``nox`` ============= @@ -113,7 +113,7 @@ Coding Style export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date - version of ``python-speech``. The + version of ``google-cloud-python``. The remote name ``upstream`` should point to the official ``googleapis`` checkout and the branch should be the default branch on that remote (``main``). @@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the ``README``. Due to the reStructuredText (``rst``) parser used by PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` instead of -``https://github.com/googleapis/python-speech/blob/main/CONTRIBUTING.rst``) +``https://github.com/googleapis/google-cloud-python/blob/main/CONTRIBUTING.rst``) may cause problems creating links or rendering the description. .. _description on PyPI: https://pypi.org/project/google-cloud-speech @@ -236,7 +236,7 @@ We support: Supported versions can be found in our ``noxfile.py`` `config`_. -.. _config: https://github.com/googleapis/python-speech/blob/main/packages/google-cloud-speech/noxfile.py +.. _config: https://github.com/googleapis/google-cloud-python/blob/main/packages/google-cloud-speech/noxfile.py ********** diff --git a/packages/google-cloud-speech/docs/conf.py b/packages/google-cloud-speech/docs/conf.py index 6cab9c625291..8618206826f4 100644 --- a/packages/google-cloud-speech/docs/conf.py +++ b/packages/google-cloud-speech/docs/conf.py @@ -156,7 +156,7 @@ html_theme_options = { "description": "Google Cloud Client Libraries for google-cloud-speech", "github_user": "googleapis", - "github_repo": "python-speech", + "github_repo": "google-cloud-python", "github_banner": True, "font_family": "'Roboto', Georgia, sans", "head_font_family": "'Roboto', Georgia, serif", diff --git a/packages/google-cloud-speech/google/cloud/speech/__init__.py b/packages/google-cloud-speech/google/cloud/speech/__init__.py index bdf659725804..8ff0f11a7cff 100644 --- a/packages/google-cloud-speech/google/cloud/speech/__init__.py +++ b/packages/google-cloud-speech/google/cloud/speech/__init__.py @@ -63,6 +63,7 @@ CustomClass, PhraseSet, SpeechAdaptation, + TranscriptNormalization, ) __all__ = ( @@ -104,4 +105,5 @@ "CustomClass", "PhraseSet", "SpeechAdaptation", + "TranscriptNormalization", ) diff --git a/packages/google-cloud-speech/google/cloud/speech_v1/__init__.py b/packages/google-cloud-speech/google/cloud/speech_v1/__init__.py index 72158ec0533a..f4a4b53f76da 100644 --- a/packages/google-cloud-speech/google/cloud/speech_v1/__init__.py +++ b/packages/google-cloud-speech/google/cloud/speech_v1/__init__.py @@ -55,7 +55,12 @@ UpdateCustomClassRequest, UpdatePhraseSetRequest, ) -from .types.resource import CustomClass, PhraseSet, SpeechAdaptation +from .types.resource import ( + CustomClass, + PhraseSet, + SpeechAdaptation, + TranscriptNormalization, +) from google.cloud.speech_v1.helpers import SpeechHelpers @@ -99,6 +104,7 @@ class SpeechClient(SpeechHelpers, SpeechClient): "StreamingRecognitionResult", "StreamingRecognizeRequest", "StreamingRecognizeResponse", + "TranscriptNormalization", "TranscriptOutputConfig", "UpdateCustomClassRequest", "UpdatePhraseSetRequest", diff --git a/packages/google-cloud-speech/google/cloud/speech_v1/types/__init__.py b/packages/google-cloud-speech/google/cloud/speech_v1/types/__init__.py index 999e4899fbca..1985f9a49ad0 100644 --- a/packages/google-cloud-speech/google/cloud/speech_v1/types/__init__.py +++ b/packages/google-cloud-speech/google/cloud/speech_v1/types/__init__.py @@ -48,7 +48,7 @@ UpdateCustomClassRequest, UpdatePhraseSetRequest, ) -from .resource import CustomClass, PhraseSet, SpeechAdaptation +from .resource import CustomClass, PhraseSet, SpeechAdaptation, TranscriptNormalization __all__ = ( "LongRunningRecognizeMetadata", @@ -85,4 +85,5 @@ "CustomClass", "PhraseSet", "SpeechAdaptation", + "TranscriptNormalization", ) diff --git a/packages/google-cloud-speech/google/cloud/speech_v1/types/cloud_speech.py b/packages/google-cloud-speech/google/cloud/speech_v1/types/cloud_speech.py index d0c92b694980..362b3ad5cda0 100644 --- a/packages/google-cloud-speech/google/cloud/speech_v1/types/cloud_speech.py +++ b/packages/google-cloud-speech/google/cloud/speech_v1/types/cloud_speech.py @@ -359,6 +359,13 @@ class RecognitionConfig(proto.Message): adaptation `__ documentation. When speech adaptation is set it supersedes the ``speech_contexts`` field. + transcript_normalization (google.cloud.speech_v1.types.TranscriptNormalization): + Optional. Use transcription normalization to + automatically replace parts of the transcript + with phrases of your choosing. For + StreamingRecognize, this normalization only + applies to stable partial transcripts (stability + > 0.8) and final transcripts. speech_contexts (MutableSequence[google.cloud.speech_v1.types.SpeechContext]): Array of [SpeechContext][google.cloud.speech.v1.SpeechContext]. A @@ -551,6 +558,12 @@ class AudioEncoding(proto.Enum): 5574. In other words, each RTP header is replaced with a single byte containing the block length. Only Speex wideband is supported. ``sample_rate_hertz`` must be 16000. + MP3 (8): + MP3 audio. MP3 encoding is a Beta feature and only available + in v1p1beta1. Support all standard MP3 bitrates (which range + from 32-320 kbps). When using this encoding, + ``sample_rate_hertz`` has to match the sample rate of the + file being used. WEBM_OPUS (9): Opus encoded audio frames in WebM container (`OggOpus `__). @@ -565,6 +578,7 @@ class AudioEncoding(proto.Enum): AMR_WB = 5 OGG_OPUS = 6 SPEEX_WITH_HEADER_BYTE = 7 + MP3 = 8 WEBM_OPUS = 9 encoding: AudioEncoding = proto.Field( @@ -605,6 +619,11 @@ class AudioEncoding(proto.Enum): number=20, message=resource.SpeechAdaptation, ) + transcript_normalization: resource.TranscriptNormalization = proto.Field( + proto.MESSAGE, + number=24, + message=resource.TranscriptNormalization, + ) speech_contexts: MutableSequence["SpeechContext"] = proto.RepeatedField( proto.MESSAGE, number=6, @@ -659,7 +678,7 @@ class SpeakerDiarizationConfig(proto.Message): enable_speaker_diarization (bool): If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using - a speaker_tag provided in the WordInfo. + a speaker_label provided in the WordInfo. min_speaker_count (int): Minimum number of speakers in the conversation. This range gives you more @@ -1469,8 +1488,17 @@ class WordInfo(proto.Message): speaker within the audio. This field specifies which one of those speakers was detected to have spoken this word. Value ranges from '1' to diarization_speaker_count. speaker_tag is - set if enable_speaker_diarization = 'true' and only in the - top alternative. + set if enable_speaker_diarization = 'true' and only for the + top alternative. Note: Use speaker_label instead. + speaker_label (str): + Output only. A label value assigned for every unique speaker + within the audio. This field specifies which speaker was + detected to have spoken this word. For some models, like + medical_conversation this can be actual speaker role, for + example "patient" or "provider", but generally this would be + a number identifying a speaker. This field is only set if + enable_speaker_diarization = 'true' and only for the top + alternative. """ start_time: duration_pb2.Duration = proto.Field( @@ -1495,6 +1523,10 @@ class WordInfo(proto.Message): proto.INT32, number=5, ) + speaker_label: str = proto.Field( + proto.STRING, + number=6, + ) class SpeechAdaptationInfo(proto.Message): diff --git a/packages/google-cloud-speech/google/cloud/speech_v1/types/resource.py b/packages/google-cloud-speech/google/cloud/speech_v1/types/resource.py index 81b319c5d4b0..c826f1c48dc8 100644 --- a/packages/google-cloud-speech/google/cloud/speech_v1/types/resource.py +++ b/packages/google-cloud-speech/google/cloud/speech_v1/types/resource.py @@ -25,6 +25,7 @@ "CustomClass", "PhraseSet", "SpeechAdaptation", + "TranscriptNormalization", }, ) @@ -228,4 +229,54 @@ class ABNFGrammar(proto.Message): ) +class TranscriptNormalization(proto.Message): + r"""Transcription normalization configuration. Use transcription + normalization to automatically replace parts of the transcript + with phrases of your choosing. For StreamingRecognize, this + normalization only applies to stable partial transcripts + (stability > 0.8) and final transcripts. + + Attributes: + entries (MutableSequence[google.cloud.speech_v1.types.TranscriptNormalization.Entry]): + A list of replacement entries. We will perform replacement + with one entry at a time. For example, the second entry in + ["cat" => "dog", "mountain cat" => "mountain dog"] will + never be applied because we will always process the first + entry before it. At most 100 entries. + """ + + class Entry(proto.Message): + r"""A single replacement configuration. + + Attributes: + search (str): + What to replace. Max length is 100 + characters. + replace (str): + What to replace with. Max length is 100 + characters. + case_sensitive (bool): + Whether the search is case sensitive. + """ + + search: str = proto.Field( + proto.STRING, + number=1, + ) + replace: str = proto.Field( + proto.STRING, + number=2, + ) + case_sensitive: bool = proto.Field( + proto.BOOL, + number=3, + ) + + entries: MutableSequence[Entry] = proto.RepeatedField( + proto.MESSAGE, + number=1, + message=Entry, + ) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/scripts/client-post-processing/integrate-isolated-handwritten-code.yaml b/scripts/client-post-processing/integrate-isolated-handwritten-code.yaml index f604ed9a5486..241a3dd8e1ee 100644 --- a/scripts/client-post-processing/integrate-isolated-handwritten-code.yaml +++ b/scripts/client-post-processing/integrate-isolated-handwritten-code.yaml @@ -102,10 +102,12 @@ replacements: packages/google-cloud-speech/google/cloud/speech_v1/__init__.py, ] before: | - from .types.resource import CustomClass, PhraseSet, SpeechAdaptation\n + \) + __all__ = \( after: | - from .types.resource import CustomClass, PhraseSet, SpeechAdaptation\n + ) + from google.cloud.speech_v1.helpers import SpeechHelpers\n\n class SpeechClient(SpeechHelpers, SpeechClient): __doc__ = SpeechClient.__doc__\n\n