From 2d2947a83e8512c2e753f27beb8f0a43bb04f198 Mon Sep 17 00:00:00 2001 From: Aman Date: Mon, 12 May 2025 21:45:54 +0530 Subject: [PATCH] Adding code to transcribe long audio files using streaming analyze content --- dialogflow/analyze_content_stream.py | 114 +++++++++++++++++++++++++++ dialogflow/requirements.txt | 1 + 2 files changed, 115 insertions(+) create mode 100644 dialogflow/analyze_content_stream.py diff --git a/dialogflow/analyze_content_stream.py b/dialogflow/analyze_content_stream.py new file mode 100644 index 00000000000..80133683952 --- /dev/null +++ b/dialogflow/analyze_content_stream.py @@ -0,0 +1,114 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Google Cloud Dialogflow API sample code using the StreamingAnalyzeContent +API. + +Also please contact Google to get credentials of this project and set up the +credential file json locations by running: +export GOOGLE_APPLICATION_CREDENTIALS= + +Example usage: + export GOOGLE_CLOUD_PROJECT='cloud-contact-center-ext-demo' + export CONVERSATION_PROFILE='FnuBYO8eTBWM8ep1i-eOng' + export GOOGLE_APPLICATION_CREDENTIALS='/Users/ruogu/Desktop/keys/cloud-contact-center-ext-demo-78798f9f9254.json' + export AUDIO_FILE_PATH='/Users/ruogu/Desktop/book_a_room.wav' + + python analyze_content_stream.py + +""" + +import os +import conversation_management +import participant_management +from pydub import AudioSegment +import math + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +AUDIO_FILE_PATH = os.getenv("AUDIO_FILE_PATH") +CONVERSATION_PROFILE_ID = os.getenv("CONVERSATION_PROFILE") + + +def split_audio(audio_file_path, max_length_ms=5_000): + """Splits audio into chunks under max_length_ms.""" + audio = AudioSegment.from_file(audio_file_path) + chunks = [] + num_chunks = math.ceil(len(audio) / max_length_ms) + base_filename = os.path.splitext(audio_file_path)[0] + + for i in range(num_chunks): + start = i * max_length_ms + end = min((i + 1) * max_length_ms, len(audio)) + chunk = audio[start:end] + chunk_path = f"{base_filename}_part{i}.wav" + chunk.export(chunk_path, format="wav", parameters=["-ac", "1"]) + chunks.append(chunk_path) + + return chunks + +def run_analyze_content_streaming(conversation_profile_id, audio_file_path, project_id): + # Create conversation + conversation = conversation_management.create_conversation( + project_id=project_id, + conversation_profile_id=conversation_profile_id + ) + conversation_id = conversation.name.split("conversations/")[1] + + # Create participant + participant = participant_management.create_participant( + project_id=project_id, + conversation_id=conversation_id, + role="END_USER" + ) + participant_id = participant.name.split("participants/")[1] + + class stream_generator: + def __init__(self, audio_file_path): + self.audio_file_path = audio_file_path + + def generator(self): + with open(self.audio_file_path, "rb") as audio_file: + while True: + chunk = audio_file.read(10000) + if not chunk: + break + # The later requests contains audio data. + yield chunk + + results = participant_management.analyze_content_audio_stream( + conversation_id=conversation_id, + participant_id=participant_id, + sample_rate_herz=48000, + stream=stream_generator(audio_file_path), + language_code="en-US", + timeout=300, + ) + + out = " ".join([result.message.content for result in results]) + + print(f"🔊 Transcription: {out}") + + # Complete conversation + conversation_management.complete_conversation(project_id, conversation_id) + return out + +if __name__ == "__main__": + audio_chunks = split_audio(AUDIO_FILE_PATH) + results = [] + for chunk_path in audio_chunks: + print(f"Processing {chunk_path}...") + partial_result = run_analyze_content_streaming(CONVERSATION_PROFILE_ID, chunk_path, PROJECT_ID) + results.extend(partial_result) + + print("Final Transcript: {}".format(''.join(results).strip())) \ No newline at end of file diff --git a/dialogflow/requirements.txt b/dialogflow/requirements.txt index 8b6f00fa3f7..b2ff9640838 100644 --- a/dialogflow/requirements.txt +++ b/dialogflow/requirements.txt @@ -4,3 +4,4 @@ pyaudio==0.2.14 termcolor==3.0.0 functions-framework==3.8.2 Werkzeug==3.0.6 +pydub==0.25.1