stlukey · Jimmys-Code · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
diff --git a/.gitmodules b/.gitmodules
diff --git a/Jimmys-Whisper b/Jimmys-Whisper
diff --git a/Jimmys_whisper.code-workspace b/Jimmys_whisper.code-workspace
@@ -0,0 +1,8 @@
+{
+	"folders": [
+		{
+			"path": "."
+		}
+	],
+	"settings": {}
+}
diff --git a/listen_rec.py b/listen_rec.py
diff --git a/main.py b/main.py
@@ -0,0 +1,73 @@
+import subprocess
+import sys
+import sounddevice as sd
+import soundfile as sf
+import numpy
+
+def transcribe_audio(file_path):
+
+
+    # Using sys.executable to ensure the same Python interpreter is used
+    process = subprocess.Popen([sys.executable, 'transcribe.py', file_path],
+                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    # Wait for the process to complete and capture both stdout and stderr (if needed for debugging)
+    stdout, stderr = process.communicate()
+
+    # Read the output from the file output.txt
+    with open("output.txt", "r") as f:
+        output = f.read()
+
+    return output
+
+# Start recording the input microphone and when the user presses enter, stop the recording and save it as an audio file.
+
+import threading
+
+import sounddevice as sd
+import soundfile as sf
+import threading
+import numpy
+
+def record_audio(file_path):
+    print("Recording... Press Enter to stop.")
+
+    # Set up a flag to indicate when recording should stop
+    stop_recording = threading.Event()
+
+    def capture_audio(file_path, sample_rate=8000):
+        # Initialize a dynamically growing list to store frames
+        frames = []
+
+        # Correct callback implementation
+        def callback(indata, _, time, status):
+            # Directly append to frames, as it's correctly in scope
+            frames.append(indata.copy())
+
+        # Open a stream that continuously records audio
+        with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
+            stop_recording.wait()  # Wait here until we're told to stop
+
+        # Concatenate all frames and save to a file
+        audio_data = numpy.concatenate(frames, axis=0)
+        sf.write(file_path, audio_data, sample_rate)
+
+
+    # Start the recording in a separate thread
+    threading.Thread(target=lambda: capture_audio(file_path), daemon=True).start()
+
+    # Wait for the user to press Enter to stop recording
+    input()
+
+    # Signal the recording thread to stop
+    stop_recording.set()
+
+#while true
+while True:
+    record_audio("output.wav")
+    #get the file path of the audio file
+    file_path = "output.wav"
+
+    transcription_result = transcribe_audio(file_path)
+
+    print(transcription_result)
diff --git a/output.txt b/output.txt
@@ -0,0 +1,2 @@
+version 9: [' Hello, can you hear me?']
+Which is 0.00 seconds a sentence
diff --git a/output.wav b/output.wav
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,13 @@
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+ffmpeg-python==0.2.0
+future==1.0.0
+idna==3.6
+numpy==1.26.4
+pycparser==2.21
+requests==2.31.0
+sounddevice==0.4.6
+soundfile==0.12.1
+urllib3==2.2.1
+whispercpp @ git+https://github.com/stlukey/whispercpp.py@7af678159c29edb3bc2a51a72665073d58f2352f
diff --git a/transcribe.py b/transcribe.py
@@ -0,0 +1,51 @@
+import time
+import os
+import sys
+import contextlib
+from whispercpp import Whisper
+counter=0
+# Ensures 'output.txt' exists and is empty at the start
+if not os.path.exists("output.txt"):
+    open("output.txt", "w").close()
+
+# Initialize Whisper outside of the function to avoid re-initialization on each call
+w = Whisper("base")
+
+def transcribe_audio(file_path):
+    # Transcribe the audio file
+    result = w.transcribe(file_path)
+
+    # Extract text from the result
+    text = w.extract_text(result)
+
+    return text
+
+# Example usage
+file_path = "output.wav"
+
+while True:
+    # Start timing
+    start_time = time.time()
+    print("Transcribing audio...")
+    # Suppressing function's verbose output using contextlib
+    with open(os.devnull, 'w') as devnull, contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
+        # Call the transcribe_audio function and store its return value in the 'transcription' variable
+        transcription = transcribe_audio(file_path)
+    print(f"{counter}]ranscription complete!")
+    end_time = time.time()
+    time_taken = int(end_time - start_time)  # Convert to integer
+
+    print(f"version {counter}: {transcription}\n")
+    # print(f"Time taken to transcribe: {time_taken} seconds")
+    print(f"Which is {time_taken/30:.2f} seconds a sentence")  # Updated for improved formatting
+    #play a sound to alert the user
+    os.system("aplay /usr/share/sounds/alsa/Front_Center.wav")
+    # Write output to file 'output.txt'
+    with open("output.txt", "w") as f:
+        f.write(f"version {counter}: {transcription}\n")
+        # f.write(f"Time taken to transcribe: {time_taken} seconds\n")
+        f.write(f"Which is {time_taken/30:.2f} seconds a sentence\n")  # Updated for improved formatting
+    counter+=1
+
+
+
diff --git a/whisper.cpp b/whisper.cpp
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		version 9: [' Hello, can you hear me?']
		Which is 0.00 seconds a sentence