Skip to content

Transcribe main #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
40 changes: 0 additions & 40 deletions .github/workflows/build_wheels.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .gitmodules

This file was deleted.

1 change: 1 addition & 0 deletions Jimmys-Whisper
Submodule Jimmys-Whisper added at 1e7e6b
8 changes: 8 additions & 0 deletions Jimmys_whisper.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"folders": [
{
"path": "."
}
],
"settings": {}
}
Empty file added listen_rec.py
Empty file.
73 changes: 73 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import subprocess
import sys
import sounddevice as sd
import soundfile as sf
import numpy

def transcribe_audio(file_path):


# Using sys.executable to ensure the same Python interpreter is used
process = subprocess.Popen([sys.executable, 'transcribe.py', file_path],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Wait for the process to complete and capture both stdout and stderr (if needed for debugging)
stdout, stderr = process.communicate()

# Read the output from the file output.txt
with open("output.txt", "r") as f:
output = f.read()

return output

# Start recording the input microphone and when the user presses enter, stop the recording and save it as an audio file.

import threading

import sounddevice as sd
import soundfile as sf
import threading
import numpy

def record_audio(file_path):
print("Recording... Press Enter to stop.")

# Set up a flag to indicate when recording should stop
stop_recording = threading.Event()

def capture_audio(file_path, sample_rate=8000):
# Initialize a dynamically growing list to store frames
frames = []

# Correct callback implementation
def callback(indata, _, time, status):
# Directly append to frames, as it's correctly in scope
frames.append(indata.copy())

# Open a stream that continuously records audio
with sd.InputStream(samplerate=sample_rate, channels=1, callback=callback):
stop_recording.wait() # Wait here until we're told to stop

# Concatenate all frames and save to a file
audio_data = numpy.concatenate(frames, axis=0)
sf.write(file_path, audio_data, sample_rate)


# Start the recording in a separate thread
threading.Thread(target=lambda: capture_audio(file_path), daemon=True).start()

# Wait for the user to press Enter to stop recording
input()

# Signal the recording thread to stop
stop_recording.set()

#while true
while True:
record_audio("output.wav")
#get the file path of the audio file
file_path = "output.wav"

transcription_result = transcribe_audio(file_path)

print(transcription_result)
2 changes: 2 additions & 0 deletions output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
version 9: [' Hello, can you hear me?']
Which is 0.00 seconds a sentence
Binary file added output.wav
Binary file not shown.
13 changes: 13 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
ffmpeg-python==0.2.0
future==1.0.0
idna==3.6
numpy==1.26.4
pycparser==2.21
requests==2.31.0
sounddevice==0.4.6
soundfile==0.12.1
urllib3==2.2.1
whispercpp @ git+https://github.com/stlukey/whispercpp.py@7af678159c29edb3bc2a51a72665073d58f2352f
51 changes: 51 additions & 0 deletions transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import time
import os
import sys
import contextlib
from whispercpp import Whisper
counter=0
# Ensures 'output.txt' exists and is empty at the start
if not os.path.exists("output.txt"):
open("output.txt", "w").close()

# Initialize Whisper outside of the function to avoid re-initialization on each call
w = Whisper("base")

def transcribe_audio(file_path):
# Transcribe the audio file
result = w.transcribe(file_path)

# Extract text from the result
text = w.extract_text(result)

return text

# Example usage
file_path = "output.wav"

while True:
# Start timing
start_time = time.time()
print("Transcribing audio...")
# Suppressing function's verbose output using contextlib
with open(os.devnull, 'w') as devnull, contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
# Call the transcribe_audio function and store its return value in the 'transcription' variable
transcription = transcribe_audio(file_path)
print(f"{counter}]ranscription complete!")
end_time = time.time()
time_taken = int(end_time - start_time) # Convert to integer

print(f"version {counter}: {transcription}\n")
# print(f"Time taken to transcribe: {time_taken} seconds")
print(f"Which is {time_taken/30:.2f} seconds a sentence") # Updated for improved formatting
#play a sound to alert the user
os.system("aplay /usr/share/sounds/alsa/Front_Center.wav")
# Write output to file 'output.txt'
with open("output.txt", "w") as f:
f.write(f"version {counter}: {transcription}\n")
# f.write(f"Time taken to transcribe: {time_taken} seconds\n")
f.write(f"Which is {time_taken/30:.2f} seconds a sentence\n") # Updated for improved formatting
counter+=1



2 changes: 1 addition & 1 deletion whisper.cpp
Submodule whisper.cpp updated from 0a2d12 to 962063
Loading