What problem are you facing?
What is the cause of the error (in your opinion)?
I have created stream from speakers and trying to read() it. But when nothing plays on system sound then code stucks on stream.read(1024) until i turn on music or another sound source.
It would be great if you can add parameter fill_silense=True or something like that, so code won't be blocked on stream.read()
Here is code:
import numpy as np
import pyaudiowpatch as pyaudio
from faster_whisper import WhisperModel
def get_stream(p: pyaudio.PyAudio, device='micro'):
if device == 'micro':
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, output=True, frames_per_buffer=1024)
pyaudio.Stream()
else:
wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
if not default_speakers["isLoopbackDevice"]:
for loopback in p.get_loopback_device_info_generator():
if default_speakers["name"] in loopback["name"]:
default_speakers = loopback
break
else:
print("Default loopback output device not found.\n\nRun `python -m pyaudiowpatch` to check available devices.\nExiting...\n")
return
print(f"Recording from: ({default_speakers['index']}){default_speakers['name']}")
stream = p.open(
format=pyaudio.paInt16,
channels=default_speakers["maxInputChannels"],
rate=int(default_speakers["defaultSampleRate"]),
frames_per_buffer=1024,
input=True,
input_device_index=default_speakers["index"]
)
return stream
def transcribe_chuck(p, stream, model: WhisperModel, chunck_length=4):
frames = []
for _ in range(0, int(stream._rate / stream._frames_per_buffer * chunck_length)):
data = stream.read(stream._frames_per_buffer)
frames.append(data)
audio_data = b''.join(frames)
np_audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
segments, info = model.transcribe(np_audio, beam_size=7)
transcription = ' '.join(segment.text for segment in segments)
return transcription
def main():
model_size = "large"
model = WhisperModel(model_size, compute_type='float16') # device="cuda",
p = pyaudio.PyAudio()
stream = get_stream(p, device='micro1')
accumulated_transcription = ''
try:
while True:
transcription = transcribe_chuck(p, stream, model)
print(transcription)
accumulated_transcription += transcription + ' '
except KeyboardInterrupt:
print('Stopping...')
with open('log.txt', 'w') as log_file:
log_file.write(accumulated_transcription)
finally:
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
main()
What problem are you facing?
What is the cause of the error (in your opinion)?
I have created stream from speakers and trying to read() it. But when nothing plays on system sound then code stucks on stream.read(1024) until i turn on music or another sound source.
It would be great if you can add parameter fill_silense=True or something like that, so code won't be blocked on stream.read()
Here is code: