diff --git a/frontend/core.py b/frontend/core.py index d9cf9fb..5880126 100644 --- a/frontend/core.py +++ b/frontend/core.py @@ -37,10 +37,11 @@ class Core: self.payment_method = payment_method - def set_speech_recog_settings(self, speech_recog_timeout, audio_input_device_name, audio_output_device_name, window_size_sec, vad_threshold, min_silence_duration_ms, speech_pad_ms): + #def set_speech_recog_settings(self, speech_recog_timeout, audio_input_device_name, audio_output_device_name, window_size_sec, vad_threshold, min_silence_duration_ms, speech_pad_ms): + def set_speech_recog_settings(self, speech_recog_timeout, window_size_sec, vad_threshold, min_silence_duration_ms, speech_pad_ms): self.speech_recog_timeout = speech_recog_timeout - self.audio_input_device_name = audio_input_device_name - self.audio_output_device_name = audio_output_device_name + #self.audio_input_device_name = audio_input_device_name + #self.audio_output_device_name = audio_output_device_name self.window_size_sec = window_size_sec self.vad_threshold = vad_threshold self.min_silence_duration_ms = min_silence_duration_ms @@ -48,19 +49,22 @@ class Core: def set_tts_settings(self, speaker_wav): - self.speaker_wav = speaker_wav + #self.speaker_wav = speaker_wav + print("Setting TTS speaker... ", end="") + self.tts.change_speaker(speaker_wav) + print("Done!") def assistant_start(self): print("Starting assistant...") - print("Setting TTS speaker... ", end="") - self.tts.change_speaker(self.speaker_wav) - print("Done!") + #print("Setting TTS speaker... ", end="") + #self.tts.change_speaker(self.speaker_wav) + #print("Done!") print("Starting VAD recording thread... ", end="") self.vad_rec.start_vad_recorder( - target_device_name = self.audio_input_device_name, + #target_device_name = self.audio_input_device_name, window_size_sec = self.window_size_sec, vad_threshold = self.vad_threshold, min_silence_duration_ms = self.min_silence_duration_ms, diff --git a/frontend/core_test.ipynb b/frontend/core_test.ipynb index 9ec801e..bcad991 100644 --- a/frontend/core_test.ipynb +++ b/frontend/core_test.ipynb @@ -11,7 +11,7 @@ "core = Core(\n", " whisper_model_name = \"large-v3\",\n", " use_chatgpt_placeholder = False\n", - ")\n" + ")" ] }, { @@ -29,13 +29,13 @@ "\n", "core.set_speech_recog_settings(\n", " speech_recog_timeout = 2.5,\n", - " audio_input_device_name = \"Virtual\",\n", - " audio_output_device_name = \"placeholder\",\n", + " #audio_input_device_name = \"Virtual\",\n", + " #audio_output_device_name = \"placeholder\",\n", " window_size_sec = 0.1,\n", " vad_threshold = 0.6,\n", " min_silence_duration_ms = 150,\n", " speech_pad_ms = 0\n", - ")\n" + ")" ] }, { diff --git a/frontend/vad_recorder.py b/frontend/vad_recorder.py index 192b9cd..2ae7c30 100644 --- a/frontend/vad_recorder.py +++ b/frontend/vad_recorder.py @@ -101,7 +101,8 @@ class VADRecorder: - def start_vad_recorder(self, target_device_name, window_size_sec = 0.1, vad_threshold = 0.6, min_silence_duration_ms = 150, speech_pad_ms = 0): + #def start_vad_recorder(self, target_device_name, window_size_sec = 0.1, vad_threshold = 0.6, min_silence_duration_ms = 150, speech_pad_ms = 0): + def start_vad_recorder(self, window_size_sec = 0.1, vad_threshold = 0.6, min_silence_duration_ms = 150, speech_pad_ms = 0): self.window_size = int(window_size_sec * SAMPLERATE) @@ -112,22 +113,24 @@ class VADRecorder: self.p = pyaudio.PyAudio() - target_device_index = None - for i in range(self.p.get_device_count()): - device_info = self.p.get_device_info_by_index(i) - if device_info['maxInputChannels'] > 0 and target_device_name in device_info['name']: - target_device_index = i - break - - if target_device_index is None: - print(f"No target device found with \"{target_device_name}\" in its name.") - exit() + #target_device_index = None + #for i in range(self.p.get_device_count()): + # device_info = self.p.get_device_info_by_index(i) + # if device_info['maxInputChannels'] > 0 and target_device_name in device_info['name']: + # target_device_index = i + # break + # + #if target_device_index is None: + # print(f"No target device found with \"{target_device_name}\" in its name.") + # exit() + # + #try: + # self.stream_in = self.p.open(format=pyaudio.paFloat32, channels=1, rate=SAMPLERATE, input=True, frames_per_buffer=self.window_size, input_device_index=target_device_index) + #except OSError: + # print(f"An unexpected error occured when trying to open device stream with \"{target_device_name}\" in its name. That could be caused by the device being disabled or unplugged.") + # exit() - try: - self.stream_in = self.p.open(format=pyaudio.paFloat32, channels=1, rate=SAMPLERATE, input=True, frames_per_buffer=self.window_size, input_device_index=target_device_index) - except OSError: - print(f"An unexpected error occured when trying to open device stream with \"{target_device_name}\" in its name. That could be caused by the device being disabled or unplugged.") - exit() + self.stream_in = self.p.open(format=pyaudio.paFloat32, channels=1, rate=SAMPLERATE, input=True, frames_per_buffer=self.window_size) self.speech = False self.audios_for_whisper = []