From 20238a31bf46f21ebc205724220112b4e1540904 Mon Sep 17 00:00:00 2001 From: Looki2000 Date: Mon, 4 Dec 2023 23:07:43 +0100 Subject: [PATCH] small fixes --- frontend/core.py | 20 +++++++++++++--- frontend/core_test.ipynb | 41 ++++++++++++++++++++++++-------- frontend/system_instructions.txt | 2 +- frontend/tts_stream.py | 3 --- 4 files changed, 49 insertions(+), 17 deletions(-) diff --git a/frontend/core.py b/frontend/core.py index 61a3247..d9cf9fb 100644 --- a/frontend/core.py +++ b/frontend/core.py @@ -5,6 +5,7 @@ from faster_whisper import WhisperModel import torch import time import numpy as np +import re @@ -84,7 +85,7 @@ class Core: self.assistant_running = True - while self.assistant_running: + while True: if self.vad_rec.speech: last_recog_time = time.perf_counter() @@ -114,8 +115,6 @@ class Core: - - elif time.perf_counter() - last_recog_time > self.speech_recog_timeout and len(speech_recog_text) > 0: speech_recog_text = speech_recog_text.strip() @@ -130,6 +129,18 @@ class Core: gpt_response = self.gpt_wrap.get_response(speech_recog_text) + # separate long sequences of numbers in text string (for example 123456789) into packets of 3 (123 456 789) + gpt_response = re.sub(r"(\d{3})(?=\d)", r"\1 ", gpt_response) + + # Add space on the right side of numbers + gpt_response = re.sub(r'(\d)([^\d\s])', r'\1 \2', gpt_response) + # Add space on the left side of numbers + gpt_response = re.sub(r'([^\d\s])(\d)', r'\1 \2', gpt_response) + + # replace "ul." with "ulica" (non case sensitive) + gpt_response = re.sub(r"ul\.", "ulica", gpt_response, flags=re.IGNORECASE) + + print("-----------------------------------------") if self.use_chatgpt_placeholder: print("!!!!! CHATGPT PLACEHOLDER RESPONSE !!!!!!") @@ -159,6 +170,9 @@ class Core: time.sleep(0.01) + if not self.assistant_running: + break + # set assistant_running back to True to indicate that the loop has exited def assistant_stop(self): diff --git a/frontend/core_test.ipynb b/frontend/core_test.ipynb index 51e6931..9ec801e 100644 --- a/frontend/core_test.ipynb +++ b/frontend/core_test.ipynb @@ -10,14 +10,21 @@ "\n", "core = Core(\n", " whisper_model_name = \"large-v3\",\n", - " use_chatgpt_placeholder = True\n", - ")\n", - "\n", + " use_chatgpt_placeholder = False\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "core.set_order_settings(\n", " phone_number = \"123456789\",\n", - " order_items = \"1x margharitta\\n2x sos majonezowy\",\n", - " delivery_address = \"ul. Amogusowa 1337, Suski Małe\",\n", - " payment_method = \"karta\"\n", + " order_items = \"2x margherita\\n3x sos czosnkowy\\n2x pepsi 1.5l\",\n", + " delivery_address = \"ulica Kielecka 16A, Opole\",\n", + " payment_method = \"gotówka\"\n", ")\n", "\n", "core.set_speech_recog_settings(\n", @@ -28,12 +35,26 @@ " vad_threshold = 0.6,\n", " min_silence_duration_ms = 150,\n", " speech_pad_ms = 0\n", - ")\n", - "\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "core.set_tts_settings(\n", " speaker_wav = \"voices/lector.wav\"\n", - ")\n", - "\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "try:\n", " core.assistant_start()\n", "except (KeyboardInterrupt, SystemExit):\n", diff --git a/frontend/system_instructions.txt b/frontend/system_instructions.txt index 1a688e7..e99e30c 100644 --- a/frontend/system_instructions.txt +++ b/frontend/system_instructions.txt @@ -2,7 +2,7 @@ You are a bot that will act as a guy that orders a pizza you will be connected o DATA: phone: {phone_number} -delivery location: {delivery_location} +delivery location: {delivery_location} (PIZZA IS FOR DELIVERY. NOT for dine-in.) paymentMethod: {payment_method} OrderItems: {order_items} diff --git a/frontend/tts_stream.py b/frontend/tts_stream.py index 1dc78ef..65e11ea 100644 --- a/frontend/tts_stream.py +++ b/frontend/tts_stream.py @@ -78,9 +78,6 @@ class TTSStream: def tts_speak(self, text): self.play_buffer_size = 512 - # separate long sequences of numbers in text string (for example 123456789) into packets of 3 (123 456 789) - text = re.sub(r"(\d{3})(?=\d)", r"\1 ", text) - # open pyaudio stream p = pyaudio.PyAudio()