added chatgpt wrap, tts phone number splitting
This commit is contained in:
parent
f18e41476f
commit
24f909abff
72
frontend/chatgpt_wrap.py
Normal file
72
frontend/chatgpt_wrap.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
class ChatGPTWrap:
|
||||||
|
def __init__(self, use_chatgpt_placeholder = False):
|
||||||
|
self.use_chatgpt_placeholder = use_chatgpt_placeholder
|
||||||
|
|
||||||
|
# true chatgpt
|
||||||
|
if not use_chatgpt_placeholder:
|
||||||
|
|
||||||
|
print("Initializing ChatGPT... ", end="")
|
||||||
|
|
||||||
|
with open("system_instructions.txt", "r", encoding="utf-8") as f:
|
||||||
|
self.system_inst_template = f.read()
|
||||||
|
|
||||||
|
|
||||||
|
#### true openai chat gpt initialization stuff below (everything that needs to be done only once) ####
|
||||||
|
raise NotImplementedError("True ChatGPT is not implemented yet!")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print("Done!")
|
||||||
|
|
||||||
|
# placeholder chatgpt
|
||||||
|
else:
|
||||||
|
print("Using ChatGPT placeholder!")
|
||||||
|
self.message_idx = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def init_order(self, phone_number, order_items, delivery_address, payment_method):
|
||||||
|
self.phone_number = phone_number
|
||||||
|
self.order_items = order_items
|
||||||
|
self.delivery_address = delivery_address
|
||||||
|
self.payment_method = payment_method
|
||||||
|
|
||||||
|
# true chatgpt
|
||||||
|
if not self.use_chatgpt_placeholder:
|
||||||
|
# generate system instructions from template
|
||||||
|
self.system_inst = self.system_inst_template.format(
|
||||||
|
phone_number = self.phone_number,
|
||||||
|
order_items = self.order_items,
|
||||||
|
delivery_address = self.delivery_address,
|
||||||
|
payment_method = self.payment_method
|
||||||
|
)
|
||||||
|
|
||||||
|
#### true openai chat gpt system instructions initialization stuff below ####
|
||||||
|
##### (everything that needs to be done each assistant session like some chat gpt conversation cleanup) ####
|
||||||
|
|
||||||
|
|
||||||
|
def get_response(self, input_message):
|
||||||
|
# true chatgpt
|
||||||
|
if not self.use_chatgpt_placeholder:
|
||||||
|
#### true openai chat gpt response stuff below ####
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pass # return response
|
||||||
|
|
||||||
|
# placeholder chatgpt
|
||||||
|
else:
|
||||||
|
choices = (
|
||||||
|
self.phone_number,
|
||||||
|
self.order_items,
|
||||||
|
self.delivery_address,
|
||||||
|
self.payment_method
|
||||||
|
)
|
||||||
|
|
||||||
|
self.message_idx += 1
|
||||||
|
|
||||||
|
return f"czat dżi pi ti plejsholder {random.choice(choices)}{' CALLEND' if self.message_idx == 3 else ''}"
|
@ -1,5 +1,6 @@
|
|||||||
from vad_recorder import VADRecorder
|
from vad_recorder import VADRecorder
|
||||||
from tts_stream import TTSStream
|
from tts_stream import TTSStream
|
||||||
|
from chatgpt_wrap import ChatGPTWrap
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
import torch
|
import torch
|
||||||
import time
|
import time
|
||||||
@ -8,7 +9,9 @@ import numpy as np
|
|||||||
|
|
||||||
|
|
||||||
class Core:
|
class Core:
|
||||||
def __init__(self, whisper_model_name = "large-v3"):
|
def __init__(self, whisper_model_name = "large-v3", use_chatgpt_placeholder = False):
|
||||||
|
self.use_chatgpt_placeholder = use_chatgpt_placeholder
|
||||||
|
|
||||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
print("\n=======================================")
|
print("\n=======================================")
|
||||||
print(f"Using {self.device.capitalize()} for:")
|
print(f"Using {self.device.capitalize()} for:")
|
||||||
@ -16,19 +19,21 @@ class Core:
|
|||||||
print(" - TTS")
|
print(" - TTS")
|
||||||
print("=======================================\n")
|
print("=======================================\n")
|
||||||
|
|
||||||
print("Loading Whisper model... ", end="")
|
print("Loading Faster Whisper model... ", end="")
|
||||||
self.whisper_model = WhisperModel(whisper_model_name, device=self.device, compute_type="float16")
|
self.whisper_model = WhisperModel(whisper_model_name, device=self.device, compute_type="float16")
|
||||||
print("Done!")
|
print("Done!")
|
||||||
|
|
||||||
# VADRecorder and TTSStream have their own console loading messages
|
# VADRecorder, TTSStream and ChatGPTWrap have their own console loading messages
|
||||||
self.vad_rec = VADRecorder()
|
self.vad_rec = VADRecorder()
|
||||||
self.tts = TTSStream(device=self.device)
|
self.tts = TTSStream(device=self.device)
|
||||||
|
self.gpt_wrap = ChatGPTWrap(use_chatgpt_placeholder)
|
||||||
|
|
||||||
|
|
||||||
def set_order_settings(self, phone_number, order_items, delivery_address):
|
def set_order_settings(self, phone_number, order_items, delivery_address, payment_method):
|
||||||
self.phone_number = phone_number
|
self.phone_number = phone_number
|
||||||
self.order_items = order_items
|
self.order_items = order_items
|
||||||
self.delivery_address = delivery_address
|
self.delivery_address = delivery_address
|
||||||
|
self.payment_method = payment_method
|
||||||
|
|
||||||
|
|
||||||
def set_speech_recog_settings(self, speech_recog_timeout, audio_input_device_name, audio_output_device_name, window_size_sec, vad_threshold, min_silence_duration_ms, speech_pad_ms):
|
def set_speech_recog_settings(self, speech_recog_timeout, audio_input_device_name, audio_output_device_name, window_size_sec, vad_threshold, min_silence_duration_ms, speech_pad_ms):
|
||||||
@ -62,7 +67,12 @@ class Core:
|
|||||||
)
|
)
|
||||||
print("Done!")
|
print("Done!")
|
||||||
|
|
||||||
|
self.gpt_wrap.init_order(
|
||||||
|
self.phone_number,
|
||||||
|
self.order_items,
|
||||||
|
self.delivery_address,
|
||||||
|
self.payment_method
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -116,9 +126,19 @@ class Core:
|
|||||||
print(speech_recog_text)
|
print(speech_recog_text)
|
||||||
print("-----------------------------------------\n\n")
|
print("-----------------------------------------\n\n")
|
||||||
|
|
||||||
time.sleep(1) # fake chatgpt delay
|
|
||||||
|
|
||||||
gpt_response = "czat dżi pi ti plejsholder 1 2 3"
|
|
||||||
|
gpt_response = self.gpt_wrap.get_response(speech_recog_text)
|
||||||
|
|
||||||
|
print("-----------------------------------------")
|
||||||
|
if self.use_chatgpt_placeholder:
|
||||||
|
print("!!!!! CHATGPT PLACEHOLDER RESPONSE !!!!!!")
|
||||||
|
else:
|
||||||
|
print("!!!!!!!!!!! CHATGPT RESPONSE !!!!!!!!!!!!")
|
||||||
|
print("-----------------------------------------")
|
||||||
|
print(gpt_response)
|
||||||
|
print("-----------------------------------------\n\n")
|
||||||
|
|
||||||
if not self.assistant_running:
|
if not self.assistant_running:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -127,7 +147,11 @@ class Core:
|
|||||||
|
|
||||||
# tts
|
# tts
|
||||||
print("Speech synthesis stream started!")
|
print("Speech synthesis stream started!")
|
||||||
self.tts.tts_speak(gpt_response)
|
self.tts.tts_speak(gpt_response.replace(" CALLEND", ""))
|
||||||
|
|
||||||
|
|
||||||
|
if "CALLEND" in gpt_response:
|
||||||
|
self.assistant_stop()
|
||||||
|
|
||||||
|
|
||||||
#print(len(audios_for_whisper), time.perf_counter() - last_recog_time, len(speech_recog_text))
|
#print(len(audios_for_whisper), time.perf_counter() - last_recog_time, len(speech_recog_text))
|
||||||
|
@ -8,12 +8,16 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from core import Core\n",
|
"from core import Core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"core = Core(whisper_model_name = \"large-v3\")\n",
|
"core = Core(\n",
|
||||||
|
" whisper_model_name = \"large-v3\",\n",
|
||||||
|
" use_chatgpt_placeholder = True\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"core.set_order_settings(\n",
|
"core.set_order_settings(\n",
|
||||||
" phone_number = 123456789,\n",
|
" phone_number = \"123456789\",\n",
|
||||||
" order_items = \"1x margharitta\\n2x sos majonezowy\",\n",
|
" order_items = \"1x margharitta\\n2x sos majonezowy\",\n",
|
||||||
" delivery_address = \"ul. Amogusowa 1337, Suski Małe\"\n",
|
" delivery_address = \"ul. Amogusowa 1337, Suski Małe\",\n",
|
||||||
|
" payment_method = \"karta\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"core.set_speech_recog_settings(\n",
|
"core.set_speech_recog_settings(\n",
|
||||||
|
15
frontend/system_instructions.txt
Normal file
15
frontend/system_instructions.txt
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
You are a bot that will act as a guy that orders a pizza you will be connected on the phone with a pizza place. you will be asked questions about example: Where should be the pizza delivered etc. and you will respond with the data in the data section also you HAVE to respond in a full sentence because it will be transformed into audio using a tts software so you cant use a list just make a sentence like: i would like a margharitta and a cocacola please. basically just write it like you would say it dont put numbers but put words that are numbers dont add shortcuts add the full word
|
||||||
|
|
||||||
|
DATA:
|
||||||
|
phone: {{phone_number}}
|
||||||
|
delivery location: {{delivery_location}}
|
||||||
|
paymentMethod: {{payment_method}}
|
||||||
|
{{order_items}}
|
||||||
|
|
||||||
|
REMEMBER DONT USE NUMBERS, USE WORDS for example dont say 1x, say one time also REMEMBER to use replacement words to a word so it is appropriate to the whole sentence example:
|
||||||
|
WRONG:
|
||||||
|
Chciałbym zamówić jedną Margherittę, dwie Colę, pięć Fant i jedną Sprite.
|
||||||
|
RIGHT:
|
||||||
|
Chciałbym zamówić jedną Margarittę, dwie Kole, pięć Fant i jednego Sprajta.
|
||||||
|
|
||||||
|
If the call ends, say at the end of your final response "CALLEND"
|
@ -78,6 +78,9 @@ class TTSStream:
|
|||||||
def tts_speak(self, text):
|
def tts_speak(self, text):
|
||||||
self.play_buffer_size = 512
|
self.play_buffer_size = 512
|
||||||
|
|
||||||
|
# separate long sequences of numbers in text string (for example 123456789) into packets of 3 (123 456 789)
|
||||||
|
text = re.sub(r"(\d{3})(?=\d)", r"\1 ", text)
|
||||||
|
|
||||||
|
|
||||||
# open pyaudio stream
|
# open pyaudio stream
|
||||||
p = pyaudio.PyAudio()
|
p = pyaudio.PyAudio()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user