finetune dialogs tool close to working
This commit is contained in:
parent
e6da26dc45
commit
afaf21ad6d
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
finetune_dalogs_tool/temp/
|
||||
finetune_dalogs_tool/output_dialogs/
|
||||
finetune_dalogs_tool/__pycache__/
|
@ -1,11 +1,181 @@
|
||||
import gradio as gr
|
||||
|
||||
# I know this code sucks and there are many repeating parts but it's just a simple temporary tool and I didn't bother to make it better
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from order_items import possible_order_items
|
||||
|
||||
root= os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
def mkdir_if_not_exists_rel(path):
|
||||
if not os.path.exists(root + "/" + path):
|
||||
os.mkdir(root + "/" + path)
|
||||
|
||||
# create output dialogs folder if it does not exist
|
||||
if not os.path.exists(root + "/output_dialogs"):
|
||||
os.mkdir(root + "/output_dialogs")
|
||||
#if not os.path.exists(root + "/output_dialogs"):
|
||||
# os.mkdir(root + "/output_dialogs")
|
||||
|
||||
mkdir_if_not_exists_rel("output_dialogs")
|
||||
mkdir_if_not_exists_rel("temp")
|
||||
|
||||
|
||||
# read templates/dialog_input.txt
|
||||
with open(root + "/templates/dialog_input.txt", "r", encoding="utf-8") as f:
|
||||
dialog_input_template = f.read()
|
||||
|
||||
|
||||
# random phone number string. for example "123 456 789"
|
||||
phone_number = " ".join("".join(map(str, pack)) for pack in np.random.randint(0, 10, 9).reshape(-1, 3))
|
||||
|
||||
payment_method = np.random.choice([
|
||||
"cash",
|
||||
"card",
|
||||
"Cash",
|
||||
"Card",
|
||||
"gotówka",
|
||||
"karta",
|
||||
"Gotówka",
|
||||
"Karta",
|
||||
"gotówką",
|
||||
"kartą",
|
||||
"Gotówką",
|
||||
"Kartą",
|
||||
])
|
||||
|
||||
|
||||
|
||||
order_items = ""
|
||||
|
||||
# randint high is exclusive so range 1 to 3 is actually 1 to 2 !!!
|
||||
|
||||
# pizza
|
||||
pizza = possible_order_items["pizza"].copy()
|
||||
for _ in range(np.random.randint(1, 3)):
|
||||
item_idx = np.random.randint(0, len(pizza))
|
||||
|
||||
order_items += f" \"{np.random.randint(1, 4)}x Pizza {pizza[item_idx]}\",\n"
|
||||
|
||||
del pizza[item_idx]
|
||||
|
||||
# drink
|
||||
drink = possible_order_items["drink"].copy()
|
||||
for _ in range(np.random.randint(0, 3)):
|
||||
item_idx = np.random.randint(0, len(drink))
|
||||
|
||||
order_items += f" \"{np.random.randint(1, 4)}x {drink[item_idx]}\",\n"
|
||||
|
||||
del drink[item_idx]
|
||||
|
||||
# sauce
|
||||
sauce = possible_order_items["sauce"].copy()
|
||||
for _ in range(np.random.randint(0, 3)):
|
||||
item_idx = np.random.randint(0, len(sauce))
|
||||
|
||||
order_items += f" \"{np.random.randint(1, 4)}x {sauce[item_idx]}\",\n"
|
||||
|
||||
del sauce[item_idx]
|
||||
|
||||
order_items = order_items[:-2]
|
||||
|
||||
|
||||
|
||||
dialog_input_template = dialog_input_template.replace("{{phone_number}}", phone_number)
|
||||
dialog_input_template = dialog_input_template.replace("{{payment_method}}", payment_method)
|
||||
dialog_input_template = dialog_input_template.replace("{{order_items}}", order_items)
|
||||
|
||||
|
||||
# save to temp/dialog_input.py
|
||||
with open(root + "/temp/dialog_input.py", "w", encoding="utf-8") as f:
|
||||
f.write(dialog_input_template)
|
||||
|
||||
vscode_available = any(os.access(os.path.join(path, "code"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep))
|
||||
|
||||
while True:
|
||||
# open it in notepad and wait for user to close it
|
||||
# (or vscode if installed)
|
||||
if vscode_available:
|
||||
# open in new process of code. not the same window
|
||||
os.system("code " + root + "/temp/dialog_input.py --new-window --wait")
|
||||
else:
|
||||
os.system("notepad " + root + "/temp/dialog_input.py")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# read and print
|
||||
dialog_input_f = open(root + "/temp/dialog_input.py", "r", encoding="utf-8")
|
||||
dialog_input = dialog_input_f.read()
|
||||
dialog_input_f.close()
|
||||
|
||||
|
||||
# execute part from the beginning to "## PYTHON END ##" as a python code here
|
||||
python_end_idx = dialog_input.find("## PYTHON END ##")
|
||||
phone_number, payment_method, order_items = "", "", ""
|
||||
exec(dialog_input[:python_end_idx])
|
||||
# it will set variables: phone_number, payment_method and order_items
|
||||
|
||||
# check if theres any empty variables
|
||||
not_ok = False
|
||||
for val in [phone_number, delivery_address, payment_method, order_items]:
|
||||
if val == "":
|
||||
print("Some values are empty. Please fill them.")
|
||||
not_ok = True
|
||||
break
|
||||
if not_ok:
|
||||
continue
|
||||
|
||||
# check the conversation
|
||||
|
||||
dialog = []
|
||||
|
||||
not_ok = False
|
||||
now_pizzeria = True
|
||||
for line in dialog_input[python_end_idx:].splitlines():
|
||||
if line == "" or line.startswith("#"):
|
||||
continue
|
||||
|
||||
if line.startswith("piz ="):
|
||||
if not now_pizzeria:
|
||||
print("Wrong order of pizzeria (piz) and customer (bot) messages.")
|
||||
not_ok = True
|
||||
break
|
||||
dialog.append(line[5:].strip())
|
||||
now_pizzeria = not now_pizzeria
|
||||
|
||||
elif line.startswith("bot ="):
|
||||
if now_pizzeria:
|
||||
print("Wrong order of pizzeria (piz) and customer (bot) messages.")
|
||||
not_ok = True
|
||||
break
|
||||
dialog.append(line[5:].strip())
|
||||
now_pizzeria = not now_pizzeria
|
||||
|
||||
if not_ok:
|
||||
continue
|
||||
|
||||
# check if dialog starts on pizzeria side and ends on customer side (length is even)
|
||||
if len(dialog) % 2 != 0:
|
||||
print("Dialog must start on pizzeria side and end on customer side.")
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
dialog[-1] += " CALLEND"
|
||||
|
||||
now_pizzeria = True
|
||||
for line in dialog:
|
||||
print("piz -" if now_pizzeria else "bot -", line)
|
||||
now_pizzeria = not now_pizzeria
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## find all json files consisting of number (0.json, 1.json .... 123.json)
|
||||
#files = [f for f in os.listdir("output_dialogs") if f.endswith(".json") and f[:-5].isdigit()]
|
||||
|
64
finetune_dialogs_tool/order_items.py
Normal file
64
finetune_dialogs_tool/order_items.py
Normal file
@ -0,0 +1,64 @@
|
||||
|
||||
# Our project is NOT associated with any of the brands listed below. They are just for experimental gpt fine-tuning purposes.
|
||||
# Below brands are registered trademarks of their respective owners.
|
||||
|
||||
possible_order_items = {
|
||||
"pizza": [
|
||||
"Margheritta",
|
||||
"Capricciosa",
|
||||
"Salami",
|
||||
"Hawaiian",
|
||||
"Vegetariana",
|
||||
"Pepperoni",
|
||||
"Prosciutto",
|
||||
"Pesto",
|
||||
"Neapolitana",
|
||||
"Quattro Formaggi",
|
||||
|
||||
"Margaritta",
|
||||
"Capriciosa",
|
||||
"Hawajska",
|
||||
"Wegetarianska",
|
||||
"Peperoni",
|
||||
"Prościutto",
|
||||
"Cztery sery",
|
||||
],
|
||||
|
||||
"drink": [
|
||||
"Coca Cola",
|
||||
"Cola",
|
||||
"Coca-Cola",
|
||||
"Kola",
|
||||
"Sprite",
|
||||
"Sprajt",
|
||||
"Fanta",
|
||||
"Pepsi",
|
||||
"Mirinda",
|
||||
"7up",
|
||||
"Seven up",
|
||||
"Woda",
|
||||
"Woda gazowana",
|
||||
"Woda niegazowana",
|
||||
"Sok pomarańczowy",
|
||||
"Sok jabłkowy",
|
||||
"Lemoniada",
|
||||
"Mountain Dew",
|
||||
"Red Bull",
|
||||
"Monster",
|
||||
],
|
||||
|
||||
"sauce": [
|
||||
"Sos Czosnkowy",
|
||||
"Sos Pomidorowy",
|
||||
"Ketchup",
|
||||
"Keczup",
|
||||
"Sos Pikantny",
|
||||
"Sos Ostry",
|
||||
"Sos Słodko-kwaśny",
|
||||
"Sos Łagodny",
|
||||
"Sos Śmietanowy",
|
||||
"Sos Musztardowy",
|
||||
"Sos Majonezowy",
|
||||
"Sos Barbecue"
|
||||
]
|
||||
}
|
23
finetune_dialogs_tool/templates/dialog_input.txt
Normal file
23
finetune_dialogs_tool/templates/dialog_input.txt
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
# Phone adress is already randomized. No need to change it.
|
||||
phone_number = "{{phone_number}}"
|
||||
|
||||
# Make up some adress. Can be real or fake. Put it between double quotes.
|
||||
delivery_address = ""
|
||||
|
||||
# Payment method is already randomized. No need to change it.
|
||||
payment_method = "{{payment_method}}"
|
||||
|
||||
# Order items are already randomized. No need to change it.
|
||||
order_items = [
|
||||
{{order_items}}
|
||||
]
|
||||
|
||||
## PYTHON END ##
|
||||
|
||||
# Write the conversation below.
|
||||
# The conversation has to start with "piz" (short for pizzeria) and has to end with "bot".
|
||||
# Do not use double quotes (") in the conversation. Just type the text next to the = sign.
|
||||
|
||||
piz =
|
||||
bot =
|
15
finetune_dialogs_tool/templates/system_instructions.txt
Normal file
15
finetune_dialogs_tool/templates/system_instructions.txt
Normal file
@ -0,0 +1,15 @@
|
||||
You are a bot that will act as a guy that orders a pizza you will be connected on the phone with a pizza place. you will be asked questions about example: Where should be the pizza delivered etc. and you will respond with the data in the data section also you HAVE to respond in a full sentence because it will be transformed into audio using a tts software so you cant use a list just make a sentence like : i would like a margharitta and a cocacola please. basically just write it like you would say it dont put numbers but put words that are numbers dont add shortcuts add the full word
|
||||
|
||||
DATA:
|
||||
phone: {{phone_number}}
|
||||
delivery location: {{delivery_location}}
|
||||
paymentMethod: {{payment_method}}
|
||||
{{order_items}}
|
||||
|
||||
REMEMBER DONT USE NUMBERS USE WORDS for example dont say 1x say one time also REMEMBER to use replacement words to a word so it is appropriate to the whole sentence example:
|
||||
WRONG:
|
||||
Chciałbym zamówić jedną Margherittę, dwie Colę, pięć Fant i jedną Sprite.
|
||||
RIGHT:
|
||||
Chciałbym zamówić jedną Margarittę, dwie Kole, pięć Fant i jednego Sprajta.
|
||||
|
||||
If the call ends, say at the end of your final response "CALLEND"
|
Loading…
Reference in New Issue
Block a user