250 lines
7.3 KiB
Python
250 lines
7.3 KiB
Python
|
|
# I know this code sucks and there are many repeating parts but it's just a simple temporary tool and I didn't bother to make it better
|
|
# This is not how something serious should be done
|
|
|
|
import os
|
|
import numpy as np
|
|
from order_items import possible_order_items
|
|
import json
|
|
from datetime import datetime
|
|
|
|
root= os.path.dirname(os.path.abspath(__file__))
|
|
vscode_available = any(os.access(os.path.join(path, "code"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep))
|
|
|
|
def mkdir_if_not_exists_rel(path):
|
|
if not os.path.exists(root + "/" + path):
|
|
os.mkdir(root + "/" + path)
|
|
|
|
# create output dialogs folder if it does not exist
|
|
#if not os.path.exists(root + "/output_dialogs"):
|
|
# os.mkdir(root + "/output_dialogs")
|
|
|
|
mkdir_if_not_exists_rel("output_dialogs")
|
|
mkdir_if_not_exists_rel("temp")
|
|
|
|
|
|
# if temp/dialog_input.py does not exist, do the processing instead of just opening it
|
|
if not os.path.exists(root + "/temp/dialog_input.py"):
|
|
# read templates/dialog_input.txt
|
|
with open(root + "/templates/dialog_input.txt", "r", encoding="utf-8") as f:
|
|
dialog_input_template = f.read()
|
|
|
|
|
|
# random phone number string. for example "123 456 789"
|
|
phone_number = " ".join("".join(map(str, pack)) for pack in np.random.randint(0, 10, 9).reshape(-1, 3))
|
|
|
|
payment_method = np.random.choice([
|
|
"cash",
|
|
"card",
|
|
"Cash",
|
|
"Card",
|
|
"gotówka",
|
|
"karta",
|
|
"Gotówka",
|
|
"Karta",
|
|
"gotówką",
|
|
"kartą",
|
|
"Gotówką",
|
|
"Kartą",
|
|
])
|
|
|
|
|
|
|
|
order_items = ""
|
|
|
|
# randint high is exclusive so range 1 to 3 is actually 1 to 2 !!!
|
|
|
|
# pizza
|
|
pizza = possible_order_items["pizza"].copy()
|
|
for _ in range(np.random.randint(1, 3)):
|
|
item_idx = np.random.randint(0, len(pizza))
|
|
|
|
order_items += f" \"{np.random.randint(1, 4)}x Pizza {pizza[item_idx]}\",\n"
|
|
|
|
del pizza[item_idx]
|
|
|
|
# drink
|
|
drink = possible_order_items["drink"].copy()
|
|
for _ in range(np.random.randint(0, 3)):
|
|
item_idx = np.random.randint(0, len(drink))
|
|
|
|
order_items += f" \"{np.random.randint(1, 4)}x {drink[item_idx]}\",\n"
|
|
|
|
del drink[item_idx]
|
|
|
|
# sauce
|
|
sauce = possible_order_items["sauce"].copy()
|
|
for _ in range(np.random.randint(0, 3)):
|
|
item_idx = np.random.randint(0, len(sauce))
|
|
|
|
order_items += f" \"{np.random.randint(1, 4)}x {sauce[item_idx]}\",\n"
|
|
|
|
del sauce[item_idx]
|
|
|
|
order_items = order_items[:-2]
|
|
|
|
|
|
|
|
dialog_input_template = dialog_input_template.replace("{{phone_number}}", phone_number)
|
|
dialog_input_template = dialog_input_template.replace("{{payment_method}}", payment_method)
|
|
dialog_input_template = dialog_input_template.replace("{{order_items}}", order_items)
|
|
|
|
|
|
# save to temp/dialog_input.py
|
|
with open(root + "/temp/dialog_input.py", "w", encoding="utf-8") as f:
|
|
f.write(dialog_input_template)
|
|
|
|
while True:
|
|
# open it in notepad and wait for user to close it
|
|
# (or vscode if installed)
|
|
if vscode_available:
|
|
# open in new process of code. not the same window
|
|
os.system("code " + root + "/temp/dialog_input.py --new-window --wait")
|
|
else:
|
|
os.system("notepad " + root + "/temp/dialog_input.py")
|
|
|
|
|
|
|
|
|
|
|
|
# read and print
|
|
dialog_input_f = open(root + "/temp/dialog_input.py", "r", encoding="utf-8")
|
|
dialog_input = dialog_input_f.read()
|
|
dialog_input_f.close()
|
|
|
|
|
|
# execute part from the beginning to "## PYTHON END ##" as a python code here
|
|
python_end_idx = dialog_input.find("## PYTHON END ##")
|
|
phone_number, delivery_address, payment_method, order_items = None, None, None, None
|
|
exec(dialog_input[:python_end_idx])
|
|
# it will set variables: phone_number, payment_method and order_items
|
|
|
|
# check if theres any empty variables
|
|
not_ok = False
|
|
for val in [phone_number, delivery_address, payment_method, order_items]:
|
|
if val == "" or val is None:
|
|
print("Some values are empty. Please fill them.")
|
|
not_ok = True
|
|
break
|
|
if not_ok:
|
|
continue
|
|
|
|
# check the conversation
|
|
|
|
dialog = []
|
|
|
|
not_ok = False
|
|
now_pizzeria = True
|
|
for line in dialog_input[python_end_idx:].splitlines():
|
|
if line.startswith("piz ="):
|
|
if not now_pizzeria:
|
|
print("Wrong order of pizzeria (piz) and customer (bot) messages.")
|
|
not_ok = True
|
|
break
|
|
dialog.append(line[5:].strip())
|
|
now_pizzeria = not now_pizzeria
|
|
|
|
elif line.startswith("bot ="):
|
|
if now_pizzeria:
|
|
print("Wrong order of pizzeria (piz) and customer (bot) messages.")
|
|
not_ok = True
|
|
break
|
|
dialog.append(line[5:].strip())
|
|
now_pizzeria = not now_pizzeria
|
|
|
|
if not_ok:
|
|
continue
|
|
|
|
# check if dialog starts on pizzeria side and ends on customer side (length is even)
|
|
if len(dialog) % 2 != 0:
|
|
print("Dialog must start on pizzeria side and end on customer side.")
|
|
continue
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
dialog[-1] += " CALLEND"
|
|
|
|
now_pizzeria = True
|
|
print("=============================================")
|
|
for line in dialog:
|
|
print("🍕 -" if now_pizzeria else "🤖 -", line)
|
|
now_pizzeria = not now_pizzeria
|
|
print("=============================================")
|
|
|
|
|
|
|
|
user_input = input("Everything seems okay. Press enter to save it as a new JSON file. Type anything to abort and exit.")
|
|
if user_input != "":
|
|
print("Aborting and exiting.")
|
|
exit()
|
|
|
|
|
|
|
|
|
|
# read templates/system_instructions.txt
|
|
with open(root + "/templates/system_instructions.txt", "r", encoding="utf-8") as f:
|
|
system_instructions_template = f.read()
|
|
|
|
system_instructions_template = system_instructions_template.replace("{{phone_number}}", phone_number)
|
|
system_instructions_template = system_instructions_template.replace("{{delivery_location}}", delivery_address)
|
|
system_instructions_template = system_instructions_template.replace("{{payment_method}}", payment_method)
|
|
|
|
order_items_str = ""
|
|
for line in order_items:
|
|
order_items_str += f"{line}\n"
|
|
order_items_str = order_items_str[:-1]
|
|
|
|
system_instructions_template = system_instructions_template.replace("{{order_items}}", order_items_str)
|
|
|
|
|
|
# use all of these to create chatgpt finetuning JSON file
|
|
|
|
training_dict = {
|
|
"messages": []
|
|
}
|
|
|
|
# add system instructions
|
|
training_dict["messages"].append({
|
|
"role": "system",
|
|
"content": system_instructions_template
|
|
})
|
|
|
|
# add dialog
|
|
now_pizzeria = True
|
|
for line in dialog:
|
|
training_dict["messages"].append({
|
|
"role": "user" if now_pizzeria else "assistant",
|
|
"content": line
|
|
})
|
|
now_pizzeria = not now_pizzeria
|
|
|
|
|
|
## find all json files consisting of number (0.json, 1.json .... 123.json)
|
|
#files = [f for f in os.listdir(root + "/output_dialogs") if f.endswith(".json") and f[:-5].isdigit()]
|
|
#
|
|
## get name for next file (number higher than the highest number in the folder)
|
|
#if len(files) == 0:
|
|
# next_file = "0.json"
|
|
#else:
|
|
# next_file = str(max([int(f[:-5]) for f in files]) + 1) + ".json"
|
|
|
|
|
|
next_file = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".json"
|
|
|
|
# save to output_dialogs/next_file
|
|
with open(root + "/output_dialogs/" + next_file, "w", encoding="utf-8") as f:
|
|
json.dump(training_dict, f, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# remove temp/dialog_input.py
|
|
os.remove(root + "/temp/dialog_input.py")
|
|
|
|
|
|
print(f"Saved as {next_file}") |