diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4a42295 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +finetune_dalogs_tool/temp/ +finetune_dalogs_tool/output_dialogs/ +finetune_dalogs_tool/__pycache__/ \ No newline at end of file diff --git a/finetune_dialogs_tool/main.py b/finetune_dialogs_tool/main.py index e83ccdc..24bb1a6 100644 --- a/finetune_dialogs_tool/main.py +++ b/finetune_dialogs_tool/main.py @@ -1,11 +1,181 @@ -import gradio as gr + +# I know this code sucks and there are many repeating parts but it's just a simple temporary tool and I didn't bother to make it better + import os +import numpy as np +from order_items import possible_order_items root= os.path.dirname(os.path.abspath(__file__)) +def mkdir_if_not_exists_rel(path): + if not os.path.exists(root + "/" + path): + os.mkdir(root + "/" + path) + # create output dialogs folder if it does not exist -if not os.path.exists(root + "/output_dialogs"): - os.mkdir(root + "/output_dialogs") +#if not os.path.exists(root + "/output_dialogs"): +# os.mkdir(root + "/output_dialogs") + +mkdir_if_not_exists_rel("output_dialogs") +mkdir_if_not_exists_rel("temp") + + +# read templates/dialog_input.txt +with open(root + "/templates/dialog_input.txt", "r", encoding="utf-8") as f: + dialog_input_template = f.read() + + +# random phone number string. for example "123 456 789" +phone_number = " ".join("".join(map(str, pack)) for pack in np.random.randint(0, 10, 9).reshape(-1, 3)) + +payment_method = np.random.choice([ + "cash", + "card", + "Cash", + "Card", + "gotówka", + "karta", + "Gotówka", + "Karta", + "gotówką", + "kartą", + "Gotówką", + "Kartą", +]) + + + +order_items = "" + +# randint high is exclusive so range 1 to 3 is actually 1 to 2 !!! + +# pizza +pizza = possible_order_items["pizza"].copy() +for _ in range(np.random.randint(1, 3)): + item_idx = np.random.randint(0, len(pizza)) + + order_items += f" \"{np.random.randint(1, 4)}x Pizza {pizza[item_idx]}\",\n" + + del pizza[item_idx] + +# drink +drink = possible_order_items["drink"].copy() +for _ in range(np.random.randint(0, 3)): + item_idx = np.random.randint(0, len(drink)) + + order_items += f" \"{np.random.randint(1, 4)}x {drink[item_idx]}\",\n" + + del drink[item_idx] + +# sauce +sauce = possible_order_items["sauce"].copy() +for _ in range(np.random.randint(0, 3)): + item_idx = np.random.randint(0, len(sauce)) + + order_items += f" \"{np.random.randint(1, 4)}x {sauce[item_idx]}\",\n" + + del sauce[item_idx] + +order_items = order_items[:-2] + + + +dialog_input_template = dialog_input_template.replace("{{phone_number}}", phone_number) +dialog_input_template = dialog_input_template.replace("{{payment_method}}", payment_method) +dialog_input_template = dialog_input_template.replace("{{order_items}}", order_items) + + +# save to temp/dialog_input.py +with open(root + "/temp/dialog_input.py", "w", encoding="utf-8") as f: + f.write(dialog_input_template) + +vscode_available = any(os.access(os.path.join(path, "code"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)) + +while True: + # open it in notepad and wait for user to close it + # (or vscode if installed) + if vscode_available: + # open in new process of code. not the same window + os.system("code " + root + "/temp/dialog_input.py --new-window --wait") + else: + os.system("notepad " + root + "/temp/dialog_input.py") + + + + + + # read and print + dialog_input_f = open(root + "/temp/dialog_input.py", "r", encoding="utf-8") + dialog_input = dialog_input_f.read() + dialog_input_f.close() + + + # execute part from the beginning to "## PYTHON END ##" as a python code here + python_end_idx = dialog_input.find("## PYTHON END ##") + phone_number, payment_method, order_items = "", "", "" + exec(dialog_input[:python_end_idx]) + # it will set variables: phone_number, payment_method and order_items + + # check if theres any empty variables + not_ok = False + for val in [phone_number, delivery_address, payment_method, order_items]: + if val == "": + print("Some values are empty. Please fill them.") + not_ok = True + break + if not_ok: + continue + + # check the conversation + + dialog = [] + + not_ok = False + now_pizzeria = True + for line in dialog_input[python_end_idx:].splitlines(): + if line == "" or line.startswith("#"): + continue + + if line.startswith("piz ="): + if not now_pizzeria: + print("Wrong order of pizzeria (piz) and customer (bot) messages.") + not_ok = True + break + dialog.append(line[5:].strip()) + now_pizzeria = not now_pizzeria + + elif line.startswith("bot ="): + if now_pizzeria: + print("Wrong order of pizzeria (piz) and customer (bot) messages.") + not_ok = True + break + dialog.append(line[5:].strip()) + now_pizzeria = not now_pizzeria + + if not_ok: + continue + + # check if dialog starts on pizzeria side and ends on customer side (length is even) + if len(dialog) % 2 != 0: + print("Dialog must start on pizzeria side and end on customer side.") + continue + + break + + + + +dialog[-1] += " CALLEND" + +now_pizzeria = True +for line in dialog: + print("piz -" if now_pizzeria else "bot -", line) + now_pizzeria = not now_pizzeria + + + + + + ## find all json files consisting of number (0.json, 1.json .... 123.json) #files = [f for f in os.listdir("output_dialogs") if f.endswith(".json") and f[:-5].isdigit()] diff --git a/finetune_dialogs_tool/order_items.py b/finetune_dialogs_tool/order_items.py new file mode 100644 index 0000000..10630f0 --- /dev/null +++ b/finetune_dialogs_tool/order_items.py @@ -0,0 +1,64 @@ + +# Our project is NOT associated with any of the brands listed below. They are just for experimental gpt fine-tuning purposes. +# Below brands are registered trademarks of their respective owners. + +possible_order_items = { + "pizza": [ + "Margheritta", + "Capricciosa", + "Salami", + "Hawaiian", + "Vegetariana", + "Pepperoni", + "Prosciutto", + "Pesto", + "Neapolitana", + "Quattro Formaggi", + + "Margaritta", + "Capriciosa", + "Hawajska", + "Wegetarianska", + "Peperoni", + "Prościutto", + "Cztery sery", + ], + + "drink": [ + "Coca Cola", + "Cola", + "Coca-Cola", + "Kola", + "Sprite", + "Sprajt", + "Fanta", + "Pepsi", + "Mirinda", + "7up", + "Seven up", + "Woda", + "Woda gazowana", + "Woda niegazowana", + "Sok pomarańczowy", + "Sok jabłkowy", + "Lemoniada", + "Mountain Dew", + "Red Bull", + "Monster", + ], + + "sauce": [ + "Sos Czosnkowy", + "Sos Pomidorowy", + "Ketchup", + "Keczup", + "Sos Pikantny", + "Sos Ostry", + "Sos Słodko-kwaśny", + "Sos Łagodny", + "Sos Śmietanowy", + "Sos Musztardowy", + "Sos Majonezowy", + "Sos Barbecue" + ] +} \ No newline at end of file diff --git a/finetune_dialogs_tool/templates/dialog_input.txt b/finetune_dialogs_tool/templates/dialog_input.txt new file mode 100644 index 0000000..368655f --- /dev/null +++ b/finetune_dialogs_tool/templates/dialog_input.txt @@ -0,0 +1,23 @@ + +# Phone adress is already randomized. No need to change it. +phone_number = "{{phone_number}}" + +# Make up some adress. Can be real or fake. Put it between double quotes. +delivery_address = "" + +# Payment method is already randomized. No need to change it. +payment_method = "{{payment_method}}" + +# Order items are already randomized. No need to change it. +order_items = [ +{{order_items}} +] + +## PYTHON END ## + +# Write the conversation below. +# The conversation has to start with "piz" (short for pizzeria) and has to end with "bot". +# Do not use double quotes (") in the conversation. Just type the text next to the = sign. + +piz = +bot = \ No newline at end of file diff --git a/finetune_dialogs_tool/templates/system_instructions.txt b/finetune_dialogs_tool/templates/system_instructions.txt new file mode 100644 index 0000000..65301c8 --- /dev/null +++ b/finetune_dialogs_tool/templates/system_instructions.txt @@ -0,0 +1,15 @@ +You are a bot that will act as a guy that orders a pizza you will be connected on the phone with a pizza place. you will be asked questions about example: Where should be the pizza delivered etc. and you will respond with the data in the data section also you HAVE to respond in a full sentence because it will be transformed into audio using a tts software so you cant use a list just make a sentence like : i would like a margharitta and a cocacola please. basically just write it like you would say it dont put numbers but put words that are numbers dont add shortcuts add the full word + +DATA: +phone: {{phone_number}} +delivery location: {{delivery_location}} +paymentMethod: {{payment_method}} +{{order_items}} + +REMEMBER DONT USE NUMBERS USE WORDS for example dont say 1x say one time also REMEMBER to use replacement words to a word so it is appropriate to the whole sentence example: +WRONG: +Chciałbym zamówić jedną Margherittę, dwie Colę, pięć Fant i jedną Sprite. +RIGHT: +Chciałbym zamówić jedną Margarittę, dwie Kole, pięć Fant i jednego Sprajta. + +If the call ends, say at the end of your final response "CALLEND" \ No newline at end of file