From d67b2cf01ca017a5f3241528b878c6187436f7f9 Mon Sep 17 00:00:00 2001 From: Looki2000 Date: Mon, 13 Nov 2023 22:33:13 +0100 Subject: [PATCH] finetool dialogs tool finaly working! --- finetune_dialogs_tool/main.py | 174 ++++++++++++------ .../templates/system_instructions.txt | 5 +- 2 files changed, 118 insertions(+), 61 deletions(-) diff --git a/finetune_dialogs_tool/main.py b/finetune_dialogs_tool/main.py index 24bb1a6..cddfbba 100644 --- a/finetune_dialogs_tool/main.py +++ b/finetune_dialogs_tool/main.py @@ -1,11 +1,14 @@ # I know this code sucks and there are many repeating parts but it's just a simple temporary tool and I didn't bother to make it better +# This is not how something serious should be done import os import numpy as np from order_items import possible_order_items +import json root= os.path.dirname(os.path.abspath(__file__)) +vscode_available = any(os.access(os.path.join(path, "code"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)) def mkdir_if_not_exists_rel(path): if not os.path.exists(root + "/" + path): @@ -19,76 +22,76 @@ mkdir_if_not_exists_rel("output_dialogs") mkdir_if_not_exists_rel("temp") -# read templates/dialog_input.txt -with open(root + "/templates/dialog_input.txt", "r", encoding="utf-8") as f: - dialog_input_template = f.read() +# if temp/dialog_input.py does not exist, do the processing instead of just opening it +if not os.path.exists(root + "/temp/dialog_input.py"): + # read templates/dialog_input.txt + with open(root + "/templates/dialog_input.txt", "r", encoding="utf-8") as f: + dialog_input_template = f.read() -# random phone number string. for example "123 456 789" -phone_number = " ".join("".join(map(str, pack)) for pack in np.random.randint(0, 10, 9).reshape(-1, 3)) + # random phone number string. for example "123 456 789" + phone_number = " ".join("".join(map(str, pack)) for pack in np.random.randint(0, 10, 9).reshape(-1, 3)) -payment_method = np.random.choice([ - "cash", - "card", - "Cash", - "Card", - "gotówka", - "karta", - "Gotówka", - "Karta", - "gotówką", - "kartą", - "Gotówką", - "Kartą", -]) + payment_method = np.random.choice([ + "cash", + "card", + "Cash", + "Card", + "gotówka", + "karta", + "Gotówka", + "Karta", + "gotówką", + "kartą", + "Gotówką", + "Kartą", + ]) -order_items = "" + order_items = "" -# randint high is exclusive so range 1 to 3 is actually 1 to 2 !!! + # randint high is exclusive so range 1 to 3 is actually 1 to 2 !!! -# pizza -pizza = possible_order_items["pizza"].copy() -for _ in range(np.random.randint(1, 3)): - item_idx = np.random.randint(0, len(pizza)) + # pizza + pizza = possible_order_items["pizza"].copy() + for _ in range(np.random.randint(1, 3)): + item_idx = np.random.randint(0, len(pizza)) - order_items += f" \"{np.random.randint(1, 4)}x Pizza {pizza[item_idx]}\",\n" + order_items += f" \"{np.random.randint(1, 4)}x Pizza {pizza[item_idx]}\",\n" - del pizza[item_idx] + del pizza[item_idx] -# drink -drink = possible_order_items["drink"].copy() -for _ in range(np.random.randint(0, 3)): - item_idx = np.random.randint(0, len(drink)) + # drink + drink = possible_order_items["drink"].copy() + for _ in range(np.random.randint(0, 3)): + item_idx = np.random.randint(0, len(drink)) - order_items += f" \"{np.random.randint(1, 4)}x {drink[item_idx]}\",\n" + order_items += f" \"{np.random.randint(1, 4)}x {drink[item_idx]}\",\n" - del drink[item_idx] + del drink[item_idx] -# sauce -sauce = possible_order_items["sauce"].copy() -for _ in range(np.random.randint(0, 3)): - item_idx = np.random.randint(0, len(sauce)) + # sauce + sauce = possible_order_items["sauce"].copy() + for _ in range(np.random.randint(0, 3)): + item_idx = np.random.randint(0, len(sauce)) - order_items += f" \"{np.random.randint(1, 4)}x {sauce[item_idx]}\",\n" + order_items += f" \"{np.random.randint(1, 4)}x {sauce[item_idx]}\",\n" - del sauce[item_idx] + del sauce[item_idx] -order_items = order_items[:-2] + order_items = order_items[:-2] -dialog_input_template = dialog_input_template.replace("{{phone_number}}", phone_number) -dialog_input_template = dialog_input_template.replace("{{payment_method}}", payment_method) -dialog_input_template = dialog_input_template.replace("{{order_items}}", order_items) + dialog_input_template = dialog_input_template.replace("{{phone_number}}", phone_number) + dialog_input_template = dialog_input_template.replace("{{payment_method}}", payment_method) + dialog_input_template = dialog_input_template.replace("{{order_items}}", order_items) -# save to temp/dialog_input.py -with open(root + "/temp/dialog_input.py", "w", encoding="utf-8") as f: - f.write(dialog_input_template) - -vscode_available = any(os.access(os.path.join(path, "code"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)) + # save to temp/dialog_input.py + with open(root + "/temp/dialog_input.py", "w", encoding="utf-8") as f: + f.write(dialog_input_template) while True: # open it in notepad and wait for user to close it @@ -111,14 +114,14 @@ while True: # execute part from the beginning to "## PYTHON END ##" as a python code here python_end_idx = dialog_input.find("## PYTHON END ##") - phone_number, payment_method, order_items = "", "", "" + phone_number, delivery_address, payment_method, order_items = None, None, None, None exec(dialog_input[:python_end_idx]) # it will set variables: phone_number, payment_method and order_items # check if theres any empty variables not_ok = False for val in [phone_number, delivery_address, payment_method, order_items]: - if val == "": + if val == "" or val is None: print("Some values are empty. Please fill them.") not_ok = True break @@ -167,21 +170,74 @@ while True: dialog[-1] += " CALLEND" now_pizzeria = True +print("=============================================") for line in dialog: - print("piz -" if now_pizzeria else "bot -", line) + print("🍕 -" if now_pizzeria else "🤖 -", line) + now_pizzeria = not now_pizzeria +print("=============================================") + + + +input("Everything seems okay. Press enter to save it as a new JSON file.") + + + + +# read templates/system_instructions.txt +with open(root + "/templates/system_instructions.txt", "r", encoding="utf-8") as f: + system_instructions_template = f.read() + +system_instructions_template = system_instructions_template.replace("{{phone_number}}", phone_number) +system_instructions_template = system_instructions_template.replace("{{delivery_location}}", delivery_address) +system_instructions_template = system_instructions_template.replace("{{payment_method}}", payment_method) + +order_items_str = "" +for line in order_items: + order_items_str += f"{line}\n" +order_items_str = order_items_str[:-1] + +system_instructions_template = system_instructions_template.replace("{{order_items}}", order_items_str) + + +# use all of these to create chatgpt finetuning JSON file + +training_dict = { + "messages": [] +} + +# add system instructions +training_dict["messages"].append({ + "role": "system", + "content": system_instructions_template +}) + +# add dialog +now_pizzeria = True +for line in dialog: + training_dict["messages"].append({ + "role": "user" if now_pizzeria else "assistant", + "content": line + }) now_pizzeria = not now_pizzeria +# find all json files consisting of number (0.json, 1.json .... 123.json) +files = [f for f in os.listdir(root + "/output_dialogs") if f.endswith(".json") and f[:-5].isdigit()] + +# get name for next file (number higher than the highest number in the folder) +if len(files) == 0: + next_file = "0.json" +else: + next_file = str(max([int(f[:-5]) for f in files]) + 1) + ".json" +# save to output_dialogs/next_file +with open(root + "/output_dialogs/" + next_file, "w", encoding="utf-8") as f: + json.dump(training_dict, f, indent=4, ensure_ascii=False) +# remove temp/dialog_input.py +os.remove(root + "/temp/dialog_input.py") -## find all json files consisting of number (0.json, 1.json .... 123.json) -#files = [f for f in os.listdir("output_dialogs") if f.endswith(".json") and f[:-5].isdigit()] -# -## get name for next file (number higher than the highest number in the folder) -#if len(files) == 0: -# next_file = "0.json" -#else: -# next_file = str(max([int(f[:-5]) for f in files]) + 1) + ".json" \ No newline at end of file + +print(f"Saved as {next_file}") \ No newline at end of file diff --git a/finetune_dialogs_tool/templates/system_instructions.txt b/finetune_dialogs_tool/templates/system_instructions.txt index 65301c8..c97a228 100644 --- a/finetune_dialogs_tool/templates/system_instructions.txt +++ b/finetune_dialogs_tool/templates/system_instructions.txt @@ -1,12 +1,13 @@ -You are a bot that will act as a guy that orders a pizza you will be connected on the phone with a pizza place. you will be asked questions about example: Where should be the pizza delivered etc. and you will respond with the data in the data section also you HAVE to respond in a full sentence because it will be transformed into audio using a tts software so you cant use a list just make a sentence like : i would like a margharitta and a cocacola please. basically just write it like you would say it dont put numbers but put words that are numbers dont add shortcuts add the full word +You are a bot that will act as a guy that orders a pizza you will be connected on the phone with a pizza place. you will be asked questions about example: Where should be the pizza delivered etc. and you will respond with the data in the data section also you HAVE to respond in a full sentence because it will be transformed into audio using a tts software so you cant use a list just make a sentence like: i would like a margharitta and a cocacola please. basically just write it like you would say it dont put numbers but put words that are numbers dont add shortcuts add the full word DATA: phone: {{phone_number}} delivery location: {{delivery_location}} paymentMethod: {{payment_method}} +order: {{order_items}} -REMEMBER DONT USE NUMBERS USE WORDS for example dont say 1x say one time also REMEMBER to use replacement words to a word so it is appropriate to the whole sentence example: +REMEMBER DONT USE NUMBERS, USE WORDS for example dont say 1x, say one time also REMEMBER to use replacement words to a word so it is appropriate to the whole sentence example: WRONG: Chciałbym zamówić jedną Margherittę, dwie Colę, pięć Fant i jedną Sprite. RIGHT: