|
@@ -70,7 +70,7 @@
|
|
|
"from importlib.metadata import version\n",
|
|
"from importlib.metadata import version\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"pkgs = [\"tqdm\", # Progress bar\n",
|
|
"pkgs = [\"tqdm\", # Progress bar\n",
|
|
|
- " ]\n",
|
|
|
|
|
|
|
+ " ]\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"for p in pkgs:\n",
|
|
"for p in pkgs:\n",
|
|
|
" print(f\"{p} version: {version(p)}\")"
|
|
" print(f\"{p} version: {version(p)}\")"
|
|
@@ -218,12 +218,13 @@
|
|
|
"import urllib.request\n",
|
|
"import urllib.request\n",
|
|
|
"import json\n",
|
|
"import json\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
|
|
+ "\n",
|
|
|
"def query_model(prompt, model=\"llama3\", url=\"http://localhost:11434/api/chat\"):\n",
|
|
"def query_model(prompt, model=\"llama3\", url=\"http://localhost:11434/api/chat\"):\n",
|
|
|
" # Create the data payload as a dictionary\n",
|
|
" # Create the data payload as a dictionary\n",
|
|
|
" data = {\n",
|
|
" data = {\n",
|
|
|
" \"model\": model,\n",
|
|
" \"model\": model,\n",
|
|
|
- " \"seed\":123, # for deterministic responses\n",
|
|
|
|
|
- " \"temperature\":0, # for deterministic responses\n",
|
|
|
|
|
|
|
+ " \"seed\": 123, # for deterministic responses\n",
|
|
|
|
|
+ " \"temperature\": 0, # for deterministic responses\n",
|
|
|
" \"messages\": [\n",
|
|
" \"messages\": [\n",
|
|
|
" {\"role\": \"user\", \"content\": prompt}\n",
|
|
" {\"role\": \"user\", \"content\": prompt}\n",
|
|
|
" ]\n",
|
|
" ]\n",
|
|
@@ -290,7 +291,7 @@
|
|
|
"\n",
|
|
"\n",
|
|
|
"with open(json_file, \"r\") as file:\n",
|
|
"with open(json_file, \"r\") as file:\n",
|
|
|
" json_data = json.load(file)\n",
|
|
" json_data = json.load(file)\n",
|
|
|
- " \n",
|
|
|
|
|
|
|
+ "\n",
|
|
|
"print(\"Number of entries:\", len(json_data))"
|
|
"print(\"Number of entries:\", len(json_data))"
|
|
|
]
|
|
]
|
|
|
},
|
|
},
|
|
@@ -520,7 +521,7 @@
|
|
|
" f\"and correct output `{entry['output']}`, \"\n",
|
|
" f\"and correct output `{entry['output']}`, \"\n",
|
|
|
" f\"score the model response `{entry['model 1 response']}`\"\n",
|
|
" f\"score the model response `{entry['model 1 response']}`\"\n",
|
|
|
" f\" on a scale from 0 to 100, where 100 is the best score. \"\n",
|
|
" f\" on a scale from 0 to 100, where 100 is the best score. \"\n",
|
|
|
- " )\n",
|
|
|
|
|
|
|
+ " )\n",
|
|
|
" print(\"\\nDataset response:\")\n",
|
|
" print(\"\\nDataset response:\")\n",
|
|
|
" print(\">>\", entry['output'])\n",
|
|
" print(\">>\", entry['output'])\n",
|
|
|
" print(\"\\nModel response:\")\n",
|
|
" print(\"\\nModel response:\")\n",
|
|
@@ -547,6 +548,7 @@
|
|
|
"source": [
|
|
"source": [
|
|
|
"from tqdm import tqdm\n",
|
|
"from tqdm import tqdm\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
|
|
+ "\n",
|
|
|
"def generate_model_scores(json_data, json_key):\n",
|
|
"def generate_model_scores(json_data, json_key):\n",
|
|
|
" scores = []\n",
|
|
" scores = []\n",
|
|
|
" for entry in tqdm(json_data, desc=\"Scoring entries\"):\n",
|
|
" for entry in tqdm(json_data, desc=\"Scoring entries\"):\n",
|