| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- # Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
- # Source for "Build a Large Language Model From Scratch"
- # - https://www.manning.com/books/build-a-large-language-model-from-scratch
- # Code: https://github.com/rasbt/LLMs-from-scratch
- #
- # A minimal instruction finetuning file based on the code in chapter 7
- import json
- import psutil
- from tqdm import tqdm
- import urllib.request
- def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
- # Create the data payload as a dictionary
- data = {
- "model": model,
- "messages": [
- {"role": "user", "content": prompt}
- ],
- "options": { # Settings below are required for deterministic responses
- "seed": 123,
- "temperature": 0,
- "num_ctx": 2048
- }
- }
- # Convert the dictionary to a JSON formatted string and encode it to bytes
- payload = json.dumps(data).encode("utf-8")
- # Create a request object, setting the method to POST and adding necessary headers
- request = urllib.request.Request(url, data=payload, method="POST")
- request.add_header("Content-Type", "application/json")
- # Send the request and capture the response
- response_data = ""
- with urllib.request.urlopen(request) as response:
- # Read and decode the response
- while True:
- line = response.readline().decode("utf-8")
- if not line:
- break
- response_json = json.loads(line)
- response_data += response_json["message"]["content"]
- return response_data
- def check_if_running(process_name):
- running = False
- for proc in psutil.process_iter(["name"]):
- if process_name in proc.info["name"]:
- running = True
- break
- return running
- def format_input(entry):
- instruction_text = (
- f"Below is an instruction that describes a task. "
- f"Write a response that appropriately completes the request."
- f"\n\n### Instruction:\n{entry['instruction']}"
- )
- input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
- return instruction_text + input_text
- def main(file_path):
- ollama_running = check_if_running("ollama")
- if not ollama_running:
- raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
- print("Ollama running:", check_if_running("ollama"))
- with open(file_path, "r") as file:
- test_data = json.load(file)
- model = "llama3"
- scores = generate_model_scores(test_data, "model_response", model)
- print(f"Number of scores: {len(scores)} of {len(test_data)}")
- print(f"Average score: {sum(scores)/len(scores):.2f}\n")
- def generate_model_scores(json_data, json_key, model="llama3"):
- scores = []
- for entry in tqdm(json_data, desc="Scoring entries"):
- if entry[json_key] == "":
- scores.append(0)
- else:
- prompt = (
- f"Given the input `{format_input(entry)}` "
- f"and correct output `{entry['output']}`, "
- f"score the model response `{entry[json_key]}`"
- f" on a scale from 0 to 100, where 100 is the best score. "
- f"Respond with the integer number only."
- )
- score = query_model(prompt, model)
- try:
- scores.append(int(score))
- except ValueError:
- print(f"Could not convert score: {score}")
- continue
- return scores
- if __name__ == "__main__":
- import argparse
- parser = argparse.ArgumentParser(
- description="Evaluate model responses with ollama"
- )
- parser.add_argument(
- "--file_path",
- required=True,
- help=(
- "The path to the test dataset `.json` file with the"
- " `'output'` and `'model_response'` keys"
- )
- )
- args = parser.parse_args()
- main(file_path=args.file_path)
|