app_orig.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
  2. # Source for "Build a Large Language Model From Scratch"
  3. # - https://www.manning.com/books/build-a-large-language-model-from-scratch
  4. # Code: https://github.com/rasbt/LLMs-from-scratch
  5. import tiktoken
  6. import torch
  7. import chainlit
  8. # For llms_from_scratch installation instructions, see:
  9. # https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
  10. from llms_from_scratch.ch04 import GPTModel
  11. from llms_from_scratch.ch05 import (
  12. download_and_load_gpt2,
  13. generate,
  14. load_weights_into_gpt,
  15. text_to_token_ids,
  16. token_ids_to_text,
  17. )
  18. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  19. def get_model_and_tokenizer():
  20. """
  21. Code to load a GPT-2 model with pretrained weights from OpenAI.
  22. The code is similar to chapter 5.
  23. The model will be downloaded automatically if it doesn't exist in the current folder, yet.
  24. """
  25. CHOOSE_MODEL = "gpt2-small (124M)" # Optionally replace with another model from the model_configs dir below
  26. BASE_CONFIG = {
  27. "vocab_size": 50257, # Vocabulary size
  28. "context_length": 1024, # Context length
  29. "drop_rate": 0.0, # Dropout rate
  30. "qkv_bias": True # Query-key-value bias
  31. }
  32. model_configs = {
  33. "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
  34. "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
  35. "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
  36. "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
  37. }
  38. model_size = CHOOSE_MODEL.split(" ")[-1].lstrip("(").rstrip(")")
  39. BASE_CONFIG.update(model_configs[CHOOSE_MODEL])
  40. settings, params = download_and_load_gpt2(model_size=model_size, models_dir="gpt2")
  41. gpt = GPTModel(BASE_CONFIG)
  42. load_weights_into_gpt(gpt, params)
  43. gpt.to(device)
  44. gpt.eval()
  45. tokenizer = tiktoken.get_encoding("gpt2")
  46. return tokenizer, gpt, BASE_CONFIG
  47. # Obtain the necessary tokenizer and model files for the chainlit function below
  48. tokenizer, model, model_config = get_model_and_tokenizer()
  49. @chainlit.on_message
  50. async def main(message: chainlit.Message):
  51. """
  52. The main Chainlit function.
  53. """
  54. token_ids = generate( # function uses `with torch.no_grad()` internally already
  55. model=model,
  56. idx=text_to_token_ids(message.content, tokenizer).to(device), # The user text is provided via as `message.content`
  57. max_new_tokens=50,
  58. context_size=model_config["context_length"],
  59. top_k=1,
  60. temperature=0.0
  61. )
  62. text = token_ids_to_text(token_ids, tokenizer)
  63. await chainlit.Message(
  64. content=f"{text}", # This returns the model response to the interface
  65. ).send()