test_ch04.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
  2. # Source for "Build a Large Language Model From Scratch"
  3. # - https://www.manning.com/books/build-a-large-language-model-from-scratch
  4. # Code: https://github.com/rasbt/LLMs-from-scratch
  5. from llms_from_scratch.ch04 import GPTModel
  6. from llms_from_scratch.ch04 import generate_text_simple
  7. import torch
  8. import tiktoken
  9. def test_GPTModel():
  10. GPT_CONFIG_124M = {
  11. "vocab_size": 50257, # Vocabulary size
  12. "context_length": 1024, # Context length
  13. "emb_dim": 768, # Embedding dimension
  14. "n_heads": 12, # Number of attention heads
  15. "n_layers": 12, # Number of layers
  16. "drop_rate": 0.1, # Dropout rate
  17. "qkv_bias": False # Query-Key-Value bias
  18. }
  19. torch.manual_seed(123)
  20. model = GPTModel(GPT_CONFIG_124M)
  21. model.eval() # disable dropout
  22. start_context = "Hello, I am"
  23. tokenizer = tiktoken.get_encoding("gpt2")
  24. encoded = tokenizer.encode(start_context)
  25. encoded_tensor = torch.tensor(encoded).unsqueeze(0)
  26. print(f"\n{50*'='}\n{22*' '}IN\n{50*'='}")
  27. print("\nInput text:", start_context)
  28. print("Encoded input text:", encoded)
  29. print("encoded_tensor.shape:", encoded_tensor.shape)
  30. out = generate_text_simple(
  31. model=model,
  32. idx=encoded_tensor,
  33. max_new_tokens=10,
  34. context_size=GPT_CONFIG_124M["context_length"]
  35. )
  36. expect = torch.tensor([
  37. [15496, 11, 314, 716, 27018, 24086, 47843, 30961, 42348, 7267,
  38. 49706, 43231, 47062, 34657]
  39. ])
  40. torch.equal(expect, out)