tests.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. # Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
  2. # Source for "Build a Large Language Model From Scratch"
  3. # - https://www.manning.com/books/build-a-large-language-model-from-scratch
  4. # Code: https://github.com/rasbt/LLMs-from-scratch
  5. # File for internal use (unit tests)
  6. from gpt import main
  7. expected = """
  8. ==================================================
  9. IN
  10. ==================================================
  11. Input text: Hello, I am
  12. Encoded input text: [15496, 11, 314, 716]
  13. encoded_tensor.shape: torch.Size([1, 4])
  14. ==================================================
  15. OUT
  16. ==================================================
  17. Output: tensor([[15496, 11, 314, 716, 27018, 24086, 47843, 30961, 42348, 7267,
  18. 49706, 43231, 47062, 34657]])
  19. Output length: 14
  20. Output text: Hello, I am Featureiman Byeswickattribute argue logger Normandy Compton analogous
  21. """
  22. def test_main(capsys):
  23. main()
  24. captured = capsys.readouterr()
  25. # Normalize line endings and strip trailing whitespace from each line
  26. normalized_expected = '\n'.join(line.rstrip() for line in expected.splitlines())
  27. normalized_output = '\n'.join(line.rstrip() for line in captured.out.splitlines())
  28. # Compare normalized strings
  29. assert normalized_output == normalized_expected