1 年間前 · 08040f024c
--- a/appendix-A/01_main-chapter-code/code-part1.ipynb
+++ b/appendix-A/01_main-chapter-code/code-part1.ipynb
@@ -46,7 +46,7 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "2.2.1\n"
			
 
				+      "2.4.0\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -658,13 +658,13 @@
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				       "Parameter containing:\n",
			
 
				-      "tensor([[ 0.0956,  0.1280, -0.0696,  ...,  0.0961,  0.0631,  0.1349],\n",
			
 
				-      "        [ 0.0983,  0.0580, -0.0574,  ...,  0.0981,  0.0370,  0.0516],\n",
			
 
				-      "        [-0.0429, -0.1411, -0.1399,  ...,  0.0767,  0.0019,  0.1400],\n",
			
 
				+      "tensor([[ 0.1182,  0.0606, -0.1292,  ..., -0.1126,  0.0735, -0.0597],\n",
			
 
				+      "        [-0.0249,  0.0154, -0.0476,  ..., -0.1001, -0.1288,  0.1295],\n",
			
 
				+      "        [ 0.0641,  0.0018, -0.0367,  ..., -0.0990, -0.0424, -0.0043],\n",
			
 
				       "        ...,\n",
			
 
				-      "        [-0.0777, -0.0726,  0.1273,  ..., -0.0613,  0.0491, -0.1381],\n",
			
 
				-      "        [-0.0830, -0.0969, -0.0473,  ...,  0.0762,  0.1318, -0.1174],\n",
			
 
				-      "        [ 0.0468, -0.0213,  0.0387,  ...,  0.0639,  0.0927, -0.0668]],\n",
			
 
				+      "        [ 0.0618,  0.0867,  0.1361,  ..., -0.0254,  0.0399,  0.1006],\n",
			
 
				+      "        [ 0.0842, -0.0512, -0.0960,  ..., -0.1091,  0.1242, -0.0428],\n",
			
 
				+      "        [ 0.0518, -0.1390, -0.0923,  ..., -0.0954, -0.0668, -0.0037]],\n",
			
 
				       "       requires_grad=True)\n"
			
 
				      ]
			
 
				     }
			
@@ -1264,7 +1264,7 @@
 
				    ],
			
 
				    "source": [
			
 
				     "model = NeuralNetwork(2, 2) # needs to match the original model exactly\n",
			
 
				-    "model.load_state_dict(torch.load(\"model.pth\"))"
			
 
				+    "model.load_state_dict(torch.load(\"model.pth\", weights_only=True))"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1340,7 +1340,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.11"
			
 
				+   "version": "3.11.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/appendix-A/01_main-chapter-code/code-part2.ipynb
+++ b/appendix-A/01_main-chapter-code/code-part2.ipynb
@@ -2,7 +2,9 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				+   "metadata": {
			
 
				+    "id": "AAAnDw04iAm4"
			
 
				+   },
			
 
				    "source": [
			
 
				     "<table style=\"width:100%\">\n",
			
 
				     "<tr>\n",
			
@@ -54,14 +56,14 @@
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "RM7kGhwMF_nO",
			
 
				-    "outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
			
 
				+    "outputId": "b1872617-aacd-46fa-e5f3-f130fd81b246"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "2.0.1+cu118\n"
			
 
				+      "2.4.0+cu121\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -79,7 +81,7 @@
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "OXLCKXhiUkZt",
			
 
				-    "outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
			
 
				+    "outputId": "e9ca3c58-d92c-4c8b-a9c9-cd7fcc1fedb4"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
@@ -102,18 +104,15 @@
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "MTTlfh53Va-T",
			
 
				-    "outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
			
 
				+    "outputId": "bae76cb5-d1d3-441f-a7c5-93a161e2e86a"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
 
				-     "data": {
			
 
				-      "text/plain": [
			
 
				-       "tensor([5., 7., 9.])"
			
 
				-      ]
			
 
				-     },
			
 
				-     "execution_count": 3,
			
 
				-     "metadata": {},
			
 
				-     "output_type": "execute_result"
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([5., 7., 9.])\n"
			
 
				+     ]
			
 
				     }
			
 
				    ],
			
 
				    "source": [
			
@@ -125,13 +124,13 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": 4,
			
 
				    "metadata": {
			
 
				     "colab": {
			
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "Z4LwTNw7Vmmb",
			
 
				-    "outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
			
 
				+    "outputId": "9ad97923-bc8e-4c49-88bf-48dc1de56804"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
@@ -151,24 +150,24 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				+   "execution_count": 5,
			
 
				    "metadata": {
			
 
				     "colab": {
			
 
				      "base_uri": "https://localhost:8080/",
			
 
				-     "height": 184
			
 
				+     "height": 158
			
 
				     },
			
 
				     "id": "tKT6URN1Vuft",
			
 
				-    "outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
			
 
				+    "outputId": "8396eb18-47c8-47a1-c1b6-8bcb9480fb52"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
 
				      "ename": "RuntimeError",
			
 
				-     "evalue": "ignored",
			
 
				+     "evalue": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
			
 
				      "output_type": "error",
			
 
				      "traceback": [
			
 
				       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
			
 
				       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
			
 
				-      "\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
			
 
				+      "\u001b[0;32m/tmp/ipykernel_2321/2079609735.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
			
 
				       "\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
			
 
				      ]
			
 
				     }
			
@@ -189,7 +188,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				+   "execution_count": 6,
			
 
				    "metadata": {
			
 
				     "id": "GyY59cjieitv"
			
 
				    },
			
@@ -215,7 +214,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				+   "execution_count": 7,
			
 
				    "metadata": {
			
 
				     "id": "v41gKqEJempa"
			
 
				    },
			
@@ -243,7 +242,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 23,
			
 
				+   "execution_count": 8,
			
 
				    "metadata": {
			
 
				     "id": "UPGVRuylep8Y"
			
 
				    },
			
@@ -271,7 +270,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 24,
			
 
				+   "execution_count": 9,
			
 
				    "metadata": {
			
 
				     "id": "drhg6IXofAXh"
			
 
				    },
			
@@ -302,13 +301,13 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 25,
			
 
				+   "execution_count": 10,
			
 
				    "metadata": {
			
 
				     "colab": {
			
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "7jaS5sqPWCY0",
			
 
				-    "outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
			
 
				+    "outputId": "8a5cd93d-671c-4abf-d5cd-97845f300ffd"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
@@ -362,7 +361,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 26,
			
 
				+   "execution_count": 11,
			
 
				    "metadata": {
			
 
				     "id": "4qrlmnPPe7FO"
			
 
				    },
			
@@ -391,13 +390,13 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 27,
			
 
				+   "execution_count": 12,
			
 
				    "metadata": {
			
 
				     "colab": {
			
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "1_-BfkfEf4HX",
			
 
				-    "outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
			
 
				+    "outputId": "9453154f-0a5b-4a44-a3c9-f010e08d5a2c"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
@@ -406,7 +405,7 @@
 
				        "1.0"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 27,
			
 
				+     "execution_count": 12,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -417,13 +416,13 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 21,
			
 
				+   "execution_count": 13,
			
 
				    "metadata": {
			
 
				     "colab": {
			
 
				      "base_uri": "https://localhost:8080/"
			
 
				     },
			
 
				     "id": "iYtXKBGEgKss",
			
 
				-    "outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
			
 
				+    "outputId": "d6cc870a-34de-490e-e5d3-23e6956744bd"
			
 
				    },
			
 
				    "outputs": [
			
 
				     {
			
@@ -432,7 +431,7 @@
 
				        "1.0"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 21,
			
 
				+     "execution_count": 13,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -443,21 +442,27 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				+   "metadata": {
			
 
				+    "id": "nc2LGFVbiAnB"
			
 
				+   },
			
 
				    "source": [
			
 
				     "### A.9.3 Training with multiple GPUs"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				+   "metadata": {
			
 
				+    "id": "cOUza9iQiAnC"
			
 
				+   },
			
 
				    "source": [
			
 
				     "See [DDP-script.py](DDP-script.py)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				+   "metadata": {
			
 
				+    "id": "YOYk5Fh7iAnC"
			
 
				+   },
			
 
				    "source": [
			
 
				     "<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/12.webp\" width=\"600px\">\n",
			
 
				     "<img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/appendix-a_compressed/13.webp\" width=\"600px\">"
			
@@ -485,7 +490,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.11.4"
			
 
				+   "version": "3.10.14"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/ch02/01_main-chapter-code/ch02.ipynb
+++ b/ch02/01_main-chapter-code/ch02.ipynb
@@ -46,7 +46,7 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "torch version: 2.3.1\n",
			
 
				+      "torch version: 2.4.0\n",
			
 
				       "tiktoken version: 0.7.0\n"
			
 
				      ]
			
 
				     }
			
@@ -1244,7 +1244,7 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "PyTorch version: 2.3.1\n"
			
 
				+      "PyTorch version: 2.4.0\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
--- a/ch02/01_main-chapter-code/dataloader.ipynb
+++ b/ch02/01_main-chapter-code/dataloader.ipynb
@@ -38,9 +38,39 @@
 
				     "This notebook contains the main takeaway, the data loading pipeline without the intermediate steps."
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2b4e8f2d-cb81-41a3-8780-a70b382e18ae",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Packages that are being used in this notebook:"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 1,
			
 
				+   "id": "c7ed6fbe-45ac-40ce-8ea5-4edb212565e1",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch version: 2.4.0\n",
			
 
				+      "tiktoken version: 0.7.0\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from importlib.metadata import version\n",
			
 
				+    "\n",
			
 
				+    "print(\"torch version:\", version(\"torch\"))\n",
			
 
				+    "print(\"tiktoken version:\", version(\"tiktoken\"))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				    "id": "0ed4b7db-3b47-4fd3-a4a6-5f4ed5dd166e",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -107,7 +137,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				+   "execution_count": 3,
			
 
				    "id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -125,7 +155,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": 4,
			
 
				    "id": "d3664332-e6bb-447e-8b96-203aafde8b24",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
--- a/ch02/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch02/01_main-chapter-code/exercise-solutions.ipynb
@@ -28,6 +28,36 @@
 
				     "# Chapter 2 Exercise solutions"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2ed9978c-6d8e-401b-9731-bec3802cbb96",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Packages that are being used in this notebook:"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "78b55ed6-3312-4e30-89b8-51dc8a4a908f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch version: 2.4.0\n",
			
 
				+      "tiktoken version: 0.7.0\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from importlib.metadata import version\n",
			
 
				+    "\n",
			
 
				+    "print(\"torch version:\", version(\"torch\"))\n",
			
 
				+    "print(\"tiktoken version:\", version(\"tiktoken\"))"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				    "id": "6f678e62-7bcb-4405-86ae-dce94f494303",
			
@@ -38,7 +68,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				+   "execution_count": 2,
			
 
				    "id": "7614337f-f639-42c9-a99b-d33f74fa8a03",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -50,7 +80,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				+   "execution_count": 3,
			
 
				    "id": "4f235d87-be85-4ddf-95a6-af59fca13d82",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -69,7 +99,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": 4,
			
 
				    "id": "45e4e8f0-3272-48bb-96f6-cced5584ceea",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -93,7 +123,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				+   "execution_count": 5,
			
 
				    "id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -103,7 +133,7 @@
 
				        "[33901]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 4,
			
 
				+     "execution_count": 5,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -114,7 +144,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": 6,
			
 
				    "id": "d3664332-e6bb-447e-8b96-203aafde8b24",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -124,7 +154,7 @@
 
				        "[86]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 5,
			
 
				+     "execution_count": 6,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -135,7 +165,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				+   "execution_count": 7,
			
 
				    "id": "2773c09d-c136-4372-a2be-04b58d292842",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -145,7 +175,7 @@
 
				        "[343]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 6,
			
 
				+     "execution_count": 7,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -156,7 +186,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				+   "execution_count": 8,
			
 
				    "id": "8a6abd32-1e0a-4038-9dd2-673f47bcdeb5",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -166,7 +196,7 @@
 
				        "[86]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 7,
			
 
				+     "execution_count": 8,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -177,7 +207,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				+   "execution_count": 9,
			
 
				    "id": "26ae940a-9841-4e27-a1df-b83fc8a488b3",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -187,7 +217,7 @@
 
				        "[220]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 8,
			
 
				+     "execution_count": 9,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -198,7 +228,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				+   "execution_count": 10,
			
 
				    "id": "a606c39a-6747-4cd8-bb38-e3183f80908d",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -208,7 +238,7 @@
 
				        "[959]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 9,
			
 
				+     "execution_count": 10,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -219,7 +249,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				+   "execution_count": 11,
			
 
				    "id": "47c7268d-8fdc-4957-bc68-5be6113f45a7",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -229,7 +259,7 @@
 
				        "'Akwirw ier'"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 10,
			
 
				+     "execution_count": 11,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -248,7 +278,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				+   "execution_count": 12,
			
 
				    "id": "4d50af16-937b-49e0-8ffd-42d30cbb41c9",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -310,7 +340,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 12,
			
 
				+   "execution_count": 13,
			
 
				    "id": "0128eefa-d7c8-4f76-9851-566dfa7c3745",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -323,7 +353,7 @@
 
				        "        [ 402,  271]])"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 12,
			
 
				+     "execution_count": 13,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -340,7 +370,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 13,
			
 
				+   "execution_count": 14,
			
 
				    "id": "ff5c1e90-c6de-4a87-adf6-7e19f603291c",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -353,7 +383,7 @@
 
				        "        [  402,   271, 10899,  2138,   257,  7026, 15632,   438]])"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 13,
			
 
				+     "execution_count": 14,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -385,7 +415,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.6"
			
 
				+   "version": "3.11.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/ch03/01_main-chapter-code/ch03.ipynb
+++ b/ch03/01_main-chapter-code/ch03.ipynb
@@ -46,7 +46,7 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "torch version: 2.2.2\n"
			
 
				+      "torch version: 2.4.0\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
--- a/ch03/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch03/01_main-chapter-code/exercise-solutions.ipynb
@@ -28,6 +28,27 @@
 
				     "# Chapter 3 Exercise solutions"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "513b627b-c197-44bd-99a2-756391c8a1cd",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch version: 2.4.0\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from importlib.metadata import version\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "print(\"torch version:\", version(\"torch\"))"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				    "id": "33dfa199-9aee-41d4-a64b-7e3811b9a616",
			
@@ -38,7 +59,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": 2,
			
 
				    "id": "5fee2cf5-61c3-4167-81b5-44ea155bbaf2",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -59,7 +80,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 58,
			
 
				+   "execution_count": 3,
			
 
				    "id": "62ea289c-41cd-4416-89dd-dde6383a6f70",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -92,7 +113,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 59,
			
 
				+   "execution_count": 4,
			
 
				    "id": "7b035143-f4e8-45fb-b398-dec1bd5153d4",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -123,7 +144,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 60,
			
 
				+   "execution_count": 5,
			
 
				    "id": "7591d79c-c30e-406d-adfd-20c12eb448f6",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -135,7 +156,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 61,
			
 
				+   "execution_count": 6,
			
 
				    "id": "ddd0f54f-6bce-46cc-a428-17c2a56557d0",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -150,7 +171,7 @@
 
				        "        [-0.5299, -0.1081]], grad_fn=<MmBackward0>)"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 61,
			
 
				+     "execution_count": 6,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -161,7 +182,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 62,
			
 
				+   "execution_count": 7,
			
 
				    "id": "340908f8-1144-4ddd-a9e1-a1c5c3d592f5",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -176,7 +197,7 @@
 
				        "        [-0.5299, -0.1081]], grad_fn=<MmBackward0>)"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 62,
			
 
				+     "execution_count": 7,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -320,7 +341,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.6"
			
 
				+   "version": "3.11.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/ch03/01_main-chapter-code/multihead-attention.ipynb
+++ b/ch03/01_main-chapter-code/multihead-attention.ipynb
@@ -364,14 +364,6 @@
 
				     "\n",
			
 
				     "print(\"context_vecs.shape:\", context_vecs.shape)"
			
 
				    ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "f1d965a5-9b98-4554-8646-7ecd497874cb",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": []
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
--- a/ch04/01_main-chapter-code/ch04.ipynb
+++ b/ch04/01_main-chapter-code/ch04.ipynb
--- a/ch04/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch04/01_main-chapter-code/exercise-solutions.ipynb
@@ -28,6 +28,27 @@
 
				     "# Chapter 4 Exercise solutions"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "5b2fac7a-fdcd-437c-b1c4-0b35a31cd489",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch version: 2.4.0\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from importlib.metadata import version\n",
			
 
				+    "\n",
			
 
				+    "import torch\n",
			
 
				+    "print(\"torch version:\", version(\"torch\"))"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				    "id": "5fea8be3-30a1-4623-a6d7-b095c6c1092e",
			
@@ -38,7 +59,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				+   "execution_count": 2,
			
 
				    "id": "2751b0e5-ffd3-4be2-8db3-e20dd4d61d69",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -60,7 +81,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				+   "execution_count": 3,
			
 
				    "id": "1bcaffd1-0cf6-4f8f-bd53-ab88a37f443e",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -79,7 +100,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": 4,
			
 
				    "id": "c1dd06c1-ab6c-4df7-ba73-f9cd54b31138",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -141,7 +162,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				+   "execution_count": 5,
			
 
				    "id": "90185dea-81ca-4cdc-aef7-4aaf95cba946",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -205,7 +226,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": 6,
			
 
				    "id": "2587e011-78a4-479c-a8fd-961cc40a5fd4",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -262,7 +283,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				+   "execution_count": 7,
			
 
				    "id": "5fee2cf5-61c3-4167-81b5-44ea155bbaf2",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -282,7 +303,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				+   "execution_count": 8,
			
 
				    "id": "5aa1b0c1-d78a-48fc-ad08-4802458b43f7",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -351,7 +372,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				+   "execution_count": 9,
			
 
				    "id": "1d013d32-c275-4f42-be21-9010f1537227",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
--- a/ch05/01_main-chapter-code/ch05.ipynb
+++ b/ch05/01_main-chapter-code/ch05.ipynb
@@ -41,10 +41,10 @@
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				       "matplotlib version: 3.9.0\n",
			
 
				-      "numpy version: 1.25.2\n",
			
 
				-      "tiktoken version: 0.5.1\n",
			
 
				-      "torch version: 2.2.2\n",
			
 
				-      "tensorflow version: 2.15.0\n"
			
 
				+      "numpy version: 1.26.4\n",
			
 
				+      "tiktoken version: 0.7.0\n",
			
 
				+      "torch version: 2.4.0\n",
			
 
				+      "tensorflow version: 2.16.1\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -400,7 +400,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				+   "execution_count": 7,
			
 
				    "id": "c990ead6-53cd-49a7-a6d1-14d8c1518249",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -445,7 +445,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				+   "execution_count": 8,
			
 
				    "id": "54aef09c-d6e3-4238-8653-b3a1b0a1077a",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -485,7 +485,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				+   "execution_count": 9,
			
 
				    "id": "31402a67-a16e-4aeb-977e-70abb9c9949b",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -519,7 +519,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				+   "execution_count": 10,
			
 
				    "id": "9b003797-161b-4d98-81dc-e68320e09fec",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -563,7 +563,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 12,
			
 
				+   "execution_count": 11,
			
 
				    "id": "176ddf35-1c5f-4d7c-bf17-70f3e7069bd4",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -606,7 +606,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 13,
			
 
				+   "execution_count": 12,
			
 
				    "id": "695d6f64-5084-4c23-aea4-105c9e38cfe4",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -643,7 +643,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 14,
			
 
				+   "execution_count": 13,
			
 
				    "id": "0e17e027-ab9f-4fb5-ac9b-a009b831c122",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -681,7 +681,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 15,
			
 
				+   "execution_count": 14,
			
 
				    "id": "62d0816e-b29a-4c8f-a9a5-a167562de978",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -715,7 +715,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 16,
			
 
				+   "execution_count": 15,
			
 
				    "id": "168952a1-b964-4aa7-8e49-966fa26add54",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -779,7 +779,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 17,
			
 
				+   "execution_count": 16,
			
 
				    "id": "654fde37-b2a9-4a20-a8d3-0206c056e2ff",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -810,7 +810,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 18,
			
 
				+   "execution_count": 17,
			
 
				    "id": "6kgJbe4ehI4q",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -836,7 +836,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 19,
			
 
				+   "execution_count": 18,
			
 
				    "id": "j2XPde_ThM_e",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -862,7 +862,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 20,
			
 
				+   "execution_count": 19,
			
 
				    "id": "6b46a952-d50a-4837-af09-4095698f7fd1",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -918,7 +918,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 21,
			
 
				+   "execution_count": 20,
			
 
				    "id": "0959c855-f860-4358-8b98-bc654f047578",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -957,7 +957,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 22,
			
 
				+   "execution_count": 21,
			
 
				    "id": "f37b3eb0-854e-4895-9898-fa7d1e67566e",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -994,7 +994,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 23,
			
 
				+   "execution_count": 22,
			
 
				    "id": "ca0116d0-d229-472c-9fbf-ebc229331c3e",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1038,7 +1038,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 24,
			
 
				+   "execution_count": 23,
			
 
				    "id": "eb860488-5453-41d7-9870-23b723f742a0",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -1083,7 +1083,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 25,
			
 
				+   "execution_count": 24,
			
 
				    "id": "7b9de31e-4096-47b3-976d-b6d2fdce04bc",
			
 
				    "metadata": {
			
 
				     "id": "7b9de31e-4096-47b3-976d-b6d2fdce04bc"
			
@@ -1127,7 +1127,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 26,
			
 
				+   "execution_count": 25,
			
 
				    "id": "56f5b0c9-1065-4d67-98b9-010e42fc1e2a",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1135,7 +1135,7 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "Training loss: 10.98758347829183\n",
			
 
				+      "Training loss: 10.987583584255642\n",
			
 
				       "Validation loss: 10.98110580444336\n"
			
 
				      ]
			
 
				     }
			
@@ -1186,7 +1186,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 27,
			
 
				+   "execution_count": 26,
			
 
				    "id": "Mtp4gY0ZO-qq",
			
 
				    "metadata": {
			
 
				     "id": "Mtp4gY0ZO-qq"
			
@@ -1262,7 +1262,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 28,
			
 
				+   "execution_count": 27,
			
 
				    "id": "3422000b-7aa2-485b-92df-99372cd22311",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -1323,7 +1323,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 29,
			
 
				+   "execution_count": 28,
			
 
				    "id": "0WSRu2i0iHJE",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -1434,7 +1434,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 30,
			
 
				+   "execution_count": 29,
			
 
				    "id": "2734cee0-f6f9-42d5-b71c-fa7e0ef28b6d",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1501,7 +1501,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 31,
			
 
				+   "execution_count": 30,
			
 
				    "id": "01a5ce39-3dc8-4c35-96bc-6410a1e42412",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1543,7 +1543,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 32,
			
 
				+   "execution_count": 31,
			
 
				    "id": "6400572f-b3c8-49e2-95bc-433e55c5b3a1",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1563,7 +1563,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 33,
			
 
				+   "execution_count": 32,
			
 
				    "id": "b23b863e-252a-403c-b5b1-62bc0a42319f",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1615,7 +1615,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 34,
			
 
				+   "execution_count": 33,
			
 
				    "id": "0759e4c8-5362-467c-bec6-b0a19d1ba43d",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -1633,7 +1633,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 35,
			
 
				+   "execution_count": 34,
			
 
				    "id": "2e66e613-4aca-4296-a984-ddd0d80c6578",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1677,7 +1677,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 36,
			
 
				+   "execution_count": 35,
			
 
				    "id": "e4600713-c51e-4f53-bf58-040a6eb362b8",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1710,7 +1710,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 37,
			
 
				+   "execution_count": 36,
			
 
				    "id": "9dfb48f0-bc3f-46a5-9844-33b6c9b0f4df",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1779,7 +1779,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 38,
			
 
				+   "execution_count": 37,
			
 
				    "id": "2a7f908a-e9ec-446a-b407-fb6dbf05c806",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1802,7 +1802,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 39,
			
 
				+   "execution_count": 38,
			
 
				    "id": "753865ed-79c5-48b1-b9f2-ccb132ff1d2f",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1826,7 +1826,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 40,
			
 
				+   "execution_count": 39,
			
 
				    "id": "4844f000-c329-4e7e-aa89-16a2c4ebee43",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1862,7 +1862,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 41,
			
 
				+   "execution_count": 40,
			
 
				    "id": "8e318891-bcc0-4d71-b147-33ce55febfa3",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -1908,7 +1908,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 42,
			
 
				+   "execution_count": 41,
			
 
				    "id": "aa2a0d7d-0457-42d1-ab9d-bd67683e7ed8",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -1964,7 +1964,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 43,
			
 
				+   "execution_count": 42,
			
 
				    "id": "3d67d869-ac04-4382-bcfb-c96d1ca80d47",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -1982,14 +1982,14 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 44,
			
 
				+   "execution_count": 43,
			
 
				    "id": "9d57d914-60a3-47f1-b499-5352f4c457cb",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "model = GPTModel(GPT_CONFIG_124M)\n",
			
 
				     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
			
 
				-    "model.load_state_dict(torch.load(\"model.pth\", map_location=device))\n",
			
 
				+    "model.load_state_dict(torch.load(\"model.pth\", map_location=device, weights_only=True))\n",
			
 
				     "model.eval();"
			
 
				    ]
			
 
				   },
			
@@ -2004,7 +2004,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 45,
			
 
				+   "execution_count": 44,
			
 
				    "id": "bbd175bb-edf4-450e-a6de-d3e8913c6532",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -2019,12 +2019,12 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 46,
			
 
				+   "execution_count": 45,
			
 
				    "id": "8a0c7295-c822-43bf-9286-c45abc542868",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "checkpoint = torch.load(\"model_and_optimizer.pth\")\n",
			
 
				+    "checkpoint = torch.load(\"model_and_optimizer.pth\", weights_only=True)\n",
			
 
				     "\n",
			
 
				     "model = GPTModel(GPT_CONFIG_124M)\n",
			
 
				     "model.load_state_dict(checkpoint[\"model_state_dict\"])\n",
			
@@ -2072,7 +2072,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 47,
			
 
				+   "execution_count": 46,
			
 
				    "id": "fb9fdf02-972a-444e-bf65-8ffcaaf30ce8",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -2082,7 +2082,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 48,
			
 
				+   "execution_count": 47,
			
 
				    "id": "a0747edc-559c-44ef-a93f-079d60227e3f",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -2090,8 +2090,8 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "TensorFlow version: 2.15.0\n",
			
 
				-      "tqdm version: 4.66.2\n"
			
 
				+      "TensorFlow version: 2.16.1\n",
			
 
				+      "tqdm version: 4.66.4\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -2102,7 +2102,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 49,
			
 
				+   "execution_count": 48,
			
 
				    "id": "c5bc89eb-4d39-4287-9b0c-e459ebe7f5ed",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -2121,21 +2121,21 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 50,
			
 
				+   "execution_count": 49,
			
 
				    "id": "76271dd7-108d-4f5b-9c01-6ae0aac4b395",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				-     "name": "stderr",
			
 
				+     "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "checkpoint: 100%|███████████████████████████| 77.0/77.0 [00:00<00:00, 58.8kiB/s]\n",
			
 
				-      "encoder.json: 100%|███████████████████████| 1.04M/1.04M [00:00<00:00, 2.70MiB/s]\n",
			
 
				-      "hparams.json: 100%|█████████████████████████| 90.0/90.0 [00:00<00:00, 27.8kiB/s]\n",
			
 
				-      "model.ckpt.data-00000-of-00001: 100%|███████| 498M/498M [00:30<00:00, 16.1MiB/s]\n",
			
 
				-      "model.ckpt.index: 100%|███████████████████| 5.21k/5.21k [00:00<00:00, 1.18MiB/s]\n",
			
 
				-      "model.ckpt.meta: 100%|██████████████████████| 471k/471k [00:00<00:00, 2.22MiB/s]\n",
			
 
				-      "vocab.bpe: 100%|████████████████████████████| 456k/456k [00:00<00:00, 2.04MiB/s]\n"
			
 
				+      "File already exists and is up-to-date: gpt2/124M/checkpoint\n",
			
 
				+      "File already exists and is up-to-date: gpt2/124M/encoder.json\n",
			
 
				+      "File already exists and is up-to-date: gpt2/124M/hparams.json\n",
			
 
				+      "File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n",
			
 
				+      "File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n",
			
 
				+      "File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n",
			
 
				+      "File already exists and is up-to-date: gpt2/124M/vocab.bpe\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -2145,7 +2145,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 51,
			
 
				+   "execution_count": 50,
			
 
				    "id": "b1a31951-d971-4a6e-9c43-11ee1168ec6a",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -2163,7 +2163,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 52,
			
 
				+   "execution_count": 51,
			
 
				    "id": "857c8331-130e-46ba-921d-fa35d7a73cfe",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -2181,7 +2181,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 53,
			
 
				+   "execution_count": 52,
			
 
				    "id": "c48dac94-8562-4a66-84ef-46c613cdc4cd",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -2241,7 +2241,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 54,
			
 
				+   "execution_count": 53,
			
 
				    "id": "9fef90dd-0654-4667-844f-08e28339ef7d",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -2274,7 +2274,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 55,
			
 
				+   "execution_count": 54,
			
 
				    "id": "f9a92229-c002-49a6-8cfb-248297ad8296",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -2287,7 +2287,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 56,
			
 
				+   "execution_count": 55,
			
 
				    "id": "f22d5d95-ca5a-425c-a9ec-fc432a12d4e9",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -2369,7 +2369,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 57,
			
 
				+   "execution_count": 56,
			
 
				    "id": "1f690253-f845-4347-b7b6-43fabbd2affa",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
--- a/ch05/01_main-chapter-code/exercise-solutions.ipynb
+++ b/ch05/01_main-chapter-code/exercise-solutions.ipynb
@@ -28,6 +28,35 @@
 
				     "# Chapter 5 Exercise solutions"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "37aa4692-2357-4d88-b072-6d2d988d7f4f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "numpy version: 1.26.4\n",
			
 
				+      "tiktoken version: 0.7.0\n",
			
 
				+      "torch version: 2.4.0\n",
			
 
				+      "tensorflow version: 2.16.1\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from importlib.metadata import version\n",
			
 
				+    "\n",
			
 
				+    "pkgs = [\"numpy\", \n",
			
 
				+    "        \"tiktoken\", \n",
			
 
				+    "        \"torch\",\n",
			
 
				+    "        \"tensorflow\" # For OpenAI's pretrained weights\n",
			
 
				+    "       ]\n",
			
 
				+    "for p in pkgs:\n",
			
 
				+    "    print(f\"{p} version: {version(p)}\")"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				    "id": "5fea8be3-30a1-4623-a6d7-b095c6c1092e",
			
@@ -58,7 +87,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				+   "execution_count": 2,
			
 
				    "id": "42dda298-3014-4c36-8d63-97c210bcf4e8",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -109,7 +138,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				+   "execution_count": 3,
			
 
				    "id": "b5605236-e300-4844-aea7-509d868efbdd",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -172,7 +201,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				+   "execution_count": 4,
			
 
				    "id": "1d4163c0-22ad-4f5b-8e20-b7420e9dbfc6",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -182,7 +211,7 @@
 
				        "tensor(0.0430)"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 3,
			
 
				+     "execution_count": 4,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -250,7 +279,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				+   "execution_count": 5,
			
 
				    "id": "a61a4034-797a-4635-bf42-ddfff1b07125",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -275,13 +304,13 @@
 
				     "\n",
			
 
				     "tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
			
 
				     "model = GPTModel(GPT_CONFIG_124M)\n",
			
 
				-    "model.load_state_dict(torch.load(\"model.pth\"))\n",
			
 
				+    "model.load_state_dict(torch.load(\"model.pth\", weights_only=True))\n",
			
 
				     "model.eval();"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				+   "execution_count": 6,
			
 
				    "id": "ee95a272-b852-43b4-9827-ea7e1dbd5724",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -292,7 +321,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				+   "execution_count": 7,
			
 
				    "id": "4ab43658-3240-484a-9072-a40a0ed85be6",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -322,7 +351,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				+   "execution_count": 8,
			
 
				    "id": "ebb22d06-393a-42d3-ab64-66646d33b39b",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -352,7 +381,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				+   "execution_count": 9,
			
 
				    "id": "75469f24-47cc-458d-a200-fe64c648131d",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -400,7 +429,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				+   "execution_count": 10,
			
 
				    "id": "94eae6ba-d9fd-417a-8e31-fc39e9299870",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -424,7 +453,7 @@
 
				     "\n",
			
 
				     "tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
			
 
				     "\n",
			
 
				-    "checkpoint = torch.load(\"model_and_optimizer.pth\")\n",
			
 
				+    "checkpoint = torch.load(\"model_and_optimizer.pth\", weights_only=True)\n",
			
 
				     "model = GPTModel(GPT_CONFIG_124M)\n",
			
 
				     "model.load_state_dict(checkpoint[\"model_state_dict\"])\n",
			
 
				     "model.to(device)\n",
			
@@ -444,7 +473,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				+   "execution_count": 11,
			
 
				    "id": "b5a78470-0652-4abd-875a-664e23c07c36",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -507,7 +536,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				+   "execution_count": 12,
			
 
				    "id": "ab4693dc-1359-47a7-8110-1e90f514a49e",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -576,7 +605,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 12,
			
 
				+   "execution_count": 13,
			
 
				    "id": "68d162d6-bbb9-4d6d-82ee-1c410694f872",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -604,7 +633,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 13,
			
 
				+   "execution_count": 14,
			
 
				    "id": "d8373461-7dad-47da-a489-3e23f0799b23",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -630,7 +659,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 14,
			
 
				+   "execution_count": 15,
			
 
				    "id": "cdd44873-d6c2-4471-a20f-f639b09fdcd3",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -655,7 +684,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 15,
			
 
				+   "execution_count": 16,
			
 
				    "id": "c7d562e4-33f6-4611-9b75-6ad1cb441d3b",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -670,7 +699,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 16,
			
 
				+   "execution_count": 17,
			
 
				    "id": "46eda9ea-ccb0-46ee-931b-3c07502b2544",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -725,7 +754,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 17,
			
 
				+   "execution_count": 18,
			
 
				    "id": "4e3574a2-687d-47a2-a2f6-457fe9d595f1",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -733,8 +762,8 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "Training loss: 3.7547483444213867\n",
			
 
				-      "Validation loss: 3.5596189498901367\n"
			
 
				+      "Training loss: 3.7547486888037787\n",
			
 
				+      "Validation loss: 3.5596182346343994\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -759,23 +788,29 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 18,
			
 
				+   "execution_count": 19,
			
 
				    "id": "1a79a4b6-fe8f-40c2-a018-e731dcf391b3",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				+    {
			
 
				+     "name": "stderr",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "checkpoint: 100%|███████████████████████████| 77.0/77.0 [00:00<00:00, 43.5kiB/s]\n",
			
 
				+      "encoder.json: 100%|███████████████████████| 1.04M/1.04M [00:00<00:00, 2.75MiB/s]\n",
			
 
				+      "hparams.json: 100%|█████████████████████████| 91.0/91.0 [00:00<00:00, 60.2kiB/s]\n",
			
 
				+      "model.ckpt.data-00000-of-00001: 100%|█████| 6.23G/6.23G [06:02<00:00, 17.2MiB/s]\n",
			
 
				+      "model.ckpt.index: 100%|████████████████████| 20.7k/20.7k [00:00<00:00, 171kiB/s]\n",
			
 
				+      "model.ckpt.meta: 100%|████████████████████| 1.84M/1.84M [00:00<00:00, 4.27MiB/s]\n",
			
 
				+      "vocab.bpe: 100%|████████████████████████████| 456k/456k [00:00<00:00, 1.73MiB/s]\n"
			
 
				+     ]
			
 
				+    },
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/checkpoint\n",
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/encoder.json\n",
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/hparams.json\n",
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/model.ckpt.data-00000-of-00001\n",
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/model.ckpt.index\n",
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/model.ckpt.meta\n",
			
 
				-      "File already exists and is up-to-date: gpt2/1558M/vocab.bpe\n",
			
 
				-      "Training loss: 3.3046313656700983\n",
			
 
				-      "Validation loss: 3.1195149421691895\n"
			
 
				+      "Training loss: 3.3046312861972384\n",
			
 
				+      "Validation loss: 3.1195147037506104\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -832,7 +867,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 19,
			
 
				+   "execution_count": 20,
			
 
				    "id": "31e0972b-e85e-4904-a0f5-24c3eacd5fa2",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -858,7 +893,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 20,
			
 
				+   "execution_count": 21,
			
 
				    "id": "b641ee88-f9d4-43ec-a787-e34199eed356",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -902,7 +937,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 21,
			
 
				+   "execution_count": 22,
			
 
				    "id": "c98f56f4-98fc-43b4-9ee5-726e9d17c73f",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -912,7 +947,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 22,
			
 
				+   "execution_count": 23,
			
 
				    "id": "b1f7853c-6e81-4f1f-a1d0-61e2c7d33a20",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
@@ -957,7 +992,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.11"
			
 
				+   "version": "3.11.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/ch05/01_main-chapter-code/gpt_train.py
+++ b/ch05/01_main-chapter-code/gpt_train.py
@@ -239,4 +239,4 @@ if __name__ == "__main__":
 
				     # Save and load model
			
 
				     torch.save(model.state_dict(), "model.pth")
			
 
				     model = GPTModel(GPT_CONFIG_124M)
			
 
				-    model.load_state_dict(torch.load("model.pth"))
			
 
				+    model.load_state_dict(torch.load("model.pth"), weights_only=True)
			
--- a/ch06/01_main-chapter-code/ch06.ipynb
+++ b/ch06/01_main-chapter-code/ch06.ipynb
--- a/ch06/01_main-chapter-code/load-finetuned-model.ipynb
+++ b/ch06/01_main-chapter-code/load-finetuned-model.ipynb
@@ -46,8 +46,8 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "tiktoken version: 0.6.0\n",
			
 
				-      "torch version: 2.2.2\n"
			
 
				+      "tiktoken version: 0.7.0\n",
			
 
				+      "torch version: 2.4.0\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -127,7 +127,7 @@
 
				     "\n",
			
 
				     "# Then load pretrained weights\n",
			
 
				     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
			
 
				-    "model.load_state_dict(torch.load(\"review_classifier.pth\", map_location=device))\n",
			
 
				+    "model.load_state_dict(torch.load(\"review_classifier.pth\", map_location=device, weights_only=True))\n",
			
 
				     "model.eval();"
			
 
				    ]
			
 
				   },
			
@@ -241,7 +241,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.12.2"
			
 
				+   "version": "3.11.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/ch07/01_main-chapter-code/ch07.ipynb
+++ b/ch07/01_main-chapter-code/ch07.ipynb
--- a/ch07/01_main-chapter-code/load-finetuned-model.ipynb
+++ b/ch07/01_main-chapter-code/load-finetuned-model.ipynb
@@ -47,7 +47,7 @@
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				       "tiktoken version: 0.7.0\n",
			
 
				-      "torch version: 2.3.1\n"
			
 
				+      "torch version: 2.4.0\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -120,7 +120,11 @@
 
				    "source": [
			
 
				     "import torch\n",
			
 
				     "\n",
			
 
				-    "model.load_state_dict(torch.load(\"gpt2-medium355M-sft.pth\", map_location=torch.device(\"cpu\")))\n",
			
 
				+    "model.load_state_dict(torch.load(\n",
			
 
				+    "    \"gpt2-medium355M-sft.pth\",\n",
			
 
				+    "    map_location=torch.device(\"cpu\"),\n",
			
 
				+    "    weights_only=True\n",
			
 
				+    "))\n",
			
 
				     "model.eval();"
			
 
				    ]
			
 
				   },
			
@@ -207,7 +211,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.12.2"
			
 
				+   "version": "3.11.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,