1 жил өмнө · 01cb137bfd
--- a/appendix-D/01_main-chapter-code/appendix-D.ipynb
+++ b/appendix-D/01_main-chapter-code/appendix-D.ipynb
@@ -81,6 +81,20 @@
 
															     "\n",
														
 
															     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
														
 
															     "\n",
														
 
															+    "# Note:\n",
														
 
															+    "# Uncommenting the following lines will allow the code to run on Apple Silicon chips, if applicable,\n",
														
 
															+    "# which is approximately 2x faster than on an Apple CPU (as measured on an M3 MacBook Air).\n",
														
 
															+    "# However, the resulting loss values may be slightly different.\n",
														
 
															+    "\n",
														
 
															+    "#if torch.cuda.is_available():\n",
														
 
															+    "#    device = torch.device(\"cuda\")\n",
														
 
															+    "#elif torch.backends.mps.is_available():\n",
														
 
															+    "#    device = torch.device(\"mps\")\n",
														
 
															+    "#else:\n",
														
 
															+    "#    device = torch.device(\"cpu\")\n",
														
 
															+    "#\n",
														
 
															+    "# print(f\"Using {device} device.\")\n",
														
 
															+    "\n",
														
 
															     "torch.manual_seed(123)\n",
														
 
															     "model = GPTModel(GPT_CONFIG_124M)\n",
														
 
															     "model.eval();  # Disable dropout during inference"
														
@@ -660,6 +674,11 @@
 
															    "source": [
														
 
															     "import tiktoken\n",
														
 
															     "\n",
														
 
															+    "# Note:\n",
														
 
															+    "# Uncomment the following code to calculate the execution time\n",
														
 
															+    "# import time\n",
														
 
															+    "# start_time = time.time()\n",
														
 
															+    "\n",
														
 
															     "torch.manual_seed(123)\n",
														
 
															     "model = GPTModel(GPT_CONFIG_124M)\n",
														
 
															     "model.to(device)\n",
														
@@ -674,7 +693,13 @@
 
															     "    eval_freq=5, eval_iter=1, start_context=\"Every effort moves you\",\n",
														
 
															     "    tokenizer=tokenizer, warmup_steps=warmup_steps, \n",
														
 
															     "    initial_lr=1e-5, min_lr=1e-5\n",
														
 
															-    ")"
														
 
															+    ")\n",
														
 
															+    "\n",
														
 
															+    "# Note:\n",
														
 
															+    "# Uncomment the following code to show the execution time\n",
														
 
															+    "# end_time = time.time()\n",
														
 
															+    "# execution_time_minutes = (end_time - start_time) / 60\n",
														
 
															+    "# print(f\"Training completed in {execution_time_minutes:.2f} minutes.\")"
														
 
															    ]
														
 
															   },
														
 
															   {
														
--- a/appendix-E/01_main-chapter-code/appendix-E.ipynb
+++ b/appendix-E/01_main-chapter-code/appendix-E.ipynb
@@ -511,6 +511,21 @@
 
															    "outputs": [],
														
 
															    "source": [
														
 
															     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
														
 
															+    "\n",
														
 
															+    "# Note:\n",
														
 
															+    "# Uncommenting the following lines will allow the code to run on Apple Silicon chips, if applicable,\n",
														
 
															+    "# which is approximately 1.2x faster than on an Apple CPU (as measured on an M3 MacBook Air).\n",
														
 
															+    "# However, the resulting loss values may be slightly different.\n",
														
 
															+    "\n",
														
 
															+    "#if torch.cuda.is_available():\n",
														
 
															+    "#    device = torch.device(\"cuda\")\n",
														
 
															+    "#elif torch.backends.mps.is_available():\n",
														
 
															+    "#    device = torch.device(\"mps\")\n",
														
 
															+    "#else:\n",
														
 
															+    "#    device = torch.device(\"cpu\")\n",
														
 
															+    "#\n",
														
 
															+    "# print(f\"Using {device} device.\")\n",
														
 
															+    "\n",
														
 
															     "model.to(device);  # no assignment model = model.to(device) necessary for nn.Module classes"
														
 
															    ]
														
 
															   },
														
--- a/ch05/01_main-chapter-code/ch05.ipynb
+++ b/ch05/01_main-chapter-code/ch05.ipynb
@@ -1154,6 +1154,8 @@
 
															     "#    device = torch.device(\"mps\")\n",
														
 
															     "#else:\n",
														
 
															     "#    device = torch.device(\"cpu\")\n",
														
 
															+    "#\n",
														
 
															+    "# print(f\"Using {device} device.\")\n",
														
 
															     "\n",
														
 
															     "\n",
														
 
															     "model.to(device) # no assignment model = model.to(device) necessary for nn.Module classes\n",