|
@@ -81,6 +81,20 @@
|
|
|
"\n",
|
|
"\n",
|
|
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
|
|
+ "# Note:\n",
|
|
|
|
|
+ "# Uncommenting the following lines will allow the code to run on Apple Silicon chips, if applicable,\n",
|
|
|
|
|
+ "# which is approximately 2x faster than on an Apple CPU (as measured on an M3 MacBook Air).\n",
|
|
|
|
|
+ "# However, the resulting loss values may be slightly different.\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "#if torch.cuda.is_available():\n",
|
|
|
|
|
+ "# device = torch.device(\"cuda\")\n",
|
|
|
|
|
+ "#elif torch.backends.mps.is_available():\n",
|
|
|
|
|
+ "# device = torch.device(\"mps\")\n",
|
|
|
|
|
+ "#else:\n",
|
|
|
|
|
+ "# device = torch.device(\"cpu\")\n",
|
|
|
|
|
+ "#\n",
|
|
|
|
|
+ "# print(f\"Using {device} device.\")\n",
|
|
|
|
|
+ "\n",
|
|
|
"torch.manual_seed(123)\n",
|
|
"torch.manual_seed(123)\n",
|
|
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
|
|
"model.eval(); # Disable dropout during inference"
|
|
"model.eval(); # Disable dropout during inference"
|
|
@@ -660,6 +674,11 @@
|
|
|
"source": [
|
|
"source": [
|
|
|
"import tiktoken\n",
|
|
"import tiktoken\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
|
|
+ "# Note:\n",
|
|
|
|
|
+ "# Uncomment the following code to calculate the execution time\n",
|
|
|
|
|
+ "# import time\n",
|
|
|
|
|
+ "# start_time = time.time()\n",
|
|
|
|
|
+ "\n",
|
|
|
"torch.manual_seed(123)\n",
|
|
"torch.manual_seed(123)\n",
|
|
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
|
|
"model.to(device)\n",
|
|
"model.to(device)\n",
|
|
@@ -674,7 +693,13 @@
|
|
|
" eval_freq=5, eval_iter=1, start_context=\"Every effort moves you\",\n",
|
|
" eval_freq=5, eval_iter=1, start_context=\"Every effort moves you\",\n",
|
|
|
" tokenizer=tokenizer, warmup_steps=warmup_steps, \n",
|
|
" tokenizer=tokenizer, warmup_steps=warmup_steps, \n",
|
|
|
" initial_lr=1e-5, min_lr=1e-5\n",
|
|
" initial_lr=1e-5, min_lr=1e-5\n",
|
|
|
- ")"
|
|
|
|
|
|
|
+ ")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Note:\n",
|
|
|
|
|
+ "# Uncomment the following code to show the execution time\n",
|
|
|
|
|
+ "# end_time = time.time()\n",
|
|
|
|
|
+ "# execution_time_minutes = (end_time - start_time) / 60\n",
|
|
|
|
|
+ "# print(f\"Training completed in {execution_time_minutes:.2f} minutes.\")"
|
|
|
]
|
|
]
|
|
|
},
|
|
},
|
|
|
{
|
|
{
|