1 year ago · d38083c401
--- a/.gitignore
+++ b/.gitignore
@@ -35,12 +35,15 @@ ch05/01_main-chapter-code/model.pth
 
				 ch05/01_main-chapter-code/model_and_optimizer.pth
			
 
				 ch05/03_bonus_pretraining_on_gutenberg/model_checkpoints
			
 
				 ch05/06_user_interface/gpt2
			
 
				+ch05/07_gpt_to_llama/.cache
			
 
				 ch05/07_gpt_to_llama/Llama-2-7b
			
 
				 ch05/07_gpt_to_llama/Llama-2-7b-chat
			
 
				-ch05/07_gpt_to_llama/.cache
			
 
				-ch05/07_gpt_to_llama/llama3-files
			
 
				-ch05/07_gpt_to_llama/llama31-files
			
 
				-ch05/07_gpt_to_llama/llama32-files
			
 
				+ch05/07_gpt_to_llama/Llama-3-8B
			
 
				+ch05/07_gpt_to_llama/Llama-3-8B-Instruct
			
 
				+ch05/07_gpt_to_llama/Llama-3.1-8B
			
 
				+ch05/07_gpt_to_llama/Llama-3.1-8B-Instruct
			
 
				+ch05/07_gpt_to_llama/Llama-3.2-1B
			
 
				+ch05/07_gpt_to_llama/Llama-3.2-1B-Instruct
			
 
				 
			
 
				 ch06/01_main-chapter-code/gpt2
			
 
				 ch06/02_bonus_additional-experiments/gpt2
			
--- a/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb
+++ b/ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb
@@ -1189,7 +1189,7 @@
 
				     "tokenizer_file = hf_hub_download(\n",
			
 
				     "    repo_id=\"meta-llama/Llama-2-7b\",\n",
			
 
				     "    filename=\"tokenizer.model\",\n",
			
 
				-    "    local_dir=\"Llama-2-7B\"\n",
			
 
				+    "    local_dir=\"Llama-2-7b\"\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
--- a/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
+++ b/ch05/07_gpt_to_llama/converting-llama2-to-llama3.ipynb
@@ -1252,7 +1252,7 @@
 
				     "tokenizer_file_path = hf_hub_download(\n",
			
 
				     "    repo_id=\"meta-llama/Meta-Llama-3-8B\",\n",
			
 
				     "    filename=\"original/tokenizer.model\",\n",
			
 
				-    "    local_dir=\"llama3-files\"\n",
			
 
				+    "    local_dir=\"Llama-3-8B\"\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
@@ -1458,7 +1458,7 @@
 
				     "    weights_file = hf_hub_download(\n",
			
 
				     "        repo_id=\"meta-llama/Meta-Llama-3-8B\",\n",
			
 
				     "        filename=f\"model-0000{i}-of-00004.safetensors\",\n",
			
 
				-    "        local_dir=\"llama3-files\"\n",
			
 
				+    "        local_dir=\"Llama-3-8B\"\n",
			
 
				     "    )\n",
			
 
				     "    current_weights = load_file(weights_file)\n",
			
 
				     "    combined_weights.update(current_weights)"
			
@@ -1677,7 +1677,7 @@
 
				     "id": "akyo7WNyF_YL"
			
 
				    },
			
 
				    "source": [
			
 
				-    "- Above, we used the pretrained base model; if you want to use a model capable of following instructions, use the `\"meta-llama/Llama-3-8b-Instruct\"` model instead, as shown below"
			
 
				+    "- Above, we used the pretrained base model; if you want to use a model capable of following instructions, use the `\"meta-llama/Llama-3-8B-Instruct\"` model instead, as shown below"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1824,7 +1824,7 @@
 
				     "    weights_file = hf_hub_download(\n",
			
 
				     "        repo_id=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n",
			
 
				     "        filename=f\"model-0000{i}-of-00004.safetensors\",\n",
			
 
				-    "        local_dir=\"llama3-files\"\n",
			
 
				+    "        local_dir=\"Llama-3-8B-Instruct\"\n",
			
 
				     "    )\n",
			
 
				     "    current_weights = load_file(weights_file)\n",
			
 
				     "    combined_weights.update(current_weights)\n",
			
@@ -2157,7 +2157,7 @@
 
				     "tokenizer_file_path = hf_hub_download(\n",
			
 
				     "    repo_id=\"meta-llama/Llama-3.1-8B\",\n",
			
 
				     "    filename=\"original/tokenizer.model\",\n",
			
 
				-    "    local_dir=\"llama31-files\"\n",
			
 
				+    "    local_dir=\"Llama-3.1-8B\"\n",
			
 
				     ")\n",
			
 
				     "\n",
			
 
				     "tokenizer = Tokenizer(tokenizer_file_path)"
			
@@ -2313,7 +2313,7 @@
 
				     "    weights_file = hf_hub_download(\n",
			
 
				     "        repo_id=\"meta-llama/Llama-3.1-8B\",\n",
			
 
				     "        filename=f\"model-0000{i}-of-00004.safetensors\",\n",
			
 
				-    "        local_dir=\"llama31-files\"\n",
			
 
				+    "        local_dir=\"Llama-3.1-8B\"\n",
			
 
				     "    )\n",
			
 
				     "    current_weights = load_file(weights_file)\n",
			
 
				     "    combined_weights.update(current_weights)\n",
			
@@ -2512,7 +2512,7 @@
 
				     "tokenizer_file_path = hf_hub_download(\n",
			
 
				     "    repo_id=\"meta-llama/Llama-3.2-1B\",\n",
			
 
				     "    filename=\"original/tokenizer.model\",\n",
			
 
				-    "    local_dir=\"llama32-files\"\n",
			
 
				+    "    local_dir=\"Llama-3.2-1B\"\n",
			
 
				     ")\n",
			
 
				     "\n",
			
 
				     "tokenizer = Tokenizer(tokenizer_file_path)"
			
@@ -2589,7 +2589,7 @@
 
				     "weights_file = hf_hub_download(\n",
			
 
				     "    repo_id=\"meta-llama/Llama-3.2-1B\",\n",
			
 
				     "    filename=f\"model.safetensors\",\n",
			
 
				-    "    local_dir=\"llama32-files\"\n",
			
 
				+    "    local_dir=\"Llama-3.2-1B\"\n",
			
 
				     ")\n",
			
 
				     "current_weights = load_file(weights_file)\n",
			
 
				     "\n",
			
@@ -2687,7 +2687,7 @@
 
				    "provenance": []
			
 
				   },
			
 
				   "kernelspec": {
			
 
				-   "display_name": "Python 3 (ipykernel)",
			
 
				+   "display_name": "pt",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -2701,7 +2701,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.6"
			
 
				+   "version": "3.11.9"
			
 
				   },
			
 
				   "widgets": {
			
 
				    "application/vnd.jupyter.widget-state+json": {
			
--- a/ch05/07_gpt_to_llama/standalone-llama32.ipynb
+++ b/ch05/07_gpt_to_llama/standalone-llama32.ipynb
@@ -733,7 +733,7 @@
 
				     "tokenizer_file_path = hf_hub_download(\n",
			
 
				     "    repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
			
 
				     "    filename=\"original/tokenizer.model\",\n",
			
 
				-    "    local_dir=\"llama32-files\"\n",
			
 
				+    "    local_dir=\"Llama-3.2-1B-Instruct\"\n",
			
 
				     ")"
			
 
				    ]
			
 
				   },
			
@@ -860,7 +860,7 @@
 
				     "    weights_file = hf_hub_download(\n",
			
 
				     "        repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
			
 
				     "        filename=f\"model.safetensors\",\n",
			
 
				-    "        local_dir=\"llama32-files\"\n",
			
 
				+    "        local_dir=\"Llama-3.2-1B-Instruct\"\n",
			
 
				     "    )\n",
			
 
				     "    combined_weights = load_file(weights_file)\n",
			
 
				     "\n",
			
@@ -871,7 +871,7 @@
 
				     "        weights_file = hf_hub_download(\n",
			
 
				     "            repo_id=f\"meta-llama/Llama-3.2-{LLAMA_SIZE_STR}-Instruct\",\n",
			
 
				     "            filename=f\"model-0000{i}-of-00002.safetensors\",\n",
			
 
				-    "            local_dir=\"llama32-files\"\n",
			
 
				+    "            local_dir=\"Llama-3.2-1B-Instruct\"\n",
			
 
				     "        )\n",
			
 
				     "        current_weights = load_file(weights_file)\n",
			
 
				     "        combined_weights.update(current_weights)\n",
			
@@ -1047,7 +1047,7 @@
 
				  ],
			
 
				  "metadata": {
			
 
				   "kernelspec": {
			
 
				-   "display_name": "Python 3 (ipykernel)",
			
 
				+   "display_name": "pt",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -1061,7 +1061,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.6"
			
 
				+   "version": "3.11.9"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,