소스 검색

Cos-sin fix in Llama 2 bonus notebook (#381)

Sebastian Raschka 1 년 전
부모
커밋
a5405c255d
1개의 변경된 파일1142개의 추가작업 그리고 22개의 파일을 삭제
  1. 1142 22
      ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb

+ 1142 - 22
ch05/07_gpt_to_llama/converting-gpt-to-llama2.ipynb

@@ -76,7 +76,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "34a9a440-84c2-42cc-808b-38677cb6af8a",
-    "outputId": "7ce8fe41-1c24-4f0b-a8d9-352b4af1b46b"
+    "outputId": "8118963b-3c72-43af-874b-439ffebdc94c"
    },
    "outputs": [
     {
@@ -578,8 +578,8 @@
     "        values = values.transpose(1, 2)\n",
     "\n",
     "        ################################### NEW ###################################\n",
-    "        keys = compute_rope(keys, self.sin, self.cos)\n",
-    "        queries = compute_rope(queries, self.sin, self.cos)\n",
+    "        keys = compute_rope(keys, self.cos, self.sin)\n",
+    "        queries = compute_rope(queries, self.cos, self.sin)\n",
     "        ###########################################################################\n",
     "\n",
     "        # Compute scaled dot-product attention (aka self-attention) with a causal mask\n",
@@ -916,7 +916,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "6079f747-8f20-4c6b-8d38-7156f1101729",
-    "outputId": "1ca50091-a20c-4a44-b806-9985a5e64135"
+    "outputId": "0a0eb34b-1a21-4c11-804f-b40007bda5a3"
    },
    "outputs": [
     {
@@ -952,7 +952,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "0df1c79e-27a7-4b0f-ba4e-167fe107125a",
-    "outputId": "b157b5ac-d37c-4b71-f609-45a91f7ed93a"
+    "outputId": "11ced939-556d-4511-d5c0-10a94ed3df32"
    },
    "outputs": [
     {
@@ -1085,7 +1085,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "3357a230-b678-4691-a238-257ee4e80185",
-    "outputId": "7d4adc4b-53cf-4099-a45f-2fb4fd25edc4"
+    "outputId": "768ed6af-ce14-40bc-ca18-117b4b448269"
    },
    "outputs": [
     {
@@ -1126,10 +1126,24 @@
    "id": "69714ea8-b9b8-4687-8392-f3abb8f93a32",
    "metadata": {
     "colab": {
-     "base_uri": "https://localhost:8080/"
+     "base_uri": "https://localhost:8080/",
+     "height": 153,
+     "referenced_widgets": [
+      "e6c75a6aa7b942fe84160e286e3acb3d",
+      "08f0bf9459bd425498a5cb236f9d4a72",
+      "10251d6f724e43788c41d4b7879cbfd3",
+      "53a973c0853b44418698136bd04df039",
+      "bdb071e7145a4007ae01599333e72612",
+      "6b1821a7f4574e3aba09c1e410cc81e4",
+      "8c2873eaec3445888ad3d54ad7387950",
+      "0c8f7044966e4207b12352503c67dcbb",
+      "8b5951213c9e4798a258146d61d02d11",
+      "2c05df3f91e64df7b33905b1065a76f7",
+      "742ae5487f2648fcae7ca8e22c7f8db9"
+     ]
     },
     "id": "69714ea8-b9b8-4687-8392-f3abb8f93a32",
-    "outputId": "aa18fccc-6533-4446-f57b-546068ad518c"
+    "outputId": "c230fec9-5c71-4a41-90ab-8a34d114ea01"
    },
    "outputs": [
     {
@@ -1143,6 +1157,20 @@
       "Please note that authentication is recommended but still optional to access public models or datasets.\n",
       "  warnings.warn(\n"
      ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e6c75a6aa7b942fe84160e286e3acb3d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -1151,7 +1179,8 @@
     "tokenizer_file = hf_hub_download(\n",
     "    repo_id=\"meta-llama/Llama-2-7b\",\n",
     "    filename=\"tokenizer.model\",\n",
-    "    local_dir=\"Llama-2-7B\")"
+    "    local_dir=\"Llama-2-7B\"\n",
+    ")"
    ]
   },
   {
@@ -1211,7 +1240,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "e0a2b5cd-6cba-4d72-b8ff-04d8315d483e",
-    "outputId": "cbc53f67-a77a-40c9-ed2d-c6f8be066cfb"
+    "outputId": "acd5065d-8900-4ba8-ef85-968365f3a0cb"
    },
    "outputs": [
     {
@@ -1219,7 +1248,7 @@
      "output_type": "stream",
      "text": [
       "Output text:\n",
-      " Every effort movesαfdmsdn coatELDâte eer tagsיśćinu Lundmysq eer napinu LundANCEHEAD ner}}}رible one}}}رible one puts Dan\n"
+      " Every effort movesαllRadius deletingpretcc否']; future eer napulate lackус während inter DES издаSchéon로жа Bass differencespadxsnu ;; ctx始\n"
      ]
     }
    ],
@@ -1278,9 +1307,42 @@
    "execution_count": 26,
    "id": "5fa9c06c-7a53-4b4d-9ce4-acc027322ee4",
    "metadata": {
-    "id": "5fa9c06c-7a53-4b4d-9ce4-acc027322ee4"
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 49,
+     "referenced_widgets": [
+      "66e777955e8748df878f118f07f38dab",
+      "da89ae3ea4d2474e98f64ada608f3cea",
+      "93e6da39c25f4edfaa72056c89df1f7f",
+      "b628603e4cb0405398c916587ee96756",
+      "93bedcb9245e44a0a1eb7e4155070f66",
+      "0723f467d37b4904819a8bb33ebda10f",
+      "e54928776bc649339002adced63738b0",
+      "d8e0f42068af4cb094e2f115f76e06e0",
+      "0a939565b6e94f08bee0a66e0f9827d4",
+      "a5fedbb7ec2e43d99711bb4cd84b9486",
+      "0c186f6539714d8eab023969ce47c500"
+     ]
+    },
+    "id": "5fa9c06c-7a53-4b4d-9ce4-acc027322ee4",
+    "outputId": "0d8942cc-e5e2-4e77-ec41-1ac7bec7d94f"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "66e777955e8748df878f118f07f38dab",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "consolidated.00.pth:   0%|          | 0.00/13.5G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "weights_file = hf_hub_download(\n",
     "   repo_id=\"meta-llama/Llama-2-7b\",\n",
@@ -1320,7 +1382,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "ee26bd0b-fea9-4924-97f7-409c14f28e49",
-    "outputId": "351029ce-b4c0-4d39-8e0e-7e7f44d25647"
+    "outputId": "fa83d38a-bb41-4cb2-d3c7-c573bfe1f8a4"
    },
    "outputs": [
     {
@@ -1455,7 +1517,7 @@
      "base_uri": "https://localhost:8080/"
     },
     "id": "240987e8-a023-462e-9376-9edfb27559ec",
-    "outputId": "3fa7a77a-6203-4d8a-bdaa-afce1f504adf"
+    "outputId": "044f24b3-4018-4860-834d-6c2731b9e47c"
    },
    "outputs": [
     {
@@ -1463,7 +1525,7 @@
      "output_type": "stream",
      "text": [
       "Output text:\n",
-      " Every effort has been made to ensure that the information contained in this website is correct and up to date and accurate at the time of publication\n"
+      " Every effort has been made to ensure that the information contained in this website is accurate and up to date and correct at the time of publication\n"
      ]
     }
    ],
@@ -1494,23 +1556,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 34,
    "id": "nbvAV7vaz6yc",
    "metadata": {
     "colab": {
-     "base_uri": "https://localhost:8080/"
+     "base_uri": "https://localhost:8080/",
+     "height": 101,
+     "referenced_widgets": [
+      "3b2448a60f5f4ba5b2c686037c8ecd78",
+      "60c5932944f24f5fad1d8da89c8e5ae9",
+      "aa31aed1b8854a4281fd7e81c60e1205",
+      "d4acf06c2414412f8f2fb4f48981c954",
+      "693d69251d3d48219c084af17b54b851",
+      "ff36d28c55dd4db3a0f76a87640fdfe2",
+      "71c49ef820494d5f8908a3daf39f0755",
+      "525dc406534f4369b11208816f8fd0d7",
+      "865f39213a7341b68f2fe73caaf801b1",
+      "eaf4c0231b6d4993b2f8e9e63d8b6921",
+      "a11edf3b018e42c88a63a515cf7fe478"
+     ]
     },
     "id": "nbvAV7vaz6yc",
-    "outputId": "bd4cae4d-5d5f-4f64-ea37-b979ef2c86bb"
+    "outputId": "724f5508-d976-4e31-b3d7-95fa65b2c1e8"
    },
    "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b2448a60f5f4ba5b2c686037c8ecd78",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "consolidated.00.pth:   0%|          | 0.00/13.5G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Output text:\n",
       " What do llamas eat?\n",
-      "Llamas are herbivores, which means they eat grass, leaves, grasses, and they eat grass\n"
+      "Llamas and alpacas are herbivores, which means they eat grasses, leaves, grass\n"
      ]
     }
    ],
@@ -1520,7 +1610,7 @@
     "weights_file = hf_hub_download(\n",
     "   repo_id=\"meta-llama/Llama-2-7b-chat\",\n",
     "   filename=\"consolidated.00.pth\",\n",
-    "   lcoal_dir=\"Llama-2-7b-chat\n",
+    "   local_dir=\"Llama-2-7b-chat\"\n",
     ")\n",
     "\n",
     "model = Llama2Model(LLAMA2_CONFIG_7B)\n",
@@ -1563,7 +1653,1037 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.6"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "0723f467d37b4904819a8bb33ebda10f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "08f0bf9459bd425498a5cb236f9d4a72": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_6b1821a7f4574e3aba09c1e410cc81e4",
+      "placeholder": "​",
+      "style": "IPY_MODEL_8c2873eaec3445888ad3d54ad7387950",
+      "value": "tokenizer.model: 100%"
+     }
+    },
+    "0a939565b6e94f08bee0a66e0f9827d4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "0c186f6539714d8eab023969ce47c500": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0c8f7044966e4207b12352503c67dcbb": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "10251d6f724e43788c41d4b7879cbfd3": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0c8f7044966e4207b12352503c67dcbb",
+      "max": 499723,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_8b5951213c9e4798a258146d61d02d11",
+      "value": 499723
+     }
+    },
+    "2c05df3f91e64df7b33905b1065a76f7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3b2448a60f5f4ba5b2c686037c8ecd78": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_60c5932944f24f5fad1d8da89c8e5ae9",
+       "IPY_MODEL_aa31aed1b8854a4281fd7e81c60e1205",
+       "IPY_MODEL_d4acf06c2414412f8f2fb4f48981c954"
+      ],
+      "layout": "IPY_MODEL_693d69251d3d48219c084af17b54b851"
+     }
+    },
+    "525dc406534f4369b11208816f8fd0d7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "53a973c0853b44418698136bd04df039": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_2c05df3f91e64df7b33905b1065a76f7",
+      "placeholder": "​",
+      "style": "IPY_MODEL_742ae5487f2648fcae7ca8e22c7f8db9",
+      "value": " 500k/500k [00:00&lt;00:00, 3.39MB/s]"
+     }
+    },
+    "60c5932944f24f5fad1d8da89c8e5ae9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ff36d28c55dd4db3a0f76a87640fdfe2",
+      "placeholder": "​",
+      "style": "IPY_MODEL_71c49ef820494d5f8908a3daf39f0755",
+      "value": "consolidated.00.pth: 100%"
+     }
+    },
+    "66e777955e8748df878f118f07f38dab": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_da89ae3ea4d2474e98f64ada608f3cea",
+       "IPY_MODEL_93e6da39c25f4edfaa72056c89df1f7f",
+       "IPY_MODEL_b628603e4cb0405398c916587ee96756"
+      ],
+      "layout": "IPY_MODEL_93bedcb9245e44a0a1eb7e4155070f66"
+     }
+    },
+    "693d69251d3d48219c084af17b54b851": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "6b1821a7f4574e3aba09c1e410cc81e4": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "71c49ef820494d5f8908a3daf39f0755": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "742ae5487f2648fcae7ca8e22c7f8db9": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "865f39213a7341b68f2fe73caaf801b1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "8b5951213c9e4798a258146d61d02d11": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "8c2873eaec3445888ad3d54ad7387950": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "93bedcb9245e44a0a1eb7e4155070f66": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "93e6da39c25f4edfaa72056c89df1f7f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d8e0f42068af4cb094e2f115f76e06e0",
+      "max": 13476925163,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_0a939565b6e94f08bee0a66e0f9827d4",
+      "value": 13476925163
+     }
+    },
+    "a11edf3b018e42c88a63a515cf7fe478": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "a5fedbb7ec2e43d99711bb4cd84b9486": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "aa31aed1b8854a4281fd7e81c60e1205": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_525dc406534f4369b11208816f8fd0d7",
+      "max": 13476925163,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_865f39213a7341b68f2fe73caaf801b1",
+      "value": 13476925163
+     }
+    },
+    "b628603e4cb0405398c916587ee96756": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_a5fedbb7ec2e43d99711bb4cd84b9486",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0c186f6539714d8eab023969ce47c500",
+      "value": " 13.5G/13.5G [01:40&lt;00:00, 111MB/s]"
+     }
+    },
+    "bdb071e7145a4007ae01599333e72612": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d4acf06c2414412f8f2fb4f48981c954": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_eaf4c0231b6d4993b2f8e9e63d8b6921",
+      "placeholder": "​",
+      "style": "IPY_MODEL_a11edf3b018e42c88a63a515cf7fe478",
+      "value": " 13.5G/13.5G [02:52&lt;00:00, 81.1MB/s]"
+     }
+    },
+    "d8e0f42068af4cb094e2f115f76e06e0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "da89ae3ea4d2474e98f64ada608f3cea": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0723f467d37b4904819a8bb33ebda10f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_e54928776bc649339002adced63738b0",
+      "value": "consolidated.00.pth: 100%"
+     }
+    },
+    "e54928776bc649339002adced63738b0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "e6c75a6aa7b942fe84160e286e3acb3d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_08f0bf9459bd425498a5cb236f9d4a72",
+       "IPY_MODEL_10251d6f724e43788c41d4b7879cbfd3",
+       "IPY_MODEL_53a973c0853b44418698136bd04df039"
+      ],
+      "layout": "IPY_MODEL_bdb071e7145a4007ae01599333e72612"
+     }
+    },
+    "eaf4c0231b6d4993b2f8e9e63d8b6921": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ff36d28c55dd4db3a0f76a87640fdfe2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    }
+   }
   }
  },
  "nbformat": 4,