|
|
@@ -41,10 +41,10 @@
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
"matplotlib version: 3.9.0\n",
|
|
|
- "numpy version: 1.25.2\n",
|
|
|
- "tiktoken version: 0.5.1\n",
|
|
|
- "torch version: 2.2.2\n",
|
|
|
- "tensorflow version: 2.15.0\n"
|
|
|
+ "numpy version: 1.26.4\n",
|
|
|
+ "tiktoken version: 0.7.0\n",
|
|
|
+ "torch version: 2.4.0\n",
|
|
|
+ "tensorflow version: 2.16.1\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
@@ -400,7 +400,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 8,
|
|
|
+ "execution_count": 7,
|
|
|
"id": "c990ead6-53cd-49a7-a6d1-14d8c1518249",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -445,7 +445,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 9,
|
|
|
+ "execution_count": 8,
|
|
|
"id": "54aef09c-d6e3-4238-8653-b3a1b0a1077a",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -485,7 +485,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 10,
|
|
|
+ "execution_count": 9,
|
|
|
"id": "31402a67-a16e-4aeb-977e-70abb9c9949b",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -519,7 +519,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 11,
|
|
|
+ "execution_count": 10,
|
|
|
"id": "9b003797-161b-4d98-81dc-e68320e09fec",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -563,7 +563,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 12,
|
|
|
+ "execution_count": 11,
|
|
|
"id": "176ddf35-1c5f-4d7c-bf17-70f3e7069bd4",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -606,7 +606,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 13,
|
|
|
+ "execution_count": 12,
|
|
|
"id": "695d6f64-5084-4c23-aea4-105c9e38cfe4",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -643,7 +643,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 14,
|
|
|
+ "execution_count": 13,
|
|
|
"id": "0e17e027-ab9f-4fb5-ac9b-a009b831c122",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -681,7 +681,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 15,
|
|
|
+ "execution_count": 14,
|
|
|
"id": "62d0816e-b29a-4c8f-a9a5-a167562de978",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -715,7 +715,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 16,
|
|
|
+ "execution_count": 15,
|
|
|
"id": "168952a1-b964-4aa7-8e49-966fa26add54",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -779,7 +779,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 17,
|
|
|
+ "execution_count": 16,
|
|
|
"id": "654fde37-b2a9-4a20-a8d3-0206c056e2ff",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -810,7 +810,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 18,
|
|
|
+ "execution_count": 17,
|
|
|
"id": "6kgJbe4ehI4q",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -836,7 +836,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 19,
|
|
|
+ "execution_count": 18,
|
|
|
"id": "j2XPde_ThM_e",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -862,7 +862,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 20,
|
|
|
+ "execution_count": 19,
|
|
|
"id": "6b46a952-d50a-4837-af09-4095698f7fd1",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -918,7 +918,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 21,
|
|
|
+ "execution_count": 20,
|
|
|
"id": "0959c855-f860-4358-8b98-bc654f047578",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -957,7 +957,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 22,
|
|
|
+ "execution_count": 21,
|
|
|
"id": "f37b3eb0-854e-4895-9898-fa7d1e67566e",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -994,7 +994,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 23,
|
|
|
+ "execution_count": 22,
|
|
|
"id": "ca0116d0-d229-472c-9fbf-ebc229331c3e",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1038,7 +1038,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 24,
|
|
|
+ "execution_count": 23,
|
|
|
"id": "eb860488-5453-41d7-9870-23b723f742a0",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -1083,7 +1083,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 25,
|
|
|
+ "execution_count": 24,
|
|
|
"id": "7b9de31e-4096-47b3-976d-b6d2fdce04bc",
|
|
|
"metadata": {
|
|
|
"id": "7b9de31e-4096-47b3-976d-b6d2fdce04bc"
|
|
|
@@ -1127,7 +1127,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 26,
|
|
|
+ "execution_count": 25,
|
|
|
"id": "56f5b0c9-1065-4d67-98b9-010e42fc1e2a",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1135,7 +1135,7 @@
|
|
|
"name": "stdout",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "Training loss: 10.98758347829183\n",
|
|
|
+ "Training loss: 10.987583584255642\n",
|
|
|
"Validation loss: 10.98110580444336\n"
|
|
|
]
|
|
|
}
|
|
|
@@ -1186,7 +1186,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 27,
|
|
|
+ "execution_count": 26,
|
|
|
"id": "Mtp4gY0ZO-qq",
|
|
|
"metadata": {
|
|
|
"id": "Mtp4gY0ZO-qq"
|
|
|
@@ -1262,7 +1262,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 28,
|
|
|
+ "execution_count": 27,
|
|
|
"id": "3422000b-7aa2-485b-92df-99372cd22311",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -1323,7 +1323,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 29,
|
|
|
+ "execution_count": 28,
|
|
|
"id": "0WSRu2i0iHJE",
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
@@ -1434,7 +1434,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 30,
|
|
|
+ "execution_count": 29,
|
|
|
"id": "2734cee0-f6f9-42d5-b71c-fa7e0ef28b6d",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1501,7 +1501,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 31,
|
|
|
+ "execution_count": 30,
|
|
|
"id": "01a5ce39-3dc8-4c35-96bc-6410a1e42412",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1543,7 +1543,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 32,
|
|
|
+ "execution_count": 31,
|
|
|
"id": "6400572f-b3c8-49e2-95bc-433e55c5b3a1",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1563,7 +1563,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 33,
|
|
|
+ "execution_count": 32,
|
|
|
"id": "b23b863e-252a-403c-b5b1-62bc0a42319f",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1615,7 +1615,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 34,
|
|
|
+ "execution_count": 33,
|
|
|
"id": "0759e4c8-5362-467c-bec6-b0a19d1ba43d",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -1633,7 +1633,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 35,
|
|
|
+ "execution_count": 34,
|
|
|
"id": "2e66e613-4aca-4296-a984-ddd0d80c6578",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1677,7 +1677,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 36,
|
|
|
+ "execution_count": 35,
|
|
|
"id": "e4600713-c51e-4f53-bf58-040a6eb362b8",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1710,7 +1710,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 37,
|
|
|
+ "execution_count": 36,
|
|
|
"id": "9dfb48f0-bc3f-46a5-9844-33b6c9b0f4df",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1779,7 +1779,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 38,
|
|
|
+ "execution_count": 37,
|
|
|
"id": "2a7f908a-e9ec-446a-b407-fb6dbf05c806",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1802,7 +1802,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 39,
|
|
|
+ "execution_count": 38,
|
|
|
"id": "753865ed-79c5-48b1-b9f2-ccb132ff1d2f",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1826,7 +1826,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 40,
|
|
|
+ "execution_count": 39,
|
|
|
"id": "4844f000-c329-4e7e-aa89-16a2c4ebee43",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1862,7 +1862,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 41,
|
|
|
+ "execution_count": 40,
|
|
|
"id": "8e318891-bcc0-4d71-b147-33ce55febfa3",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -1908,7 +1908,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 42,
|
|
|
+ "execution_count": 41,
|
|
|
"id": "aa2a0d7d-0457-42d1-ab9d-bd67683e7ed8",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -1964,7 +1964,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 43,
|
|
|
+ "execution_count": 42,
|
|
|
"id": "3d67d869-ac04-4382-bcfb-c96d1ca80d47",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -1982,14 +1982,14 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 44,
|
|
|
+ "execution_count": 43,
|
|
|
"id": "9d57d914-60a3-47f1-b499-5352f4c457cb",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
|
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
|
|
- "model.load_state_dict(torch.load(\"model.pth\", map_location=device))\n",
|
|
|
+ "model.load_state_dict(torch.load(\"model.pth\", map_location=device, weights_only=True))\n",
|
|
|
"model.eval();"
|
|
|
]
|
|
|
},
|
|
|
@@ -2004,7 +2004,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 45,
|
|
|
+ "execution_count": 44,
|
|
|
"id": "bbd175bb-edf4-450e-a6de-d3e8913c6532",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -2019,12 +2019,12 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 46,
|
|
|
+ "execution_count": 45,
|
|
|
"id": "8a0c7295-c822-43bf-9286-c45abc542868",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "checkpoint = torch.load(\"model_and_optimizer.pth\")\n",
|
|
|
+ "checkpoint = torch.load(\"model_and_optimizer.pth\", weights_only=True)\n",
|
|
|
"\n",
|
|
|
"model = GPTModel(GPT_CONFIG_124M)\n",
|
|
|
"model.load_state_dict(checkpoint[\"model_state_dict\"])\n",
|
|
|
@@ -2072,7 +2072,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 47,
|
|
|
+ "execution_count": 46,
|
|
|
"id": "fb9fdf02-972a-444e-bf65-8ffcaaf30ce8",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -2082,7 +2082,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 48,
|
|
|
+ "execution_count": 47,
|
|
|
"id": "a0747edc-559c-44ef-a93f-079d60227e3f",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -2090,8 +2090,8 @@
|
|
|
"name": "stdout",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "TensorFlow version: 2.15.0\n",
|
|
|
- "tqdm version: 4.66.2\n"
|
|
|
+ "TensorFlow version: 2.16.1\n",
|
|
|
+ "tqdm version: 4.66.4\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
@@ -2102,7 +2102,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 49,
|
|
|
+ "execution_count": 48,
|
|
|
"id": "c5bc89eb-4d39-4287-9b0c-e459ebe7f5ed",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -2121,21 +2121,21 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 50,
|
|
|
+ "execution_count": 49,
|
|
|
"id": "76271dd7-108d-4f5b-9c01-6ae0aac4b395",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
{
|
|
|
- "name": "stderr",
|
|
|
+ "name": "stdout",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "checkpoint: 100%|███████████████████████████| 77.0/77.0 [00:00<00:00, 58.8kiB/s]\n",
|
|
|
- "encoder.json: 100%|███████████████████████| 1.04M/1.04M [00:00<00:00, 2.70MiB/s]\n",
|
|
|
- "hparams.json: 100%|█████████████████████████| 90.0/90.0 [00:00<00:00, 27.8kiB/s]\n",
|
|
|
- "model.ckpt.data-00000-of-00001: 100%|███████| 498M/498M [00:30<00:00, 16.1MiB/s]\n",
|
|
|
- "model.ckpt.index: 100%|███████████████████| 5.21k/5.21k [00:00<00:00, 1.18MiB/s]\n",
|
|
|
- "model.ckpt.meta: 100%|██████████████████████| 471k/471k [00:00<00:00, 2.22MiB/s]\n",
|
|
|
- "vocab.bpe: 100%|████████████████████████████| 456k/456k [00:00<00:00, 2.04MiB/s]\n"
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/checkpoint\n",
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/encoder.json\n",
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/hparams.json\n",
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001\n",
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/model.ckpt.index\n",
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/model.ckpt.meta\n",
|
|
|
+ "File already exists and is up-to-date: gpt2/124M/vocab.bpe\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
@@ -2145,7 +2145,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 51,
|
|
|
+ "execution_count": 50,
|
|
|
"id": "b1a31951-d971-4a6e-9c43-11ee1168ec6a",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -2163,7 +2163,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 52,
|
|
|
+ "execution_count": 51,
|
|
|
"id": "857c8331-130e-46ba-921d-fa35d7a73cfe",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -2181,7 +2181,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 53,
|
|
|
+ "execution_count": 52,
|
|
|
"id": "c48dac94-8562-4a66-84ef-46c613cdc4cd",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -2241,7 +2241,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 54,
|
|
|
+ "execution_count": 53,
|
|
|
"id": "9fef90dd-0654-4667-844f-08e28339ef7d",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -2274,7 +2274,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 55,
|
|
|
+ "execution_count": 54,
|
|
|
"id": "f9a92229-c002-49a6-8cfb-248297ad8296",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -2287,7 +2287,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 56,
|
|
|
+ "execution_count": 55,
|
|
|
"id": "f22d5d95-ca5a-425c-a9ec-fc432a12d4e9",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
@@ -2369,7 +2369,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 57,
|
|
|
+ "execution_count": 56,
|
|
|
"id": "1f690253-f845-4347-b7b6-43fabbd2affa",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|