Sfoglia il codice sorgente

fixed num_workers (#229)

* fixed num_workers

* ch06 & ch07: added num_workers to create_dataloader_v1
Daniel Kleine 1 anno fa
parent
commit
bbb2a0c3d5

+ 1 - 1
appendix-D/01_main-chapter-code/previous_chapters.py

@@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch02/01_main-chapter-code/ch02.ipynb

@@ -1346,7 +1346,7 @@
     "        batch_size=batch_size,\n",
     "        shuffle=shuffle,\n",
     "        drop_last=drop_last,\n",
-    "        num_workers=0\n",
+    "        num_workers=num_workers\n",
     "    )\n",
     "\n",
     "    return dataloader"

+ 1 - 1
ch02/01_main-chapter-code/dataloader.ipynb

@@ -82,7 +82,7 @@
     "\n",
     "    # Create dataloader\n",
     "    dataloader = DataLoader(\n",
-    "        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)\n",
+    "        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)\n",
     "\n",
     "    return dataloader\n",
     "\n",

+ 1 - 1
ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

@@ -128,7 +128,7 @@
     "        batch_size=batch_size,\n",
     "        shuffle=shuffle,\n",
     "        drop_last=drop_last,\n",
-    "        num_workers=0\n",
+    "        num_workers=num_workers\n",
     "    )\n",
     "\n",
     "    return dataloader"

+ 2 - 2
ch04/01_main-chapter-code/gpt.py

@@ -13,7 +13,7 @@ from torch.utils.data import Dataset, DataLoader
 
 
 class GPTDatasetV1(Dataset):
-    def __init__(self, txt, tokenizer, max_length, stride, num_workers=0):
+    def __init__(self, txt, tokenizer, max_length, stride):
         self.input_ids = []
         self.target_ids = []
 
@@ -44,7 +44,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch04/01_main-chapter-code/previous_chapters.py

@@ -41,7 +41,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch04/02_performance-analysis/previous_chapters.py

@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch05/01_main-chapter-code/previous_chapters.py

@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch05/02_alternative_weight_loading/previous_chapters.py

@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch05/03_bonus_pretraining_on_gutenberg/previous_chapters.py

@@ -44,7 +44,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
     tokenizer = tiktoken.get_encoding("gpt2")
     dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 1 - 1
ch05/05_bonus_hparam_tuning/previous_chapters.py

@@ -49,7 +49,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=0)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 2 - 2
ch06/01_main-chapter-code/previous_chapters.py

@@ -41,7 +41,7 @@ class GPTDatasetV1(Dataset):
 
 
 def create_dataloader_v1(txt, batch_size=4, max_length=256,
-                         stride=128, shuffle=True, drop_last=True):
+                         stride=128, shuffle=True, drop_last=True, num_workers=0):
     # Initialize the tokenizer
     tokenizer = tiktoken.get_encoding("gpt2")
 
@@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 2 - 2
ch06/02_bonus_additional-experiments/previous_chapters.py

@@ -41,7 +41,7 @@ class GPTDatasetV1(Dataset):
 
 
 def create_dataloader_v1(txt, batch_size=4, max_length=256,
-                         stride=128, shuffle=True, drop_last=True):
+                         stride=128, shuffle=True, drop_last=True, num_workers=0):
     # Initialize the tokenizer
     tokenizer = tiktoken.get_encoding("gpt2")
 
@@ -50,7 +50,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 2 - 2
ch06/03_bonus_imdb-classification/previous_chapters.py

@@ -42,7 +42,7 @@ class GPTDatasetV1(Dataset):
 
 
 def create_dataloader_v1(txt, batch_size=4, max_length=256,
-                         stride=128, shuffle=True, drop_last=True):
+                         stride=128, shuffle=True, drop_last=True, num_workers=0):
     # Initialize the tokenizer
     tokenizer = tiktoken.get_encoding("gpt2")
 
@@ -51,7 +51,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader
 

+ 2 - 2
ch07/01_main-chapter-code/previous_chapters.py

@@ -45,7 +45,7 @@ class GPTDatasetV1(Dataset):
 
 
 def create_dataloader_v1(txt, batch_size=4, max_length=256,
-                         stride=128, shuffle=True, drop_last=True):
+                         stride=128, shuffle=True, drop_last=True, num_workers=0):
     # Initialize the tokenizer
     tokenizer = tiktoken.get_encoding("gpt2")
 
@@ -54,7 +54,7 @@ def create_dataloader_v1(txt, batch_size=4, max_length=256,
 
     # Create dataloader
     dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
 
     return dataloader