浏览代码

Fix timeout issue related to spam data backup url (#544)

* Add backup url for Spam Dataset

* import urllib

* fix url

* fix timeout issue
Sebastian Raschka 9 月之前
父节点
当前提交
d1e99f6092

+ 2 - 3
appendix-E/01_main-chapter-code/appendix-E.ipynb

@@ -207,9 +207,8 @@
     "\n",
     "try:\n",
     "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
-    "except urllib.error.HTTPError:\n",
-    "    print(\"UCI Machine Learning Repository (https://archive.ics.uci.edu)\"\n",
-    "          \" temporary unavailable. Using backup URL.\")\n",
+    "except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
+    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
     "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
     "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
     "\n",

+ 2 - 3
ch06/01_main-chapter-code/ch06.ipynb

@@ -217,9 +217,8 @@
     "\n",
     "try:\n",
     "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path)\n",
-    "except urllib.error.HTTPError:\n",
-    "    print(\"UCI Machine Learning Repository (https://archive.ics.uci.edu)\"\n",
-    "          \" temporary unavailable. Using backup URL.\")\n",
+    "except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:\n",
+    "    print(f\"Primary URL failed: {e}. Trying backup URL...\")\n",
     "    url = \"https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip\"\n",
     "    download_and_unzip_spam_data(url, zip_path, extracted_path, data_file_path) "
    ]

+ 2 - 1
ch06/01_main-chapter-code/gpt_class_finetune.py

@@ -280,7 +280,8 @@ if __name__ == "__main__":
         download_and_unzip_spam_data(
             url, zip_path, extracted_path, data_file_path, test_mode=args.test_mode
         )
-    except urllib.error.HTTPError:
+    except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+        print(f"Primary URL failed: {e}. Trying backup URL...")
         backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
         download_and_unzip_spam_data(
             backup_url, zip_path, extracted_path, data_file_path, test_mode=args.test_mode

+ 2 - 1
ch06/02_bonus_additional-experiments/additional_experiments.py

@@ -605,7 +605,8 @@ if __name__ == "__main__":
     if not all_exist:
         try:
             download_and_unzip(url, zip_path, extract_to, new_file_path)
-        except urllib.error.HTTPError:
+        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+            print(f"Primary URL failed: {e}. Trying backup URL...")
             backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
             download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
         create_dataset_csvs(new_file_path)

+ 2 - 1
ch06/03_bonus_imdb-classification/train_bert_hf_spam.py

@@ -412,7 +412,8 @@ if __name__ == "__main__":
     if not all_exist:
         try:
             download_and_unzip(url, zip_path, extract_to, new_file_path)
-        except urllib.error.HTTPError:
+        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
+            print(f"Primary URL failed: {e}. Trying backup URL...")
             backup_url = "https://f001.backblazeb2.com/file/LLMs-from-scratch/sms%2Bspam%2Bcollection.zip"
             download_and_unzip(backup_url, zip_path, extract_to, new_file_path)
         create_dataset_csvs(new_file_path)