2 years ago · d66b23588d
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,162 @@
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+*.key
			
 
				+solution/
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+wheels/
			
 
				+share/python-wheels/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+MANIFEST
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.nox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*.cover
			
 
				+*.py,cover
			
 
				+.hypothesis/
			
 
				+.pytest_cache/
			
 
				+cover/
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+local_settings.py
			
 
				+db.sqlite3
			
 
				+db.sqlite3-journal
			
 
				+
			
 
				+# Flask stuff:
			
 
				+instance/
			
 
				+.webassets-cache
			
 
				+
			
 
				+# Scrapy stuff:
			
 
				+.scrapy
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+.pybuilder/
			
 
				+target/
			
 
				+
			
 
				+# Jupyter Notebook
			
 
				+.ipynb_checkpoints
			
 
				+
			
 
				+# IPython
			
 
				+profile_default/
			
 
				+ipython_config.py
			
 
				+
			
 
				+# pyenv
			
 
				+#   For a library or package, you might want to ignore these files since the code is
			
 
				+#   intended to run in multiple environments; otherwise, check them in:
			
 
				+# .python-version
			
 
				+
			
 
				+# pipenv
			
 
				+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
			
 
				+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
			
 
				+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
			
 
				+#   install all needed dependencies.
			
 
				+#Pipfile.lock
			
 
				+
			
 
				+# poetry
			
 
				+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
			
 
				+#   This is especially recommended for binary packages to ensure reproducibility, and is more
			
 
				+#   commonly ignored for libraries.
			
 
				+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
			
 
				+#poetry.lock
			
 
				+
			
 
				+# pdm
			
 
				+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
			
 
				+#pdm.lock
			
 
				+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
			
 
				+#   in version control.
			
 
				+#   https://pdm.fming.dev/#use-with-ide
			
 
				+.pdm.toml
			
 
				+
			
 
				+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
			
 
				+__pypackages__/
			
 
				+
			
 
				+# Celery stuff
			
 
				+celerybeat-schedule
			
 
				+celerybeat.pid
			
 
				+
			
 
				+# SageMath parsed files
			
 
				+*.sage.py
			
 
				+
			
 
				+# Environments
			
 
				+.env
			
 
				+.venv
			
 
				+env/
			
 
				+venv/
			
 
				+ENV/
			
 
				+env.bak/
			
 
				+venv.bak/
			
 
				+
			
 
				+# Spyder project settings
			
 
				+.spyderproject
			
 
				+.spyproject
			
 
				+
			
 
				+# Rope project settings
			
 
				+.ropeproject
			
 
				+
			
 
				+# mkdocs documentation
			
 
				+/site
			
 
				+
			
 
				+# mypy
			
 
				+.mypy_cache/
			
 
				+.dmypy.json
			
 
				+dmypy.json
			
 
				+
			
 
				+# Pyre type checker
			
 
				+.pyre/
			
 
				+
			
 
				+# pytype static type analyzer
			
 
				+.pytype/
			
 
				+
			
 
				+# Cython debug symbols
			
 
				+cython_debug/
			
 
				+
			
 
				+# PyCharm
			
 
				+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
			
 
				+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
			
 
				+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
			
 
				+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
			
 
				+#.idea/
			
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -0,0 +1,201 @@
 
				+                                 Apache License
			
 
				+                           Version 2.0, January 2004
			
 
				+                        http://www.apache.org/licenses/
			
 
				+
			
 
				+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				+
			
 
				+   1. Definitions.
			
 
				+
			
 
				+      "License" shall mean the terms and conditions for use, reproduction,
			
 
				+      and distribution as defined by Sections 1 through 9 of this document.
			
 
				+
			
 
				+      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				+      the copyright owner that is granting the License.
			
 
				+
			
 
				+      "Legal Entity" shall mean the union of the acting entity and all
			
 
				+      other entities that control, are controlled by, or are under common
			
 
				+      control with that entity. For the purposes of this definition,
			
 
				+      "control" means (i) the power, direct or indirect, to cause the
			
 
				+      direction or management of such entity, whether by contract or
			
 
				+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				+      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				+
			
 
				+      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				+      exercising permissions granted by this License.
			
 
				+
			
 
				+      "Source" form shall mean the preferred form for making modifications,
			
 
				+      including but not limited to software source code, documentation
			
 
				+      source, and configuration files.
			
 
				+
			
 
				+      "Object" form shall mean any form resulting from mechanical
			
 
				+      transformation or translation of a Source form, including but
			
 
				+      not limited to compiled object code, generated documentation,
			
 
				+      and conversions to other media types.
			
 
				+
			
 
				+      "Work" shall mean the work of authorship, whether in Source or
			
 
				+      Object form, made available under the License, as indicated by a
			
 
				+      copyright notice that is included in or attached to the work
			
 
				+      (an example is provided in the Appendix below).
			
 
				+
			
 
				+      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				+      form, that is based on (or derived from) the Work and for which the
			
 
				+      editorial revisions, annotations, elaborations, or other modifications
			
 
				+      represent, as a whole, an original work of authorship. For the purposes
			
 
				+      of this License, Derivative Works shall not include works that remain
			
 
				+      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				+      the Work and Derivative Works thereof.
			
 
				+
			
 
				+      "Contribution" shall mean any work of authorship, including
			
 
				+      the original version of the Work and any modifications or additions
			
 
				+      to that Work or Derivative Works thereof, that is intentionally
			
 
				+      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				+      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				+      the copyright owner. For the purposes of this definition, "submitted"
			
 
				+      means any form of electronic, verbal, or written communication sent
			
 
				+      to the Licensor or its representatives, including but not limited to
			
 
				+      communication on electronic mailing lists, source code control systems,
			
 
				+      and issue tracking systems that are managed by, or on behalf of, the
			
 
				+      Licensor for the purpose of discussing and improving the Work, but
			
 
				+      excluding communication that is conspicuously marked or otherwise
			
 
				+      designated in writing by the copyright owner as "Not a Contribution."
			
 
				+
			
 
				+      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				+      on behalf of whom a Contribution has been received by Licensor and
			
 
				+      subsequently incorporated within the Work.
			
 
				+
			
 
				+   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      copyright license to reproduce, prepare Derivative Works of,
			
 
				+      publicly display, publicly perform, sublicense, and distribute the
			
 
				+      Work and such Derivative Works in Source or Object form.
			
 
				+
			
 
				+   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      (except as stated in this section) patent license to make, have made,
			
 
				+      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				+      where such license applies only to those patent claims licensable
			
 
				+      by such Contributor that are necessarily infringed by their
			
 
				+      Contribution(s) alone or by combination of their Contribution(s)
			
 
				+      with the Work to which such Contribution(s) was submitted. If You
			
 
				+      institute patent litigation against any entity (including a
			
 
				+      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				+      or a Contribution incorporated within the Work constitutes direct
			
 
				+      or contributory patent infringement, then any patent licenses
			
 
				+      granted to You under this License for that Work shall terminate
			
 
				+      as of the date such litigation is filed.
			
 
				+
			
 
				+   4. Redistribution. You may reproduce and distribute copies of the
			
 
				+      Work or Derivative Works thereof in any medium, with or without
			
 
				+      modifications, and in Source or Object form, provided that You
			
 
				+      meet the following conditions:
			
 
				+
			
 
				+      (a) You must give any other recipients of the Work or
			
 
				+          Derivative Works a copy of this License; and
			
 
				+
			
 
				+      (b) You must cause any modified files to carry prominent notices
			
 
				+          stating that You changed the files; and
			
 
				+
			
 
				+      (c) You must retain, in the Source form of any Derivative Works
			
 
				+          that You distribute, all copyright, patent, trademark, and
			
 
				+          attribution notices from the Source form of the Work,
			
 
				+          excluding those notices that do not pertain to any part of
			
 
				+          the Derivative Works; and
			
 
				+
			
 
				+      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				+          distribution, then any Derivative Works that You distribute must
			
 
				+          include a readable copy of the attribution notices contained
			
 
				+          within such NOTICE file, excluding those notices that do not
			
 
				+          pertain to any part of the Derivative Works, in at least one
			
 
				+          of the following places: within a NOTICE text file distributed
			
 
				+          as part of the Derivative Works; within the Source form or
			
 
				+          documentation, if provided along with the Derivative Works; or,
			
 
				+          within a display generated by the Derivative Works, if and
			
 
				+          wherever such third-party notices normally appear. The contents
			
 
				+          of the NOTICE file are for informational purposes only and
			
 
				+          do not modify the License. You may add Your own attribution
			
 
				+          notices within Derivative Works that You distribute, alongside
			
 
				+          or as an addendum to the NOTICE text from the Work, provided
			
 
				+          that such additional attribution notices cannot be construed
			
 
				+          as modifying the License.
			
 
				+
			
 
				+      You may add Your own copyright statement to Your modifications and
			
 
				+      may provide additional or different license terms and conditions
			
 
				+      for use, reproduction, or distribution of Your modifications, or
			
 
				+      for any such Derivative Works as a whole, provided Your use,
			
 
				+      reproduction, and distribution of the Work otherwise complies with
			
 
				+      the conditions stated in this License.
			
 
				+
			
 
				+   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				+      any Contribution intentionally submitted for inclusion in the Work
			
 
				+      by You to the Licensor shall be under the terms and conditions of
			
 
				+      this License, without any additional terms or conditions.
			
 
				+      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				+      the terms of any separate license agreement you may have executed
			
 
				+      with Licensor regarding such Contributions.
			
 
				+
			
 
				+   6. Trademarks. This License does not grant permission to use the trade
			
 
				+      names, trademarks, service marks, or product names of the Licensor,
			
 
				+      except as required for reasonable and customary use in describing the
			
 
				+      origin of the Work and reproducing the content of the NOTICE file.
			
 
				+
			
 
				+   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				+      agreed to in writing, Licensor provides the Work (and each
			
 
				+      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				+      implied, including, without limitation, any warranties or conditions
			
 
				+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				+      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				+      appropriateness of using or redistributing the Work and assume any
			
 
				+      risks associated with Your exercise of permissions under this License.
			
 
				+
			
 
				+   8. Limitation of Liability. In no event and under no legal theory,
			
 
				+      whether in tort (including negligence), contract, or otherwise,
			
 
				+      unless required by applicable law (such as deliberate and grossly
			
 
				+      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				+      liable to You for damages, including any direct, indirect, special,
			
 
				+      incidental, or consequential damages of any character arising as a
			
 
				+      result of this License or out of the use or inability to use the
			
 
				+      Work (including but not limited to damages for loss of goodwill,
			
 
				+      work stoppage, computer failure or malfunction, or any and all
			
 
				+      other commercial damages or losses), even if such Contributor
			
 
				+      has been advised of the possibility of such damages.
			
 
				+
			
 
				+   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				+      the Work or Derivative Works thereof, You may choose to offer,
			
 
				+      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				+      or other liability obligations and/or rights consistent with this
			
 
				+      License. However, in accepting such obligations, You may act only
			
 
				+      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				+      of any other Contributor, and only if You agree to indemnify,
			
 
				+      defend, and hold each Contributor harmless for any liability
			
 
				+      incurred by, or claims asserted against, such Contributor by reason
			
 
				+      of your accepting any such warranty or additional liability.
			
 
				+
			
 
				+   END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+   APPENDIX: How to apply the Apache License to your work.
			
 
				+
			
 
				+      To apply the Apache License to your work, attach the following
			
 
				+      boilerplate notice, with the fields enclosed by brackets "[]"
			
 
				+      replaced with your own identifying information. (Don't include
			
 
				+      the brackets!)  The text should be enclosed in the appropriate
			
 
				+      comment syntax for the file format. We also recommend that a
			
 
				+      file or class name and description of purpose be included on the
			
 
				+      same "printed page" as the copyright notice for easier
			
 
				+      identification within third-party archives.
			
 
				+
			
 
				+   Copyright 2023 Sebastian Raschka
			
 
				+
			
 
				+   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+   you may not use this file except in compliance with the License.
			
 
				+   You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,2 @@
 
				+Details will follow ...
			
 
				+
			
--- a/ch01/README.md
+++ b/ch01/README.md
@@ -0,0 +1,2 @@
 
				+Details will follow ...
			
 
				+
			
--- a/ch02/01_optional-python-setup-preferences/README.md
+++ b/ch02/01_optional-python-setup-preferences/README.md
@@ -0,0 +1,78 @@
 
				+# Python Setup Tips
			
 
				+
			
 
				+
			
 
				+
			
 
				+There are several different ways you can install Python and set up your computing environment. Here, I am illustrating my personal preference. 
			
 
				+
			
 
				+(I am using computers running macOS, but this workflow is similar for Linux machines and may work for other operating systems as well.)
			
 
				+
			
 
				+
			
 
				+
			
 
				+## 1. Download and install Miniforge
			
 
				+
			
 
				+Download miniforge from the GitHub repository [here](https://github.com/conda-forge/miniforge).
			
 
				+
			
 
				+<img src="figures/download.png" alt="download" style="zoom:33%;" />
			
 
				+
			
 
				+Depending on your operating system, this should download either an `.sh` (macOS, Linux) or `.exe` file (Windows). 
			
 
				+
			
 
				+For the `.sh` file, open your command line terminal and execute the following command
			
 
				+
			
 
				+```bash
			
 
				+sh ~/Desktop/Miniforge3-MacOSX-arm64.sh
			
 
				+```
			
 
				+
			
 
				+where `Desktop/` is the folder where the Miniforge installer was downloaded to. On your computer, you may have to replace it with `Downloads/`.
			
 
				+
			
 
				+<img src="figures/miniforge-install.png" alt="miniforge-install" style="zoom:33%;" />
			
 
				+
			
 
				+Next, step through the download instructions, confirming with "Enter".
			
 
				+
			
 
				+## 2. Create a new virtual environment
			
 
				+
			
 
				+After the installation was successfully completed, I recommend creating a new virtual environment called `dl-fundamentals`, which you can do by executing
			
 
				+
			
 
				+```bash
			
 
				+conda create -n LLMs python=3.10
			
 
				+```
			
 
				+
			
 
				+<img src="figures/new-env.png" alt="new-env" style="zoom:33%;" />
			
 
				+
			
 
				+Next, activate your new virtual environment (you have to do it every time you open a new terminal window or tab):
			
 
				+
			
 
				+```bash
			
 
				+conda activate dl-workshop
			
 
				+```
			
 
				+
			
 
				+<img src="figures/activate-env.png" alt="activate-env" style="zoom:33%;" />
			
 
				+
			
 
				+## Optional: styling your terminal
			
 
				+
			
 
				+If you want to style your terminal similar to mine so that you can see which virtual environment is active,  check out the [Oh My Zsh](https://github.com/ohmyzsh/ohmyzsh) project.
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 3. Install new Python libraries
			
 
				+
			
 
				+
			
 
				+
			
 
				+To install new Python libraries, you can now use the `conda` package installer. For example, you can install [JupyterLab](https://jupyter.org/install) and [watermark](https://github.com/rasbt/watermark) as follows:
			
 
				+
			
 
				+```bash
			
 
				+conda install jupyterlab watermark
			
 
				+```
			
 
				+
			
 
				+<img src="figures/conda-install.png" alt="conda-install" style="zoom:33%;" />
			
 
				+
			
 
				+
			
 
				+
			
 
				+You can also still use `pip` to install libraries. By default, `pip` should be linked to your new `LLms` conda environment:
			
 
				+
			
 
				+<img src="figures/check-pip.png" alt="check-pip" style="zoom:33%;" />
			
 
				+
			
 
				+---
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+Any questions? Please feel free to reach out in the [Discussion Forum](https://github.com/rasbt/LLMs-from-scratch/discussions).
			
--- a/ch02/01_optional-python-setup-preferences/figures/activate-env.png
+++ b/ch02/01_optional-python-setup-preferences/figures/activate-env.png
--- a/ch02/01_optional-python-setup-preferences/figures/check-pip.png
+++ b/ch02/01_optional-python-setup-preferences/figures/check-pip.png
--- a/ch02/01_optional-python-setup-preferences/figures/conda-install.png
+++ b/ch02/01_optional-python-setup-preferences/figures/conda-install.png
--- a/ch02/01_optional-python-setup-preferences/figures/download.png
+++ b/ch02/01_optional-python-setup-preferences/figures/download.png
--- a/ch02/01_optional-python-setup-preferences/figures/miniforge-install.png
+++ b/ch02/01_optional-python-setup-preferences/figures/miniforge-install.png
--- a/ch02/01_optional-python-setup-preferences/figures/new-env.png
+++ b/ch02/01_optional-python-setup-preferences/figures/new-env.png
--- a/ch02/02_installing-python-libraries/README.md
+++ b/ch02/02_installing-python-libraries/README.md
@@ -0,0 +1,46 @@
 
				+# Libraries Used In This Workshop
			
 
				+
			
 
				+We will be using the following libraries in this workshop, and I highly recommend installing them before attending the event:
			
 
				+
			
 
				+- numpy >= 1.24.3 (The fundamental package for scientific computing with Python)
			
 
				+- scipy >= 1.10.1 (Additional functions for NumPy)
			
 
				+- pandas >= 2.0.2 (A data frame library)
			
 
				+- matplotlib >= 3.7.1 (A plotting library)
			
 
				+- jupyterlab >= 4.0 (An application for running Jupyter notebooks)
			
 
				+- ipywidgets >= 8.0.6 (Fixes progress bar issues in Jupyter Lab)
			
 
				+- scikit-learn >= 1.2.2 (A general machine learning library)
			
 
				+- watermark >= 2.4.2 (An IPython/Jupyter extension for printing package information)
			
 
				+- torch >= 2.0.1 (The PyTorch deep learning library)
			
 
				+- torchvision >= 0.15.2 (PyTorch utilities for computer vision)
			
 
				+- torchmetrics >= 0.11.4 (Metrics for PyTorch)
			
 
				+- transformers >= 4.30.2 (Language transformers and LLMs for PyTorch)
			
 
				+- lightning >= 2.0.3 (A library for advanced PyTorch features: multi-GPU, mixed-precision etc.)
			
 
				+
			
 
				+To install these requirements most conveniently, you can use the `requirements.txt` file:
			
 
				+
			
 
				+```
			
 
				+pip install -r requirements.txt
			
 
				+```
			
 
				+
			
 
				+![install-requirements](figures/install-requirements.png)
			
 
				+
			
 
				+Then, after completing the installation, please check if all the packages are installed and are up to date using
			
 
				+
			
 
				+```
			
 
				+python_environment_check.py
			
 
				+```
			
 
				+
			
 
				+![check_1](figures/check_1.png)
			
 
				+
			
 
				+It's also recommended to check the versions in JupyterLab by running the `jupyter_environment_check.ipynb` in this directory. Ideally, it should look like as follows:
			
 
				+
			
 
				+![check_1](figures/check_2.png)
			
 
				+
			
 
				+If you see the following issues, it's likely that your JupyterLab instance is connected to wrong conda environment:
			
 
				+
			
 
				+![jupyter-issues](figures/jupyter-issues.png)
			
 
				+
			
 
				+
			
 
				+In this case, you may want to use `watermark` to check if you opened the JupyterLab instance in the right conda environment using the `--conda` flag:
			
 
				+
			
 
				+![watermark](figures/watermark.png)
			
--- a/ch02/02_installing-python-libraries/figures/check_1.png
+++ b/ch02/02_installing-python-libraries/figures/check_1.png
--- a/ch02/02_installing-python-libraries/figures/check_2.png
+++ b/ch02/02_installing-python-libraries/figures/check_2.png
--- a/ch02/02_installing-python-libraries/figures/install-requirements.png
+++ b/ch02/02_installing-python-libraries/figures/install-requirements.png
--- a/ch02/02_installing-python-libraries/figures/jupyter-issues.png
+++ b/ch02/02_installing-python-libraries/figures/jupyter-issues.png
--- a/ch02/02_installing-python-libraries/figures/watermark.png
+++ b/ch02/02_installing-python-libraries/figures/watermark.png
--- a/ch02/02_installing-python-libraries/jupyter_environment_check.ipynb
+++ b/ch02/02_installing-python-libraries/jupyter_environment_check.ipynb
@@ -0,0 +1,123 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "18d54544-92d0-412c-8e28-f9083b2bab6f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "[OK] Your Python version is 3.10.12\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "from python_environment_check import check_packages, get_requirements_dict"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "60e03297-4337-4181-b8eb-f483f406954a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "d = get_requirements_dict()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "d982ddf9-c167-4ed2-9fce-e271f2b1e1de",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "[OK] numpy 1.25.1\n",
			
 
				+      "[OK] scipy 1.11.1\n",
			
 
				+      "[OK] pandas 2.0.3\n",
			
 
				+      "[OK] matplotlib 3.7.2\n",
			
 
				+      "[OK] jupyterlab 4.0.3\n",
			
 
				+      "[OK] ipywidgets 8.0.7\n",
			
 
				+      "[OK] watermark 2.4.3\n",
			
 
				+      "[OK] torch 2.0.1\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "check_packages(d)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "e0bdd547-333c-42a9-92f3-4e552f206cf3",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Same checks as above but using watermark:"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "9d696044-9272-4b96-8305-34602807bb94",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%load_ext watermark"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "ce321731-a15a-4579-b33b-035730371eb3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "numpy     : 1.25.1\n",
			
 
				+      "scipy     : 1.11.1\n",
			
 
				+      "pandas    : 2.0.3\n",
			
 
				+      "matplotlib: 3.7.2\n",
			
 
				+      "sklearn   : 1.3.0\n",
			
 
				+      "watermark : 2.4.3\n",
			
 
				+      "torch     : 2.0.1\n",
			
 
				+      "\n",
			
 
				+      "conda environment: LLMs\n",
			
 
				+      "\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "%watermark --conda -p numpy,scipy,pandas,matplotlib,sklearn,watermark,torch"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.12"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/ch02/02_installing-python-libraries/python_environment_check.py
+++ b/ch02/02_installing-python-libraries/python_environment_check.py
@@ -0,0 +1,62 @@
 
				+# Sebastian Raschka, 2023
			
 
				+
			
 
				+from os.path import dirname, join, realpath
			
 
				+from packaging.version import parse as version_parse
			
 
				+import platform
			
 
				+import sys
			
 
				+
			
 
				+if version_parse(platform.python_version()) < version_parse('3.9'):
			
 
				+    print('[FAIL] We recommend Python 3.9 or newer but'
			
 
				+          ' found version %s' % (sys.version))
			
 
				+else:
			
 
				+    print('[OK] Your Python version is %s' % (platform.python_version()))
			
 
				+
			
 
				+
			
 
				+def get_packages(pkgs):
			
 
				+    versions = []
			
 
				+    for p in pkgs:
			
 
				+        try:
			
 
				+            imported = __import__(p)
			
 
				+            try:
			
 
				+                versions.append(imported.__version__)
			
 
				+            except AttributeError:
			
 
				+                try:
			
 
				+                    versions.append(imported.version)
			
 
				+                except AttributeError:
			
 
				+                    try:
			
 
				+                        versions.append(imported.version_info)
			
 
				+                    except AttributeError:
			
 
				+                        versions.append('0.0')
			
 
				+        except ImportError:
			
 
				+            print(f'[FAIL]: {p} is not installed and/or cannot be imported.')
			
 
				+            versions.append('N/A')
			
 
				+    return versions
			
 
				+
			
 
				+
			
 
				+def get_requirements_dict():
			
 
				+    PROJECT_ROOT = dirname(realpath(__file__))
			
 
				+    REQUIREMENTS_FILE = join(PROJECT_ROOT, "requirements.txt")
			
 
				+    d = {}
			
 
				+    with open(REQUIREMENTS_FILE) as f:
			
 
				+        for line in f:
			
 
				+            line = line.split(" ")
			
 
				+            d[line[0]] = line[-1]
			
 
				+    return d
			
 
				+
			
 
				+
			
 
				+def check_packages(d):
			
 
				+    versions = get_packages(d.keys())
			
 
				+
			
 
				+    for (pkg_name, suggested_ver), actual_ver in zip(d.items(), versions):
			
 
				+        if actual_ver == 'N/A':
			
 
				+            continue
			
 
				+        actual_ver, suggested_ver = version_parse(actual_ver), version_parse(suggested_ver)
			
 
				+        if actual_ver < suggested_ver:
			
 
				+            print(f'[FAIL] {pkg_name} {actual_ver}, please upgrade to >= {suggested_ver}')
			
 
				+        else:
			
 
				+            print(f'[OK] {pkg_name} {actual_ver}')
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    d = get_requirements_dict()
			
 
				+    check_packages(d)
			
--- a/ch02/02_installing-python-libraries/requirements.txt
+++ b/ch02/02_installing-python-libraries/requirements.txt
@@ -0,0 +1,8 @@
 
				+numpy >= 1.24.3
			
 
				+scipy >= 1.10.1
			
 
				+pandas >= 2.0.2
			
 
				+matplotlib >= 3.7.1
			
 
				+jupyterlab >= 4.0
			
 
				+ipywidgets >= 8.0.6
			
 
				+watermark >= 2.4.2
			
 
				+torch >= 2.0.1
			
--- a/ch02/03_main-chapter-code/ch02-DDP-script.py
+++ b/ch02/03_main-chapter-code/ch02-DDP-script.py
@@ -0,0 +1,176 @@
 
				+import torch
			
 
				+import torch.nn.functional as F
			
 
				+from torch.utils.data import Dataset, DataLoader
			
 
				+
			
 
				+# NEW imports:
			
 
				+import os
			
 
				+import torch.multiprocessing as mp
			
 
				+from torch.utils.data.distributed import DistributedSampler
			
 
				+from torch.nn.parallel import DistributedDataParallel as DDP
			
 
				+from torch.distributed import init_process_group, destroy_process_group
			
 
				+
			
 
				+
			
 
				+# NEW: function to initialize a distributed process group (1 process / GPU)
			
 
				+# this allows communication among processes
			
 
				+def ddp_setup(rank, world_size):
			
 
				+    """
			
 
				+    Arguments:
			
 
				+        rank: a unique process ID
			
 
				+        world_size: total number of processes in the group
			
 
				+    """
			
 
				+    # rank of machine running rank:0 process
			
 
				+    # here, we assume all GPUs are on the same machine
			
 
				+    os.environ["MASTER_ADDR"] = "localhost"
			
 
				+    # any free port on the machine
			
 
				+    os.environ["MASTER_PORT"] = "12345"
			
 
				+
			
 
				+    # initialize process group
			
 
				+    # Windows users may have to use "gloo" instead of "nccl" as backend
			
 
				+    # nccl: NVIDIA Collective Communication Library
			
 
				+    init_process_group(backend="nccl", rank=rank, world_size=world_size)
			
 
				+    torch.cuda.set_device(rank)
			
 
				+
			
 
				+
			
 
				+class ToyDataset(Dataset):
			
 
				+    def __init__(self, X, y):
			
 
				+        self.features = X
			
 
				+        self.labels = y
			
 
				+
			
 
				+    def __getitem__(self, index):
			
 
				+        one_x = self.features[index]
			
 
				+        one_y = self.labels[index]
			
 
				+        return one_x, one_y
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return self.labels.shape[0]
			
 
				+
			
 
				+
			
 
				+class NeuralNetwork(torch.nn.Module):
			
 
				+    def __init__(self, num_inputs, num_outputs):
			
 
				+        super().__init__()
			
 
				+
			
 
				+        self.layers = torch.nn.Sequential(
			
 
				+            # 1st hidden layer
			
 
				+            torch.nn.Linear(num_inputs, 30),
			
 
				+            torch.nn.ReLU(),
			
 
				+
			
 
				+            # 2nd hidden layer
			
 
				+            torch.nn.Linear(30, 20),
			
 
				+            torch.nn.ReLU(),
			
 
				+
			
 
				+            # output layer
			
 
				+            torch.nn.Linear(20, num_outputs),
			
 
				+        )
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        logits = self.layers(x)
			
 
				+        return logits
			
 
				+
			
 
				+
			
 
				+def prepare_dataset():
			
 
				+    X_train = torch.tensor([
			
 
				+        [-1.2, 3.1],
			
 
				+        [-0.9, 2.9],
			
 
				+        [-0.5, 2.6],
			
 
				+        [2.3, -1.1],
			
 
				+        [2.7, -1.5]
			
 
				+    ])
			
 
				+    y_train = torch.tensor([0, 0, 0, 1, 1])
			
 
				+
			
 
				+    X_test = torch.tensor([
			
 
				+        [-0.8, 2.8],
			
 
				+        [2.6, -1.6],
			
 
				+    ])
			
 
				+    y_test = torch.tensor([0, 1])
			
 
				+
			
 
				+    train_ds = ToyDataset(X_train, y_train)
			
 
				+    test_ds = ToyDataset(X_test, y_test)
			
 
				+
			
 
				+    train_loader = DataLoader(
			
 
				+        dataset=train_ds,
			
 
				+        batch_size=2,
			
 
				+        shuffle=False, # NEW: False because of DistributedSampler below
			
 
				+        pin_memory=True,
			
 
				+        drop_last=True,
			
 
				+        # NEW: chunk batches across GPUs without overlapping samples:
			
 
				+        sampler=DistributedSampler(train_ds) # NEW
			
 
				+    )
			
 
				+    test_loader = DataLoader(
			
 
				+        dataset=test_ds,
			
 
				+        batch_size=2,
			
 
				+        shuffle=False,
			
 
				+    )
			
 
				+    return train_loader, test_loader
			
 
				+
			
 
				+
			
 
				+# NEW: wrapper
			
 
				+def main(rank, world_size, num_epochs):
			
 
				+
			
 
				+    ddp_setup(rank, world_size) # NEW: initialize process groups
			
 
				+
			
 
				+    train_loader, test_loader = prepare_dataset()
			
 
				+    model = NeuralNetwork(num_inputs=2, num_outputs=2)
			
 
				+    model.to(rank)
			
 
				+    optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
			
 
				+
			
 
				+    model = DDP(model, device_ids=[rank]) # NEW: wrap model with DDP
			
 
				+    # the core model is now accessible as model.module
			
 
				+    
			
 
				+    for epoch in range(num_epochs):
			
 
				+    
			
 
				+        model.train()
			
 
				+        for features, labels in enumerate(train_loader):
			
 
				+    
			
 
				+            features, labels = features.to(rank), labels.to(rank) # New: use rank
			
 
				+            logits = model(features)
			
 
				+            loss = F.cross_entropy(logits, labels) # Loss function
			
 
				+    
			
 
				+            optimizer.zero_grad()
			
 
				+            loss.backward()
			
 
				+            optimizer.step()
			
 
				+    
			
 
				+            ### LOGGING
			
 
				+            print(f"[GPU{rank}] Epoch: {epoch+1:03d}/{num_epochs:03d}"
			
 
				+                  f" | Batchsize {labels.shape[0]:03d}"
			
 
				+                  f" | Train/Val Loss: {loss:.2f}")
			
 
				+    
			
 
				+    model.eval()
			
 
				+    train_acc = compute_accuracy(model, train_loader, device=rank)
			
 
				+    print(f"[GPU{rank}] Training accuracy", train_acc)
			
 
				+    test_acc = compute_accuracy(model, test_loader, device=rank)
			
 
				+    print(f"[GPU{rank}] Test accuracy", test_acc)
			
 
				+
			
 
				+    destroy_process_group() # NEW: cleanly exit distributed mode
			
 
				+
			
 
				+
			
 
				+def compute_accuracy(model, dataloader, device):
			
 
				+    model = model.eval()
			
 
				+    correct = 0.0
			
 
				+    total_examples = 0
			
 
				+
			
 
				+    for idx, (features, labels) in enumerate(dataloader):
			
 
				+        features, labels = features.to(device), labels.to(device)
			
 
				+
			
 
				+        with torch.no_grad():
			
 
				+            logits = model(features)
			
 
				+        predictions = torch.argmax(logits, dim=1)
			
 
				+        compare = labels == predictions
			
 
				+        correct += torch.sum(compare)
			
 
				+        total_examples += len(compare)
			
 
				+    return (correct / total_examples).item()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("PyTorch version:", torch.__version__)
			
 
				+    print("CUDA available:", torch.cuda.is_available())
			
 
				+    print("Number of GPUs available:", torch.cuda.device_count())
			
 
				+
			
 
				+    torch.manual_seed(123)
			
 
				+
			
 
				+    # NEW: spawn new processes
			
 
				+    # note that spawn will automatically pass the rank
			
 
				+    num_epochs = 3
			
 
				+    world_size = torch.cuda.device_count()
			
 
				+    mp.spawn(main, args=(world_size, num_epochs), nprocs=world_size)
			
 
				+    # nprocs=world_size spawns one process per GPU
			
 
				+
			
--- a/ch02/03_main-chapter-code/ch02-code-part1.ipynb
+++ b/ch02/03_main-chapter-code/ch02-code-part1.ipynb
@@ -0,0 +1,1243 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "ca7fc8a0-280c-4979-b0c7-fc3a99b3b785",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Chapter 2: Introduction to PyTorch (Part 1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "f5bf13d2-8fc2-483e-88cc-6b4310221e68",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.1 What is PyTorch"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "96ee5660-5327-48e2-9104-a882b3b2afa4",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "2.0.1\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import torch\n",
			
 
				+    "\n",
			
 
				+    "print(torch.__version__)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "f73ad4e4-7ec6-4467-a9e9-0cdf6d195264",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "False\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "print(torch.cuda.is_available())"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.2 Understanding tensors"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "26d7f785-e048-42bc-9182-a556af6bb7f4",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2.2.1 Scalars, vectors, matrices, and tensors"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "a3a464d6-cec8-4363-87bd-ea4f900baced",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import torch\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "\n",
			
 
				+    "# create a 0D tensor (scalar) from a Python integer\n",
			
 
				+    "tensor0d = torch.tensor(1)\n",
			
 
				+    "\n",
			
 
				+    "# create a 1D tensor (vector) from a Python list\n",
			
 
				+    "tensor1d = torch.tensor([1, 2, 3])\n",
			
 
				+    "\n",
			
 
				+    "# create a 2D tensor from a nested Python list\n",
			
 
				+    "tensor2d = torch.tensor([[1, 2], [3, 4]])\n",
			
 
				+    "\n",
			
 
				+    "# create a 3D tensor from a nested Python list\n",
			
 
				+    "tensor3d_1 = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
			
 
				+    "\n",
			
 
				+    "# create a 3D tensor from NumPy array\n",
			
 
				+    "ary3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
			
 
				+    "tensor3d_2 = torch.tensor(ary3d)  # Copies NumPy array\n",
			
 
				+    "tensor3d_3 = torch.from_numpy(ary3d)  # Shares memory with NumPy array"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "dbe14c47-499a-4d48-b354-a0e6fd957872",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[[1, 2],\n",
			
 
				+      "         [3, 4]],\n",
			
 
				+      "\n",
			
 
				+      "        [[5, 6],\n",
			
 
				+      "         [7, 8]]])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "ary3d[0, 0, 0] = 999\n",
			
 
				+    "print(tensor3d_2) # remains unchanged"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "e3e4c23a-cdba-46f5-a2dc-5fb32bf9117b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[[999,   2],\n",
			
 
				+      "         [  3,   4]],\n",
			
 
				+      "\n",
			
 
				+      "        [[  5,   6],\n",
			
 
				+      "         [  7,   8]]])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "print(tensor3d_3) # changes because of memory sharing"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "63dec48d-2b60-41a2-ac06-fef7e718605a",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2.2.2 Tensor data types"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "3f48c014-e1a2-4a53-b5c5-125812d4034c",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch.int64\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor1d = torch.tensor([1, 2, 3])\n",
			
 
				+    "print(tensor1d.dtype)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "5429a086-9de2-4ac7-9f14-d087a7507394",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch.float32\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "floatvec = torch.tensor([1.0, 2.0, 3.0])\n",
			
 
				+    "print(floatvec.dtype)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 8,
			
 
				+   "id": "a9a438d1-49bb-481c-8442-7cc2bb3dd4af",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch.float32\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "floatvec = tensor1d.to(torch.float32)\n",
			
 
				+    "print(floatvec.dtype)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "2020deb5-aa02-4524-b311-c010f4ad27ff",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2.2.3 Common PyTorch tensor operations"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "id": "c02095f2-8a48-4953-b3c9-5313d4362ce7",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([[1, 2, 3],\n",
			
 
				+       "        [4, 5, 6]])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 9,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d = torch.tensor([[1, 2, 3], [4, 5, 6]])\n",
			
 
				+    "tensor2d"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 10,
			
 
				+   "id": "f33e1d45-5b2c-4afe-b4b2-66ac4099fd1a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "torch.Size([2, 3])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 10,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d.shape"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 11,
			
 
				+   "id": "f3a4129d-f870-4e03-9c32-cd8521cb83fe",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([[1, 2],\n",
			
 
				+       "        [3, 4],\n",
			
 
				+       "        [5, 6]])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 11,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d.reshape(3, 2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 12,
			
 
				+   "id": "589ac0a7-adc7-41f3-b721-155f580e9369",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([[1, 2],\n",
			
 
				+       "        [3, 4],\n",
			
 
				+       "        [5, 6]])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 12,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d.view(3, 2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 13,
			
 
				+   "id": "344e307f-ba5d-4f9a-a791-2c75a3d1417e",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([[1, 4],\n",
			
 
				+       "        [2, 5],\n",
			
 
				+       "        [3, 6]])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 13,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d.T"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 14,
			
 
				+   "id": "19a75030-6a41-4ca8-9aae-c507ae79225c",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([[14, 32],\n",
			
 
				+       "        [32, 77]])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 14,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d.matmul(tensor2d.T)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 15,
			
 
				+   "id": "e7c950bc-d640-4203-b210-3ac8932fe4d4",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([[14, 32],\n",
			
 
				+       "        [32, 77]])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 15,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor2d @ tensor2d.T"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "4c15bdeb-78e2-4870-8a4f-a9f591666f38",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.3 Seeing models as computation graphs"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 16,
			
 
				+   "id": "22af61e9-0443-4705-94d7-24c21add09c7",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor(0.0852)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import torch.nn.functional as F\n",
			
 
				+    "\n",
			
 
				+    "y = torch.tensor([1.0])  # true label\n",
			
 
				+    "x1 = torch.tensor([1.1]) # input feature\n",
			
 
				+    "w1 = torch.tensor([2.2]) # weight parameter\n",
			
 
				+    "b = torch.tensor([0.0])  # bias unit\n",
			
 
				+    "\n",
			
 
				+    "z = x1 * w1 + b          # net input\n",
			
 
				+    "a = torch.sigmoid(z)     # activation & output\n",
			
 
				+    "\n",
			
 
				+    "loss = F.binary_cross_entropy(a, y)\n",
			
 
				+    "print(loss)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "f9424f26-2bac-47e7-b834-92ece802247c",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.4 Automatic differentiation made easy"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 17,
			
 
				+   "id": "ebf5cef7-48d6-4d2a-8ab0-0fb10bdd7d1a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "(tensor([-0.0898]),)\n",
			
 
				+      "(tensor([-0.0817]),)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import torch.nn.functional as F\n",
			
 
				+    "from torch.autograd import grad\n",
			
 
				+    "\n",
			
 
				+    "y = torch.tensor([1.0])\n",
			
 
				+    "x1 = torch.tensor([1.1])\n",
			
 
				+    "w1 = torch.tensor([2.2], requires_grad=True)\n",
			
 
				+    "b = torch.tensor([0.0], requires_grad=True)\n",
			
 
				+    "\n",
			
 
				+    "z = x1 * w1 + b \n",
			
 
				+    "a = torch.sigmoid(z)\n",
			
 
				+    "\n",
			
 
				+    "loss = F.binary_cross_entropy(a, y)\n",
			
 
				+    "\n",
			
 
				+    "grad_L_w1 = grad(loss, w1, retain_graph=True)\n",
			
 
				+    "grad_L_b = grad(loss, b, retain_graph=True)\n",
			
 
				+    "\n",
			
 
				+    "print(grad_L_w1)\n",
			
 
				+    "print(grad_L_b)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 18,
			
 
				+   "id": "93c5875d-f6b2-492c-b5ef-7e132f93a4e0",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([-0.0898])\n",
			
 
				+      "tensor([-0.0817])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "loss.backward()\n",
			
 
				+    "\n",
			
 
				+    "print(w1.grad)\n",
			
 
				+    "print(b.grad)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "f53bdd7d-44e6-40ab-8a5a-4eef74ef35dc",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.5 Implementing multilayer neural networks"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 19,
			
 
				+   "id": "84b749e1-7768-4cfe-94d6-a08c7feff4a1",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "class NeuralNetwork(torch.nn.Module):\n",
			
 
				+    "    def __init__(self, num_inputs, num_outputs):\n",
			
 
				+    "        super().__init__()\n",
			
 
				+    "\n",
			
 
				+    "        self.layers = torch.nn.Sequential(\n",
			
 
				+    "                \n",
			
 
				+    "            # 1st hidden layer\n",
			
 
				+    "            torch.nn.Linear(num_inputs, 30),\n",
			
 
				+    "            torch.nn.ReLU(),\n",
			
 
				+    "\n",
			
 
				+    "            # 2nd hidden layer\n",
			
 
				+    "            torch.nn.Linear(30, 20),\n",
			
 
				+    "            torch.nn.ReLU(),\n",
			
 
				+    "\n",
			
 
				+    "            # output layer\n",
			
 
				+    "            torch.nn.Linear(20, num_outputs),\n",
			
 
				+    "        )\n",
			
 
				+    "\n",
			
 
				+    "    def forward(self, x):\n",
			
 
				+    "        logits = self.layers(x)\n",
			
 
				+    "        return logits"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 20,
			
 
				+   "id": "c5b59e2e-1930-456d-93b9-f69263e3adbe",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "model = NeuralNetwork(50, 3)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 21,
			
 
				+   "id": "39d02a21-33e7-4879-8fd2-d6309faf2f8d",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "NeuralNetwork(\n",
			
 
				+      "  (layers): Sequential(\n",
			
 
				+      "    (0): Linear(in_features=50, out_features=30, bias=True)\n",
			
 
				+      "    (1): ReLU()\n",
			
 
				+      "    (2): Linear(in_features=30, out_features=20, bias=True)\n",
			
 
				+      "    (3): ReLU()\n",
			
 
				+      "    (4): Linear(in_features=20, out_features=3, bias=True)\n",
			
 
				+      "  )\n",
			
 
				+      ")\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "print(model)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 22,
			
 
				+   "id": "94535738-de02-4c2a-9b44-1cd186fa990a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Total number of trainable model parameters: 2213\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
			
 
				+    "print(\"Total number of trainable model parameters:\", num_params)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 23,
			
 
				+   "id": "2c394106-ad71-4ccb-a3c9-9b60af3fa748",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Parameter containing:\n",
			
 
				+      "tensor([[-0.0064,  0.0004, -0.0903,  ..., -0.1316,  0.0910,  0.0363],\n",
			
 
				+      "        [ 0.1354,  0.1124, -0.0476,  ...,  0.0578,  0.1014,  0.0008],\n",
			
 
				+      "        [ 0.0975, -0.0478,  0.0298,  ...,  0.0416,  0.0849,  0.1314],\n",
			
 
				+      "        ...,\n",
			
 
				+      "        [ 0.0118,  0.0240,  0.0420,  ..., -0.1305, -0.0517, -0.0826],\n",
			
 
				+      "        [-0.0323,  0.1073,  0.0215,  ..., -0.1264, -0.1100,  0.1232],\n",
			
 
				+      "        [ 0.0861,  0.0403, -0.0545,  ...,  0.1352,  0.0817, -0.0938]],\n",
			
 
				+      "       requires_grad=True)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "print(model.layers[0].weight)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 24,
			
 
				+   "id": "b201882b-9285-4db9-bb63-43afe6a2ff9e",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Parameter containing:\n",
			
 
				+      "tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],\n",
			
 
				+      "        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],\n",
			
 
				+      "        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],\n",
			
 
				+      "        ...,\n",
			
 
				+      "        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],\n",
			
 
				+      "        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],\n",
			
 
				+      "        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],\n",
			
 
				+      "       requires_grad=True)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "torch.manual_seed(123)\n",
			
 
				+    "\n",
			
 
				+    "model = NeuralNetwork(50, 3)\n",
			
 
				+    "print(model.layers[0].weight)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 25,
			
 
				+   "id": "1da9a35e-44f3-460c-90fe-304519736fd6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "torch.Size([30, 50])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "print(model.layers[0].weight.shape)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 26,
			
 
				+   "id": "57eadbae-90fe-43a3-a33f-c23a095ba42a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "torch.manual_seed(123)\n",
			
 
				+    "\n",
			
 
				+    "X = torch.rand((1, 50))\n",
			
 
				+    "out = model(X)\n",
			
 
				+    "print(out)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 27,
			
 
				+   "id": "48d720cb-ef73-4b7b-92e0-8198a072defd",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[-0.1262,  0.1080, -0.1792]])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "with torch.no_grad():\n",
			
 
				+    "    out = model(X)\n",
			
 
				+    "print(out)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 28,
			
 
				+   "id": "10df3640-83c3-4061-a74d-08f07a5cc6ac",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[0.3113, 0.3934, 0.2952]])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "with torch.no_grad():\n",
			
 
				+    "    out = F.softmax(model(X), dim=1)\n",
			
 
				+    "print(out)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "19858180-0f26-43a8-b2c3-7ed40abf9f85",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.6 Setting up efficient data loaders"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 29,
			
 
				+   "id": "b9dc2745-8be8-4344-80ef-325f02cda7b7",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "X_train = torch.tensor([\n",
			
 
				+    "    [-1.2, 3.1],\n",
			
 
				+    "    [-0.9, 2.9],\n",
			
 
				+    "    [-0.5, 2.6],\n",
			
 
				+    "    [2.3, -1.1],\n",
			
 
				+    "    [2.7, -1.5]\n",
			
 
				+    "])\n",
			
 
				+    "\n",
			
 
				+    "y_train = torch.tensor([0, 0, 0, 1, 1])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 30,
			
 
				+   "id": "88283948-5fca-461a-98a1-788b6be191d5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "X_test = torch.tensor([\n",
			
 
				+    "    [-0.8, 2.8],\n",
			
 
				+    "    [2.6, -1.6],\n",
			
 
				+    "])\n",
			
 
				+    "\n",
			
 
				+    "y_test = torch.tensor([0, 1])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 31,
			
 
				+   "id": "edf323e2-1789-41a0-8e44-f3cab16e5f5d",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from torch.utils.data import Dataset\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "class ToyDataset(Dataset):\n",
			
 
				+    "    def __init__(self, X, y):\n",
			
 
				+    "        self.features = X\n",
			
 
				+    "        self.labels = y\n",
			
 
				+    "\n",
			
 
				+    "    def __getitem__(self, index):\n",
			
 
				+    "        one_x = self.features[index]\n",
			
 
				+    "        one_y = self.labels[index]        \n",
			
 
				+    "        return one_x, one_y\n",
			
 
				+    "\n",
			
 
				+    "    def __len__(self):\n",
			
 
				+    "        return self.labels.shape[0]\n",
			
 
				+    "\n",
			
 
				+    "train_ds = ToyDataset(X_train, y_train)\n",
			
 
				+    "test_ds = ToyDataset(X_test, y_test)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 32,
			
 
				+   "id": "b7014705-1fdc-4f72-b892-d8db8bebc331",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "5"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 32,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "len(train_ds)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 33,
			
 
				+   "id": "3ec6627a-4c3f-481a-b794-d2131be95eaf",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from torch.utils.data import DataLoader\n",
			
 
				+    "\n",
			
 
				+    "torch.manual_seed(123)\n",
			
 
				+    "\n",
			
 
				+    "train_loader = DataLoader(\n",
			
 
				+    "    dataset=train_ds,\n",
			
 
				+    "    batch_size=2,\n",
			
 
				+    "    shuffle=True,\n",
			
 
				+    "    num_workers=0\n",
			
 
				+    ")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 34,
			
 
				+   "id": "8c9446de-5e4b-44fa-bf9a-a63e2661027e",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "test_ds = ToyDataset(X_test, y_test)\n",
			
 
				+    "\n",
			
 
				+    "test_loader = DataLoader(\n",
			
 
				+    "    dataset=test_ds,\n",
			
 
				+    "    batch_size=2,\n",
			
 
				+    "    shuffle=False,\n",
			
 
				+    "    num_workers=0\n",
			
 
				+    ")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 35,
			
 
				+   "id": "99d4404c-9884-419f-979c-f659742d86ef",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Batch 1: tensor([[ 2.3000, -1.1000],\n",
			
 
				+      "        [-0.9000,  2.9000]]) tensor([1, 0])\n",
			
 
				+      "Batch 2: tensor([[-1.2000,  3.1000],\n",
			
 
				+      "        [-0.5000,  2.6000]]) tensor([0, 0])\n",
			
 
				+      "Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "for idx, (x, y) in enumerate(train_loader):\n",
			
 
				+    "    print(f\"Batch {idx+1}:\", x, y)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 36,
			
 
				+   "id": "9d003f7e-7a80-40bf-a7fb-7a0d7dbba9db",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "train_loader = DataLoader(\n",
			
 
				+    "    dataset=train_ds,\n",
			
 
				+    "    batch_size=2,\n",
			
 
				+    "    shuffle=True,\n",
			
 
				+    "    num_workers=0,\n",
			
 
				+    "    drop_last=True\n",
			
 
				+    ")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "4db4d7f4-82da-44a4-b94e-ee04665d9c3c",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for idx, (x, y) in enumerate(train_loader):\n",
			
 
				+    "    print(f\"Batch {idx+1}:\", x, y)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.7 A typical training loop"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 38,
			
 
				+   "id": "93f1791a-d887-4fc5-a307-5e5bde9e06f6",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
			
 
				+      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
			
 
				+      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
			
 
				+      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
			
 
				+      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
			
 
				+      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import torch.nn.functional as F\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "torch.manual_seed(123)\n",
			
 
				+    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
			
 
				+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
			
 
				+    "\n",
			
 
				+    "num_epochs = 3\n",
			
 
				+    "\n",
			
 
				+    "for epoch in range(num_epochs):\n",
			
 
				+    "    \n",
			
 
				+    "    model.train()\n",
			
 
				+    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
			
 
				+    "\n",
			
 
				+    "        logits = model(features)\n",
			
 
				+    "        \n",
			
 
				+    "        loss = F.cross_entropy(logits, labels) # Loss function\n",
			
 
				+    "        \n",
			
 
				+    "        optimizer.zero_grad()\n",
			
 
				+    "        loss.backward()\n",
			
 
				+    "        optimizer.step()\n",
			
 
				+    "    \n",
			
 
				+    "        ### LOGGING\n",
			
 
				+    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
			
 
				+    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
			
 
				+    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
			
 
				+    "\n",
			
 
				+    "    model.eval()\n",
			
 
				+    "    # Optional model evaluation"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 39,
			
 
				+   "id": "00dcf57f-6a7e-4af7-aa5a-df2cb0866fa5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[ 2.8569, -4.1618],\n",
			
 
				+      "        [ 2.5382, -3.7548],\n",
			
 
				+      "        [ 2.0944, -3.1820],\n",
			
 
				+      "        [-1.4814,  1.4816],\n",
			
 
				+      "        [-1.7176,  1.7342]])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "model.eval()\n",
			
 
				+    "\n",
			
 
				+    "with torch.no_grad():\n",
			
 
				+    "    outputs = model(X_train)\n",
			
 
				+    "\n",
			
 
				+    "print(outputs)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 40,
			
 
				+   "id": "19be7390-18b8-43f9-9841-d7fb1919f6fd",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([[    0.9991,     0.0009],\n",
			
 
				+      "        [    0.9982,     0.0018],\n",
			
 
				+      "        [    0.9949,     0.0051],\n",
			
 
				+      "        [    0.0491,     0.9509],\n",
			
 
				+      "        [    0.0307,     0.9693]])\n",
			
 
				+      "tensor([0, 0, 0, 1, 1])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "torch.set_printoptions(sci_mode=False)\n",
			
 
				+    "probas = F.softmax(outputs, dim=1)\n",
			
 
				+    "print(probas)\n",
			
 
				+    "\n",
			
 
				+    "predictions = torch.argmax(outputs, dim=1)\n",
			
 
				+    "print(predictions)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 41,
			
 
				+   "id": "07e7e530-f8d3-429c-9f5e-cf8078078c0e",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([0, 0, 0, 1, 1])\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "predictions = torch.argmax(outputs, dim=1)\n",
			
 
				+    "print(predictions)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 42,
			
 
				+   "id": "5f756f0d-63c8-41b5-a5d8-01baa847e026",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([True, True, True, True, True])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 42,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "predictions == y_train"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 43,
			
 
				+   "id": "da274bb0-f11c-4c81-a880-7a031fbf2943",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor(5)"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 43,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "torch.sum(predictions == y_train)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 44,
			
 
				+   "id": "16d62314-8dee-45b0-8f55-9e5aae2b24f4",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def compute_accuracy(model, dataloader):\n",
			
 
				+    "\n",
			
 
				+    "    model = model.eval()\n",
			
 
				+    "    correct = 0.0\n",
			
 
				+    "    total_examples = 0\n",
			
 
				+    "    \n",
			
 
				+    "    for idx, (features, labels) in enumerate(dataloader):\n",
			
 
				+    "        \n",
			
 
				+    "        with torch.no_grad():\n",
			
 
				+    "            logits = model(features)\n",
			
 
				+    "        \n",
			
 
				+    "        predictions = torch.argmax(logits, dim=1)\n",
			
 
				+    "        compare = labels == predictions\n",
			
 
				+    "        correct += torch.sum(compare)\n",
			
 
				+    "        total_examples += len(compare)\n",
			
 
				+    "\n",
			
 
				+    "    return (correct / total_examples).item()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 45,
			
 
				+   "id": "4f6c9c17-2a5f-46c0-804b-873f169b729a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "1.0"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 45,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "compute_accuracy(model, train_loader)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 46,
			
 
				+   "id": "311ed864-e21e-4aac-97c7-c6086caef27a",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "1.0"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 46,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "compute_accuracy(model, test_loader)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "4d5cd469-3a45-4394-944b-3ce543f41dac",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.8 Saving and loading models"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 47,
			
 
				+   "id": "b013127d-a2c3-4b04-9fb3-a6a7c88d83c5",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "torch.save(model.state_dict(), \"model.pth\")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 48,
			
 
				+   "id": "b2b428c2-3a44-4d91-97c4-8298cf2b51eb",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "<All keys matched successfully>"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 48,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "model = NeuralNetwork(2, 2) # needs to match the original  model exactly\n",
			
 
				+    "model.load_state_dict(torch.load(\"model.pth\"))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "f891c013-43da-4a05-973d-997be313d2d8",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 2.9 Optimizing training performance with GPUs"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "e68ae888-cabf-49c9-bad6-ecdce774db57",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2.9.1 PyTorch computations on GPU devices"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "141c845f-efe3-4614-b376-b8b7a9a2c887",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "See [ch02-code-part2.ipynb](ch02-code-part2.ipynb)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "99811829-b817-42ea-b03e-d35374debcc0",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2.9.2 Single-GPU training"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "0b21456c-4af7-440f-9e78-37770277b5bc",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "See [ch02-code-part2.ipynb](ch02-code-part2.ipynb)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "db6eb2d1-a341-4489-b04b-635c26945333",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2.9.3 Training with multiple GPUs"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "9d049a81-5fb0-49b5-9d6a-17a9976d8520",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "See [ch02-code-part3.py](ch02-code-part3.py)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.12"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/ch02/03_main-chapter-code/ch02_code-part2.ipynb
+++ b/ch02/03_main-chapter-code/ch02_code-part2.ipynb
@@ -0,0 +1,452 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "O9i6kzBsZVaZ"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "# Chapter 2: Introduction to PyTorch (Part 1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "ppbG5d-NZezH"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "## 2.9 Optimizing training performance with GPUs"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "6jH0J_DPZhbn"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "### 2.9.1 PyTorch computations on GPU devices"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "RM7kGhwMF_nO",
			
 
				+    "outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "2.0.1+cu118\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import torch\n",
			
 
				+    "\n",
			
 
				+    "print(torch.__version__)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "OXLCKXhiUkZt",
			
 
				+    "outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "True\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "print(torch.cuda.is_available())"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "MTTlfh53Va-T",
			
 
				+    "outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "tensor([5., 7., 9.])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 3,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor_1 = torch.tensor([1., 2., 3.])\n",
			
 
				+    "tensor_2 = torch.tensor([4., 5., 6.])\n",
			
 
				+    "\n",
			
 
				+    "print(tensor_1 + tensor_2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "Z4LwTNw7Vmmb",
			
 
				+    "outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "tensor([5., 7., 9.], device='cuda:0')\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor_1 = tensor_1.to(\"cuda\")\n",
			
 
				+    "tensor_2 = tensor_2.to(\"cuda\")\n",
			
 
				+    "\n",
			
 
				+    "print(tensor_1 + tensor_2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/",
			
 
				+     "height": 184
			
 
				+    },
			
 
				+    "id": "tKT6URN1Vuft",
			
 
				+    "outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "ename": "RuntimeError",
			
 
				+     "evalue": "ignored",
			
 
				+     "output_type": "error",
			
 
				+     "traceback": [
			
 
				+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
			
 
				+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
			
 
				+      "\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
			
 
				+      "\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tensor_1 = tensor_1.to(\"cpu\")\n",
			
 
				+    "print(tensor_1 + tensor_2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {
			
 
				+    "id": "c8j1cWDcWAMf"
			
 
				+   },
			
 
				+   "source": [
			
 
				+    "## 2.9.2 Single-GPU training"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 8,
			
 
				+   "metadata": {
			
 
				+    "id": "GyY59cjieitv"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "X_train = torch.tensor([\n",
			
 
				+    "    [-1.2, 3.1],\n",
			
 
				+    "    [-0.9, 2.9],\n",
			
 
				+    "    [-0.5, 2.6],\n",
			
 
				+    "    [2.3, -1.1],\n",
			
 
				+    "    [2.7, -1.5]\n",
			
 
				+    "])\n",
			
 
				+    "\n",
			
 
				+    "y_train = torch.tensor([0, 0, 0, 1, 1])\n",
			
 
				+    "\n",
			
 
				+    "X_test = torch.tensor([\n",
			
 
				+    "    [-0.8, 2.8],\n",
			
 
				+    "    [2.6, -1.6],\n",
			
 
				+    "])\n",
			
 
				+    "\n",
			
 
				+    "y_test = torch.tensor([0, 1])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "metadata": {
			
 
				+    "id": "v41gKqEJempa"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from torch.utils.data import Dataset\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "class ToyDataset(Dataset):\n",
			
 
				+    "    def __init__(self, X, y):\n",
			
 
				+    "        self.features = X\n",
			
 
				+    "        self.labels = y\n",
			
 
				+    "\n",
			
 
				+    "    def __getitem__(self, index):\n",
			
 
				+    "        one_x = self.features[index]\n",
			
 
				+    "        one_y = self.labels[index]\n",
			
 
				+    "        return one_x, one_y\n",
			
 
				+    "\n",
			
 
				+    "    def __len__(self):\n",
			
 
				+    "        return self.labels.shape[0]\n",
			
 
				+    "\n",
			
 
				+    "train_ds = ToyDataset(X_train, y_train)\n",
			
 
				+    "test_ds = ToyDataset(X_test, y_test)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 23,
			
 
				+   "metadata": {
			
 
				+    "id": "UPGVRuylep8Y"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from torch.utils.data import DataLoader\n",
			
 
				+    "\n",
			
 
				+    "torch.manual_seed(123)\n",
			
 
				+    "\n",
			
 
				+    "train_loader = DataLoader(\n",
			
 
				+    "    dataset=train_ds,\n",
			
 
				+    "    batch_size=2,\n",
			
 
				+    "    shuffle=True,\n",
			
 
				+    "    num_workers=1,\n",
			
 
				+    "    drop_last=True\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "test_loader = DataLoader(\n",
			
 
				+    "    dataset=test_ds,\n",
			
 
				+    "    batch_size=2,\n",
			
 
				+    "    shuffle=False,\n",
			
 
				+    "    num_workers=1\n",
			
 
				+    ")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 24,
			
 
				+   "metadata": {
			
 
				+    "id": "drhg6IXofAXh"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "class NeuralNetwork(torch.nn.Module):\n",
			
 
				+    "    def __init__(self, num_inputs, num_outputs):\n",
			
 
				+    "        super().__init__()\n",
			
 
				+    "\n",
			
 
				+    "        self.layers = torch.nn.Sequential(\n",
			
 
				+    "\n",
			
 
				+    "            # 1st hidden layer\n",
			
 
				+    "            torch.nn.Linear(num_inputs, 30),\n",
			
 
				+    "            torch.nn.ReLU(),\n",
			
 
				+    "\n",
			
 
				+    "            # 2nd hidden layer\n",
			
 
				+    "            torch.nn.Linear(30, 20),\n",
			
 
				+    "            torch.nn.ReLU(),\n",
			
 
				+    "\n",
			
 
				+    "            # output layer\n",
			
 
				+    "            torch.nn.Linear(20, num_outputs),\n",
			
 
				+    "        )\n",
			
 
				+    "\n",
			
 
				+    "    def forward(self, x):\n",
			
 
				+    "        logits = self.layers(x)\n",
			
 
				+    "        return logits"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 25,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "7jaS5sqPWCY0",
			
 
				+    "outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
			
 
				+      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
			
 
				+      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
			
 
				+      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
			
 
				+      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
			
 
				+      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import torch.nn.functional as F\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "torch.manual_seed(123)\n",
			
 
				+    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
			
 
				+    "\n",
			
 
				+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # NEW\n",
			
 
				+    "model = model.to(device) # NEW\n",
			
 
				+    "\n",
			
 
				+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
			
 
				+    "\n",
			
 
				+    "num_epochs = 3\n",
			
 
				+    "\n",
			
 
				+    "for epoch in range(num_epochs):\n",
			
 
				+    "\n",
			
 
				+    "    model.train()\n",
			
 
				+    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
			
 
				+    "\n",
			
 
				+    "        features, labels = features.to(device), labels.to(device) # NEW\n",
			
 
				+    "        logits = model(features)\n",
			
 
				+    "        loss = F.cross_entropy(logits, labels) # Loss function\n",
			
 
				+    "\n",
			
 
				+    "        optimizer.zero_grad()\n",
			
 
				+    "        loss.backward()\n",
			
 
				+    "        optimizer.step()\n",
			
 
				+    "\n",
			
 
				+    "        ### LOGGING\n",
			
 
				+    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
			
 
				+    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
			
 
				+    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
			
 
				+    "\n",
			
 
				+    "    model.eval()\n",
			
 
				+    "    # Optional model evaluation"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 26,
			
 
				+   "metadata": {
			
 
				+    "id": "4qrlmnPPe7FO"
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def compute_accuracy(model, dataloader, device):\n",
			
 
				+    "\n",
			
 
				+    "    model = model.eval()\n",
			
 
				+    "    correct = 0.0\n",
			
 
				+    "    total_examples = 0\n",
			
 
				+    "\n",
			
 
				+    "    for idx, (features, labels) in enumerate(dataloader):\n",
			
 
				+    "\n",
			
 
				+    "        features, labels = features.to(device), labels.to(device) # New\n",
			
 
				+    "\n",
			
 
				+    "        with torch.no_grad():\n",
			
 
				+    "            logits = model(features)\n",
			
 
				+    "\n",
			
 
				+    "        predictions = torch.argmax(logits, dim=1)\n",
			
 
				+    "        compare = labels == predictions\n",
			
 
				+    "        correct += torch.sum(compare)\n",
			
 
				+    "        total_examples += len(compare)\n",
			
 
				+    "\n",
			
 
				+    "    return (correct / total_examples).item()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 27,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "1_-BfkfEf4HX",
			
 
				+    "outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "1.0"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 27,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "compute_accuracy(model, train_loader, device=device)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 21,
			
 
				+   "metadata": {
			
 
				+    "colab": {
			
 
				+     "base_uri": "https://localhost:8080/"
			
 
				+    },
			
 
				+    "id": "iYtXKBGEgKss",
			
 
				+    "outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "1.0"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 21,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "compute_accuracy(model, test_loader, device=device)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "accelerator": "GPU",
			
 
				+  "colab": {
			
 
				+   "gpuType": "T4",
			
 
				+   "provenance": []
			
 
				+  },
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.12"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}