7 месяцев назад · adaf4faaae
--- a/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb
+++ b/ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb
@@ -2140,7 +2140,7 @@
 
				    },
			
 
				    "source": [
			
 
				     "- In other words, `torch.gather` is a selection function\n",
			
 
				-    "- When we computed the loss earlier, we used it to retrieve the log probabilities corresponding to the correct token in the 50,256-token vocabulary\n",
			
 
				+    "- When we computed the loss earlier, we used it to retrieve the log probabilities corresponding to the correct token in the 50,257-token vocabulary\n",
			
 
				     "- The \"correct\" tokens are the tokens given in the response entry"
			
 
				    ]
			
 
				   },
			
@@ -3112,7 +3112,7 @@
 
				    "provenance": []
			
 
				   },
			
 
				   "kernelspec": {
			
 
				-   "display_name": ".venv",
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -3126,7 +3126,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.12.6"
			
 
				+   "version": "3.10.16"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,