|
|
@@ -2140,7 +2140,7 @@
|
|
|
},
|
|
|
"source": [
|
|
|
"- In other words, `torch.gather` is a selection function\n",
|
|
|
- "- When we computed the loss earlier, we used it to retrieve the log probabilities corresponding to the correct token in the 50,256-token vocabulary\n",
|
|
|
+ "- When we computed the loss earlier, we used it to retrieve the log probabilities corresponding to the correct token in the 50,257-token vocabulary\n",
|
|
|
"- The \"correct\" tokens are the tokens given in the response entry"
|
|
|
]
|
|
|
},
|
|
|
@@ -3112,7 +3112,7 @@
|
|
|
"provenance": []
|
|
|
},
|
|
|
"kernelspec": {
|
|
|
- "display_name": ".venv",
|
|
|
+ "display_name": "Python 3 (ipykernel)",
|
|
|
"language": "python",
|
|
|
"name": "python3"
|
|
|
},
|
|
|
@@ -3126,7 +3126,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.12.6"
|
|
|
+ "version": "3.10.16"
|
|
|
}
|
|
|
},
|
|
|
"nbformat": 4,
|