3 місяців тому · 145322ded8
--- a/ch05/11_qwen3/README.md
+++ b/ch05/11_qwen3/README.md
@@ -255,7 +255,7 @@ The following table shows a performance comparison on an A100 for consequent `ge
 
				 | Qwen3Model compiled | 107        | 1.99 GB |
			
 
				 
			
 
				 &nbsp;
			
 
				-#### Pro tip 2: speed up inference with compilation
			
 
				+#### Pro tip 2: speed up inference with KV cache
			
 
				 
			
 
				 You can significantly boost inference performance using the KV cache `Qwen3Model` drop-in replacement when running the model on a CPU. (See my [Understanding and Coding the KV Cache in LLMs from Scratch](https://magazine.sebastianraschka.com/p/coding-the-kv-cache-in-llms) article to learn more about KV caches.)
			
 
				 
			
--- a/ch05/11_qwen3/standalone-qwen3.ipynb
+++ b/ch05/11_qwen3/standalone-qwen3.ipynb
@@ -822,7 +822,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 37,
			
 
				+   "execution_count": null,
			
 
				    "id": "699cb1b8-a67d-49fb-80a6-0dad9d81f392",
			
 
				    "metadata": {
			
 
				     "colab": {
			
@@ -936,7 +936,8 @@
 
				     "        weights_dict.update(shard)\n",
			
 
				     "\n",
			
 
				     "load_weights_into_qwen(model, QWEN3_CONFIG, weights_dict)\n",
			
 
				-    "model.to(device);"
			
 
				+    "model.to(device)\n",
			
 
				+    "del weights_dict"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1187,7 +1188,7 @@
 
				    "provenance": []
			
 
				   },
			
 
				   "kernelspec": {
			
 
				-   "display_name": "Python 3 (ipykernel)",
			
 
				+   "display_name": ".venv",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -1201,7 +1202,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.11.9"
			
 
				+   "version": "3.12.6"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,