|
|
@@ -134,7 +134,9 @@
|
|
|
" \n",
|
|
|
" # Use a placeholder for LayerNorm\n",
|
|
|
" self.final_norm = DummyLayerNorm(cfg[\"emb_dim\"])\n",
|
|
|
- " self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n",
|
|
|
+ " self.out_head = nn.Linear(\n",
|
|
|
+ " cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n",
|
|
|
+ " )\n",
|
|
|
"\n",
|
|
|
" def forward(self, in_idx):\n",
|
|
|
" batch_size, seq_len = in_idx.shape\n",
|
|
|
@@ -208,7 +210,7 @@
|
|
|
"batch.append(torch.tensor(tokenizer.encode(txt1)))\n",
|
|
|
"batch.append(torch.tensor(tokenizer.encode(txt2)))\n",
|
|
|
"batch = torch.stack(batch, dim=0)\n",
|
|
|
- "batch"
|
|
|
+ "print(batch)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
@@ -772,7 +774,7 @@
|
|
|
"torch.manual_seed(123)\n",
|
|
|
"ex_short = ExampleWithShortcut()\n",
|
|
|
"inputs = torch.tensor([[-1., 1., 2.]])\n",
|
|
|
- "ex_short(inputs)"
|
|
|
+ "print(ex_short(inputs))"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
@@ -947,7 +949,9 @@
|
|
|
" \n",
|
|
|
" # Use a placeholder for LayerNorm\n",
|
|
|
" self.final_norm = LayerNorm(cfg[\"emb_dim\"])\n",
|
|
|
- " self.out_head = nn.Linear(cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False)\n",
|
|
|
+ " self.out_head = nn.Linear(\n",
|
|
|
+ " cfg[\"emb_dim\"], cfg[\"vocab_size\"], bias=False\n",
|
|
|
+ " )\n",
|
|
|
"\n",
|
|
|
" def forward(self, in_idx):\n",
|
|
|
" batch_size, seq_len = in_idx.shape\n",
|