|
|
@@ -48,7 +48,7 @@
|
|
|
" self.target_ids = []\n",
|
|
|
"\n",
|
|
|
" # Tokenize the entire text\n",
|
|
|
- " token_ids = tokenizer.encode(txt, allowed_special={'<|endoftext|>'})\n",
|
|
|
+ " token_ids = self.tokenizer.encode(txt, allowed_special={'<|endoftext|>'})\n",
|
|
|
"\n",
|
|
|
" # Use a sliding window to chunk the book into overlapping sequences of max_length\n",
|
|
|
" for i in range(0, len(token_ids) - max_length, stride):\n",
|
|
|
@@ -150,7 +150,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.10.6"
|
|
|
+ "version": "3.10.10"
|
|
|
}
|
|
|
},
|
|
|
"nbformat": 4,
|