|
|
@@ -31,6 +31,49 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 2,
|
|
|
+ "id": "4f235d87-be85-4ddf-95a6-af59fca13d82",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "[33901, 86, 343, 86, 220, 959]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "integers = tokenizer.encode(\"Akwirw ier\")\n",
|
|
|
+ "print(integers)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "id": "45e4e8f0-3272-48bb-96f6-cced5584ceea",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "33901 -> Ak\n",
|
|
|
+ "86 -> w\n",
|
|
|
+ "343 -> ir\n",
|
|
|
+ "86 -> w\n",
|
|
|
+ "220 -> \n",
|
|
|
+ "959 -> ier\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "for i in integers:\n",
|
|
|
+ " print(f\"{i} -> {tokenizer.decode([i])}\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
"id": "664397bc-6daa-4b88-90aa-e8fc1fbd5846",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -40,7 +83,7 @@
|
|
|
"[33901]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 2,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -51,7 +94,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": 5,
|
|
|
"id": "d3664332-e6bb-447e-8b96-203aafde8b24",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -61,7 +104,7 @@
|
|
|
"[86]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": 5,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -72,7 +115,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 4,
|
|
|
+ "execution_count": 6,
|
|
|
"id": "2773c09d-c136-4372-a2be-04b58d292842",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -82,7 +125,7 @@
|
|
|
"[343]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 4,
|
|
|
+ "execution_count": 6,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -93,7 +136,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 5,
|
|
|
+ "execution_count": 7,
|
|
|
"id": "8a6abd32-1e0a-4038-9dd2-673f47bcdeb5",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -103,7 +146,7 @@
|
|
|
"[86]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 5,
|
|
|
+ "execution_count": 7,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -114,7 +157,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 6,
|
|
|
+ "execution_count": 8,
|
|
|
"id": "26ae940a-9841-4e27-a1df-b83fc8a488b3",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -124,7 +167,7 @@
|
|
|
"[220]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 6,
|
|
|
+ "execution_count": 8,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -135,7 +178,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 7,
|
|
|
+ "execution_count": 9,
|
|
|
"id": "a606c39a-6747-4cd8-bb38-e3183f80908d",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -145,7 +188,7 @@
|
|
|
"[959]"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 7,
|
|
|
+ "execution_count": 9,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -156,7 +199,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 8,
|
|
|
+ "execution_count": 10,
|
|
|
"id": "47c7268d-8fdc-4957-bc68-5be6113f45a7",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
|
@@ -166,7 +209,7 @@
|
|
|
"'Akwirw ier'"
|
|
|
]
|
|
|
},
|
|
|
- "execution_count": 8,
|
|
|
+ "execution_count": 10,
|
|
|
"metadata": {},
|
|
|
"output_type": "execute_result"
|
|
|
}
|
|
|
@@ -323,7 +366,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.10.12"
|
|
|
+ "version": "3.11.4"
|
|
|
}
|
|
|
},
|
|
|
"nbformat": 4,
|