|
@@ -1,117 +0,0 @@
|
|
|
-{
|
|
|
|
|
- "cells": [
|
|
|
|
|
- {
|
|
|
|
|
- "cell_type": "code",
|
|
|
|
|
- "execution_count": 4,
|
|
|
|
|
- "id": "98efe79e-daa3-40d0-ab4d-f667d4d6ba9d",
|
|
|
|
|
- "metadata": {},
|
|
|
|
|
- "outputs": [
|
|
|
|
|
- {
|
|
|
|
|
- "name": "stderr",
|
|
|
|
|
- "output_type": "stream",
|
|
|
|
|
- "text": [
|
|
|
|
|
- "/Users/Author/miniforge3/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
|
|
|
|
- " from .autonotebook import tqdm as notebook_tqdm\n",
|
|
|
|
|
- "Downloading (…)olve/main/vocab.json: 100%|█| 1.04M/1.04M [00:00<00:00, 1.66MB/s]\n",
|
|
|
|
|
- "Downloading (…)olve/main/merges.txt: 100%|███| 456k/456k [00:00<00:00, 2.44MB/s]\n",
|
|
|
|
|
- "Downloading (…)/main/tokenizer.json: 100%|█| 1.36M/1.36M [00:00<00:00, 1.97MB/s]\n",
|
|
|
|
|
- "Downloading (…)lve/main/config.json: 100%|██████| 718/718 [00:00<00:00, 621kB/s]\n"
|
|
|
|
|
- ]
|
|
|
|
|
- },
|
|
|
|
|
- {
|
|
|
|
|
- "name": "stdout",
|
|
|
|
|
- "output_type": "stream",
|
|
|
|
|
- "text": [
|
|
|
|
|
- "Decoded Inputs:\n",
|
|
|
|
|
- "I HAD always\n",
|
|
|
|
|
- " Jack Gisburn\n",
|
|
|
|
|
- " a cheap genius--\n",
|
|
|
|
|
- " a good fellow enough\n",
|
|
|
|
|
- "so it was no\n",
|
|
|
|
|
- " surprise to me to\n",
|
|
|
|
|
- " that, in the\n",
|
|
|
|
|
- " of his glory,\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "Decoded Targets:\n",
|
|
|
|
|
- " HAD always thought\n",
|
|
|
|
|
- " Gisburn rather\n",
|
|
|
|
|
- " cheap genius--though\n",
|
|
|
|
|
- " good fellow enough--\n",
|
|
|
|
|
- " it was no great\n",
|
|
|
|
|
- " to me to hear\n",
|
|
|
|
|
- ", in the height\n",
|
|
|
|
|
- " his glory, he\n"
|
|
|
|
|
- ]
|
|
|
|
|
- }
|
|
|
|
|
- ],
|
|
|
|
|
- "source": [
|
|
|
|
|
- "import torch\n",
|
|
|
|
|
- "from transformers import GPT2Tokenizer\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "inputs = torch.tensor([\n",
|
|
|
|
|
- " [40, 367, 2885, 1464],\n",
|
|
|
|
|
- " [3619, 402, 271, 10899],\n",
|
|
|
|
|
- " [257, 7026, 15632, 438],\n",
|
|
|
|
|
- " [257, 922, 5891, 1576],\n",
|
|
|
|
|
- " [568, 340, 373, 645],\n",
|
|
|
|
|
- " [5975, 284, 502, 284],\n",
|
|
|
|
|
- " [326, 11, 287, 262],\n",
|
|
|
|
|
- " [286, 465, 13476, 11]\n",
|
|
|
|
|
- "])\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "targets = torch.tensor([\n",
|
|
|
|
|
- " [367, 2885, 1464, 1807],\n",
|
|
|
|
|
- " [402, 271, 10899, 2138],\n",
|
|
|
|
|
- " [7026, 15632, 438, 2016],\n",
|
|
|
|
|
- " [922, 5891, 1576, 438],\n",
|
|
|
|
|
- " [340, 373, 645, 1049],\n",
|
|
|
|
|
- " [284, 502, 284, 3285],\n",
|
|
|
|
|
- " [11, 287, 262, 6001],\n",
|
|
|
|
|
- " [465, 13476, 11, 339]\n",
|
|
|
|
|
- "])\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "decoded_inputs = [tokenizer.decode(i) for i in inputs]\n",
|
|
|
|
|
- "decoded_targets = [tokenizer.decode(t) for t in targets]\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "print(\"Decoded Inputs:\")\n",
|
|
|
|
|
- "for di in decoded_inputs:\n",
|
|
|
|
|
- " print(di)\n",
|
|
|
|
|
- "\n",
|
|
|
|
|
- "print(\"\\nDecoded Targets:\")\n",
|
|
|
|
|
- "for dt in decoded_targets:\n",
|
|
|
|
|
- " print(dt)"
|
|
|
|
|
- ]
|
|
|
|
|
- },
|
|
|
|
|
- {
|
|
|
|
|
- "cell_type": "code",
|
|
|
|
|
- "execution_count": null,
|
|
|
|
|
- "id": "defc6b2f-9ac2-49e0-a4e1-03247cacffce",
|
|
|
|
|
- "metadata": {},
|
|
|
|
|
- "outputs": [],
|
|
|
|
|
- "source": []
|
|
|
|
|
- }
|
|
|
|
|
- ],
|
|
|
|
|
- "metadata": {
|
|
|
|
|
- "kernelspec": {
|
|
|
|
|
- "display_name": "Python 3 (ipykernel)",
|
|
|
|
|
- "language": "python",
|
|
|
|
|
- "name": "python3"
|
|
|
|
|
- },
|
|
|
|
|
- "language_info": {
|
|
|
|
|
- "codemirror_mode": {
|
|
|
|
|
- "name": "ipython",
|
|
|
|
|
- "version": 3
|
|
|
|
|
- },
|
|
|
|
|
- "file_extension": ".py",
|
|
|
|
|
- "mimetype": "text/x-python",
|
|
|
|
|
- "name": "python",
|
|
|
|
|
- "nbconvert_exporter": "python",
|
|
|
|
|
- "pygments_lexer": "ipython3",
|
|
|
|
|
- "version": "3.10.12"
|
|
|
|
|
- }
|
|
|
|
|
- },
|
|
|
|
|
- "nbformat": 4,
|
|
|
|
|
- "nbformat_minor": 5
|
|
|
|
|
-}
|
|
|