PyThaiNLP · bact · Oct 1, 2019 · Sep 12, 2019
diff --git a/word-tokenization/data-preparation-and-post-processing.ipynb b/word-tokenization/data-preparation-and-post-processing.ipynb
@@ -2,8 +2,13 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-09-12T08:19:35.741983Z",
+     "start_time": "2019-09-12T08:19:35.323151Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import pandas as pd\n",
@@ -19,13 +24,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-09-12T08:19:43.555275Z",
+     "start_time": "2019-09-12T08:19:43.440900Z"
+    }
+   },
    "outputs": [],
    "source": [
     "data = []\n",
-    "with open (\"./kaggle-competition/train.txt\", \"r\") as ftxt, \\\n",
-    "    open (\"./kaggle-competition/train_label.txt\", \"r\") as flabel :\n",
+    "with open (\"../kaggle-competition/train.txt\", \"r\") as ftxt, \\\n",
+    "    open (\"../kaggle-competition/train_label.txt\", \"r\") as flabel :\n",
     "    for t, l in zip(ftxt, flabel):\n",
     "        t = t.strip()\n",
     "        l = l.strip()\n",
@@ -39,8 +49,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-09-12T08:19:49.840676Z",
+     "start_time": "2019-09-12T08:19:49.826456Z"
+    }
+   },
    "outputs": [],
    "source": [
     "def sampling_class(df, cls_name, n=20, seed=71):\n",
@@ -56,7 +71,12 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-09-12T08:19:51.210467Z",
+     "start_time": "2019-09-12T08:19:51.158301Z"
+    }
+   },
    "outputs": [],
    "source": [
     "seeds = dict(zip(['neg', 'neu', 'pos', 'q'], range(4)))\n",
@@ -91,7 +111,12 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-09-12T08:19:52.294514Z",
+     "start_time": "2019-09-12T08:19:52.270380Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -108,26 +133,6 @@
     "set(df_train.label.values)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([0, 4, 3, 2, 1])"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "b"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -220,20 +225,13 @@
     "            ft.write(\"%s\\n\" % l.replace(\"|\", \"\"))\n",
     "            fl.write(\"%s\\n\" % l)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv-python3-global",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "venv-python3-global"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -248,10 +246,13 @@
    "version": "3.7.3"
   },
   "toc": {
+   "base_numbering": 1,
    "nav_menu": {},
    "number_sections": true,
    "sideBar": true,
    "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
    "toc_cell": false,
    "toc_position": {},
    "toc_section_display": "block",