diff --git a/.travis.yml b/.travis.yml
index 8f4edb93f..f04002977 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,10 +4,16 @@
 language: python
 python:
   - "3.6"
+
+# workaround to make boto work on travis
+# from https://github.com/travis-ci/travis-ci/issues/7940
+before_install:
+  - sudo rm -f /etc/boto.cfg
+
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install:
   - pip install -r requirements.txt
-  - pip install .[icu,ner,pos,tokenize,transliterate]
+  - pip install .[icu,ipa,ner,thai2vec]
   - pip install coveralls
 
 os:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5ba12656d..dd52500c3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -23,8 +23,8 @@ We use the famous [gitflow](http://nvie.com/posts/a-successful-git-branching-mod
 - Write tests for your new features (please see "Tests" topic below);
 - Always remember that [commented code is dead
   code](http://www.codinghorror.com/blog/2008/07/coding-without-comments.html);
-- Name identifiers (variables, classes, functions, module names) with readable
-  names (`x` is always wrong);
+- Name identifiers (variables, classes, functions, module names) with meaningful
+  and pronounceable names (`x` is always wrong);
 - When manipulating strings, use [Python's new-style
   formatting](http://docs.python.org/library/string.html#format-string-syntax)
   (`'{} = {}'.format(a, b)` instead of `'%s = %s' % (a, b)`);
@@ -55,7 +55,7 @@ Happy hacking! (;
 ## newmm (onecut), mm, TCC, and Thai Soundex Code
 - Korakot Chaovavanich
 
-## Thai2Vec & ulmfit
+## Thai2Vec & ULMFiT
 - Charin Polpanumas
 
 ## Docs
diff --git a/README-pypi.md b/README-pypi.md
index 70a8a53c2..8141c642e 100644
--- a/README-pypi.md
+++ b/README-pypi.md
@@ -10,20 +10,14 @@
 
 PyThaiNLP is a Python library for natural language processing (NLP) of Thai language.
 
-PyThaiNLP features include Thai word and subword segmentations, soundex, romanization, part-of-speech taggers, and spelling corrections.
-
-## What's new in version 1.7 ?
-
-- Deprecate Python 2 support. (Python 2 compatibility code will be completely dropped in PyThaiNLP 1.8)
-- Refactor pythainlp.tokenize.pyicu for readability
-- Add Thai NER model to pythainlp.ner
-- thai2vec v0.2 - larger vocab, benchmarking results on Wongnai dataset
-- Sentiment classifier based on ULMFit and various product review datasets
-- Add ULMFit utility to PyThaiNLP
-- Add Thai romanization model ThaiTransliterator
-- Retrain POS-tagging model
-- Improved word_tokenize (newmm, mm) and dict_word_tokenize
-- Documentation added
+PyThaiNLP includes Thai word tokenizers, transliterators, soundex converters, part-of-speech taggers, and spell checkers.
+
+## What's new in version 1.8 ?
+
+- New NorvigSpellChecker spell checker class, which can be initialized with custom dictionary.
+- Terminate Python 2 support. Remove all Python 2 compatibility code.
+- Remove old, obsolated, deprecated, and experimental code.
+- see [PyThaiNLP 1.8 change log](https://github.com/PyThaiNLP/pythainlp/issues/118)
 
 ## Install
 
diff --git a/README.md b/README.md
index ef71bf205..c3399a200 100644
--- a/README.md
+++ b/README.md
@@ -12,9 +12,9 @@ Thai Natural Language Processing in Python.
 
 PyThaiNLP is a Python package for text processing and linguistic analysis, similar to `nltk` but with focus on Thai language.
 
-PyThaiNLP supports Python 3.4+.
-Since version 1.7, PyThaiNLP deprecates its support for Python 2. The future PyThaiNLP 1.8 will completely drop all supports for Python 2.
-Python 2 users can still use PyThaiNLP 1.6.
+PyThaiNLP 1.8 supports Python 3.6+. Some functions may work with older version of Python 3, but it is not well-tested and will not be supported. See [PyThaiNLP 1.8 change log](https://github.com/PyThaiNLP/pythainlp/issues/118).
+
+Python 2 users can use PyThaiNLP 1.6, our latest released that tested with Python 2.7.
 
 **This is a document for development branch (post 1.7.x). Things will break. For a document for stable branch, see [master](https://github.com/PyThaiNLP/pythainlp/tree/master).**
 
@@ -34,21 +34,40 @@ Python 2 users can still use PyThaiNLP 1.6.
 
 ## Installation
 
-**Using pip**
+PyThaiNLP uses PyPI as its main distribution channel, see https://pypi.org/project/pythainlp/
+
+### Stable release
 
-Stable release
+Standard installation:
 
 ```sh
 $ pip install pythainlp
 ```
 
-Development release
+For some advanced functionalities, like word vector, extra packages may be needed. Install them with these options during pip install:
 
 ```sh
-$ pip install https://github.com/PyThaiNLP/pythainlp/archive/dev.zip
+$ pip install pythainlp[extra1,extra2,...]
 ```
 
-Note: PyTorch is required for ulmfit sentiment analyser. ```pip install torch``` is needed for the feature. gensim and keras packages may also needed for other modules that rely on these machine learning libraries.
+where ```extras``` can be
+  - ```artagger``` (to support artagger part-of-speech tagger)
+  - ```deepcut``` (to support deepcut machine-learnt tokenizer)
+  - ```icu``` (for ICU support in transliteration and tokenization)
+  - ```ipa``` (for International Phonetic Alphabet support in transliteration)
+  - ```ml``` (to support ULMFit models, like one for sentiment analyser)
+  - ```ner``` (for named-entity recognizer)
+  - ```thai2rom``` (for machine-learnt romanization)
+  - ```thai2vec``` (for Thai word vector)
+  - ```full``` (install everything)
+
+see ```extras``` and ```extras_require``` in [```setup.py```](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py) for details.
+
+Development release:
+
+```sh
+$ pip install https://github.com/PyThaiNLP/pythainlp/archive/dev.zip
+```
 
 ## Documentation
 
diff --git a/appveyor.yml b/appveyor.yml
index 00b4e1ae2..808598eae 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -32,7 +32,7 @@ install:
   # - "set ICU_VERSION=62"
   - "%PYTHON%/python.exe -m pip install --upgrade pip"
   - "%PYTHON%/python.exe -m pip install %PYICU_WHEEL%"
-  - "%PYTHON%/python.exe -m pip install -e .[icu,ner,pos,tokenize,transliterate]"
+  - "%PYTHON%/python.exe -m pip install -e .[icu,ipa,ner,thai2vec]"
 
 test_script:
   - "%PYTHON%/python.exe -m pip --version"
diff --git a/pythainlp/number/wordtonum.py b/pythainlp/number/wordtonum.py
index 7184cf61a..871d4c784 100644
--- a/pythainlp/number/wordtonum.py
+++ b/pythainlp/number/wordtonum.py
@@ -40,11 +40,11 @@
 
 
 def _thaiword_to_num(tokens):
-    len_tokens = len(tokens)
-
-    if len_tokens == 0:
+    if not tokens:
         return None
 
+    len_tokens = len(tokens)
+
     if len_tokens == 1:
         return _THAI_INT_MAP[tokens[0]]
 
diff --git a/pythainlp/sentiment/ulmfit_sent.py b/pythainlp/sentiment/ulmfit_sent.py
index 19ca3368f..19532f453 100644
--- a/pythainlp/sentiment/ulmfit_sent.py
+++ b/pythainlp/sentiment/ulmfit_sent.py
@@ -15,6 +15,8 @@
 
 # from fastai.text import multiBatchRNN
 
+__all__ = ["about", "get_sentiment"]
+
 MODEL_NAME = "sent_model"
 ITOS_NAME = "itos_sent"
 
@@ -29,24 +31,26 @@ def get_path(fname):
 
 
 # load model
-model = torch.load(get_path(MODEL_NAME))
-model.eval()
+MODEL = torch.load(get_path(MODEL_NAME))
+MODEL.eval()
 
 # load itos and stoi
 itos = pickle.load(open(get_path(ITOS_NAME), "rb"))
 stoi = defaultdict(lambda: 0, {v: k for k, v in enumerate(itos)})
 
+
 # get sentiment; 1 for positive and 0 for negative
 # or score if specified return_score=True
-softmax = lambda x: np.exp(x) / np.sum(np.exp(x))
+def softmax(x):
+    return np.exp(x) / np.sum(np.exp(x))
 
 
 def get_sentiment(text, return_score=False):
     words = word_tokenize(text)
     tensor = LongTensor([stoi[word] for word in words]).view(-1, 1).cpu()
     tensor = Variable(tensor, volatile=False)
-    model.reset()
-    pred, *_ = model(tensor)
+    MODEL.reset()
+    pred, *_ = MODEL(tensor)
     result = pred.data.cpu().numpy().reshape(-1)
 
     if return_score:
diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
index d60ee950f..7b694375a 100644
--- a/pythainlp/tag/__init__.py
+++ b/pythainlp/tag/__init__.py
@@ -20,21 +20,30 @@ def pos_tag(words, engine="unigram", corpus="orchid"):
         * pud - Parallel Universal Dependencies (PUD) treebanks
     :return: returns a list of labels regarding which part of speech it is
     """
+    if not words:
+        return []
+
     if engine == "perceptron":
-        from .perceptron import tag as _tag
+        from .perceptron import tag as tag_
     elif engine == "artagger":
 
-        def _tag(text, corpus=None):
+        def tag_(words, corpus=None):
+            if not words:
+                return []
+
             from artagger import Tagger
-            words = Tagger().tag(" ".join(text))
+            words_ = Tagger().tag(" ".join(words))
 
-            return [(word.word, word.tag) for word in words]
+            return [(word.word, word.tag) for word in words_]
 
     else:  # default, use "unigram" ("old") engine
-        from .unigram import tag as _tag
+        from .unigram import tag as tag_
 
-    return _tag(words, corpus=corpus)
+    return tag_(words, corpus=corpus)
 
 
 def pos_tag_sents(sentences, engine="unigram", corpus="orchid"):
+    if not sentences:
+        return []
+
     return [pos_tag(sent, engine=engine, corpus=corpus) for sent in sentences]
diff --git a/pythainlp/tag/perceptron.py b/pythainlp/tag/perceptron.py
index 8d4fe1280..e5dc9e424 100644
--- a/pythainlp/tag/perceptron.py
+++ b/pythainlp/tag/perceptron.py
@@ -7,28 +7,33 @@
 import dill
 from pythainlp.corpus import CORPUS_PATH
 
+_ORCHID_DATA_FILENAME = "orchid_pt_tagger.dill"
+_PUD_DATA_FILENAME = "ud_thai_pud_pt_tagger.dill"
 
-def orchid_data():
-    data_filename = os.path.join(CORPUS_PATH, "orchid_pt_tagger.dill")
+
+def _load_tagger(filename):
+    data_filename = os.path.join(CORPUS_PATH, filename)
     with open(data_filename, "rb") as fh:
         model = dill.load(fh)
     return model
 
 
-def pud_data():
-    data_filename = os.path.join(CORPUS_PATH, "ud_thai_pud_pt_tagger.dill")
-    with open(data_filename, "rb") as fh:
-        model = dill.load(fh)
-    return model
+_ORCHID_TAGGER = _load_tagger(_ORCHID_DATA_FILENAME)
+_PUD_TAGGER = _load_tagger(_PUD_DATA_FILENAME)
 
 
-def tag(text, corpus="pud"):
+def tag(words, corpus="pud"):
     """
     รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('คำ', 'ชนิดคำ'), ('คำ', 'ชนิดคำ'), ...]
     """
+    if not words:
+        return []
+
+    words = [word.strip() for word in words if word.strip()]
+
     if corpus == "orchid":
-        tagger = orchid_data()
+        tagger = _ORCHID_TAGGER
     else:  # default, use "pud" as a corpus
-        tagger = pud_data()
+        tagger = _PUD_TAGGER
 
-    return tagger.tag(text)
+    return tagger.tag(words)
diff --git a/pythainlp/tag/unigram.py b/pythainlp/tag/unigram.py
index 21324bf64..e90c992f0 100644
--- a/pythainlp/tag/unigram.py
+++ b/pythainlp/tag/unigram.py
@@ -15,26 +15,29 @@
 _THAI_POS_PUD_PATH = os.path.join(CORPUS_PATH, _THAI_POS_PUD_FILENAME)
 
 
-def orchid_data():
+def _orchid_tagger():
     with open(_THAI_POS_ORCHID_PATH, encoding="utf-8-sig") as f:
         model = json.load(f)
     return model
 
 
-def pud_data():
+def _pud_tagger():
     with open(_THAI_POS_PUD_PATH, "rb") as handle:
         model = dill.load(handle)
     return model
 
 
-def tag(text, corpus):
+def tag(words, corpus):
     """
     รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('คำ', 'ชนิดคำ'), ('คำ', 'ชนิดคำ'), ...]
     """
+    if not words:
+        return []
+
     if corpus == "orchid":
-        tagger = nltk.tag.UnigramTagger(model=orchid_data())
-        return tagger.tag(text)
+        tagger = nltk.tag.UnigramTagger(model=_orchid_tagger())
+        return tagger.tag(words)
 
     # default, use "pud" as a corpus
-    tagger = pud_data()
-    return tagger.tag(text)
+    tagger = _pud_tagger()
+    return tagger.tag(words)
diff --git a/pythainlp/tokenize/__init__.py b/pythainlp/tokenize/__init__.py
index e81c3214d..3c97535c0 100644
--- a/pythainlp/tokenize/__init__.py
+++ b/pythainlp/tokenize/__init__.py
@@ -34,15 +34,18 @@ def word_tokenize(text, engine="newmm", whitespaces=True):
         >>> word_tokenize(text, engine="icu")
         ['โอ', 'เค', 'บ่', 'พวก', 'เรา', 'รัก', 'ภาษา', 'บ้าน', 'เกิด']
     """
+    if not text:
+        return []
+
     if engine == "newmm" or engine == "onecut":
-        from .newmm import mmcut as segment
+        from .newmm import segment
     elif engine == "longest" or engine == "longest-matching":
         from .longest import segment
     elif engine == "ulmfit":
-        from .newmm import mmcut
+        from .newmm import segment as segment_
 
         def segment(text):
-            return mmcut(text, trie=FROZEN_DICT_TRIE)
+            return segment_(text, trie=FROZEN_DICT_TRIE)
 
     elif engine == "icu":
         from .pyicu import segment
@@ -51,7 +54,7 @@ def segment(text):
     elif engine == "mm" or engine == "multi_cut":
         from .multi_cut import segment
     else:  # default, use "newmm" engine
-        from .newmm import mmcut as segment
+        from .newmm import segment
 
     if not whitespaces:
         return [token.strip(" ") for token in segment(text) if token.strip(" ")]
@@ -73,14 +76,18 @@ def dict_word_tokenize(text, custom_dict, engine="newmm"):
         >>> dict_word_tokenize("แมวดีดีแมว", trie)
         ['แมว', 'ดี', 'ดี', 'แมว']
     """
+
+    if not text:
+        return []
+
     if engine == "newmm" or engine == "onecut":
-        from .newmm import mmcut as segment
+        from .newmm import segment
     elif engine == "longest" or engine == "longest-matching":
         from .longest import segment
     elif engine == "mm" or engine == "multi_cut":
         from .multi_cut import segment
     else:  # default, use "newmm" engine
-        from .newmm import mmcut as segment
+        from .newmm import segment
 
     return segment(text, custom_dict)
 
@@ -94,12 +101,16 @@ def sent_tokenize(text, engine="whitespace+newline"):
 
     :return: a list of text, split by whitespace or new line.
     """
+
+    if not text:
+        return []
+
     sentences = []
 
     if engine == "whitespace":
         sentences = nltk.tokenize.WhitespaceTokenizer().tokenize(text)
     else:  # default, use whitespace + newline
-        sentences = re.sub(r"\n+|\s+", "|", text).split("|")
+        sentences = re.sub(r"\n+|\s+", "|", text.strip()).split("|")
 
     return sentences
 
@@ -110,6 +121,9 @@ def subword_tokenize(text, engine="tcc"):
     :param str engine: choosing 'tcc' uses the Thai Character Cluster rule to segment words into the smallest unique units.
     :return: a list of tokenized strings.
     """
+    if not text:
+        return ""
+
     from .tcc import tcc
 
     return tcc(text)
@@ -121,6 +135,10 @@ def syllable_tokenize(text):
 
     :return: returns list of strings of syllables
     """
+
+    if not text:
+        return []
+
     tokens = []
     if text:
         words = word_tokenize(text)
@@ -171,6 +189,6 @@ def __init__(self, custom_dict=None):
             self.__trie_dict = Trie(thai_words())
 
     def word_tokenize(self, text, engine="newmm"):
-        from .newmm import mmcut as segment
+        from .newmm import segment
 
         return segment(text, self.__trie_dict)
diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py
index 395e76583..510a1b848 100644
--- a/pythainlp/tokenize/deepcut.py
+++ b/pythainlp/tokenize/deepcut.py
@@ -7,4 +7,7 @@
 
 
 def segment(text):
+    if not text:
+        return []
+
     return deepcut.tokenize(text)
diff --git a/pythainlp/tokenize/etcc.py b/pythainlp/tokenize/etcc.py
index a90e0b835..5e73b4586 100644
--- a/pythainlp/tokenize/etcc.py
+++ b/pythainlp/tokenize/etcc.py
@@ -27,6 +27,10 @@ def etcc(text):
     รับ str
     ส่งออก str
     """
+
+    if not text:
+        return ""
+
     if re.search(r"[เแ]" + _C + r"[" + "".join(_UV) + r"]" + r"\w", text):
         search = re.findall(r"[เแ]" + _C + r"[" + "".join(_UV) + r"]" + r"\w", text)
         for i in search:
diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py
index 483685da2..33ff1fa0a 100644
--- a/pythainlp/tokenize/longest.py
+++ b/pythainlp/tokenize/longest.py
@@ -35,7 +35,7 @@
 _UNKNOWN = False
 
 
-class Tokenizer(object):
+class LongestMatchTokenizer(object):
     def __init__(self, trie):
         self.__trie = trie
 
@@ -95,6 +95,9 @@ def __longest_matching(self, text, begin_pos):
             return ""
 
     def __segment_text(self, text):
+        if not text:
+            return []
+
         begin_pos = 0
         len_text = len(text)
         tokens = []
@@ -137,4 +140,5 @@ def segment(text, trie=None):
     """ตัดคำภาษาไทยด้วยวิธี longest matching"""
     if not trie:
         trie = DEFAULT_DICT_TRIE
-    return Tokenizer(trie).tokenize(text)
+
+    return LongestMatchTokenizer(trie).tokenize(text)
diff --git a/pythainlp/tokenize/multi_cut.py b/pythainlp/tokenize/multi_cut.py
index 80f621c27..d161bdf4e 100644
--- a/pythainlp/tokenize/multi_cut.py
+++ b/pythainlp/tokenize/multi_cut.py
@@ -40,7 +40,7 @@ def __init__(self, value, multi=None, in_dict=True):
 _PAT_ENG = re.compile(_RE_ENG)
 
 
-def multicut(text, trie=None):
+def _multicut(text, trie=None):
     """
     ส่งคืน LatticeString คืนมาเป็นก้อนๆ
     """
@@ -95,18 +95,18 @@ def serialize(p, p2):  # helper function
 
 def mmcut(text):
     res = []
-    for w in multicut(text):
+    for w in _multicut(text):
         mm = min(w.multi, key=lambda x: x.count("/"))
         res.extend(mm.split("/"))
     return res
 
 
-def combine(ww):
+def _combine(ww):
     if ww == []:
         yield ""
     else:
         w = ww[0]
-        for tail in combine(ww[1:]):
+        for tail in _combine(ww[1:]):
             if w.unique:
                 yield w + "|" + tail
             else:
@@ -118,13 +118,18 @@ def segment(text, trie=None):
     """
     ใช้ในการหา list ที่สามารถตัดคำได้ทั้งหมด
     """
-    ww = list(multicut(text, trie=trie))
-    return ww
+    if not text:
+        return []
+
+    return list(_multicut(text, trie=trie))
 
 
 def find_all_segment(text, trie=None):
     """
     ใช้ในการหา list ที่สามารถตัดคำได้ทั้งหมด
     """
-    ww = list(multicut(text, trie=trie))
-    return list(combine(ww))
+    if not text:
+        return []
+
+    ww = list(_multicut(text, trie=trie))
+    return list(_combine(ww))
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
index 08fda8628..17815fd9f 100644
--- a/pythainlp/tokenize/newmm.py
+++ b/pythainlp/tokenize/newmm.py
@@ -90,7 +90,11 @@ def onecut(text, trie):
 
 
 # ช่วยให้ไม่ต้องพิมพ์ยาวๆ
-def mmcut(text, trie=None):
+def segment(text, trie=None):
+    if not text:
+        return []
+
     if not trie:
         trie = DEFAULT_DICT_TRIE
+
     return list(onecut(text, trie))
diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py
index aefcc9311..23b7b38e4 100644
--- a/pythainlp/tokenize/pyicu.py
+++ b/pythainlp/tokenize/pyicu.py
@@ -17,5 +17,8 @@ def _gen_words(text):
 
 
 def segment(text):
+    if not text:
+        return []
+
     text = re.sub("([^\u0E00-\u0E7F\n ]+)", " \\1 ", text)
     return list(_gen_words(text))
diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py
index bfb5920e9..b50bdb24a 100644
--- a/pythainlp/tokenize/tcc.py
+++ b/pythainlp/tokenize/tcc.py
@@ -48,6 +48,9 @@
 
 
 def tcc_gen(w):
+    if not w:
+        return ''
+
     p = 0
     while p < len(w):
         m = PAT_TCC.match(w[p:])
@@ -60,13 +63,20 @@ def tcc_gen(w):
 
 
 def tcc_pos(text):
+    if not text:
+        return set()
+
     p_set = set()
     p = 0
     for w in tcc_gen(text):
         p += len(w)
         p_set.add(p)
+
     return p_set
 
 
-def tcc(w, sep="/"):
-    return sep.join(tcc_gen(w))
\ No newline at end of file
+def tcc(text, sep="/"):
+    if not text:
+        return ""
+
+    return sep.join(tcc_gen(text))
diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py
index 48bd5cfd2..7ede03197 100644
--- a/pythainlp/transliterate/__init__.py
+++ b/pythainlp/transliterate/__init__.py
@@ -10,11 +10,17 @@ def romanize(text, engine="royin"):
     :param str engine: 'royin' (default) or 'thai2rom'. 'royin' uses Thai Royal Institute standard. 'thai2rom' is deep learning Thai romanization (require keras).
     :return: English (more or less) text that spells out how the Thai text should read.
     """
+
+    if not text:
+        return ""
+
     if engine == "thai2rom":
         from .thai2rom import romanize
+
         return romanize(text)
     else:  # use default engine "royin"
         from .royin import romanize
+
         words = word_tokenize(text)
         romanized_words = [romanize(word) for word in words]
         return "".join(romanized_words)
@@ -26,6 +32,10 @@ def transliterate(text, engine="ipa"):
     :param str engine: 'ipa' (default) or 'pyicu'.
     :return: A string of Internaitonal Phonetic Alphabets indicating how the text should read.
     """
+
+    if not text:
+        return ""
+
     if engine == "pyicu":
         from .pyicu import transliterate
     else:
diff --git a/pythainlp/transliterate/royin.py b/pythainlp/transliterate/royin.py
index 69a3671d9..e868f10d0 100644
--- a/pythainlp/transliterate/royin.py
+++ b/pythainlp/transliterate/royin.py
@@ -145,8 +145,9 @@ def _replace_consonants(word, res):
         lenword = len(res)
         while i < lenword:
             if i == 0 and res[0] == "ห":
-                word = word.replace(res[0], _CONSONANTS[res[0]][0])
-                i += 1
+                word = word.replace(res[0], "")
+                del res[0]
+                lenword -= 1
             elif i == 0 and res[0] != "ห":
                 word = word.replace(res[0], _CONSONANTS[res[0]][0])
                 i += 1
@@ -168,6 +169,9 @@ def _replace_consonants(word, res):
 
 
 def romanize(word):
+    if not word:
+        return ""
+
     word2 = _replace_vowels(_normalize(word))
     res = re.findall(_RE_CONSONANT, word2)
     # 2-character word, all consonants
diff --git a/pythainlp/word_vector/thai2vec.py b/pythainlp/word_vector/thai2vec.py
index e2b4b1329..0f371e31e 100644
--- a/pythainlp/word_vector/thai2vec.py
+++ b/pythainlp/word_vector/thai2vec.py
@@ -10,7 +10,7 @@
 from pythainlp.tokenize import word_tokenize
 
 
-def download():
+def _download():
     path = get_file("thai2vec02")
     if not path:
         download_data("thai2vec02")
@@ -20,8 +20,13 @@ def download():
 
 def get_model():
     """
-    :return: Downloads the `gensim` model."""
-    return KeyedVectors.load_word2vec_format(download(), binary=False)
+    Download model
+    :return: `gensim` model
+    """
+    return KeyedVectors.load_word2vec_format(_download(), binary=False)
+
+
+_MODEL = get_model()
 
 
 def most_similar_cosmul(positive, negative):
@@ -29,28 +34,30 @@ def most_similar_cosmul(positive, negative):
     การใช้งาน
     input list
     """
-    return get_model().most_similar_cosmul(positive=positive, negative=negative)
+    return _MODEL.most_similar_cosmul(positive=positive, negative=negative)
 
 
 def doesnt_match(listdata):
-    return get_model().doesnt_match(listdata)
+    return _MODEL.doesnt_match(listdata)
 
 
 def similarity(word1, word2):
     """
+    Get cosine similarity between two words.
+    If a word is not in the vocabulary, KeyError will be raised.
     :param str word1: first word
     :param str word2: second word
     :return: the cosine similarity between the two word vectors
     """
-    return get_model().similarity(word1, word2)
+    return _MODEL.similarity(word1, word2)
 
 
 def sentence_vectorizer(text, dim=300, use_mean=False):
     words = word_tokenize(text)
     vec = np.zeros((1, dim))
     for word in words:
-        if word in get_model().wv.index2word:
-            vec += get_model().wv.word_vec(word)
+        if word in _MODEL.wv.index2word:
+            vec += _MODEL.wv.word_vec(word)
         else:
             pass
     if use_mean:
diff --git a/setup.py b/setup.py
index 3fa7c5c18..583a5d98a 100644
--- a/setup.py
+++ b/setup.py
@@ -9,21 +9,25 @@
     requirements = f.read().splitlines()
 
 extras = {
+    "artagger": ["artagger"],
+    "deepcut": ["deepcut", "keras", "tensorflow"],
     "icu": ["pyicu"],
+    "ipa": ["epitran"],
     "ml": ["fastai==0.7.0", "keras", "numpy", "torch"],
     "ner": ["sklearn_crfsuite"],
-    "pos": ["artagger"],
-    "tokenize": ["deepcut", "pyicu"],
-    "transliterate": ["epitran", "pyicu"],
+    "thai2rom": ["keras", "numpy"],
+    "thai2vec": ["gensim", "numpy"],
     "full": [
         "artagger",
         "deepcut",
         "epitran",
         "fastai==0.7.0",
+        "gensim",
         "keras",
         "numpy",
         "pyicu",
         "sklearn_crfsuite",
+        "tensorflow",
         "torch",
     ],
 }
diff --git a/tests/__init__.py b/tests/__init__.py
index ec4a492d6..12fc36236 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import unittest
 from collections import Counter
+from nltk.corpus import wordnet as wn
 
 from pythainlp.collation import collate
 from pythainlp.corpus import (
@@ -34,11 +35,26 @@
 from pythainlp.sentiment import sentiment
 from pythainlp.soundex import lk82, metasound, soundex, udom83
 from pythainlp.spell import correct, spell
+from pythainlp.spell.pn import NorvigSpellChecker, dictionary, known, prob
 from pythainlp.summarize import summarize
-from pythainlp.tag import pos_tag, pos_tag_sents
-from pythainlp.tokenize import etcc, syllable_tokenize, tcc, word_tokenize
+from pythainlp.tag import perceptron, pos_tag, pos_tag_sents, unigram
+from pythainlp.tokenize import (
+    FROZEN_DICT_TRIE,
+    dict_word_tokenize,
+    etcc,
+    longest,
+    multi_cut,
+    newmm,
+    sent_tokenize,
+    subword_tokenize,
+    syllable_tokenize,
+    tcc,
+    word_tokenize,
+)
+from pythainlp.tokenize import pyicu as tokenize_pyicu
 from pythainlp.transliterate import romanize, transliterate
 from pythainlp.transliterate.ipa import trans_list, xsampa_list
+from pythainlp.transliterate.royin import romanize as romanize_royin
 from pythainlp.util import (
     deletetone,
     eng_to_thai,
@@ -48,6 +64,7 @@
     normalize,
     thai_to_eng,
 )
+from pythainlp.word_vector import thai2vec
 
 
 class TestUM(unittest.TestCase):
@@ -86,10 +103,31 @@ def test_ttc(self):
         self.assertIsNotNone(ttc.word_freqs())
 
     def test_wordnet(self):
+        self.assertIsNotNone(wordnet.langs())
+
         self.assertEqual(
             wordnet.synset("spy.n.01").lemma_names("tha"), ["สปาย", "สายลับ"]
         )
-        self.assertIsNotNone(wordnet.langs())
+        self.assertIsNotNone(wordnet.synsets("นก"))
+        self.assertIsNotNone(wordnet.all_synsets(pos=wn.ADJ))
+
+        self.assertIsNotNone(wordnet.lemmas("นก"))
+        self.assertIsNotNone(wordnet.all_lemma_names(pos=wn.ADV))
+        self.assertIsNotNone(wordnet.lemma("cat.n.01.cat"))
+
+        self.assertEqual(wordnet.morphy("dogs"), "dog")
+
+        bird = wordnet.synset("bird.n.01")
+        mouse = wordnet.synset("mouse.n.01")
+        self.assertEqual(
+            wordnet.path_similarity(bird, mouse), bird.path_similarity(mouse)
+        )
+        self.assertEqual(
+            wordnet.wup_similarity(bird, mouse), bird.wup_similarity(mouse)
+        )
+
+        cat_key = wordnet.synsets("แมว")[0].lemmas()[0].key()
+        self.assertIsNotNone(wordnet.lemma_from_key(cat_key))
 
     # ### pythainlp.date
 
@@ -170,6 +208,7 @@ def test_number(self):
         )
         self.assertEqual(thaiword_to_num("ยี่สิบ"), 20)
         self.assertEqual(thaiword_to_num("ศูนย์"), 0)
+        self.assertEqual(thaiword_to_num("ศูนย์อะไรนะ"), 0)
         self.assertEqual(thaiword_to_num(""), None)
         self.assertEqual(thaiword_to_num(None), None)
 
@@ -234,13 +273,22 @@ def test_soundex(self):
     # ### pythainlp.spell
 
     def test_spell(self):
-        self.assertIsNotNone(spell("เน้ร"))
-        self.assertEqual(spell(""), "")
         self.assertEqual(spell(None), "")
+        self.assertEqual(spell(""), "")
+        self.assertIsNotNone(spell("เน้ร"))
+        self.assertIsNotNone(spell("เกสมร์"))
 
-        self.assertIsNotNone(correct("ทดสอง"))
-        self.assertEqual(correct(""), "")
         self.assertEqual(correct(None), "")
+        self.assertEqual(correct(""), "")
+        self.assertIsNotNone(correct("ทดสอง"))
+
+        self.assertIsNotNone(dictionary())
+        self.assertGreaterEqual(prob("มี"), 0)
+        self.assertIsNotNone(known(["เกิด", "abc", ""]))
+
+        checker = NorvigSpellChecker(dict_filter="")
+        self.assertIsNotNone(checker.dictionary())
+        self.assertGreaterEqual(checker.prob("มี"), 0)
 
     # ### pythainlp.summarize
 
@@ -262,8 +310,19 @@ def test_summarize(self):
 
     def test_pos_tag(self):
         tokens = ["ผม", "รัก", "คุณ"]
+
+        self.assertEqual(pos_tag(None), [])
+        self.assertEqual(pos_tag([]), [])
+
         self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="orchid"))
         self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud"))
+        self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud"))
+
+        self.assertEqual(unigram.tag(None, corpus="pud"), [])
+        self.assertEqual(unigram.tag([], corpus="pud"), [])
+        self.assertEqual(unigram.tag(None, corpus="orchid"), [])
+        self.assertEqual(unigram.tag([], corpus="orchid"), [])
+
         self.assertEqual(
             pos_tag(word_tokenize("คุณกำลังประชุม"), engine="unigram"),
             [("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")],
@@ -271,10 +330,16 @@ def test_pos_tag(self):
 
         self.assertIsNotNone(pos_tag(tokens, engine="perceptron", corpus="orchid"))
         self.assertIsNotNone(pos_tag(tokens, engine="perceptron", corpus="pud"))
+        self.assertEqual(perceptron.tag(None, corpus="pud"), [])
+        self.assertEqual(perceptron.tag([], corpus="pud"), [])
+        self.assertEqual(perceptron.tag(None, corpus="orchid"), [])
+        self.assertEqual(perceptron.tag([], corpus="orchid"), [])
 
-        # self.assertIsNotNone(pos_tag(tokens, engine="arttagger", corpus="orchid"))
-        # self.assertIsNotNone(pos_tag(tokens, engine="arttagger", corpus="pud"))
+        # self.assertIsNotNone(pos_tag(tokens, engine="artagger", corpus="orchid"))
+        # self.assertIsNotNone(pos_tag(tokens, engine="artagger", corpus="pud"))
 
+        self.assertEqual(pos_tag_sents(None), [])
+        self.assertEqual(pos_tag_sents([]), [])
         self.assertEqual(
             pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]),
             [
@@ -285,30 +350,88 @@ def test_pos_tag(self):
 
     # ### pythainlp.tokenize
 
-    def test_syllable_tokenize(self):
-        self.assertEqual(
-            syllable_tokenize("สวัสดีชาวโลก"), ["สวัส", "ดี", "ชาว", "โลก"]
+    def test_dict_word_tokenize(self):
+        self.assertEqual(dict_word_tokenize("", custom_dict=FROZEN_DICT_TRIE), [])
+        self.assertIsNotNone(
+            dict_word_tokenize("รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE)
+        )
+        self.assertIsNotNone(
+            dict_word_tokenize(
+                "รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE, engine="newmm"
+            )
+        )
+        self.assertIsNotNone(
+            dict_word_tokenize(
+                "รถไฟฟ้ากรุงเทพBTSหูว์ค์",
+                custom_dict=FROZEN_DICT_TRIE,
+                engine="longest",
+            )
+        )
+        self.assertIsNotNone(
+            dict_word_tokenize(
+                "รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE, engine="mm"
+            )
+        )
+        self.assertIsNotNone(
+            dict_word_tokenize(
+                "รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE, engine="XX"
+            )
+        )
+
+    def test_etcc(self):
+        self.assertEqual(etcc.etcc(""), "")
+        self.assertEqual(etcc.etcc("คืนความสุข"), "/คืน/ความสุข")
+        self.assertIsNotNone(
+            etcc.etcc(
+                "หมูแมวเหล่านี้ด้วยเหตุผลเชื่อมโยงทางกรรมพันธุ์"
+                + "สัตว์มีแขนขาหน้าหัวเราะเพราะแข็งขืน"
+            )
         )
 
     def test_word_tokenize(self):
+        self.assertEqual(word_tokenize(""), [])
         self.assertEqual(
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
         )
+        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="ulmfit"))
+        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="XX"))
 
     def test_word_tokenize_icu(self):
+        self.assertEqual(tokenize_pyicu.segment(None), [])
+        self.assertEqual(tokenize_pyicu.segment(""), [])
         self.assertEqual(
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="icu"),
             ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
         )
 
+    # def test_word_tokenize_deepcut(self):
+    # self.assertEqual(deepcut.segment(None), [])
+    # self.assertEqual(deepcut.segment(""), [])
+    # self.assertIsNotNone(word_tokenize("ลึกลงไปลลลล", engine="deepcut"))
+
+    def test_word_tokenize_longest_matching(self):
+        self.assertEqual(longest.segment(None), [])
+        self.assertEqual(longest.segment(""), [])
+        self.assertEqual(
+            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="longest"),
+            ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
+        )
+
     def test_word_tokenize_mm(self):
+        self.assertEqual(multi_cut.segment(None), [])
+        self.assertEqual(multi_cut.segment(""), [])
+        self.assertEqual(word_tokenize("", engine="mm"), [])
         self.assertEqual(
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="mm"),
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
         )
 
+        self.assertIsNotNone(multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS"))
+
     def test_word_tokenize_newmm(self):
+        self.assertEqual(newmm.segment(None), [])
+        self.assertEqual(newmm.segment(""), [])
         self.assertEqual(
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="newmm"),
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
@@ -326,31 +449,64 @@ def test_word_tokenize_newmm(self):
             ["จุ๋ม", "ง่วง"],
         )
 
-    def test_word_tokenize_longest_matching(self):
+    def test_sent_tokenize(self):
+        self.assertEqual(sent_tokenize(None), [])
+        self.assertEqual(sent_tokenize(""), [])
         self.assertEqual(
-            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="longest"),
-            ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
+            sent_tokenize("รักน้ำ  รักปลา  ", engine="whitespace"), ["รักน้ำ", "รักปลา"]
+        )
+        self.assertEqual(sent_tokenize("รักน้ำ  รักปลา  "), ["รักน้ำ", "รักปลา"])
+
+    def test_subword_tokenize(self):
+        self.assertEqual(subword_tokenize(None), "")
+        self.assertEqual(subword_tokenize(""), "")
+        self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร"))
+
+    def test_syllable_tokenize(self):
+        self.assertEqual(syllable_tokenize(None), [])
+        self.assertEqual(syllable_tokenize(""), [])
+        self.assertEqual(
+            syllable_tokenize("สวัสดีชาวโลก"), ["สวัส", "ดี", "ชาว", "โลก"]
         )
 
     def test_tcc(self):
+        self.assertEqual(tcc.tcc(None), "")
+        self.assertEqual(tcc.tcc(""), "")
         self.assertEqual(tcc.tcc("ประเทศไทย"), "ป/ระ/เท/ศ/ไท/ย")
 
-    def test_etcc(self):
-        self.assertEqual(etcc.etcc("คืนความสุข"), "/คืน/ความสุข")
+        self.assertEqual(list(tcc.tcc_gen("")), [])
+        self.assertEqual(tcc.tcc_pos(""), set())
 
     # ### pythainlp.transliterate
 
     def test_romanize(self):
+        self.assertEqual(romanize(None), "")
+        self.assertEqual(romanize(""), "")
         self.assertEqual(romanize("แมว"), "maeo")
-        self.assertIsNotNone(romanize("กก", engine="royin"))
+
+        self.assertEqual(romanize_royin(None), "")
+        self.assertEqual(romanize_royin(""), "")
+        self.assertEqual(romanize_royin("หาย"), "hai")
+        self.assertEqual(romanize_royin("หยาก"), "yak")
+
         self.assertEqual(romanize("แมว", engine="royin"), "maeo")
         self.assertEqual(romanize("เดือน", engine="royin"), "duean")
         self.assertEqual(romanize("ดู", engine="royin"), "du")
         self.assertEqual(romanize("ดำ", engine="royin"), "dam")
         self.assertEqual(romanize("บัว", engine="royin"), "bua")
+        self.assertEqual(romanize("กร", engine="royin"), "kon")
+        self.assertEqual(romanize("กรร", engine="royin"), "kan")
+        self.assertEqual(romanize("กรรม", engine="royin"), "kam")
+        self.assertIsNotNone(romanize("กก", engine="royin"))
+        self.assertIsNotNone(romanize("ฝ้าย", engine="royin"))
+        self.assertIsNotNone(romanize("ทีปกร", engine="royin"))
+        self.assertIsNotNone(romanize("กรม", engine="royin"))
+        self.assertIsNotNone(romanize("ธรรพ์", engine="royin"))
+        self.assertIsNotNone(romanize("กฏa์", engine="royin"))
         # self.assertIsNotNone(romanize("บัว", engine="thai2rom"))
 
     def test_transliterate(self):
+        self.assertEqual(transliterate(""), "")
         self.assertEqual(transliterate("แมว", "pyicu"), "mæw")
         self.assertEqual(transliterate("คน", engine="ipa"), "kʰon")
         self.assertIsNotNone(trans_list("คน"))
@@ -384,6 +540,23 @@ def test_keyboard(self):
         self.assertEqual(eng_to_thai("l;ylfu8iy["), "สวัสดีครับ")
         self.assertEqual(thai_to_eng("สวัสดีครับ"), "l;ylfu8iy[")
 
+    # ### pythainlp.word_vector
+
+    def test_thai2vec(self):
+        self.assertGreaterEqual(thai2vec.similarity("แบคทีเรีย", "คน"), 0)
+        self.assertIsNotNone(thai2vec.sentence_vectorizer(""))
+        self.assertIsNotNone(thai2vec.sentence_vectorizer("เสรีภาพในการชุมนุม"))
+        self.assertIsNotNone(
+            thai2vec.sentence_vectorizer("เสรีภาพในการสมาคม", use_mean=True)
+        )
+        self.assertIsNotNone(thai2vec.sentence_vectorizer("I คิด therefore I am ผ็ฎ์"))
+        self.assertEqual(
+            thai2vec.most_similar_cosmul(["ราชา", "ผู้ชาย"], ["ผู้หญิง"])[0][0],
+            "ราชินี",
+        )
+        self.assertEqual(thai2vec.doesnt_match(["ญี่ปุ่น", "พม่า", "ไอติม"]), "ไอติม")
+        self.assertIsNotNone(thai2vec.about())
+
 
 if __name__ == "__main__":
     unittest.main()