diff --git a/pythainlp/number/thainum.py b/pythainlp/number/thainum.py index 4f274b6a2..91776e520 100644 --- a/pythainlp/number/thainum.py +++ b/pythainlp/number/thainum.py @@ -5,12 +5,11 @@ Adapted from http://justmindthought.blogspot.com/2012/12/code-php.html """ -import ast import math __all__ = ["bahttext", "num_to_thaiword"] -p = [ +_p = [ ["ภาษาไทย", "ตัวเลข", "เลขไทย"], ["หนึ่ง", "1", "๑"], ["สอง", "2", "๒"], @@ -22,18 +21,15 @@ ["แปด", "8", "๘"], ["เก้า", "9", "๙"], ] -thaitonum = dict((x[2], x[1]) for x in p[1:]) -p1 = dict((x[0], x[1]) for x in p[1:]) -d1 = 0 -# เลขไทยสู่เลข +# เลขไทยสู่เลขอารบิก def thai_num_to_num(text): """ :param str text: Thai number characters such as '๑', '๒', '๓' :return: universal numbers such as '1', '2', '3' """ - thaitonum = dict((x[2], x[1]) for x in p[1:]) + thaitonum = dict((x[2], x[1]) for x in _p[1:]) return thaitonum[text] @@ -42,7 +38,7 @@ def thai_num_to_text(text): :param str text: Thai number characters such as '๑', '๒', '๓' :return: Thai numbers, spelled out in Thai """ - thaitonum = dict((x[2], x[0]) for x in p[1:]) + thaitonum = dict((x[2], x[0]) for x in _p[1:]) return thaitonum[text] @@ -51,7 +47,7 @@ def num_to_thai_num(text): :param text: universal numbers such as '1', '2', '3' :return: Thai number characters such as '๑', '๒', '๓' """ - thaitonum = dict((x[1], x[2]) for x in p[1:]) + thaitonum = dict((x[1], x[2]) for x in _p[1:]) return thaitonum[text] @@ -60,7 +56,7 @@ def num_to_text(text): :param text: universal numbers such as '1', '2', '3' :return: Thai numbers, spelled out in Thai """ - thaitonum = dict((x[1], x[0]) for x in p[1:]) + thaitonum = dict((x[1], x[0]) for x in _p[1:]) return thaitonum[text] @@ -69,7 +65,7 @@ def text_to_num(text): :param text: Thai numbers, spelled out in Thai :return: universal numbers such as '1', '2', '3' """ - thaitonum = dict((x[0], x[1]) for x in p[1:]) + thaitonum = dict((x[0], x[1]) for x in _p[1:]) return thaitonum[text] @@ -78,47 +74,34 @@ def text_to_thai_num(text): :param text: Thai numbers, spelled out in Thai :return: Thai numbers such as '๑', '๒', '๓' """ - thaitonum = dict((x[0], x[2]) for x in p[1:]) + thaitonum = dict((x[0], x[2]) for x in _p[1:]) return thaitonum[text] -def number_format(num, places=0): - return "{:20,.2f}".format(num) - - -def bahttext(amount_number): +def bahttext(number): """ Converts a number to Thai text and adds a suffix of "Baht" currency. + Precision will be fixed at two decimal places (0.00) to fits "Satang" unit. Similar to BAHTTEXT function in Excel """ ret = "" - if amount_number is None: + if number is None: pass - elif amount_number == 0: + elif number == 0: ret = "ศูนย์บาทถ้วน" else: - amount_number = number_format(amount_number, 2).replace(" ", "") - pt = amount_number.find(".") - number, fraction = "", "" - amount_number1 = amount_number.split(".") - - if not pt: - number = amount_number - else: - amount_number = amount_number.split(".") - number = amount_number[0] - fraction = int(amount_number1[1]) - - number = ast.literal_eval(number.replace(",", "")) + num_int, num_dec = "{:.2f}".format(number).split(".") + num_int = int(num_int) + num_dec = int(num_dec) - baht = num_to_thaiword(number) - if baht != "": + baht = num_to_thaiword(num_int) + if baht: ret = "".join([ret, baht, "บาท"]) - satang = num_to_thaiword(fraction) - if satang != "" and satang != "ศูนย์": + satang = num_to_thaiword(num_dec) + if satang and satang != "ศูนย์": ret = "".join([ret, satang, "สตางค์"]) else: ret = "".join([ret, "ถ้วน"]) @@ -139,7 +122,18 @@ def num_to_thaiword(number): ret = "ศูนย์" else: _POS_CALL = ["แสน", "หมื่น", "พัน", "ร้อย", "สิบ", ""] - _NUM_CALL = ["", "หนึ่ง", "สอง", "สาม", "สี่", "ห้า", "หก", "เจ็ด", "แปด", "เก้า"] + _NUM_CALL = [ + "", + "หนึ่ง", + "สอง", + "สาม", + "สี่", + "ห้า", + "หก", + "เจ็ด", + "แปด", + "เก้า", + ] if number > 1000000: ret += num_to_thaiword(int(number / 1000000)) + "ล้าน" diff --git a/pythainlp/rank/__init__.py b/pythainlp/rank/__init__.py index b10fa238a..dbf5781b7 100644 --- a/pythainlp/rank/__init__.py +++ b/pythainlp/rank/__init__.py @@ -5,6 +5,7 @@ _STOPWORDS = thai_stopwords() + # เรียงจำนวนคำของประโยค def rank(data, stopword=False): """ @@ -20,5 +21,4 @@ def rank(data, stopword=False): if __name__ == "__main__": - text = ["แมว", "ชอบ", "ปลา", "แมว", "ชอบ", "นอน", "คน", "เป็น", "ทาส", "แมว"] - print(rank(text)) + print(rank(["แมว", "ชอบ", "ปลา", "แมว", "ชอบ", "นอน", "คน", "เป็น", "ทาส", "แมว"])) diff --git a/pythainlp/romanization/royin.py b/pythainlp/romanization/royin.py index 6f815956c..87776b8e2 100644 --- a/pythainlp/romanization/royin.py +++ b/pythainlp/romanization/royin.py @@ -180,41 +180,4 @@ def romanize(word): if __name__ == "__main__": - print(romanize("แมว") == "maeo") - print(romanize("น้าว") == "nao") - print(romanize("รวม") == "ruam") - print(romanize("ไทย") == "thai") - print(romanize("ผัวะ") == "phua") - print(romanize("ใย") == "yai") - print(romanize("ไล่") == "lai") - print(romanize("เมา") == "mao") - print(romanize("ต้น") == "ton") - print(romanize("ตาล") == "tan") - print(romanize("แสง") == "saeng") - print(romanize("เลียน") == "lian") - print(romanize("เลือก") == "lueak") - print(romanize("เธอ") == "thoe") - print(romanize("หรู") == "ru") - print(romanize("ลอม") == "lom") - print(romanize("และ") == "lae") - print(romanize("เลาะ") == "lo") - print(romanize("ลอม") == "lom") - print(romanize("เล็ง") == "leng") - print(romanize("นึก") == "nuek") - print(romanize("มัว") == "mua") - print(romanize("มีด") == "mit") - print(romanize("โค") == "kho") - print(romanize("ขอ") == "kho") - print(romanize("วรร") == "wan") - print(romanize("สรรพ") == "sap") - print(romanize("วัน") + romanize("นะ") + romanize("พง")) - print(romanize("นัด") + romanize("ชะ") + romanize("โนน")) - print(romanize("สรรพ")) - print(romanize("สรร") + romanize("หา")) - print(romanize("สรร") + romanize("หา")) - print(romanize("แมว")) print(romanize("กร") == romanize("กอน")) - print(romanize("คฤ") + romanize("หาสน์")) - print(romanize("กฤ") + romanize("ศะ") + romanize("ฎา")) - print(romanize("ฤกษ์")) - print(romanize("ฤ") + romanize("ดู") + romanize("กาล")) diff --git a/pythainlp/sentiment/__init__.py b/pythainlp/sentiment/__init__.py index dee3e86bf..c2b018382 100644 --- a/pythainlp/sentiment/__init__.py +++ b/pythainlp/sentiment/__init__.py @@ -39,14 +39,10 @@ def sentiment(text, engine="old"): return "pos" if tag else "neg" else: # default, use "old" vocabulary-based engine - with open( - os.path.join(_SENTIMENT_PATH, "vocabulary.data"), "rb" - ) as in_strm: + with open(os.path.join(_SENTIMENT_PATH, "vocabulary.data"), "rb") as in_strm: vocabulary = dill.load(in_strm) - with open( - os.path.join(_SENTIMENT_PATH, "sentiment.data"), "rb" - ) as in_strm: + with open(os.path.join(_SENTIMENT_PATH, "sentiment.data"), "rb") as in_strm: classifier = dill.load(in_strm) text = set(word_tokenize(text)) - _STOPWORDS diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py index 45b5adb05..4a02b9f4e 100644 --- a/pythainlp/tokenize/pyicu.py +++ b/pythainlp/tokenize/pyicu.py @@ -31,7 +31,4 @@ def segment(text): if __name__ == "__main__": - print(segment("ทดสอบระบบตัดคำด้วยไอซียู")) - print(segment("ผมชอบพูดไทยคำEnglishคำ")) - print(segment("ไทยEnglish540บาท")) - print(segment("ประหยัด ไฟเบอห้า")) + print(segment("พูดไทย2คำEnglishคำ")) diff --git a/pythainlp/util/keyboard.py b/pythainlp/util/keyboard.py index 9cba30da7..7cdbe2aa0 100644 --- a/pythainlp/util/keyboard.py +++ b/pythainlp/util/keyboard.py @@ -124,10 +124,5 @@ def thai_to_eng(text): if __name__ == "__main__": - a = "l;ylfu8iy[" - a = eng_to_thai(a) - a = eng_to_thai(a) - b = "นามรสนอำันี" - b = thai_to_eng(b) - print(a) - print(b) + print(eng_to_thai("l;ylfu8iy[")) + print(thai_to_eng("นามรสนอำันี"))