From 71aa5ebd7bbcb43b56169e7df0b1a4af74dac65b Mon Sep 17 00:00:00 2001
From: MpolaarbearM <notari5555@gmail.com>
Date: Sat, 4 Nov 2023 00:15:12 +0100
Subject: [PATCH 1/4] add function for pos tag with transformers

---
 ptagger_temp4XcDf.json    |  1 +
 pythainlp/tag/__init__.py |  3 ++-
 pythainlp/tag/pos_tag.py  | 50 +++++++++++++++++++++++++++++++++++++++
 tests/test_tag.py         | 13 +++++++++-
 4 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 ptagger_temp4XcDf.json

diff --git a/ptagger_temp4XcDf.json b/ptagger_temp4XcDf.json
new file mode 100644
index 000000000..526ea1935
--- /dev/null
+++ b/ptagger_temp4XcDf.json
@@ -0,0 +1 @@
+{"weights": {"bias": {"N": 0.934, "V": -0.934}, "i suffix คน": {"N": 0.091, "V": -0.091}, "i pref1 ค": {"N": 0.091, "V": -0.091}, "i-1 tag -START-": {"N": 0.997, "V": -0.997}, "i-2 tag -START2-": {"N": 0.997, "V": -0.997}, "i tag+i-2 tag -START- -START2-": {"N": 0.997, "V": -0.997}, "i word คน": {"N": 0.091, "V": -0.091}, "i-1 tag+i word -START- คน": {"N": 0.997, "V": -0.997}, "i-1 word -START2-": {"N": 0.997, "V": -0.997}, "i-1 suffix T2-": {"N": 0.997, "V": -0.997}, "i-2 word -START-": {"N": 0.997, "V": -0.997}, "i+1 word เดิน": {"N": 0.997, "V": -0.997}, "i+1 suffix ดิน": {"N": 0.997, "V": -0.997}, "i+2 word -END-": {"N": 0.091, "V": -0.091}, "i suffix ดิน": {"V": 0.994, "N": -0.994}, "i pref1 เ": {"V": 0.994, "N": -0.994}, "i-1 tag V": {"V": 0.066, "N": -0.066}, "i-2 tag -START-": {"V": 1.894, "N": -1.894}, "i tag+i-2 tag V -START-": {"V": 0.994, "N": -0.994}, "i word เดิน": {"V": 0.994, "N": -0.994}, "i-1 tag+i word V เดิน": {"V": 0.994, "N": -0.994}, "i-1 word คน": {"V": 0.997, "N": -0.997}, "i-1 suffix คน": {"V": 0.997, "N": -0.997}, "i-2 word -START2-": {"V": 1.894, "N": -1.894}, "i+1 word -END-": {"V": -0.843, "N": 0.843}, "i+1 suffix ND-": {"V": -0.843, "N": 0.843}, "i+2 word -END2-": {"V": -0.843, "N": 0.843}, "i suffix นก": {"N": 0.929, "V": -0.929}, "i pref1 น": {"N": 0.929, "V": -0.929}, "i-2 tag N": {"N": 1.831, "V": -1.831}, "i tag+i-2 tag V N": {"N": 0.929, "V": -0.929}, "i word นก": {"N": 0.929, "V": -0.929}, "i-1 tag+i word V นก": {"N": 0.929, "V": -0.929}, "i-1 word กิน": {"N": 0.929, "V": -0.929}, "i-1 suffix กิน": {"N": 0.929, "V": -0.929}, "i-2 word คน": {"N": 1.831, "V": -1.831}, "i-1 tag N": {"V": -0.003, "N": 0.003}, "i tag+i-2 tag N -START-": {"V": 0.9, "N": -0.9}, "i-1 tag+i word N คน": {"V": 0.906, "N": -0.906}, "i+1 word กาแฟ": {"V": 0.906, "N": -0.906}, "i+1 suffix าแฟ": {"V": 0.906, "N": -0.906}, "i suffix าแฟ": {"N": 0.903, "V": -0.903}, "i pref1 ก": {"N": 0.903, "V": -0.903}, "i tag+i-2 tag N N": {"N": 0.903, "V": -0.903}, "i word กาแฟ": {"N": 0.903, "V": -0.903}, "i-1 tag+i word N กาแฟ": {"N": 0.903, "V": -0.903}, "i suffix 021": {"N": 0.8, "V": -0.8}, "i pref1 2": {"N": 0.8, "V": -0.8}, "i word !YEAR": {"N": 0.8, "V": -0.8}, "i-1 tag+i word N !YEAR": {"N": 0.8, "V": -0.8}, "i-1 word ปี": {"N": 0.8, "V": -0.8}, "i-1 suffix ปี": {"N": 0.8, "V": -0.8}, "i suffix บิน": {"V": 0.794, "N": -0.794}, "i pref1 บ": {"V": 0.794, "N": -0.794}, "i word บิน": {"V": 0.794, "N": -0.794}, "i-1 tag+i word N บิน": {"V": 0.794, "N": -0.794}, "i-1 word นก": {"V": 0.794, "N": -0.794}, "i-1 suffix นก": {"V": 0.794, "N": -0.794}}, "tagdict": {"กิน": "V"}, "classes": ["N", "V"]}
\ No newline at end of file
diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
index 93d57e3c0..3325cbcb0 100644
--- a/pythainlp/tag/__init__.py
+++ b/pythainlp/tag/__init__.py
@@ -27,10 +27,11 @@
     "chunk_parse",
     "NER",
     "NNER",
+    "pos_tag_transformers" # added by moss
 ]
 
 from pythainlp.tag.locations import tag_provinces
-from pythainlp.tag.pos_tag import pos_tag, pos_tag_sents
+from pythainlp.tag.pos_tag import pos_tag, pos_tag_sents,pos_tag_transformers
 from pythainlp.tag._tag_perceptron import PerceptronTagger
 from pythainlp.tag.chunk import chunk_parse
 from pythainlp.tag.named_entity import NER, NNER
diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py
index c2e9bbf7f..c095b0217 100644
--- a/pythainlp/tag/pos_tag.py
+++ b/pythainlp/tag/pos_tag.py
@@ -15,6 +15,7 @@
 from typing import List, Tuple
 
 
+
 def pos_tag(
     words: List[str], engine: str = "perceptron", corpus: str = "orchid"
 ) -> List[Tuple[str, str]]:
@@ -176,3 +177,52 @@ def pos_tag_sents(
         return []
 
     return [pos_tag(sent, engine=engine, corpus=corpus) for sent in sentences]
+
+
+def pos_tag_transformers(
+    words: str, engine: str = "bert-base-th-cased-blackboard"
+):
+    """
+    "wangchanberta-ud-thai-pud-upos",
+    "mdeberta-v3-ud-thai-pud-upos",
+    "bert-base-th-cased-blackboard",
+
+    """
+
+    try:
+        from transformers import AutoModelForTokenClassification, AutoTokenizer, TokenClassificationPipeline
+    except ImportError:
+        raise ImportError("Not found transformers! Please install transformers by pip install transformers")
+    
+    if not words:
+        return []
+    
+    if engine == "wangchanberta-ud-thai-pud-upos" :
+        model = AutoModelForTokenClassification.from_pretrained("Pavarissy/wangchanberta-ud-thai-pud-upos")
+        tokenizer = AutoTokenizer.from_pretrained("Pavarissy/wangchanberta-ud-thai-pud-upos")
+    elif engine == "mdeberta-v3-ud-thai-pud-upos":
+        model = AutoModelForTokenClassification.from_pretrained("Pavarissy/mdeberta-v3-ud-thai-pud-upos")
+        tokenizer = AutoTokenizer.from_pretrained("Pavarissy/mdeberta-v3-ud-thai-pud-upos")
+    elif engine == "bert-base-th-cased-blackboard":
+        model = AutoModelForTokenClassification.from_pretrained("lunarlist/pos_thai")
+        tokenizer = AutoTokenizer.from_pretrained("lunarlist/pos_thai")
+    else:
+        raise ValueError(
+            "pos_tag_transformers not support {0} engine.".format(
+                engine
+            )
+        )
+    
+    pipeline = TokenClassificationPipeline(model=model, tokenizer=tokenizer, grouped_entities=True)
+
+    outputs = pipeline(words)
+    return outputs
+
+
+
+
+
+        
+
+
+
diff --git a/tests/test_tag.py b/tests/test_tag.py
index eae51bf30..f5bb8747e 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -9,10 +9,12 @@
     perceptron,
     pos_tag,
     pos_tag_sents,
+    pos_tag_transformers,
     unigram,
-    tltk,
+    #tltk,
     NER,
     NNER,
+
 )
 from pythainlp.tag.locations import tag_provinces
 from pythainlp.tag.thainer import ThaiNameTagger
@@ -362,3 +364,12 @@ def test_NER_class(self):
     def test_NNER_class(self):
         nner = NNER()
         self.assertIsNotNone(nner.tag("แมวทำอะไรตอนห้าโมงเช้า"))
+
+    def test_pos_tag_transformers(self):
+        self.assertIsNotNone(pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "bert-base-th-cased-blackboard"))
+        self.assertIsNotNone(pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "mdeberta-v3-ud-thai-pud-upos"))
+        self.assertIsNotNone(pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "wangchanberta-ud-thai-pud-upos"))
+
+        with self.assertRaises(ValueError):
+            pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "non-existing-engine")
+

From 174dfb286bd6f27e3b5bec8a74f0dcb69de21095 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Sun, 5 Nov 2023 02:40:08 +0700
Subject: [PATCH 2/4] Fixed postag test

---
 tests/test_tag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_tag.py b/tests/test_tag.py
index f5bb8747e..16e398d02 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -11,7 +11,7 @@
     pos_tag_sents,
     pos_tag_transformers,
     unigram,
-    #tltk,
+    tltk,
     NER,
     NNER,
 

From 04bf517b94c72f682d6c96c8d6dcf7cb488387bd Mon Sep 17 00:00:00 2001
From: MpolaarbearM <notari5555@gmail.com>
Date: Sat, 4 Nov 2023 23:43:53 +0100
Subject: [PATCH 3/4] fixed pep8

---
 ptagger_temp4XcDf.json    |  1 -
 pythainlp/tag/__init__.py |  4 ++--
 pythainlp/tag/pos_tag.py  | 31 +++++++++++++------------------
 tests/test_tag.py         | 13 +++++++------
 4 files changed, 22 insertions(+), 27 deletions(-)
 delete mode 100644 ptagger_temp4XcDf.json

diff --git a/ptagger_temp4XcDf.json b/ptagger_temp4XcDf.json
deleted file mode 100644
index 526ea1935..000000000
--- a/ptagger_temp4XcDf.json
+++ /dev/null
@@ -1 +0,0 @@
-{"weights": {"bias": {"N": 0.934, "V": -0.934}, "i suffix คน": {"N": 0.091, "V": -0.091}, "i pref1 ค": {"N": 0.091, "V": -0.091}, "i-1 tag -START-": {"N": 0.997, "V": -0.997}, "i-2 tag -START2-": {"N": 0.997, "V": -0.997}, "i tag+i-2 tag -START- -START2-": {"N": 0.997, "V": -0.997}, "i word คน": {"N": 0.091, "V": -0.091}, "i-1 tag+i word -START- คน": {"N": 0.997, "V": -0.997}, "i-1 word -START2-": {"N": 0.997, "V": -0.997}, "i-1 suffix T2-": {"N": 0.997, "V": -0.997}, "i-2 word -START-": {"N": 0.997, "V": -0.997}, "i+1 word เดิน": {"N": 0.997, "V": -0.997}, "i+1 suffix ดิน": {"N": 0.997, "V": -0.997}, "i+2 word -END-": {"N": 0.091, "V": -0.091}, "i suffix ดิน": {"V": 0.994, "N": -0.994}, "i pref1 เ": {"V": 0.994, "N": -0.994}, "i-1 tag V": {"V": 0.066, "N": -0.066}, "i-2 tag -START-": {"V": 1.894, "N": -1.894}, "i tag+i-2 tag V -START-": {"V": 0.994, "N": -0.994}, "i word เดิน": {"V": 0.994, "N": -0.994}, "i-1 tag+i word V เดิน": {"V": 0.994, "N": -0.994}, "i-1 word คน": {"V": 0.997, "N": -0.997}, "i-1 suffix คน": {"V": 0.997, "N": -0.997}, "i-2 word -START2-": {"V": 1.894, "N": -1.894}, "i+1 word -END-": {"V": -0.843, "N": 0.843}, "i+1 suffix ND-": {"V": -0.843, "N": 0.843}, "i+2 word -END2-": {"V": -0.843, "N": 0.843}, "i suffix นก": {"N": 0.929, "V": -0.929}, "i pref1 น": {"N": 0.929, "V": -0.929}, "i-2 tag N": {"N": 1.831, "V": -1.831}, "i tag+i-2 tag V N": {"N": 0.929, "V": -0.929}, "i word นก": {"N": 0.929, "V": -0.929}, "i-1 tag+i word V นก": {"N": 0.929, "V": -0.929}, "i-1 word กิน": {"N": 0.929, "V": -0.929}, "i-1 suffix กิน": {"N": 0.929, "V": -0.929}, "i-2 word คน": {"N": 1.831, "V": -1.831}, "i-1 tag N": {"V": -0.003, "N": 0.003}, "i tag+i-2 tag N -START-": {"V": 0.9, "N": -0.9}, "i-1 tag+i word N คน": {"V": 0.906, "N": -0.906}, "i+1 word กาแฟ": {"V": 0.906, "N": -0.906}, "i+1 suffix าแฟ": {"V": 0.906, "N": -0.906}, "i suffix าแฟ": {"N": 0.903, "V": -0.903}, "i pref1 ก": {"N": 0.903, "V": -0.903}, "i tag+i-2 tag N N": {"N": 0.903, "V": -0.903}, "i word กาแฟ": {"N": 0.903, "V": -0.903}, "i-1 tag+i word N กาแฟ": {"N": 0.903, "V": -0.903}, "i suffix 021": {"N": 0.8, "V": -0.8}, "i pref1 2": {"N": 0.8, "V": -0.8}, "i word !YEAR": {"N": 0.8, "V": -0.8}, "i-1 tag+i word N !YEAR": {"N": 0.8, "V": -0.8}, "i-1 word ปี": {"N": 0.8, "V": -0.8}, "i-1 suffix ปี": {"N": 0.8, "V": -0.8}, "i suffix บิน": {"V": 0.794, "N": -0.794}, "i pref1 บ": {"V": 0.794, "N": -0.794}, "i word บิน": {"V": 0.794, "N": -0.794}, "i-1 tag+i word N บิน": {"V": 0.794, "N": -0.794}, "i-1 word นก": {"V": 0.794, "N": -0.794}, "i-1 suffix นก": {"V": 0.794, "N": -0.794}}, "tagdict": {"กิน": "V"}, "classes": ["N", "V"]}
\ No newline at end of file
diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
index 3325cbcb0..790810a62 100644
--- a/pythainlp/tag/__init__.py
+++ b/pythainlp/tag/__init__.py
@@ -27,11 +27,11 @@
     "chunk_parse",
     "NER",
     "NNER",
-    "pos_tag_transformers" # added by moss
+    "pos_tag_transformers"
 ]
 
 from pythainlp.tag.locations import tag_provinces
-from pythainlp.tag.pos_tag import pos_tag, pos_tag_sents,pos_tag_transformers
+from pythainlp.tag.pos_tag import pos_tag, pos_tag_sents, pos_tag_transformers
 from pythainlp.tag._tag_perceptron import PerceptronTagger
 from pythainlp.tag.chunk import chunk_parse
 from pythainlp.tag.named_entity import NER, NNER
diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py
index c095b0217..abdfe5fc2 100644
--- a/pythainlp/tag/pos_tag.py
+++ b/pythainlp/tag/pos_tag.py
@@ -190,18 +190,22 @@ def pos_tag_transformers(
     """
 
     try:
-        from transformers import AutoModelForTokenClassification, AutoTokenizer, TokenClassificationPipeline
+        from transformers import AutoModelForTokenClassification, \
+            AutoTokenizer, TokenClassificationPipeline
     except ImportError:
-        raise ImportError("Not found transformers! Please install transformers by pip install transformers")
-    
+        raise ImportError(
+            "Not found transformers! Please install transformers by pip install transformers")
+
     if not words:
         return []
-    
-    if engine == "wangchanberta-ud-thai-pud-upos" :
-        model = AutoModelForTokenClassification.from_pretrained("Pavarissy/wangchanberta-ud-thai-pud-upos")
+
+    if engine == "wangchanberta-ud-thai-pud-upos":
+        model = AutoModelForTokenClassification.from_pretrained(
+            "Pavarissy/wangchanberta-ud-thai-pud-upos")
         tokenizer = AutoTokenizer.from_pretrained("Pavarissy/wangchanberta-ud-thai-pud-upos")
     elif engine == "mdeberta-v3-ud-thai-pud-upos":
-        model = AutoModelForTokenClassification.from_pretrained("Pavarissy/mdeberta-v3-ud-thai-pud-upos")
+        model = AutoModelForTokenClassification.from_pretrained(
+            "Pavarissy/mdeberta-v3-ud-thai-pud-upos")
         tokenizer = AutoTokenizer.from_pretrained("Pavarissy/mdeberta-v3-ud-thai-pud-upos")
     elif engine == "bert-base-th-cased-blackboard":
         model = AutoModelForTokenClassification.from_pretrained("lunarlist/pos_thai")
@@ -212,17 +216,8 @@ def pos_tag_transformers(
                 engine
             )
         )
-    
+
     pipeline = TokenClassificationPipeline(model=model, tokenizer=tokenizer, grouped_entities=True)
 
     outputs = pipeline(words)
-    return outputs
-
-
-
-
-
-        
-
-
-
+    return outputs
\ No newline at end of file
diff --git a/tests/test_tag.py b/tests/test_tag.py
index f5bb8747e..c0b8082e2 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -366,10 +366,11 @@ def test_NNER_class(self):
         self.assertIsNotNone(nner.tag("แมวทำอะไรตอนห้าโมงเช้า"))
 
     def test_pos_tag_transformers(self):
-        self.assertIsNotNone(pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "bert-base-th-cased-blackboard"))
-        self.assertIsNotNone(pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "mdeberta-v3-ud-thai-pud-upos"))
-        self.assertIsNotNone(pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "wangchanberta-ud-thai-pud-upos"))
-
+        self.assertIsNotNone(pos_tag_transformers(
+            words="แมวทำอะไรตอนห้าโมงเช้า", engine="bert-base-th-cased-blackboard"))
+        self.assertIsNotNone(pos_tag_transformers(
+            words="แมวทำอะไรตอนห้าโมงเช้า", engine="mdeberta-v3-ud-thai-pud-upos"))
+        self.assertIsNotNone(pos_tag_transformers(
+            words="แมวทำอะไรตอนห้าโมงเช้า", engine="wangchanberta-ud-thai-pud-upos"))
         with self.assertRaises(ValueError):
-            pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "non-existing-engine")
-
+            pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "non-existing-engine")
\ No newline at end of file

From 387d1373a550e1bf9fecda37cbedf6515838962c Mon Sep 17 00:00:00 2001
From: MpolaarbearM <notari5555@gmail.com>
Date: Sat, 4 Nov 2023 23:48:11 +0100
Subject: [PATCH 4/4] fixed pep8

---
 tests/test_tag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_tag.py b/tests/test_tag.py
index c0b8082e2..b27fc77bb 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -373,4 +373,4 @@ def test_pos_tag_transformers(self):
         self.assertIsNotNone(pos_tag_transformers(
             words="แมวทำอะไรตอนห้าโมงเช้า", engine="wangchanberta-ud-thai-pud-upos"))
         with self.assertRaises(ValueError):
-            pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine = "non-existing-engine")
\ No newline at end of file
+            pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine="non-existing-engine")
\ No newline at end of file