From 6e38d7ddfd86286da74e7ceb1f929a79ac8803f3 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Sat, 11 Jan 2014 14:47:17 +1100 Subject: [PATCH 1/9] CHANGE: Now using ddt to improve test maintainability. Moved test cases to tab-separated inputexpected_output CSV files. --- setup.py | 25 ++-- tests/test.py | 277 ++++++++---------------------------------- tests/to_hepburn.csv | 10 ++ tests/to_hiragana.csv | 16 +++ tests/to_kana.csv | 26 ++++ tests/to_katakana.csv | 26 ++++ tests/to_kunrei.csv | 10 ++ tests/to_roma.csv | 6 + 8 files changed, 162 insertions(+), 234 deletions(-) create mode 100644 tests/to_hepburn.csv create mode 100644 tests/to_hiragana.csv create mode 100644 tests/to_kana.csv create mode 100644 tests/to_katakana.csv create mode 100644 tests/to_kunrei.csv create mode 100644 tests/to_roma.csv diff --git a/setup.py b/setup.py index 75afc02..1533d06 100755 --- a/setup.py +++ b/setup.py @@ -1,19 +1,23 @@ #!/usr/bin/env python +import os, json, imp +from setuptools import setup, find_packages + PROJ_NAME = 'romkan' PACKAGE_NAME = 'romkan' - PROJ_METADATA = '%s.json' % PROJ_NAME -import os, json, imp +HERE = os.path.abspath(os.path.dirname(__file__)) +README = open(os.path.join(HERE, 'README.rst')).read() +CHANGELOG = open(os.path.join(HERE, 'CHANGELOG.rst')).read() +VERSION = imp.load_source('version', os.path.join(HERE, 'src/%s/version.py' % PACKAGE_NAME)).__version__ +SRC = os.path.join(HERE, 'src') + + +proj_info = json.loads(open(os.path.join(HERE, PROJ_METADATA)).read()) +test_requirements = ['ddt>=0.5.0', 'unicodecsv>=0.9.4'] -here = os.path.abspath(os.path.dirname(__file__)) -proj_info = json.loads(open(os.path.join(here, PROJ_METADATA)).read()) -README = open(os.path.join(here, 'README.rst')).read() -CHANGELOG = open(os.path.join(here, 'CHANGELOG.rst')).read() -VERSION = imp.load_source('version', os.path.join(here, 'src/%s/version.py' % PACKAGE_NAME)).__version__ -from setuptools import setup, find_packages setup( name = proj_info['name'], version = VERSION, @@ -28,10 +32,11 @@ long_description = README + '\n\n' + CHANGELOG, - packages = find_packages('src'), - package_dir = {'' : 'src'}, + packages = find_packages(SRC), + package_dir = {'' : SRC}, test_suite = 'tests', + tests_require = test_requirements, platforms = 'any', zip_safe = False, diff --git a/tests/test.py b/tests/test.py index 1e7a11b..4f499de 100755 --- a/tests/test.py +++ b/tests/test.py @@ -1,234 +1,63 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import unicode_literals +import os import unittest +from ddt import ddt, data +from unicodecsv import UnicodeReader +from romkan import to_roma, to_kunrei, to_hepburn, to_kana, to_hiragana, to_katakana +from romkan import is_vowel, is_consonant, expand_consonant -import sys -import os -# make sure to import romkan from ../romkan directory relative to this -# file -sys.path.insert(0, - (os.path.dirname( - os.path.dirname( - os.path.abspath(__file__))))) -from romkan import * +def open_csv(path): + fh = open(path, 'r') + ucsv = UnicodeReader(fh, delimiter=str(' ')) + return ucsv -class RomkanTestCase(unittest.TestCase): - - def test_to_katakana(self): - self.assertEqual(to_katakana("kanji"), - "カンジ") - self.assertEqual(to_katakana("kanzi"), - "カンジ") - self.assertEqual(to_katakana("kannji"), - "カンジ") - self.assertEqual(to_katakana("chie"), - "チエ") - self.assertEqual(to_katakana("tie"), - "チエ") - self.assertEqual(to_katakana("kyouju"), - "キョウジュ") - self.assertEqual(to_katakana("syuukyou"), - "シュウキョウ") - self.assertEqual(to_katakana("shuukyou"), - "シュウキョウ") - self.assertEqual(to_katakana("saichuu"), - "サイチュウ") - self.assertEqual(to_katakana("saityuu"), - "サイチュウ") - self.assertEqual(to_katakana("cheri-"), - "チェリー") - self.assertEqual(to_katakana("tyeri-"), - "チェリー") - self.assertEqual(to_katakana("shinrai"), - "シンライ") - self.assertEqual(to_katakana("sinrai"), - "シンライ") - self.assertEqual(to_katakana("hannnou"), - "ハンノウ") - self.assertEqual(to_katakana("han'nou"), - "ハンノウ") - - self.assertEqual(to_katakana("wo"), - "ヲ") - self.assertEqual(to_katakana("we"), - "ウェ") - self.assertEqual(to_katakana("du"), - "ヅ") - self.assertEqual(to_katakana("she"), - "シェ") - self.assertEqual(to_katakana("di"), - "ヂ") - self.assertEqual(to_katakana("fu"), - "フ") - self.assertEqual(to_katakana("ti"), - "チ") - self.assertEqual(to_katakana("wi"), - "ウィ") - - self.assertEqual(to_katakana("je"), - "ジェ") - self.assertEqual(to_katakana("e-jento"), - "エージェント") - - def test_to_hiragana(self): - self.assertEqual(to_hiragana("kanji"), - "かんじ") - self.assertEqual(to_hiragana("kanzi"), - "かんじ") - self.assertEqual(to_hiragana("kannji"), - "かんじ") - self.assertEqual(to_hiragana("chie"), - "ちえ") - self.assertEqual(to_hiragana("tie"), - "ちえ") - self.assertEqual(to_hiragana("kyouju"), - "きょうじゅ") - self.assertEqual(to_hiragana("syuukyou"), - "しゅうきょう") - self.assertEqual(to_hiragana("shuukyou"), - "しゅうきょう") - self.assertEqual(to_hiragana("saichuu"), - "さいちゅう") - self.assertEqual(to_hiragana("saityuu"), - "さいちゅう") - self.assertEqual(to_hiragana("cheri-"), - "ちぇりー") - self.assertEqual(to_hiragana("tyeri-"), - "ちぇりー") - self.assertEqual(to_hiragana("shinrai"), - "しんらい") - self.assertEqual(to_hiragana("sinrai"), - "しんらい") - self.assertEqual(to_hiragana("hannnou"), - "はんのう") - self.assertEqual(to_hiragana("han'nou"), - "はんのう") - - def test_to_kana(self): - self.assertEqual(to_kana("kanji"), - "カンジ") - self.assertEqual(to_kana("kanzi"), - "カンジ") - self.assertEqual(to_kana("kannji"), - "カンジ") - self.assertEqual(to_kana("chie"), - "チエ") - self.assertEqual(to_kana("tie"), - "チエ") - self.assertEqual(to_kana("kyouju"), - "キョウジュ") - self.assertEqual(to_kana("syuukyou"), - "シュウキョウ") - self.assertEqual(to_kana("shuukyou"), - "シュウキョウ") - self.assertEqual(to_kana("saichuu"), - "サイチュウ") - self.assertEqual(to_kana("saityuu"), - "サイチュウ") - self.assertEqual(to_kana("cheri-"), - "チェリー") - self.assertEqual(to_kana("tyeri-"), - "チェリー") - self.assertEqual(to_kana("shinrai"), - "シンライ") - self.assertEqual(to_kana("sinrai"), - "シンライ") - self.assertEqual(to_kana("hannnou"), - "ハンノウ") - self.assertEqual(to_kana("han'nou"), - "ハンノウ") - - self.assertEqual(to_kana("wo"), - "ヲ") - self.assertEqual(to_kana("we"), - "ウェ") - self.assertEqual(to_kana("du"), - "ヅ") - self.assertEqual(to_kana("she"), - "シェ") - self.assertEqual(to_kana("di"), - "ヂ") - self.assertEqual(to_kana("fu"), - "フ") - self.assertEqual(to_kana("ti"), - "チ") - self.assertEqual(to_kana("wi"), - "ウィ") - - self.assertEqual(to_kana("je"), - "ジェ") - self.assertEqual(to_kana("e-jento"), - "エージェント") - - def test_to_hepburn(self): - self.assertEqual(to_hepburn("kannzi"), - "kanji") - self.assertEqual(to_hepburn("tie"), - "chie") - - self.assertEqual(to_hepburn("KANNZI"), - "kanji") - self.assertEqual(to_hepburn("TIE"), - "chie") - - self.assertEqual(to_hepburn("カンジ"), - "kanji") - self.assertEqual(to_hepburn("チエ"), - "chie") - - self.assertEqual(to_hepburn("かんじ"), - "kanji") - self.assertEqual(to_hepburn("ちえ"), - "chie") - self.assertEqual(to_hepburn("しゃしん"), - "shashin") - self.assertEqual(to_hepburn("しゅっしょう"), - "shusshou") - - def test_to_kunrei(self): - self.assertEqual(to_kunrei("kanji"), - "kanzi") - self.assertEqual(to_kunrei("chie"), - "tie") - - self.assertEqual(to_kunrei("KANJI"), - "kanzi") - self.assertEqual(to_kunrei("CHIE"), - "tie") - - self.assertEqual(to_kunrei("カンジ"), - "kanzi") - self.assertEqual(to_kunrei("チエ"), - "tie") - - self.assertEqual(to_kunrei("かんじ"), - "kanzi") - self.assertEqual(to_kunrei("ちえ"), - "tie") - - self.assertEqual(to_kunrei("しゃしん"), - "syasin") - self.assertEqual(to_kunrei("しゅっしょう"), - "syussyou") - - def test_to_roma(self): - self.assertEqual(to_roma("カンジ"), - "kanji") - self.assertEqual(to_roma("チャウ"), - "chau") - self.assertEqual(to_roma("ハンノウ"), - "han'nou") - - self.assertEqual(to_roma("かんじ"), - "kanji") - self.assertEqual(to_roma("ちゃう"), - "chau") - self.assertEqual(to_roma("はんのう"), - "han'nou") +HERE = os.path.abspath(os.path.dirname(__file__)) +TO_ROMA = os.path.join(HERE, 'to_roma.csv') +TO_KUNREI = os.path.join(HERE, 'to_kunrei.csv') +TO_HEPBURN = os.path.join(HERE, 'to_hepburn.csv') +TO_KANA = os.path.join(HERE, 'to_kana.csv') +TO_HIRAGANA = os.path.join(HERE, 'to_hiragana.csv') +TO_KATAKANA = os.path.join(HERE, 'to_katakana.csv') + + +@ddt +class ConversionTestCase(unittest.TestCase): + + @data(*open_csv(TO_ROMA)) + def test_to_roma(self, row): + in_text, expected = row + self.assertEquals(to_roma(in_text), expected) + + @data(*open_csv(TO_KUNREI)) + def test_to_kunrei(self, row): + in_text, expected = row + self.assertEquals(to_kunrei(in_text), expected) + + @data(*open_csv(TO_HEPBURN)) + def test_to_hepburn(self, row): + in_text, expected = row + self.assertEquals(to_hepburn(in_text), expected) + + @data(*open_csv(TO_KANA)) + def test_to_kana(self, row): + in_text, expected = row + self.assertEquals(to_kana(in_text), expected) + + @data(*open_csv(TO_HIRAGANA)) + def test_to_hiragana(self, row): + in_text, expected = row + self.assertEquals(to_hiragana(in_text), expected) + + @data(*open_csv(TO_KATAKANA)) + def test_to_katakana(self, row): + in_text, expected = row + self.assertEquals(to_katakana(in_text), expected) + + +class RomkanTestCase(unittest.TestCase): def test_is_consonant(self): assert not is_consonant("a") diff --git a/tests/to_hepburn.csv b/tests/to_hepburn.csv new file mode 100644 index 0000000..58a1477 --- /dev/null +++ b/tests/to_hepburn.csv @@ -0,0 +1,10 @@ +kannzi kanji +tie chie +KANNZI kanji +TIE chie +カンジ kanji +チエ chie +かんじ kanji +ちえ chie +しゃしん shashin +しゅっしょう shusshou \ No newline at end of file diff --git a/tests/to_hiragana.csv b/tests/to_hiragana.csv new file mode 100644 index 0000000..d1d9af1 --- /dev/null +++ b/tests/to_hiragana.csv @@ -0,0 +1,16 @@ +kanji かんじ +kanzi かんじ +kannji かんじ +chie ちえ +tie ちえ +kyouju きょうじゅ +syuukyou しゅうきょう +shuukyou しゅうきょう +saichuu さいちゅう +saityuu さいちゅう +cheri- ちぇりー +tyeri- ちぇりー +shinrai しんらい +sinrai しんらい +hannnou はんのう +han'nou はんのう \ No newline at end of file diff --git a/tests/to_kana.csv b/tests/to_kana.csv new file mode 100644 index 0000000..2c77c3a --- /dev/null +++ b/tests/to_kana.csv @@ -0,0 +1,26 @@ +kanji カンジ +kanzi カンジ +kannji カンジ +chie チエ +tie チエ +kyouju キョウジュ +syuukyou シュウキョウ +shuukyou シュウキョウ +saichuu サイチュウ +saityuu サイチュウ +cheri- チェリー +tyeri- チェリー +shinrai シンライ +sinrai シンライ +hannnou ハンノウ +han'nou ハンノウ +wo ヲ +we ウェ +du ヅ +she シェ +di ヂ +fu フ +ti チ +wi ウィ +je ジェ +e-jento エージェント \ No newline at end of file diff --git a/tests/to_katakana.csv b/tests/to_katakana.csv new file mode 100644 index 0000000..2c77c3a --- /dev/null +++ b/tests/to_katakana.csv @@ -0,0 +1,26 @@ +kanji カンジ +kanzi カンジ +kannji カンジ +chie チエ +tie チエ +kyouju キョウジュ +syuukyou シュウキョウ +shuukyou シュウキョウ +saichuu サイチュウ +saityuu サイチュウ +cheri- チェリー +tyeri- チェリー +shinrai シンライ +sinrai シンライ +hannnou ハンノウ +han'nou ハンノウ +wo ヲ +we ウェ +du ヅ +she シェ +di ヂ +fu フ +ti チ +wi ウィ +je ジェ +e-jento エージェント \ No newline at end of file diff --git a/tests/to_kunrei.csv b/tests/to_kunrei.csv new file mode 100644 index 0000000..a3650ab --- /dev/null +++ b/tests/to_kunrei.csv @@ -0,0 +1,10 @@ +kanji kanzi +chie tie +KANJI kanzi +CHIE tie +カンジ kanzi +チエ tie +かんじ kanzi +ちえ tie +しゃしん syasin +しゅっしょう syussyou \ No newline at end of file diff --git a/tests/to_roma.csv b/tests/to_roma.csv new file mode 100644 index 0000000..9b095b4 --- /dev/null +++ b/tests/to_roma.csv @@ -0,0 +1,6 @@ +カンジ kanji +チャウ chau +ハンノウ han'nou +かんじ kanji +ちゃう chau +はんのう han'nou \ No newline at end of file From 6ee82d984293916db8cae4d5a966049ddcfe84b2 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Sat, 11 Jan 2014 22:51:41 +1100 Subject: [PATCH 2/9] ADD: Romanization test cases. Some pass, some fail. --- tests/to_roma.csv | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/to_roma.csv b/tests/to_roma.csv index 9b095b4..7eb9be5 100644 --- a/tests/to_roma.csv +++ b/tests/to_roma.csv @@ -3,4 +3,30 @@ ハンノウ han'nou かんじ kanji ちゃう chau -はんのう han'nou \ No newline at end of file +はんのう han'nou +こんやく kon'yaku +こんにゃく konnyaku +れんあい ren'ai +そうじ souji +じょうき jouki +ボディー bodi- +つづく tsuzuku +けっこう kekkou +さいあく saiaku +まんいち man'ichi +まいにち mainichi +ャ ~ya +ちぢむ chijimu +しんばし shimbashi +しんぱい shimpai +じゃあく jaaku +おばあさん obāsan +おにいさん oniisan +おねえさん onēsan +ちゅうい chūi +みずうみ mizuumi +こおどり koodori +こおどりkōri +おおさか ōsaka +とうきょう tōkyō +がっこう gakkō \ No newline at end of file From d558aa7c936ecf70b58b36b7fe6e9de711643036 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Mon, 13 Jan 2014 19:37:43 +1100 Subject: [PATCH 3/9] ADD: Compiled a CSV table with lots of test cases for the various romanization schemes (they need to be discussed) --- tests/tests.csv | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 tests/tests.csv diff --git a/tests/tests.csv b/tests/tests.csv new file mode 100644 index 0000000..cfc2804 --- /dev/null +++ b/tests/tests.csv @@ -0,0 +1,99 @@ +word hiragana katakana hepburn traditional hepburn revised nihon-shiki kunrei-shiki wapuro source Comments +市 し shi shi si si si/shi http://en.wikipedia.org/wiki/Hepburn_romanization +漢字 かんじ カンジ kanji kanji kanzi kanzi kannzi/kannji/kan'zi/kan'ji +知恵 ちえ チエ chie chie tie tie chie/tie +写真 しゃしん シャシン shashin shashin syasin syasin shashin/syasin/shasin/syashin +教授 きょうじゅ キョウジュ kyōju kyōju kyouju kyouju kyouju +出生 しゅっしょう シュッショウ shusshō shusshō syussyou syussyou syussyou/shusshou/syussyou/shussyou +宗教 しゅうきょう シュウキョウ shūkyō shūkyō syuukyou syuukyou shuukyou/syuukyou +最中 さいちゅう サイチュウ saichū saichū saityuu saityuu saichuu/saityuu +チェリー ちぇりー チェリー cheri- cheri- tyeri- tyeri- cheri-/tyeri- +信頼 しんらい シンライ shinrai shinrai sinrai sinrai shinrai/sinrai +ヲ を ヲ wo +ウェ うぇ ウェ we +ヅ づ ヅ du +シェ しぇ シェ she +ヂ ぢ ヂ di +フ ふ フ fu +チ ち チ ti +ウィ うぃ ウィ wi +ジェ じぇ ジェ je +エージェント えーじぇんと エージェント e-jento + ちゃう チャウ chau chau tyau chau/tyau +反応 はんのう ハンノウ hannō hannō hannou hannou/han'nou +婚約 こんやく コンヤク kon-yaku kon'yaku kon'yaku kon'yaku +蒟蒻 こんにゃく コンニャク konnyaku konnyaku konnyaku konnyaku +恋愛 れんあい レンアイ ren-ai ren'ai ren'ai ren'ai/rennai +こんな こんな コンナ konna konna konna kon'na/konnna +掃除 そうじ ソウジ sōji sōji souzi souji/souzi +蒸気 じょうき ジョウキ jōki jōki zouki jouki/zouki +ボディー ぼでぃー ボディー bodi- bodi- bodeli- +結構 けっこう ケッコウ kekkō kekkō kekkou kekkou kekkou +最悪 さいあく サイアク saiaku saiaku saiaku saiaku +万一 まんいち マンイチ man-ichi man'ichi man'iti mannichi/man'ichi/manniti/man'iti http://cldr.unicode.org/index/cldr-spec/transliteration-guidelines +まにち まにち マニチ manichi manichi maniti maniti manichi/maniti http://cldr.unicode.org/index/cldr-spec/transliteration-guidelines +毎日 まいにち マイニチ mainichi mainichi mainiti mainichi/mainiti + ャ ャ ~ya ~ya lya +新橋 しんばし シンバシ shimbashi shinbashi sinbasi shinbashi/sinbasi/shinbasi/sinbashi +心配 しんぱい シンパイ shimpai shinpai sinpai sinpai/shinpai +邪悪 じゃあく ジャアク jaaku jaaku zaaku jaaku/zaaku http://en.wikipedia.org/wiki/Hepburn_romanization +お婆さん おばあさん オバアサン obaasan obāsan obaasan obaasan/obaasann/obaasan' http://en.wikipedia.org/wiki/Hepburn_romanization +お兄さん おにいさん オニイサン oniisan oniisan oniisan oniisan/oniisann/oniisan' http://en.wikipedia.org/wiki/Hepburn_romanization +お爺さん おじいさん オジイサン ojiisan ojiisan ojiisan ojiisan/ojiisann/ojiisan' http://en.wikipedia.org/wiki/Hepburn_romanization +美味しい おいしい オイシイ oishii oishii oisii oishii/oisii http://en.wikipedia.org/wiki/Hepburn_romanization +新潟 にいがた ニイガタ niigata niigata niigata niigata http://en.wikipedia.org/wiki/Hepburn_romanization +濡れ縁 ぬれえん ヌレエン nureen nureen nureen nureen http://en.wikipedia.org/wiki/Hepburn_romanization +お姉さん おねえさん オネエサン oneesan onēsan oneesan oneesan/oneesann/oneesan' http://en.wikipedia.org/wiki/Hepburn_romanization +遠回り とおまわり トオマワリ tōmawari tōmawari toomawari toomawari http://en.wikipedia.org/wiki/Hepburn_romanization +灰色 はいいろ ハイイロ haiiro haiiro haiiro haiiro http://en.wikipedia.org/wiki/Hepburn_romanization +食う くう クウ kuu kuu kuu kuu http://en.wikipedia.org/wiki/Hepburn_romanization +縫う ぬう ヌウ nuu nuu nuu nuu http://en.wikipedia.org/wiki/Hepburn_romanization +数学 すうがく スウガク sūgaku sūgaku suugaku suugaku http://en.wikipedia.org/wiki/Hepburn_romanization +注意 ちゅうい チュウイ chūi chūi chuui chuui/tyuui http://en.wikipedia.org/wiki/Hepburn_romanization +ぐうたら ぐうたら グウタラ gūtara gūtara guutara guutara http://en.wikipedia.org/wiki/Hepburn_romanization +湖 みずうみ ミズウミ mizuumi mizuumi mizuumi mizuumi http://en.wikipedia.org/wiki/Hepburn_romanization +小躍り こおどり コオドリ koodori koodori koodori koodori http://en.wikipedia.org/wiki/Hepburn_romanization +氷 こおり コオリ kōri kōri koori koori http://en.wikipedia.org/wiki/Hepburn_romanization +大阪 おおさか ōsaka ōsaka oosaka oosaka http://en.wikipedia.org/wiki/Hepburn_romanization +追う おう オウ ou ou ou ou http://en.wikipedia.org/wiki/Hepburn_romanization +迷う まよう マヨウ mayou mayou mayou mayou http://en.wikipedia.org/wiki/Hepburn_romanization +子馬 こうま コウマ kouma kouma kouma kouma http://en.wikipedia.org/wiki/Hepburn_romanization +とうきょう とうきょう トウキョウ tōkyō tōkyō toukyou toukyou http://en.wikipedia.org/wiki/Hepburn_romanization +勉強 べんきょう ベンキョウ benkyō benkyō benkyou benkyou http://en.wikipedia.org/wiki/Hepburn_romanization +東京 がっこう ガッコウ gakkō gakkō gakkou gakkou http://en.wikipedia.org/wiki/Hepburn_romanization +電報 でんぽう デンポウ dempō denpō denpou denpou http://en.wikipedia.org/wiki/Hepburn_romanization +金曜日 きんようび キンヨウビ kinyōbi kin'yōbi kin'youbi kin'youbi/kinnyoubi http://en.wikipedia.org/wiki/Hepburn_romanization +学生 がくせい ガクセイ gakusei gakusei gakusei gakusei http://en.wikipedia.org/wiki/Hepburn_romanization +経験 けいけん ケイケン keiken keiken keiken keiken http://en.wikipedia.org/wiki/Hepburn_romanization +制服 せいふく セイフク seifuku seifuku seifuku seifuku/seihuku http://en.wikipedia.org/wiki/Hepburn_romanization +姪 めい メイ mei mei mei mei http://en.wikipedia.org/wiki/Hepburn_romanization +招いて まねいて マネイテ maneite maneite maneite maneite http://en.wikipedia.org/wiki/Hepburn_romanization +軽い かるい カルイ karui karui karui karui http://en.wikipedia.org/wiki/Hepburn_romanization +鴬 うぐいす ウグイス uguisu uguisu uguisu uguisu http://en.wikipedia.org/wiki/Hepburn_romanization +甥 おい オイ oi oi oi oi http://en.wikipedia.org/wiki/Hepburn_romanization +セーラー せーらー セーラー sērā sērā se-ra- http://en.wikipedia.org/wiki/Hepburn_romanization +パーティー ぱーてぃ パーティー pātī pātī pa-teli- http://en.wikipedia.org/wiki/Hepburn_romanization +レーナ れーな レーナ rēna rēna re-na http://en.wikipedia.org/wiki/Hepburn_romanization proper name (Lena) +ヒーター ひーた ヒーター hītā hītā hi-ta- http://en.wikipedia.org/wiki/Hepburn_romanization +タクシー たくしー タクシー takushī takushī takushi-/takusi- http://en.wikipedia.org/wiki/Hepburn_romanization +スーパーマン すーぱーまん スーパーマン sūpāman sūpāman su-pa-man/su-pa-man'/su-pa-mann http://en.wikipedia.org/wiki/Hepburn_romanization +ローマ字 ローマじ rōmaji ro-maji/ro-mazi http://en.wikipedia.org/wiki/Romanization_of_Japanese +富士山 ふじさん fujisan fujisan huzisan fujisan/huzisan/fuzisan/hujisan/fujisan'/huzisan'/fuzisan'/hujisan'/fujisann/huzisann/fuzisann/hujisann http://en.wikipedia.org/wiki/Romanization_of_Japanese +お茶 おちゃ ocha ocha otya otya ocha/otya http://en.wikipedia.org/wiki/Romanization_of_Japanese +知事 ちじ chiji chiji tizi tizi chiji/tizi/tiji/chizi http://en.wikipedia.org/wiki/Romanization_of_Japanese +縮む ちぢむ チヂム chijimu chijimu tidimu tizimu chidimu/tidimu/chidzimu/tidzimu http://en.wikipedia.org/wiki/Romanization_of_Japanese +続く つづく ツヅク tsuzuku tsuzuku tuduku tuzuku tsuduku/tuduku/tsudzuku/tudzuku http://en.wikipedia.org/wiki/Romanization_of_Japanese +仮名使い かなづかい カナヅカイ kanazukai kanazukai kanadukai kanazukai kanadukai http://en.wikipedia.org/wiki/Kunrei-shiki +案内 あんない アンナイ annai annai annai an'nai/annnai http://en.wikipedia.org/wiki/Hepburn_romanization +君間 ぐんま グンマ gumma gunma gunma gunma/gun'ma/gunnma http://en.wikipedia.org/wiki/Hepburn_romanization Proper name +簡易 かんい カンイ kan-i kan'i kan'i kan'i/kanni http://en.wikipedia.org/wiki/Hepburn_romanization +信用 しんよう シンヨウ shin-yō shin'yō sinyou shinyou/sinyou http://en.wikipedia.org/wiki/Hepburn_romanization +結果 けっか ケッカ kekka kekka kekka kekka http://en.wikipedia.org/wiki/Hepburn_romanization +さっさと さっさと サッサト sassato sassato sassato sassato http://en.wikipedia.org/wiki/Hepburn_romanization +ずっと ずっと ズット zutto zutto zutto zutto http://en.wikipedia.org/wiki/Hepburn_romanization +切符 きっぷ キップ kippu kippu kippu kippu http://en.wikipedia.org/wiki/Hepburn_romanization +雑誌 ざっし ザッシ zasshi zasshi zassi zasshi/zassi http://en.wikipedia.org/wiki/Hepburn_romanization +一緒 いっしょ イッショ issho issho isso issho/isso http://en.wikipedia.org/wiki/Hepburn_romanization +こっち こっち コッチ kotchi kotchi kotti kocchi/kotti http://en.wikipedia.org/wiki/Hepburn_romanization +抹茶 まっちゃ マッチゃ matcha matcha mattya mattya/maccha http://en.wikipedia.org/wiki/Hepburn_romanization +三つ みっつ ミッツ mittsu mittsu mittu mittsu/mittu http://en.wikipedia.org/wiki/Hepburn_romanization From d3582f253e64d01d2bf565354d9273034041a387 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Mon, 13 Jan 2014 19:45:34 +1100 Subject: [PATCH 4/9] CHANGE: Bump the number of test cases to 697. 302 fail - there's work to do --- tests/test.py | 62 +++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/tests/test.py b/tests/test.py index 4f499de..27fdf35 100755 --- a/tests/test.py +++ b/tests/test.py @@ -3,58 +3,58 @@ import os import unittest from ddt import ddt, data -from unicodecsv import UnicodeReader -from romkan import to_roma, to_kunrei, to_hepburn, to_kana, to_hiragana, to_katakana +from unicodecsv import DictReader +from romkan import to_kunrei, to_hepburn, to_hiragana, to_katakana from romkan import is_vowel, is_consonant, expand_consonant -def open_csv(path): - fh = open(path, 'r') - ucsv = UnicodeReader(fh, delimiter=str(' ')) - return ucsv +SEP = '/' # Separates variants in the same CSV field +HERE = os.path.abspath(os.path.dirname(__file__)) +TESTS_CSV = os.path.join(HERE, 'tests.csv') -HERE = os.path.abspath(os.path.dirname(__file__)) -TO_ROMA = os.path.join(HERE, 'to_roma.csv') -TO_KUNREI = os.path.join(HERE, 'to_kunrei.csv') -TO_HEPBURN = os.path.join(HERE, 'to_hepburn.csv') -TO_KANA = os.path.join(HERE, 'to_kana.csv') -TO_HIRAGANA = os.path.join(HERE, 'to_hiragana.csv') -TO_KATAKANA = os.path.join(HERE, 'to_katakana.csv') +def get_cases_from_csv(input_field, output_field): + fh = open(TESTS_CSV, 'r') + ucsv = DictReader(fh, delimiter=str('\t')) + for row in ucsv: + inputs, outputs = row[input_field], row[output_field] + for i in inputs.split(SEP): + if outputs != '': + yield i, outputs.split(SEP) @ddt class ConversionTestCase(unittest.TestCase): - @data(*open_csv(TO_ROMA)) - def test_to_roma(self, row): + @data(*list(get_cases_from_csv('hiragana', 'hepburn revised'))) + def test_hiragana_to_hepburn(self, row): in_text, expected = row - self.assertEquals(to_roma(in_text), expected) + self.assertIn(to_hepburn(in_text), expected) - @data(*open_csv(TO_KUNREI)) - def test_to_kunrei(self, row): + @data(*list(get_cases_from_csv('hiragana', 'kunrei-shiki'))) + def test_hiragana_to_kunrei(self, row): in_text, expected = row - self.assertEquals(to_kunrei(in_text), expected) + self.assertIn(to_kunrei(in_text), expected) - @data(*open_csv(TO_HEPBURN)) - def test_to_hepburn(self, row): + @data(*list(get_cases_from_csv('hiragana', 'katakana'))) + def test_hiragana_to_katakana(self, row): in_text, expected = row - self.assertEquals(to_hepburn(in_text), expected) + self.assertIn(to_katakana(in_text), expected) - @data(*open_csv(TO_KANA)) - def test_to_kana(self, row): + @data(*list(get_cases_from_csv('katakana', 'hiragana'))) + def test_katakana_to_hiragana(self, row): in_text, expected = row - self.assertEquals(to_kana(in_text), expected) + self.assertIn(to_katakana(in_text), expected) - @data(*open_csv(TO_HIRAGANA)) - def test_to_hiragana(self, row): + @data(*list(get_cases_from_csv('wapuro', 'hiragana'))) + def test_wapuro_to_hiragana(self, row): in_text, expected = row - self.assertEquals(to_hiragana(in_text), expected) + self.assertIn(to_hiragana(in_text), expected) - @data(*open_csv(TO_KATAKANA)) - def test_to_katakana(self, row): + @data(*list(get_cases_from_csv('wapuro', 'katakana'))) + def test_wapuro_to_katakana(self, row): in_text, expected = row - self.assertEquals(to_katakana(in_text), expected) + self.assertIn(to_katakana(in_text), expected) class RomkanTestCase(unittest.TestCase): From dbe56c77db4d061894bb69f8a8fe36f5cf6b3861 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Mon, 13 Jan 2014 19:47:43 +1100 Subject: [PATCH 5/9] Delete unused CSV files --- tests/to_hepburn.csv | 10 ---------- tests/to_hiragana.csv | 16 ---------------- tests/to_kana.csv | 26 -------------------------- tests/to_katakana.csv | 26 -------------------------- tests/to_kunrei.csv | 10 ---------- tests/to_roma.csv | 32 -------------------------------- 6 files changed, 120 deletions(-) delete mode 100644 tests/to_hepburn.csv delete mode 100644 tests/to_hiragana.csv delete mode 100644 tests/to_kana.csv delete mode 100644 tests/to_katakana.csv delete mode 100644 tests/to_kunrei.csv delete mode 100644 tests/to_roma.csv diff --git a/tests/to_hepburn.csv b/tests/to_hepburn.csv deleted file mode 100644 index 58a1477..0000000 --- a/tests/to_hepburn.csv +++ /dev/null @@ -1,10 +0,0 @@ -kannzi kanji -tie chie -KANNZI kanji -TIE chie -カンジ kanji -チエ chie -かんじ kanji -ちえ chie -しゃしん shashin -しゅっしょう shusshou \ No newline at end of file diff --git a/tests/to_hiragana.csv b/tests/to_hiragana.csv deleted file mode 100644 index d1d9af1..0000000 --- a/tests/to_hiragana.csv +++ /dev/null @@ -1,16 +0,0 @@ -kanji かんじ -kanzi かんじ -kannji かんじ -chie ちえ -tie ちえ -kyouju きょうじゅ -syuukyou しゅうきょう -shuukyou しゅうきょう -saichuu さいちゅう -saityuu さいちゅう -cheri- ちぇりー -tyeri- ちぇりー -shinrai しんらい -sinrai しんらい -hannnou はんのう -han'nou はんのう \ No newline at end of file diff --git a/tests/to_kana.csv b/tests/to_kana.csv deleted file mode 100644 index 2c77c3a..0000000 --- a/tests/to_kana.csv +++ /dev/null @@ -1,26 +0,0 @@ -kanji カンジ -kanzi カンジ -kannji カンジ -chie チエ -tie チエ -kyouju キョウジュ -syuukyou シュウキョウ -shuukyou シュウキョウ -saichuu サイチュウ -saityuu サイチュウ -cheri- チェリー -tyeri- チェリー -shinrai シンライ -sinrai シンライ -hannnou ハンノウ -han'nou ハンノウ -wo ヲ -we ウェ -du ヅ -she シェ -di ヂ -fu フ -ti チ -wi ウィ -je ジェ -e-jento エージェント \ No newline at end of file diff --git a/tests/to_katakana.csv b/tests/to_katakana.csv deleted file mode 100644 index 2c77c3a..0000000 --- a/tests/to_katakana.csv +++ /dev/null @@ -1,26 +0,0 @@ -kanji カンジ -kanzi カンジ -kannji カンジ -chie チエ -tie チエ -kyouju キョウジュ -syuukyou シュウキョウ -shuukyou シュウキョウ -saichuu サイチュウ -saityuu サイチュウ -cheri- チェリー -tyeri- チェリー -shinrai シンライ -sinrai シンライ -hannnou ハンノウ -han'nou ハンノウ -wo ヲ -we ウェ -du ヅ -she シェ -di ヂ -fu フ -ti チ -wi ウィ -je ジェ -e-jento エージェント \ No newline at end of file diff --git a/tests/to_kunrei.csv b/tests/to_kunrei.csv deleted file mode 100644 index a3650ab..0000000 --- a/tests/to_kunrei.csv +++ /dev/null @@ -1,10 +0,0 @@ -kanji kanzi -chie tie -KANJI kanzi -CHIE tie -カンジ kanzi -チエ tie -かんじ kanzi -ちえ tie -しゃしん syasin -しゅっしょう syussyou \ No newline at end of file diff --git a/tests/to_roma.csv b/tests/to_roma.csv deleted file mode 100644 index 7eb9be5..0000000 --- a/tests/to_roma.csv +++ /dev/null @@ -1,32 +0,0 @@ -カンジ kanji -チャウ chau -ハンノウ han'nou -かんじ kanji -ちゃう chau -はんのう han'nou -こんやく kon'yaku -こんにゃく konnyaku -れんあい ren'ai -そうじ souji -じょうき jouki -ボディー bodi- -つづく tsuzuku -けっこう kekkou -さいあく saiaku -まんいち man'ichi -まいにち mainichi -ャ ~ya -ちぢむ chijimu -しんばし shimbashi -しんぱい shimpai -じゃあく jaaku -おばあさん obāsan -おにいさん oniisan -おねえさん onēsan -ちゅうい chūi -みずうみ mizuumi -こおどり koodori -こおどりkōri -おおさか ōsaka -とうきょう tōkyō -がっこう gakkō \ No newline at end of file From 0a271cd96e9e71a58f2f566881478a32fcb5e527 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Mon, 13 Jan 2014 20:37:28 +1100 Subject: [PATCH 6/9] FIX: small hiragana 'ya' --- tests/tests.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.csv b/tests/tests.csv index cfc2804..379b55c 100644 --- a/tests/tests.csv +++ b/tests/tests.csv @@ -33,7 +33,7 @@ word hiragana katakana hepburn traditional hepburn revised nihon-shiki kunrei-sh 万一 まんいち マンイチ man-ichi man'ichi man'iti mannichi/man'ichi/manniti/man'iti http://cldr.unicode.org/index/cldr-spec/transliteration-guidelines まにち まにち マニチ manichi manichi maniti maniti manichi/maniti http://cldr.unicode.org/index/cldr-spec/transliteration-guidelines 毎日 まいにち マイニチ mainichi mainichi mainiti mainichi/mainiti - ャ ャ ~ya ~ya lya + ゃ ャ ~ya ~ya lya 新橋 しんばし シンバシ shimbashi shinbashi sinbasi shinbashi/sinbasi/shinbasi/sinbashi 心配 しんぱい シンパイ shimpai shinpai sinpai sinpai/shinpai 邪悪 じゃあく ジャアク jaaku jaaku zaaku jaaku/zaaku http://en.wikipedia.org/wiki/Hepburn_romanization From 3e5935b531d537fc051222db16344465274ee5ab Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Mon, 13 Jan 2014 20:41:05 +1100 Subject: [PATCH 7/9] =?UTF-8?q?ADD:=20Reversibility=20test=20cases.=20All?= =?UTF-8?q?=20pass=20except=20for=20'=E3=81=A2'/'=E3=81=A7=E3=81=83'=20->?= =?UTF-8?q?=20'dyi'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test.py b/tests/test.py index 27fdf35..0454ce8 100755 --- a/tests/test.py +++ b/tests/test.py @@ -56,6 +56,11 @@ def test_wapuro_to_katakana(self, row): in_text, expected = row self.assertIn(to_katakana(in_text), expected) + @data(*list(get_cases_from_csv('hiragana', 'hiragana'))) + def test_reversibility(self, row): + in_text, _ = row + self.assertEquals(to_hiragana(to_kunrei(in_text)), in_text) + class RomkanTestCase(unittest.TestCase): From b564f615f1252f036ab49ccfb3443f40c8613691 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Mon, 13 Jan 2014 20:41:05 +1100 Subject: [PATCH 8/9] =?UTF-8?q?ADD:=20Reversibility=20test=20cases.=20All?= =?UTF-8?q?=20pass=20except=20for=20'=E3=81=A2'/'=E3=81=A7=E3=81=83'=20->?= =?UTF-8?q?=20'dyi'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test.py b/tests/test.py index 27fdf35..5b08bf8 100755 --- a/tests/test.py +++ b/tests/test.py @@ -1,5 +1,4 @@ #!/usr/bin/env python - import os import unittest from ddt import ddt, data @@ -56,6 +55,11 @@ def test_wapuro_to_katakana(self, row): in_text, expected = row self.assertIn(to_katakana(in_text), expected) + @data(*list(get_cases_from_csv('hiragana', 'hiragana'))) + def test_reversibility(self, row): + in_text, _ = row + self.assertEquals(to_hiragana(to_kunrei(in_text)), in_text) + class RomkanTestCase(unittest.TestCase): @@ -86,5 +90,6 @@ def test_expand_consonant(self): self.assertEqual(sorted(expand_consonant("ch")), ["cha", "che", "chi", "cho", "chu"]) + if __name__ == '__main__': unittest.main() From 2c4f99894674196777a9b1c9643308119213cd58 Mon Sep 17 00:00:00 2001 From: Baptiste Lagarde Date: Fri, 24 Jan 2014 08:46:10 +1100 Subject: [PATCH 9/9] FIX: Explicit is better than implicit. This should fix Python3 test runs --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1533d06..a943e90 100755 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ proj_info = json.loads(open(os.path.join(HERE, PROJ_METADATA)).read()) -test_requirements = ['ddt>=0.5.0', 'unicodecsv>=0.9.4'] +test_requirements = ['ddt>=0.5.0', 'unicodecsv>=0.10.1'] setup( @@ -37,6 +37,7 @@ test_suite = 'tests', tests_require = test_requirements, + dependency_links = ['https://github.com/jdunck/python-unicodecsv/zipball/master#egg=unicodecsv-0.10.1'], platforms = 'any', zip_safe = False,