Skip to content
Merged

Test #262

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/push-app-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ jobs:
uses: actions/checkout@v4
with:
submodules: recursive
fetch-tags: true

- name: update version info
run: |
git fetch --tags
git describe --tags --always >| VERSION
git rev-parse HEAD >> VERSION

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/push-test-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ jobs:
uses: actions/checkout@v4
with:
submodules: recursive
fetch-tags: true

- name: update version info
run: |
git fetch --tags
git describe --tags --always >| VERSION
git rev-parse HEAD >> VERSION

Expand Down
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[submodule "ext/arabic_rom"]
path = ext/arabic_rom
url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git
branch = main
2 changes: 1 addition & 1 deletion ext/arabic_rom
12 changes: 9 additions & 3 deletions scriptshifter/hooks/korean/romanizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from csv import reader
from os import path
from unicodedata import normalize

from scriptshifter.exceptions import BREAK
from scriptshifter.hooks.korean import KCONF
Expand Down Expand Up @@ -92,6 +93,8 @@ def s2r_names_post_config(ctx):
def _romanize_nonames(src, options):
""" Main Romanization function for non-name strings. """

# Normalize to precomposed characters.
src = normalize("NFC", src)
# FKR038: Convert Chinese characters to Hangul
if options.get("hancha", True):
kor = _hancha2hangul(_marc8_hancha(src))
Expand Down Expand Up @@ -142,6 +145,8 @@ def _romanize_names(src, options):
"""
rom_ls = []
warnings = []
# Normalize to precomposed characters.
src = normalize("NFC", src)

if "," in src and "·" in src:
warnings.append(
Expand Down Expand Up @@ -386,9 +391,10 @@ def _romanize_oclc_auto(kor):

# FKR068: Exceptions, Exceptions to initial sound law, Proper names
def _kor_rom(kor):
# Only convert string if it contains CJK (i.e. do not change if already romanized)
# \u3000 is the ideographic space, the lowest codepoint in the Unicode CJK range
if max(kor) < '\u3000':
# Only convert string if it contains CJK (i.e. do not change if already
# romanized) \u3000 is the ideographic space, the lowest codepoint in the
# Unicode CJK range
if max(kor) < '\u3000':
return kor

kor = re.sub(r"\s{2,}", " ", kor.strip())
Expand Down
42 changes: 4 additions & 38 deletions scriptshifter/tables/data/chinese.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# Chinese numerals map.
#
# All other Chinese mappings are kept in _chinese_base.yml. This mapping only
# adds an overlay for parsing numerals and Scriptshifter-specific features.

---
general:
name: Chinese
description: >
Chinese transliteration table that does not convert Chinese numerals to
Indo-Arabic numerals.
parents:
- _chinese_base
case_sensitive: false
Expand All @@ -29,39 +27,7 @@ script_to_roman:

hooks:
pre_assembly:
-
- chinese.parse_numerals_pre_assembly
-
- chinese.person_name_pre_assembly

map:
"〇": "ling#0 "
"零": "ling#0 "
"一": "yi#1 "
"二": "er#2 "
"兩": "liang#2 "
"两": "liang#2 "
"三": "san#3 "
"四": "si#4 "
"五": "wu#5 "
"六": "liu#6 "
"七": "qi#7 "
"八": "ba#8 "
"九": "jiu#9 "
"十": "shi#10 "
"廾": "gong#20 "
"廿": "nian#20 "
"卅": "sa#30 "
"卌": "xi#40 "
"百": "bai#100 "
"千": "qian#1000 "
"万": "wan#10000 "
"萬": "wan#10000 "
"亿": "yi#100000000 "
"億": "yi#100000000 "
"及": "ji# "
"至": "zhi# "
"年": "nian# "
"月": "yue# "
"日": "ri# "
"第": "di# "
map: {}
70 changes: 70 additions & 0 deletions scriptshifter/tables/data/chinese_numerals.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Chinese numerals map.
#
# All other Chinese mappings are kept in _chinese_base.yml. This mapping only
# adds an overlay for parsing numerals and Scriptshifter-specific features.

---
general:
name: Chinese (numerals transliteration)
description: >
Chinese transliteration table that includes romanization of Chinese
numerals.
parents:
- _chinese_base
case_sensitive: false

options:
- id: marc_field
label: MARC field
description: >
Romanize according to a specific MARC field format. If indicating a
subfield, append it to the numeric field value , e.g. \'245n\'.
Leave blank if not applicable.
type: string
default:

script_to_roman:
directives:
# Capitalize the first letter of the string only; TODO
# Implement a list that includes all punctuation marks that
# want the following letter capitalized.
capitalize: true

hooks:
pre_assembly:
-
- chinese.parse_numerals_pre_assembly
-
- chinese.person_name_pre_assembly

map:
"〇": "ling#0 "
"零": "ling#0 "
"一": "yi#1 "
"二": "er#2 "
"兩": "liang#2 "
"两": "liang#2 "
"三": "san#3 "
"四": "si#4 "
"五": "wu#5 "
"六": "liu#6 "
"七": "qi#7 "
"八": "ba#8 "
"九": "jiu#9 "
"十": "shi#10 "
"廾": "gong#20 "
"廿": "nian#20 "
"卅": "sa#30 "
"卌": "xi#40 "
"百": "bai#100 "
"千": "qian#1000 "
"万": "wan#10000 "
"萬": "wan#10000 "
"亿": "yi#100000000 "
"億": "yi#100000000 "
"及": "ji# "
"至": "zhi# "
"年": "nian# "
"月": "yue# "
"日": "ri# "
"第": "di# "
Loading