Skip to content

Commit 5a21c05

Browse files
committed
Merge branch 'word_boundaries' into test
2 parents 290a3be + eed9a59 commit 5a21c05

File tree

10 files changed

+249
-98
lines changed

10 files changed

+249
-98
lines changed

scriptshifter/tables/__init__.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ def __init__(self, content):
8484
# Standalone has precedence, then initial, then final, then medial.
8585
# This is somewhat arbitrary and may change if special cases arise.
8686
# WB markers are moved to flags to allow default comparison.
87-
if self.content.endswith(TOKEN_WB_MARKER):
88-
self.flags |= BOW
89-
self.content = self.content.rstrip(TOKEN_WB_MARKER)
9087
if self.content.startswith(TOKEN_WB_MARKER):
91-
self.flags |= EOW
88+
self.flags |= BOW
9289
self.content = self.content.lstrip(TOKEN_WB_MARKER)
90+
if self.content.endswith(TOKEN_WB_MARKER):
91+
self.flags |= EOW
92+
self.content = self.content.rstrip(TOKEN_WB_MARKER)
9393

9494
def __lt__(self, other):
9595
"""
@@ -115,9 +115,9 @@ def __lt__(self, other):
115115
if (
116116
(self.flags > 0 or other.flags > 0)
117117
and self.content == other.content):
118-
logger.debug(f"{self.content} flags: {self.flags}")
119-
logger.debug(f"{other.content} flags: {other.flags}")
120-
logger.debug("Performing flags comparison.")
118+
# logger.debug(f"{self.content} flags: {self.flags}")
119+
# logger.debug(f"{other.content} flags: {other.flags}")
120+
# logger.debug("Performing flags comparison.")
121121

122122
return self.flags > other.flags
123123

@@ -202,6 +202,8 @@ def populate_table(conn, tname, tdata):
202202
203203
@param tdata(dict): Table data.
204204
"""
205+
logger.info(f"Populating table: {tname}")
206+
205207
res = conn.execute(
206208
"""INSERT INTO tbl_language (
207209
name, label, marc_code, description

scriptshifter/tables/data/_ignore_base.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1+
---
12
general:
23
name: Common ignore list.
34

45
roman_to_script:
56
ignore:
6-
- " "
77
- "at head of title"
88
- "colophon"
99
- "date of publication not identified"
@@ -38,8 +38,6 @@ roman_to_script:
3838
- "\\b[\u2021$][0-9a-z]\\b"
3939

4040
script_to_roman:
41-
ignore:
42-
- " "
4341
ignore_ptn:
4442
# MARC sub-field markers.
4543
- "\\b[\u2021$][0-9a-z]\\b"

scriptshifter/tables/data/greek_classical.yml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,8 @@ script_to_roman:
404404
"\u0399": "I"
405405
"\u039A": "K"
406406
"\u039B": "L"
407-
"\u039C\u03C0%": "B"
407+
"%\u039C\u03A0": "B"
408+
"%\u039C\u03C0": "B"
408409
"\u039C": "M"
409410
"\u039D\u03C4%": "\u1E0E"
410411
"\u039D": "N"
@@ -461,10 +462,11 @@ script_to_roman:
461462
"\u03B2": "b"
462463
"\u03B3\u03B3": "ng"
463464
"\u03B3\u03BA": "nk"
464-
"\u0393\u03BA%": "Gk"
465-
"\u03B3\u03BA%": "gk"
466465
"%\u0393\u03BA": "Gk"
466+
"%\u0393\u039A": "GK"
467+
"\u0393\u039A%": "GK"
467468
"%\u03B3\u03BA": "gk"
469+
"\u03B3\u03BA%": "gk"
468470
"\u03B3\u03BE": "nx"
469471
"\u03B3\u03C7": "nch"
470472
"\u03B3": "g"
@@ -494,9 +496,9 @@ script_to_roman:
494496
"\u03B9": "i"
495497
"\u03BA": "k"
496498
"\u03BB": "l"
497-
"\u03BC\u03C0%": "b"
499+
"%\u03BC\u03C0": "b"
498500
"\u03BC": "m"
499-
"\u03BD\u03C4%": "\u1E0F"
501+
"%\u03BD\u03C4": "\u1E0F"
500502
"\u03BD": "n"
501503
"\u03BE": "x"
502504
"\u1F41": "ho"
@@ -611,6 +613,8 @@ roman_to_script:
611613
"Au": "\u0391\u03C5"
612614
"au": "\u03B1\u03C5"
613615
"a\u0301": "\u03AC"
616+
"%B": "\u039C\u03C0"
617+
"%b": "\u03BC\u03C0"
614618
"B": "\u0392"
615619
"b": "\u03B2"
616620
"b\u0333": "\u03D0"
@@ -699,7 +703,7 @@ roman_to_script:
699703
"m": "\u03BC"
700704
"nch": "\u03B3\u03C7"
701705
"ng": "\u03B3\u03B3"
702-
"%nk%": "\u03B3\u03BA"
706+
"nk": "\u03B3\u03BA"
703707
"nx": "\u03B3\u03BE"
704708
"No\u0332": "\u2116"
705709
"N": "\u039D"
@@ -749,7 +753,7 @@ roman_to_script:
749753
# "S": "\u03F9" # FIXME ambiguous.
750754
"S": "\u03A3"
751755
# "s": "\u03F2" # FIXME ambiguous.
752-
"%s": "\u03C2"
756+
"s%": "\u03C2"
753757
"s": "\u03C3"
754758
"T\u0333H\u0333": "\u03F4"
755759
"t\u0333h\u0333": "\u03D1"

0 commit comments

Comments
 (0)