Skip to content

Commit a160e3e

Browse files
committed
Update Lepcha and Church Slavonic; fix one more ignore pattern.
1 parent 636650e commit a160e3e

3 files changed

Lines changed: 4858 additions & 301 deletions

File tree

scriptshifter/tables/data/_ignore_base.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ roman_to_script:
3131
# numerals) ranges to avoid this ambiguity.
3232
- "I{2,3}\\b"
3333
- "I(V|X)\\b"
34-
- "LI{,3}\\b"
34+
- "LI{1,3}\\b"
3535
- "LI?(V|X)\\b"
3636
- "L(V|X{1,3})I{,3}\\b"
3737
- "LX{1,3}I?V\\b"

scriptshifter/tables/data/church_slavonic.yml

Lines changed: 63 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -2,211 +2,95 @@
22
general:
33
name: Church Slavonic
44
description: Church Slavonic in Cyrillic script.
5-
version: 1.0.0
6-
date: 2025-11-30
5+
version: 1.0.1
6+
date: 2026-01-11
77
parents:
88
- cyrillic_generic
99

1010
roman_to_script:
1111
map:
1212

13-
"V\u0307": "\u0474"
14-
"v\u0307": "\u0475"
15-
16-
"G\u0301": "\u0494"
17-
"g\u0301": "\u0495"
18-
19-
# CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
20-
"I\uFE20E\uFE21\u0304": "\u0464"
21-
"I\uFE20E\u0304\uFE21": "\u0464"
22-
"I\u0361E\u0304": "\u0464"
23-
"I\uFE20e\uFE21\u0304": "\u0464"
24-
"I\uFE20e\u0304\uFE21": "\u0464"
25-
"I\u0361e\u0304": "\u0464"
26-
"I\uFE20E\uFE21\u0328": "\u0468"
27-
"I\uFE20E\u0328\uFE21": "\u0468"
28-
"I\u0361E\u0328": "\u0468"
29-
"I\uFE20e\uFE21\u0328": "\u0468"
30-
"I\uFE20e\u0328\uFE21": "\u0468"
31-
"I\u0361e\u0328": "\u0468"
32-
"i\uFE20e\uFE21\u0304": "\u0465"
33-
"i\uFE20e\u0304\uFE21": "\u0465"
34-
"i\u0361e\u0304": "\u0465"
35-
"i\uFE20E\uFE21\u0304": "\u0465"
36-
"i\uFE20E\u0304\uFE21": "\u0465"
37-
"i\u0361E\u0304": "\u0465"
38-
"i\uFE20e\uFE21\u0328": "\u0469"
39-
"i\uFE20e\u0328\uFE21": "\u0469"
40-
"i\u0361e\u0328": "\u0469"
41-
"i\uFE20E\uFE21\u0328": "\u0469"
42-
"i\uFE20E\u0328\uFE21": "\u0469"
43-
"i\u0361E\u0328": "\u0469"
44-
"I\uFE20E\uFE21": "\u0462"
45-
"I\u0361E": "\u0462"
46-
"I\uFE20e\uFE21": "\u0462"
47-
"I\u0361e": "\u0462"
48-
"i\uFE20e\uFE21": "\u0463"
49-
"i\u0361e": "\u0463"
50-
"i\uFE20E\uFE21": "\u0463"
51-
"i\u0361E": "\u0463"
52-
53-
# CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
13+
"E\u0304": "\u0415"
14+
"e\u0304": "\u0435"
5415
"E\u030C": "\u0462"
55-
"E\u0304": "\u0404"
56-
"E\u0307": "\u042D"
57-
"E\u0308": "\u0401"
58-
"E\u0328": "\u0466"
5916
"e\u030C": "\u0463"
60-
"e\u0304": "\u0454"
61-
"e\u0307": "\u044D"
62-
"e\u0308": "\u0451"
63-
"e\u0328": "\u0467"
64-
65-
# CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
66-
"I\uFE20O\uFE21\u0328": "\u046C"
67-
"I\uFE20O\u0328\uFE21": "\u046C"
68-
"I\u0361O\u0328": "\u046C"
69-
"I\uFE20o\uFE21\u0328": "\u046C"
70-
"I\uFE20o\u0328\uFE21": "\u046C"
71-
"I\u0361o\u0328": "\u046C"
72-
"i\uFE20o\uFE21\u0328": "\u046D"
73-
"i\uFE20o\u0328\uFE21": "\u046D"
74-
"i\u0361o\u0328": "\u046D"
75-
"i\uFE20O\uFE21\u0328": "\u046D"
76-
"i\uFE20O\u0328\uFE21": "\u046D"
77-
"i\u0361O\u0328": "\u046D"
78-
79-
# CONVERSION OF "I/i" WITH MACRON (0304) AND BREVE (0306)
80-
"I\u0304": "\u0406"
81-
"i\u0304": "\u0456"
82-
83-
# CONVERSION OF REMAINING LONE "I/i"
84-
"I": "\u0418"
85-
"i": "\u0438"
86-
87-
"kH": "\u0445"
88-
89-
"K\uFE20S\uFE21": "\u046E"
90-
"K\uFE20s\uFE21": "\u046E"
91-
"k\uFE20s\uFE21": "\u046F"
92-
"k\uFE20S\uFE21": "\u046F"
93-
94-
# CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
17+
"E": "\u0404"
18+
"e": "\u0454"
19+
"F\u0307": "\u0472"
20+
"f\u0307": "\u0473"
21+
"G\u0301": "\u040B"
22+
"g\u0301": "\u045B"
23+
"I\uFE20A\uFE21": "\uA656"
24+
"I\uFE20a\uFE21": "\uA656"
25+
"I\u0361A": "\uA656"
26+
"I\u0361a": "\uA656"
27+
"i\uFE20a\uFE21": "\uA657"
28+
"i\u0361a": "\uA657"
29+
"I\uFE20E\uFE21": "\u0464"
30+
"I\uFE20e\uFE21": "\u0464"
31+
"I\u0361E": "\u0464"
32+
"i\uFE20e\uFE21": "\u0465"
33+
"i\u0361e": "\u0465"
9534
"O\u0304\uFE20T\uFE21": "\u047E"
96-
"O\u0304\uFE20t\uFE21": "\u047E"
9735
"O\uFE20\u0304T\uFE21": "\u047E"
36+
"O\u0304\uFE20t\uFE21": "\u047E"
9837
"O\uFE20\u0304t\uFE21": "\u047E"
99-
"O\uFE20T\uFE21": "\u047E"
100-
"O\uFE20t\uFE21": "\u047E"
38+
"O\u0304\u0361T": "\u047E"
39+
"O\u0304\u0361t": "\u047E"
10140
"o\u0304\uFE20t\uFE21": "\u047F"
102-
"o\u0304\uFE20T\uFE21": "\u047F"
10341
"o\uFE20\u0304t\uFE21": "\u047F"
104-
"o\uFE20\u0304T\uFE21": "\u047F"
105-
"o\uFE20t\uFE21": "\u047F"
106-
"o\uFE20T\uFE21": "\u047F"
107-
108-
# CONVERSION OF "O/o" WITH MACRON(0304) AND OGONEK (0328)
109-
"O\u0328": "\u046A"
110-
"o\u0328": "\u046B"
42+
"o\u0304\u0361t": "\u047F"
11143
"O\u0304": "\u0460"
11244
"o\u0304": "\u0461"
113-
114-
"P\uFE20S\uFE21": "\u0470"
115-
"P\uFE20s\uFE21": "\u0470"
116-
"p\uFE20s\uFE21": "\u0471"
117-
"p\uFE20S\uFE21": "\u0471"
118-
11945
"SHT": "\u0429"
12046
"SHt": "\u0429"
12147
"Sht": "\u0429"
122-
"sHT": "\u0449"
123-
"shT": "\u0449"
12448
"sht": "\u0449"
125-
126-
"sH": "\u0448"
127-
128-
"T\uFE20S\uFE21": "\u0426"
129-
"T\uFE20s\uFE21": "\u0426"
130-
"t\uFE20s\uFE21": "\u0446"
131-
"t\uFE20S\uFE21": "\u0446"
132-
133-
"U\u0304": "\u0478"
134-
"u\u0304": "\u0479"
135-
136-
"F\u0307": "\u0472"
137-
"f\u0307": "\u0473"
138-
139-
"cH": "\u0447"
140-
49+
"U\u0304": "\uA64A"
50+
"u\u0304": "\uA64B"
51+
"U": "\u0478"
52+
"u": "\u0479"
53+
"Y\u0304": "\u042B"
54+
"y\u0304": "\u044B"
14155
"Y\u0307": "\u0476"
14256
"y\u0307": "\u0477"
143-
"Y": "\u042B"
144-
"y": "\u044B"
145-
146-
# this conversion is ambiguous - \u042C is also theoretically possible
147-
"\u0027": "\u044C"
148-
# this conversion is ambiguous - \u044C is also theoretically possible
149-
"\u02BA": "\u044A"
150-
57+
"Y": "\uA650"
58+
"y": "\uA651"
59+
"Z\u0307": "\u0405"
60+
"z\u0307": "\u0455"
61+
15162
script_to_roman:
15263
map:
153-
# CONVERSION TO "I/i" LIGATED TO "A/a"
154-
"\u0474": "V\u0307"
155-
"\u0475": "v\u0307"
156-
"\u0494": "G\u0301"
157-
"\u0495": "g\u0301"
158-
"\u0413": "G"
159-
"\u0433": "g"
160-
# CONVERION TO "I/i" LIGATED TO "E/e" WITH DIACRITICS
161-
"\u0464": "I\u0361E\u0304"
162-
"\u0468": "I\u0361E\u0328"
163-
"\u0465": "i\u0361e\u0304"
164-
"\u0469": "i\u0361e\u0328"
165-
# CONVERSION TO "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
166-
"\u0462": "E\u030C"
167-
"\u0404": "E\u0304"
168-
"\u042D": "E\u0307"
169-
"\u0401": "E\u0308"
170-
"\u0466": "E\u0328"
171-
"\u0463": "e\u030C"
172-
"\u0454": "e\u0304"
173-
"\u044D": "e\u0307"
174-
"\u0451": "e\u0308"
175-
"\u0467": "e\u0328"
176-
# CONVERION T0 "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
177-
"\u046C": "I\u0361O\u0328"
178-
"\u046D": "i\u0361o\u0328"
179-
# CONVERION TO "I/i" LIGATED TO "U/u"
180-
# CONVERSION TO "I/i" WITH MACRON (0304) AND BREVE (0306)
181-
"\u0406": "I\u0304"
182-
"\u0456": "i\u0304"
183-
# CONVERSION TO LONE "I/i"
184-
"\u046E": "K\u0361S"
185-
"\u046F": "k\u0361s"
186-
# CONVERION TO "O/o" WITH MACRON (0304) LIGATED TO "T/t"
187-
"\u047E": "O\u0361\u0304t"
188-
"\u047F": "o\u0361\u0304t"
189-
# CONVERSION TO "O/o" WITH MACRON(0304) AND OGONEK (0328)
190-
"\u046A": "O\u0328"
191-
"\u046B": "o\u0328"
192-
"\u0460": "O\u0304"
193-
"\u0461": "o\u0304"
194-
# CONVERSION TO LONE "O/o"
195-
"\u0470": "P\u0361S"
196-
"\u0471": "p\u0361s"
64+
65+
"\u0404": "E"
66+
"\u0405": "Z\u0307"
67+
"\u040B": "G\u0301"
68+
"\u0415": "E\u0304"
19769
"\u0429": "Sht"
70+
"\u042B": "Y\u0304"
71+
"\u0435": "e\u0304"
19872
"\u0449": "sht"
199-
"\u0478": "U\u0304"
200-
"\u0479": "u\u0304"
73+
"\u044B": "y\u0304"
74+
"\u0454": "e"
75+
"\u0455": "z\u0307"
76+
"\u045B": "g\u0301"
77+
"\u0460": "O\u0304"
78+
"\u0461": "o\u0304"
79+
"\u0462": "E\u030C"
80+
"\u0463": "e\u030C"
81+
"\u0464": "I\u0361E"
82+
"\u0465": "i\u0361e"
20183
"\u0472": "F\u0307"
20284
"\u0473": "f\u0307"
20385
"\u0476": "Y\u0307"
20486
"\u0477": "y\u0307"
205-
# Uppercase hard sign (ambiguously maps to one Latin character)
206-
"\u042A": "\u02BA"
207-
# Lowercase hard sign (ambiguously maps to one Latin character)
208-
"\u044A": "\u02BA"
209-
# Uppercase soft sign (ambiguously maps to one Latin character)
210-
# Lowercase soft sign (ambiguously maps to one Latin character)
211-
"\u042B": "Y"
212-
"\u044B": "y"
87+
"\u0478": "U"
88+
"\u0479": "u"
89+
"\u047E": "O\u0304\u0361T"
90+
"\u047F": "o\u0304\u0361t"
91+
"\uA64A": "U\u0304"
92+
"\uA64B": "u\u0304"
93+
"\uA650": "Y"
94+
"\uA651": "y"
95+
"\uA656": "I\u0361A"
96+
"\uA657": "i\u0361a"

0 commit comments

Comments
 (0)