feat: confusables show ktiv male, emoji/prep stripping fully upstream
- Confusables deck front now shows shared ktiv male form instead of nikkud variants joined by "/". Back still shows nikkud with definitions. - Fixed list scraper EMOJI_RE to catch variation selectors (U+FE0F) and ZWJ (U+200D) — cleaned 17 entries with leftover selectors in meaning. - Removed build-time prep extraction fallback (0 entries relied on it). - release.py: fix keeshare field name (API_TOKEN → password). Closes: Pealim #11 (emoji/prep upstream), Pealim #16 (confusables ktiv male) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
138acb06d8
commit
f3496998f5
4 changed files with 76 additions and 51 deletions
|
|
@ -265,6 +265,14 @@ details[open] > .more-header::before { content: "● "; }
|
|||
text-align: center;
|
||||
margin: 0.3em 0;
|
||||
}
|
||||
.plural-direction {
|
||||
font-size: 32px;
|
||||
color: #444;
|
||||
text-align: center;
|
||||
direction: rtl;
|
||||
margin: 8px 0;
|
||||
font-weight: bold;
|
||||
}
|
||||
.card [type="button"], .card button, .replay-button {
|
||||
display: block !important;
|
||||
margin: 4px auto !important;
|
||||
|
|
@ -288,7 +296,26 @@ details[open] > .more-header::before { content: "● "; }
|
|||
.related-header { color: #999; }
|
||||
.rw-word { color: #e0e0e0; }
|
||||
.rw-meaning { color: #999; }
|
||||
.plural-direction { color: #aaa; }
|
||||
}
|
||||
.nightMode .card { color: #e8e8e8; background: #1c1c1e; }
|
||||
.nightMode .hebrew { color: #f0f0f0; }
|
||||
.nightMode .hebrew-sm { color: #e0e0e0; }
|
||||
.nightMode .meaning { color: #82b0ff; }
|
||||
.nightMode .sec-label { color: #e0e0e0; }
|
||||
.nightMode .sec-key { color: #e0e0e0; }
|
||||
.nightMode .sec-val { color: #e0e0e0; }
|
||||
.nightMode .conf-entry { color: #ddd; }
|
||||
.nightMode .hint { color: #777; }
|
||||
.nightMode .voice-label { color: #888; }
|
||||
.nightMode .example { color: #e0e0e0; border-right-color: #555; }
|
||||
.nightMode .divider { border-top-color: #333; }
|
||||
.nightMode .freq-badge { color: #888; border-color: #444; }
|
||||
.nightMode .more-header { color: #bbb; background: #2a2a2e; border-color: #555; }
|
||||
.nightMode .related-header { color: #999; }
|
||||
.nightMode .rw-word { color: #e0e0e0; }
|
||||
.nightMode .rw-meaning { color: #999; }
|
||||
.nightMode .plural-direction { color: #aaa; }
|
||||
"""
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -422,7 +449,7 @@ CONJ_BACK = """
|
|||
<div class="hebrew">{{ConjugatedForm}}{{#Prep}} ({{Prep}}){{/Prep}}</div>
|
||||
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
||||
<details class="more-toggle"><summary class="more-header">מידע נוסף</summary>
|
||||
{{#Meaning}}<div class="sec-label" style="text-align:center;display:block;">{{Meaning}}</div>{{/Meaning}}
|
||||
{{#Meaning}}<div class="meaning" style="font-size:28px;">{{Meaning}}</div>{{/Meaning}}
|
||||
<div class="sec-table">
|
||||
<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span><span class="sec-val">{{Root}}</span></div>
|
||||
<div class="sec-label"><span class="sec-key">בִּנְיָן:</span><span class="sec-val">{{Binyan}}</span></div>
|
||||
|
|
@ -860,7 +887,6 @@ def build_vocab_deck(
|
|||
meaning = re.sub(r"\s{2,}", " ", meaning).strip(", ;:")
|
||||
meaning = re.sub(r"(\w)\(", r"\1 (", meaning) # space before opening paren
|
||||
meaning = re.sub(r",(\S)", r", \1", meaning) # space after comma
|
||||
meaning_raw = entry.get("meaning_raw", "") or ""
|
||||
slug = entry.get("slug", "") or ""
|
||||
frequency = entry.get("frequency") or 999_999
|
||||
audio_file = entry.get("audio_file", "") or ""
|
||||
|
|
@ -906,14 +932,9 @@ def build_vocab_deck(
|
|||
emoji_str = emoji_lookup[kw]
|
||||
break
|
||||
|
||||
# Extract Hebrew prepositions: prefer upstream-parsed prep field, fall back to meaning_raw scan
|
||||
# (fallback covers entries scraped before prep was moved upstream)
|
||||
# Hebrew prepositions — extracted upstream by list scraper
|
||||
entry_prep = entry.get("prep")
|
||||
if entry_prep:
|
||||
prep_str = " ".join(f"({p})" for p in entry_prep.split())
|
||||
else:
|
||||
preps = HBPAREN_RE.findall(meaning_raw)
|
||||
prep_str = " ".join(f"({p})" for p in preps)
|
||||
prep_str = " ".join(f"({p})" for p in entry_prep.split()) if entry_prep else ""
|
||||
|
||||
# Audio — use audio_file from entry; for confusables it's already slug-based
|
||||
audio_tag = ""
|
||||
|
|
@ -1123,25 +1144,12 @@ def build_conj_deck(
|
|||
root = ".".join(root_list)
|
||||
voice = VOICE_MAP.get(binyan, "")
|
||||
|
||||
meaning_raw = entry.get("meaning_raw", "") or ""
|
||||
meaning = entry.get("meaning", "") or ""
|
||||
# Extract Hebrew preposition — strip from meaning, show on Hebrew side
|
||||
# Hebrew preposition — extracted upstream by scraper
|
||||
prep_str = ""
|
||||
conj_prep = conj.get("prep")
|
||||
conj_prep = conj.get("prep") or entry.get("prep")
|
||||
if conj_prep:
|
||||
# Strip any parentheses from stored prep value
|
||||
prep_str = conj_prep.strip("() ")
|
||||
elif meaning_raw:
|
||||
preps = HBPAREN_RE.findall(meaning_raw)
|
||||
if preps:
|
||||
prep_str = preps[0]
|
||||
# Strip Hebrew prepositions from English meaning to avoid duplication
|
||||
if prep_str:
|
||||
meaning = HBPAREN_RE.sub("", meaning).strip()
|
||||
# Also strip from meaning_raw patterns like "(על)"
|
||||
meaning = re.sub(r"\(\s*" + re.escape(prep_str) + r"\s*-?\s*\)", "", meaning).strip()
|
||||
# Clean up double spaces and trailing commas
|
||||
meaning = re.sub(r"\s{2,}", " ", meaning).strip(", ")
|
||||
|
||||
related = [(f, w, m) for f, w, m in root_words.get(root, []) if w != infinitive]
|
||||
if related:
|
||||
|
|
@ -1461,7 +1469,7 @@ def build_confusables_deck(
|
|||
continue
|
||||
|
||||
word_no_nik = unique_entries[0]["word"].get("ktiv_male", "")
|
||||
words_display = " / ".join(e["word"]["nikkud"] for e in unique_entries)
|
||||
words_display = word_no_nik # Show ktiv male (shared form) on front
|
||||
|
||||
defs_parts: list[str] = []
|
||||
audio_parts: list[str] = []
|
||||
|
|
@ -1530,8 +1538,8 @@ def write_conf_apkg(
|
|||
PLURAL_FRONT_SG = """
|
||||
<div class="hebrew" style="color:#1a1a8c;">{{Singular}}</div>
|
||||
{{#SingularAudio}}<div>{{SingularAudio}}</div>{{/SingularAudio}}
|
||||
<div class="sec-label">{{Meaning}}</div>
|
||||
<div class="hint" style="font-size:28px;">יָחִיד ← רַבִּים</div>
|
||||
<div class="meaning" style="font-size:28px;">{{Meaning}}</div>
|
||||
<div class="plural-direction">יָחִיד ← רַבִּים</div>
|
||||
"""
|
||||
|
||||
PLURAL_BACK_SG = """
|
||||
|
|
@ -1547,14 +1555,14 @@ PLURAL_BACK_SG = """
|
|||
PLURAL_FRONT_PL = """
|
||||
<div class="hebrew" style="color:#1a1a8c;">{{Plural}}</div>
|
||||
{{#PluralAudio}}<div>{{PluralAudio}}</div>{{/PluralAudio}}
|
||||
<div class="hint" style="font-size:28px;">רַבִּים ← יָחִיד</div>
|
||||
<div class="plural-direction">רַבִּים ← יָחִיד</div>
|
||||
"""
|
||||
|
||||
PLURAL_BACK_PL = """
|
||||
{{FrontSide}}<hr>
|
||||
<div class="hebrew">{{Singular}}</div>
|
||||
{{#SingularAudio}}<div>{{SingularAudio}}</div>{{/SingularAudio}}
|
||||
<div class="sec-label" style="text-align:center;display:block;">{{Meaning}}</div>
|
||||
<div class="meaning" style="font-size:28px;">{{Meaning}}</div>
|
||||
<div class="sec-table">
|
||||
{{#Gender}}<div class="sec-label"><span class="sec-key">מִין:</span><span class="sec-val">{{Gender}}</span></div>{{/Gender}}
|
||||
{{#Mishkal}}<div class="sec-label"><span class="sec-key">מִשְׁקָל:</span><span class="sec-val">{{Mishkal}}</span></div>{{/Mishkal}}
|
||||
|
|
@ -1651,9 +1659,9 @@ def build_plural_deck(
|
|||
irregular_count = len(irregulars)
|
||||
target_regular = irregular_count * 2
|
||||
mishkal_count = len(by_mishkal) or 1
|
||||
per_mishkal = max(2, target_regular // mishkal_count)
|
||||
# Over-sample per mishkal to compensate for small patterns, then trim
|
||||
per_mishkal = max(3, (target_regular * 3) // (mishkal_count * 2))
|
||||
|
||||
selected: list[tuple[str, dict, dict]] = list(irregulars)
|
||||
regular_pool: list[tuple[str, dict, dict]] = []
|
||||
for _mishkal, entries in sorted(by_mishkal.items()):
|
||||
entries.sort(key=lambda e: e[1].get("frequency") or 999_999)
|
||||
|
|
@ -1664,7 +1672,24 @@ def build_plural_deck(
|
|||
regular_pool.sort(key=lambda e: e[1].get("frequency") or 999_999)
|
||||
regular_pool = regular_pool[:target_regular]
|
||||
|
||||
selected.extend(regular_pool)
|
||||
# Sort both pools by frequency, then interleave for homogeneous 2:1 regular:irregular
|
||||
irregulars.sort(key=lambda e: e[1].get("frequency") or 999_999)
|
||||
regular_pool.sort(key=lambda e: e[1].get("frequency") or 999_999)
|
||||
|
||||
# Interleave: for every 1 irregular, insert 2 regulars
|
||||
selected: list[tuple[str, dict, dict]] = []
|
||||
ri = 0 # regular index
|
||||
for _ii, irr in enumerate(irregulars):
|
||||
# Insert 2 regulars before each irregular (when available)
|
||||
for _ in range(2):
|
||||
if ri < len(regular_pool):
|
||||
selected.append(regular_pool[ri])
|
||||
ri += 1
|
||||
selected.append(irr)
|
||||
|
|
|
|||
Loading…
Reference in a new issue