From def2fc1acaa0663e48f146f28cdc2c1cf523a4b7 Mon Sep 17 00:00:00 2001 From: Sochen Date: Sat, 7 Mar 2026 08:45:53 +0000 Subject: [PATCH] fix: card formatting, example sentence homograph protection, plural coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Formatting (#5): - Labels now display with nikkud (שֹׁרֶשׁ, חֵלֶק דִּיבּוּר, רַבִּים, etc.) - Secondary fields below audio 1.6x bigger (20px → 32px) - Label keys styled separately (.sec-key class, smaller/dimmer than values) - Example sentences centered on card (margin: auto, max-width: 90%) - Emoji only on English side (removed duplicate from Eng→Heb back) - Broken images hidden via onerror handler Example sentences (#6): - Confusable words (same consonants, different nikkud) now only match example sentences by exact nikkud form, preventing wrong-word sentences - Same protection applied to cloze sentence and vetted sentence lookups Plural coverage (#3): - Added stripped-nikkud fallback for noun plural matching - 3,918 nouns now show plurals (was ~3,604, +314 from fallback) Co-Authored-By: Claude Opus 4.6 --- apkg_builder.py | 68 ++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/apkg_builder.py b/apkg_builder.py index 6599026..af00216 100644 --- a/apkg_builder.py +++ b/apkg_builder.py @@ -190,7 +190,8 @@ CARD_CSS = """ direction: rtl; text-align: right; font-style: italic; - margin-top: 10px; + margin: 10px auto 0; + max-width: 90%; border-right: 3px solid #aaa; padding-right: 8px; } @@ -211,12 +212,16 @@ CARD_CSS = """ color: #555; } .sec-label { - font-size: 20px; + font-size: 32px; color: #555; direction: rtl; text-align: center; margin-top: 6px; } +.sec-key { + font-size: 24px; + color: #888; +} .related-group { direction: rtl; text-align: right; @@ -235,6 +240,7 @@ CARD_CSS = """ .meaning { color: #82b0ff; } .root-info { color: #aaa; } .sec-label { color: #aaa; } + .sec-key { color: #666; } .hint { color: #777; } .voice-label { color: #888; } .example { color: #bbb; border-right-color: #555; } @@ -257,14 +263,14 @@ VOCAB_BACK_HEB = """
{{Meaning}}
{{#Emoji}}
{{Emoji}}
{{/Emoji}} -{{^Emoji}}{{#Image}}
{{/Image}}{{/Emoji}} -{{#Root}}
שורש: {{Root}}
{{/Root}} -{{#PoS}}
חלק דיבור: {{PoS}}
{{/PoS}} +{{^Emoji}}{{#Image}}
{{/Image}}{{/Emoji}} +{{#Root}}
שֹׁרֶשׁ: {{Root}}
{{/Root}} +{{#PoS}}
חֵלֶק דִּיבּוּר: {{PoS}}
{{/PoS}} {{#SharedRoots}} -
מילים קשורות:
+
מִילִים קְשׁוּרוֹת:
{{SharedRoots}}
{{/SharedRoots}} -{{#Plural}}
רבים: {{Plural}}
{{/Plural}} +{{#Plural}}
רַבִּים: {{Plural}}
{{/Plural}} {{#Example}}
{{Example}}
{{/Example}} @@ -275,7 +281,7 @@ VOCAB_FRONT_ENG = """
{{Meaning}}
{{#Hint}}
{{Hint}}
{{/Hint}} {{#Emoji}}
{{Emoji}}
{{/Emoji}} -{{^Emoji}}{{#Image}}
{{/Image}}{{/Emoji}} +{{^Emoji}}{{#Image}}
{{/Image}}{{/Emoji}} """ VOCAB_BACK_ENG = """ @@ -283,12 +289,10 @@ VOCAB_BACK_ENG = """
{{Word}}{{#Prep}} {{Prep}}{{/Prep}}
{{#Audio}}
{{Audio}}
{{/Audio}} -{{#WordNoNikkud}}
ללא ניקוד: {{WordNoNikkud}}
{{/WordNoNikkud}} -{{#Root}}
שורש: {{Root}}
{{/Root}} -{{#PoS}}
חלק דיבור: {{PoS}}
{{/PoS}} -{{#Emoji}}
{{Emoji}}
{{/Emoji}} -{{^Emoji}}{{#Image}}
{{/Image}}{{/Emoji}} -{{#Plural}}
רבים: {{Plural}}
{{/Plural}} +{{#WordNoNikkud}}
לְלֹא נִיקּוּד: {{WordNoNikkud}}
{{/WordNoNikkud}} +{{#Root}}
שֹׁרֶשׁ: {{Root}}
{{/Root}} +{{#PoS}}
חֵלֶק דִּיבּוּר: {{PoS}}
{{/PoS}} +{{#Plural}}
רַבִּים: {{Plural}}
{{/Plural}} {{#Example}}
{{Example}}
{{/Example}} @@ -449,7 +453,6 @@ VOICE_MAP = { # ────────────────────────────────────────────────────────────────────────────── - def _audio_tag(word_no_nikkud: str, audio_dir: Path = AUDIO_DIR) -> str: """Return [sound:xxx.mp3] if audio file exists, else empty string.""" safe = re.sub(r"[^\u05d0-\u05ea]", "", word_no_nikkud) @@ -738,6 +741,7 @@ def build_vocab_deck( # Load noun plural forms for vocab card back display noun_plural_lookup: dict[str, str] = {} # word (nikkud) → plural (nikkud) + _noun_plural_stripped: dict[str, str] = {} # word (stripped) → plural (nikkud), fallback noun_plural_path = DATA_DIR / "noun_plurals.json" if noun_plural_path.exists(): try: @@ -748,6 +752,9 @@ def build_vocab_deck( pl = _entry.get("plural", "") if sg and pl: noun_plural_lookup[sg] = pl + s = _strip_nikkud(sg) + if s not in _noun_plural_stripped: + _noun_plural_stripped[s] = pl logger.info(f" Noun plurals loaded: {len(noun_plural_lookup)} entries") except (json.JSONDecodeError, OSError): pass @@ -933,21 +940,25 @@ def build_vocab_deck( if mp3_path not in media_files: media_files.append(mp3_path) + # Consonant-only form for confusable detection and cloze matching + word_consonants = _strip_nikkud(word) + is_confusable = word_consonants in _confusable_words + # Example sentences — priority: EPUB (nikkud'd) > Ben Yehuda > none + # For confusable words (same consonants, different nikkud), only match by + # exact nikkud form to avoid showing wrong-word sentences. example_html = "" # 1. EPUB/PDF sentences (full nikkud) - epub_sents = ( - epub_examples.get(word) or epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik)) - ) + epub_sents = epub_examples.get(word) + if not epub_sents and not is_confusable: + epub_sents = epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik)) if epub_sents: example_html = epub_sents[0] else: # 2. Ben Yehuda examples (some have nikkud from nikkud corpus) - by_sents = ( - examples_cache.get(word) - or examples_cache.get(word_no_nik) - or examples_cache.get(_strip_nikkud(word_no_nik)) - ) + by_sents = examples_cache.get(word) + if not by_sents and not is_confusable: + by_sents = examples_cache.get(word_no_nik) or examples_cache.get(_strip_nikkud(word_no_nik)) if by_sents: # Prefer nikkud'd Ben Yehuda sentences (contain combining marks) nikkud_sents = [s for s in by_sents if any("\u0591" <= c <= "\u05c7" for c in s)] @@ -958,13 +969,12 @@ def build_vocab_deck( # Uses stripped (no-nikkud) matching. Skips homographs (confusable words). cloze_example = "" cloze_hint = "" - word_consonants = _strip_nikkud(word) - if word_consonants and word_consonants not in _confusable_words: + if word_consonants and not is_confusable: # Pick best sentence for cloze: vetted first, then example_html cloze_source = None - vetted = ( - vetted_cloze.get(word) or vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik)) - ) + vetted = vetted_cloze.get(word) + if not vetted and not is_confusable: + vetted = vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik)) if vetted: cloze_source = vetted[0] elif example_html: @@ -1041,7 +1051,7 @@ def build_vocab_deck( emoji_str, prep_str, hint_str, - noun_plural_lookup.get(word, ""), + noun_plural_lookup.get(word, "") or _noun_plural_stripped.get(word_consonants, ""), cloze_example, cloze_hint, ],