fix: card formatting, example sentence homograph protection, plural coverage

Formatting (#5): - Labels now display with nikkud (שֹׁרֶשׁ, חֵלֶק דִּיבּוּר, רַבִּים, etc.) - Secondary fields below audio 1.6x bigger (20px → 32px) - Label keys styled separately (.sec-key class, smaller/dimmer than values) - Example sentences centered on card (margin: auto, max-width: 90%) - Emoji only on English side (removed duplicate from Eng→Heb back) - Broken images hidden via onerror handler Example sentences (#6): - Confusable words (same consonants, different nikkud) now only match example sentences by exact nikkud form, preventing wrong-word sentences - Same protection applied to cloze sentence and vetted sentence lookups Plural coverage (#3): - Added stripped-nikkud fallback for noun plural matching - 3,918 nouns now show plurals (was ~3,604, +314 from fallback) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 08:45:53 +00:00 · 2026-03-07 08:45:53 +00:00 · def2fc1aca
commit def2fc1aca
parent 5685270dfa
1 changed files with 39 additions and 29 deletions
--- a/apkg_builder.py
+++ b/apkg_builder.py
@ -190,7 +190,8 @@ CARD_CSS = """
  direction: rtl;
  text-align: right;
  font-style: italic;
-  margin-top: 10px;
+  margin: 10px auto 0;
+  max-width: 90%;
  border-right: 3px solid #aaa;
  padding-right: 8px;
 }
@ -211,12 +212,16 @@ CARD_CSS = """
  color: #555;
 }
 .sec-label {
-  font-size: 20px;
+  font-size: 32px;
  color: #555;
  direction: rtl;
  text-align: center;
  margin-top: 6px;
 }
+.sec-key {
+  font-size: 24px;
+  color: #888;
+}
 .related-group {
  direction: rtl;
  text-align: right;
@ -235,6 +240,7 @@ CARD_CSS = """
  .meaning     { color: #82b0ff; }
  .root-info   { color: #aaa; }
  .sec-label   { color: #aaa; }
+  .sec-key     { color: #666; }
  .hint        { color: #777; }
  .voice-label { color: #888; }
  .example     { color: #bbb; border-right-color: #555; }
@ -257,14 +263,14 @@ VOCAB_BACK_HEB = """
 <div class="divider"></div>
 <div class="meaning">{{Meaning}}</div>
 {{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
-{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
-{{#Root}}<div class="sec-label">שורש: {{Root}}</div>{{/Root}}
-{{#PoS}}<div class="sec-label">חלק דיבור: {{PoS}}</div>{{/PoS}}
+{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
+{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
+{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
 {{#SharedRoots}}
-<div class="sec-label">מילים קשורות:</div>
+<div class="sec-label"><span class="sec-key">מִילִים קְשׁוּרוֹת:</span></div>
 <div class="root-info">{{SharedRoots}}</div>
 {{/SharedRoots}}
-{{#Plural}}<div class="sec-label">רבים: <span class="hebrew-sm">{{Plural}}</span></div>{{/Plural}}
+{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
 {{#Example}}
 <div class="example">{{Example}}</div>
 {{/Example}}
@ -275,7 +281,7 @@ VOCAB_FRONT_ENG = """
 <div class="meaning">{{Meaning}}</div>
 {{#Hint}}<div class="hint">{{Hint}}</div>{{/Hint}}
 {{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
-{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
+{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
 """

 VOCAB_BACK_ENG = """
@ -283,12 +289,10 @@ VOCAB_BACK_ENG = """
 <div class="divider"></div>
 <div class="hebrew">{{Word}}{{#Prep}} <span class="hebrew-sm">{{Prep}}</span>{{/Prep}}</div>
 {{#Audio}}<div>{{Audio}}</div>{{/Audio}}
-{{#WordNoNikkud}}<div class="sec-label">ללא ניקוד: {{WordNoNikkud}}</div>{{/WordNoNikkud}}
-{{#Root}}<div class="sec-label">שורש: {{Root}}</div>{{/Root}}
-{{#PoS}}<div class="sec-label">חלק דיבור: {{PoS}}</div>{{/PoS}}
-{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
-{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
-{{#Plural}}<div class="sec-label">רבים: <span class="hebrew-sm">{{Plural}}</span></div>{{/Plural}}
+{{#WordNoNikkud}}<div class="sec-label"><span class="sec-key">לְלֹא נִיקּוּד:</span> {{WordNoNikkud}}</div>{{/WordNoNikkud}}
+{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
+{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
+{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
 {{#Example}}
 <div class="example">{{Example}}</div>
 {{/Example}}
@ -449,7 +453,6 @@ VOICE_MAP = {
 # ──────────────────────────────────────────────────────────────────────────────


-
 def _audio_tag(word_no_nikkud: str, audio_dir: Path = AUDIO_DIR) -> str:
    """Return [sound:xxx.mp3] if audio file exists, else empty string."""
    safe = re.sub(r"[^\u05d0-\u05ea]", "", word_no_nikkud)
@ -738,6 +741,7 @@ def build_vocab_deck(

    # Load noun plural forms for vocab card back display
    noun_plural_lookup: dict[str, str] = {}  # word (nikkud) → plural (nikkud)
+    _noun_plural_stripped: dict[str, str] = {}  # word (stripped) → plural (nikkud), fallback
    noun_plural_path = DATA_DIR / "noun_plurals.json"
    if noun_plural_path.exists():
        try:
@ -748,6 +752,9 @@ def build_vocab_deck(
                pl = _entry.get("plural", "")
                if sg and pl:
                    noun_plural_lookup[sg] = pl
+                    s = _strip_nikkud(sg)
+                    if s not in _noun_plural_stripped:
+                        _noun_plural_stripped[s] = pl
            logger.info(f"  Noun plurals loaded: {len(noun_plural_lookup)} entries")
        except (json.JSONDecodeError, OSError):
            pass
@ -933,21 +940,25 @@ def build_vocab_deck(
            if mp3_path not in media_files:
                media_files.append(mp3_path)

+        # Consonant-only form for confusable detection and cloze matching
+        word_consonants = _strip_nikkud(word)
+        is_confusable = word_consonants in _confusable_words
+
        # Example sentences — priority: EPUB (nikkud'd) > Ben Yehuda > none
+        # For confusable words (same consonants, different nikkud), only match by
+        # exact nikkud form to avoid showing wrong-word sentences.
        example_html = ""
        # 1. EPUB/PDF sentences (full nikkud)
-        epub_sents = (
-            epub_examples.get(word) or epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik))
-        )
+        epub_sents = epub_examples.get(word)
+        if not epub_sents and not is_confusable:
+            epub_sents = epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik))
        if epub_sents:
            example_html = epub_sents[0]
        else:
            # 2. Ben Yehuda examples (some have nikkud from nikkud corpus)
-            by_sents = (
-                examples_cache.get(word)
-                or examples_cache.get(word_no_nik)
-                or examples_cache.get(_strip_nikkud(word_no_nik))
-            )
+            by_sents = examples_cache.get(word)
+            if not by_sents and not is_confusable:
+                by_sents = examples_cache.get(word_no_nik) or examples_cache.get(_strip_nikkud(word_no_nik))
            if by_sents:
                # Prefer nikkud'd Ben Yehuda sentences (contain combining marks)
                nikkud_sents = [s for s in by_sents if any("\u0591" <= c <= "\u05c7" for c in s)]
@ -958,13 +969,12 @@ def build_vocab_deck(
        # Uses stripped (no-nikkud) matching. Skips homographs (confusable words).
        cloze_example = ""
        cloze_hint = ""
-        word_consonants = _strip_nikkud(word)
-        if word_consonants and word_consonants not in _confusable_words:
+        if word_consonants and not is_confusable:
            # Pick best sentence for cloze: vetted first, then example_html
            cloze_source = None
-            vetted = (
-                vetted_cloze.get(word) or vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik))
-            )
+            vetted = vetted_cloze.get(word)
+            if not vetted and not is_confusable:
+                vetted = vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik))
            if vetted:
                cloze_source = vetted[0]
            elif example_html:
@ -1041,7 +1051,7 @@ def build_vocab_deck(
                emoji_str,
                prep_str,
                hint_str,
-                noun_plural_lookup.get(word, ""),
+                noun_plural_lookup.get(word, "") or _noun_plural_stripped.get(word_consonants, ""),
                cloze_example,
                cloze_hint,
            ],