fix: card formatting, example sentence homograph protection, plural coverage

Formatting (#5):
- Labels now display with nikkud (שֹׁרֶשׁ, חֵלֶק דִּיבּוּר, רַבִּים, etc.)
- Secondary fields below audio 1.6x bigger (20px → 32px)
- Label keys styled separately (.sec-key class, smaller/dimmer than values)
- Example sentences centered on card (margin: auto, max-width: 90%)
- Emoji only on English side (removed duplicate from Eng→Heb back)
- Broken images hidden via onerror handler

Example sentences (#6):
- Confusable words (same consonants, different nikkud) now only match
  example sentences by exact nikkud form, preventing wrong-word sentences
- Same protection applied to cloze sentence and vetted sentence lookups

Plural coverage (#3):
- Added stripped-nikkud fallback for noun plural matching
- 3,918 nouns now show plurals (was ~3,604, +314 from fallback)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sochen 2026-03-07 08:45:53 +00:00
parent 5685270dfa
commit def2fc1aca

View file

@ -190,7 +190,8 @@ CARD_CSS = """
direction: rtl;
text-align: right;
font-style: italic;
margin-top: 10px;
margin: 10px auto 0;
max-width: 90%;
border-right: 3px solid #aaa;
padding-right: 8px;
}
@ -211,12 +212,16 @@ CARD_CSS = """
color: #555;
}
.sec-label {
font-size: 20px;
font-size: 32px;
color: #555;
direction: rtl;
text-align: center;
margin-top: 6px;
}
.sec-key {
font-size: 24px;
color: #888;
}
.related-group {
direction: rtl;
text-align: right;
@ -235,6 +240,7 @@ CARD_CSS = """
.meaning { color: #82b0ff; }
.root-info { color: #aaa; }
.sec-label { color: #aaa; }
.sec-key { color: #666; }
.hint { color: #777; }
.voice-label { color: #888; }
.example { color: #bbb; border-right-color: #555; }
@ -257,14 +263,14 @@ VOCAB_BACK_HEB = """
<div class="divider"></div>
<div class="meaning">{{Meaning}}</div>
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
{{#Root}}<div class="sec-label">שורש: {{Root}}</div>{{/Root}}
{{#PoS}}<div class="sec-label">חלק דיבור: {{PoS}}</div>{{/PoS}}
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
{{#SharedRoots}}
<div class="sec-label">מילים קשורות:</div>
<div class="sec-label"><span class="sec-key">מִילִים קְשׁוּרוֹת:</span></div>
<div class="root-info">{{SharedRoots}}</div>
{{/SharedRoots}}
{{#Plural}}<div class="sec-label">רבים: <span class="hebrew-sm">{{Plural}}</span></div>{{/Plural}}
{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
{{#Example}}
<div class="example">{{Example}}</div>
{{/Example}}
@ -275,7 +281,7 @@ VOCAB_FRONT_ENG = """
<div class="meaning">{{Meaning}}</div>
{{#Hint}}<div class="hint">{{Hint}}</div>{{/Hint}}
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
"""
VOCAB_BACK_ENG = """
@ -283,12 +289,10 @@ VOCAB_BACK_ENG = """
<div class="divider"></div>
<div class="hebrew">{{Word}}{{#Prep}} <span class="hebrew-sm">{{Prep}}</span>{{/Prep}}</div>
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
{{#WordNoNikkud}}<div class="sec-label">ללא ניקוד: {{WordNoNikkud}}</div>{{/WordNoNikkud}}
{{#Root}}<div class="sec-label">שורש: {{Root}}</div>{{/Root}}
{{#PoS}}<div class="sec-label">חלק דיבור: {{PoS}}</div>{{/PoS}}
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
{{#Plural}}<div class="sec-label">רבים: <span class="hebrew-sm">{{Plural}}</span></div>{{/Plural}}
{{#WordNoNikkud}}<div class="sec-label"><span class="sec-key">לְלֹא נִיקּוּד:</span> {{WordNoNikkud}}</div>{{/WordNoNikkud}}
{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
{{#Example}}
<div class="example">{{Example}}</div>
{{/Example}}
@ -449,7 +453,6 @@ VOICE_MAP = {
# ──────────────────────────────────────────────────────────────────────────────
def _audio_tag(word_no_nikkud: str, audio_dir: Path = AUDIO_DIR) -> str:
"""Return [sound:xxx.mp3] if audio file exists, else empty string."""
safe = re.sub(r"[^\u05d0-\u05ea]", "", word_no_nikkud)
@ -738,6 +741,7 @@ def build_vocab_deck(
# Load noun plural forms for vocab card back display
noun_plural_lookup: dict[str, str] = {} # word (nikkud) → plural (nikkud)
_noun_plural_stripped: dict[str, str] = {} # word (stripped) → plural (nikkud), fallback
noun_plural_path = DATA_DIR / "noun_plurals.json"
if noun_plural_path.exists():
try:
@ -748,6 +752,9 @@ def build_vocab_deck(
pl = _entry.get("plural", "")
if sg and pl:
noun_plural_lookup[sg] = pl
s = _strip_nikkud(sg)
if s not in _noun_plural_stripped:
_noun_plural_stripped[s] = pl
logger.info(f" Noun plurals loaded: {len(noun_plural_lookup)} entries")
except (json.JSONDecodeError, OSError):
pass
@ -933,21 +940,25 @@ def build_vocab_deck(
if mp3_path not in media_files:
media_files.append(mp3_path)
# Consonant-only form for confusable detection and cloze matching
word_consonants = _strip_nikkud(word)
is_confusable = word_consonants in _confusable_words
# Example sentences — priority: EPUB (nikkud'd) > Ben Yehuda > none
# For confusable words (same consonants, different nikkud), only match by
# exact nikkud form to avoid showing wrong-word sentences.
example_html = ""
# 1. EPUB/PDF sentences (full nikkud)
epub_sents = (
epub_examples.get(word) or epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik))
)
epub_sents = epub_examples.get(word)
if not epub_sents and not is_confusable:
epub_sents = epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik))
if epub_sents:
example_html = epub_sents[0]
else:
# 2. Ben Yehuda examples (some have nikkud from nikkud corpus)
by_sents = (
examples_cache.get(word)
or examples_cache.get(word_no_nik)
or examples_cache.get(_strip_nikkud(word_no_nik))
)
by_sents = examples_cache.get(word)
if not by_sents and not is_confusable:
by_sents = examples_cache.get(word_no_nik) or examples_cache.get(_strip_nikkud(word_no_nik))
if by_sents:
# Prefer nikkud'd Ben Yehuda sentences (contain combining marks)
nikkud_sents = [s for s in by_sents if any("\u0591" <= c <= "\u05c7" for c in s)]
@ -958,13 +969,12 @@ def build_vocab_deck(
# Uses stripped (no-nikkud) matching. Skips homographs (confusable words).
cloze_example = ""
cloze_hint = ""
word_consonants = _strip_nikkud(word)
if word_consonants and word_consonants not in _confusable_words:
if word_consonants and not is_confusable:
# Pick best sentence for cloze: vetted first, then example_html
cloze_source = None
vetted = (
vetted_cloze.get(word) or vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik))
)
vetted = vetted_cloze.get(word)
if not vetted and not is_confusable:
vetted = vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik))
if vetted:
cloze_source = vetted[0]
elif example_html:
@ -1041,7 +1051,7 @@ def build_vocab_deck(
emoji_str,
prep_str,
hint_str,
noun_plural_lookup.get(word, ""),
noun_plural_lookup.get(word, "") or _noun_plural_stripped.get(word_consonants, ""),
cloze_example,
cloze_hint,
],