fix: card formatting, example sentence homograph protection, plural coverage
Formatting (#5): - Labels now display with nikkud (שֹׁרֶשׁ, חֵלֶק דִּיבּוּר, רַבִּים, etc.) - Secondary fields below audio 1.6x bigger (20px → 32px) - Label keys styled separately (.sec-key class, smaller/dimmer than values) - Example sentences centered on card (margin: auto, max-width: 90%) - Emoji only on English side (removed duplicate from Eng→Heb back) - Broken images hidden via onerror handler Example sentences (#6): - Confusable words (same consonants, different nikkud) now only match example sentences by exact nikkud form, preventing wrong-word sentences - Same protection applied to cloze sentence and vetted sentence lookups Plural coverage (#3): - Added stripped-nikkud fallback for noun plural matching - 3,918 nouns now show plurals (was ~3,604, +314 from fallback) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5685270dfa
commit
def2fc1aca
1 changed files with 39 additions and 29 deletions
|
|
@ -190,7 +190,8 @@ CARD_CSS = """
|
|||
direction: rtl;
|
||||
text-align: right;
|
||||
font-style: italic;
|
||||
margin-top: 10px;
|
||||
margin: 10px auto 0;
|
||||
max-width: 90%;
|
||||
border-right: 3px solid #aaa;
|
||||
padding-right: 8px;
|
||||
}
|
||||
|
|
@ -211,12 +212,16 @@ CARD_CSS = """
|
|||
color: #555;
|
||||
}
|
||||
.sec-label {
|
||||
font-size: 20px;
|
||||
font-size: 32px;
|
||||
color: #555;
|
||||
direction: rtl;
|
||||
text-align: center;
|
||||
margin-top: 6px;
|
||||
}
|
||||
.sec-key {
|
||||
font-size: 24px;
|
||||
color: #888;
|
||||
}
|
||||
.related-group {
|
||||
direction: rtl;
|
||||
text-align: right;
|
||||
|
|
@ -235,6 +240,7 @@ CARD_CSS = """
|
|||
.meaning { color: #82b0ff; }
|
||||
.root-info { color: #aaa; }
|
||||
.sec-label { color: #aaa; }
|
||||
.sec-key { color: #666; }
|
||||
.hint { color: #777; }
|
||||
.voice-label { color: #888; }
|
||||
.example { color: #bbb; border-right-color: #555; }
|
||||
|
|
@ -257,14 +263,14 @@ VOCAB_BACK_HEB = """
|
|||
<div class="divider"></div>
|
||||
<div class="meaning">{{Meaning}}</div>
|
||||
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
|
||||
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
|
||||
{{#Root}}<div class="sec-label">שורש: {{Root}}</div>{{/Root}}
|
||||
{{#PoS}}<div class="sec-label">חלק דיבור: {{PoS}}</div>{{/PoS}}
|
||||
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
|
||||
{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
|
||||
{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
|
||||
{{#SharedRoots}}
|
||||
<div class="sec-label">מילים קשורות:</div>
|
||||
<div class="sec-label"><span class="sec-key">מִילִים קְשׁוּרוֹת:</span></div>
|
||||
<div class="root-info">{{SharedRoots}}</div>
|
||||
{{/SharedRoots}}
|
||||
{{#Plural}}<div class="sec-label">רבים: <span class="hebrew-sm">{{Plural}}</span></div>{{/Plural}}
|
||||
{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
|
||||
{{#Example}}
|
||||
<div class="example">{{Example}}</div>
|
||||
{{/Example}}
|
||||
|
|
@ -275,7 +281,7 @@ VOCAB_FRONT_ENG = """
|
|||
<div class="meaning">{{Meaning}}</div>
|
||||
{{#Hint}}<div class="hint">{{Hint}}</div>{{/Hint}}
|
||||
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
|
||||
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
|
||||
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
|
||||
"""
|
||||
|
||||
VOCAB_BACK_ENG = """
|
||||
|
|
@ -283,12 +289,10 @@ VOCAB_BACK_ENG = """
|
|||
<div class="divider"></div>
|
||||
<div class="hebrew">{{Word}}{{#Prep}} <span class="hebrew-sm">{{Prep}}</span>{{/Prep}}</div>
|
||||
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
||||
{{#WordNoNikkud}}<div class="sec-label">ללא ניקוד: {{WordNoNikkud}}</div>{{/WordNoNikkud}}
|
||||
{{#Root}}<div class="sec-label">שורש: {{Root}}</div>{{/Root}}
|
||||
{{#PoS}}<div class="sec-label">חלק דיבור: {{PoS}}</div>{{/PoS}}
|
||||
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
|
||||
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;"></div>{{/Image}}{{/Emoji}}
|
||||
{{#Plural}}<div class="sec-label">רבים: <span class="hebrew-sm">{{Plural}}</span></div>{{/Plural}}
|
||||
{{#WordNoNikkud}}<div class="sec-label"><span class="sec-key">לְלֹא נִיקּוּד:</span> {{WordNoNikkud}}</div>{{/WordNoNikkud}}
|
||||
{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
|
||||
{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
|
||||
{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
|
||||
{{#Example}}
|
||||
<div class="example">{{Example}}</div>
|
||||
{{/Example}}
|
||||
|
|
@ -449,7 +453,6 @@ VOICE_MAP = {
|
|||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
def _audio_tag(word_no_nikkud: str, audio_dir: Path = AUDIO_DIR) -> str:
|
||||
"""Return [sound:xxx.mp3] if audio file exists, else empty string."""
|
||||
safe = re.sub(r"[^\u05d0-\u05ea]", "", word_no_nikkud)
|
||||
|
|
@ -738,6 +741,7 @@ def build_vocab_deck(
|
|||
|
||||
# Load noun plural forms for vocab card back display
|
||||
noun_plural_lookup: dict[str, str] = {} # word (nikkud) → plural (nikkud)
|
||||
_noun_plural_stripped: dict[str, str] = {} # word (stripped) → plural (nikkud), fallback
|
||||
noun_plural_path = DATA_DIR / "noun_plurals.json"
|
||||
if noun_plural_path.exists():
|
||||
try:
|
||||
|
|
@ -748,6 +752,9 @@ def build_vocab_deck(
|
|||
pl = _entry.get("plural", "")
|
||||
if sg and pl:
|
||||
noun_plural_lookup[sg] = pl
|
||||
s = _strip_nikkud(sg)
|
||||
if s not in _noun_plural_stripped:
|
||||
_noun_plural_stripped[s] = pl
|
||||
logger.info(f" Noun plurals loaded: {len(noun_plural_lookup)} entries")
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
|
@ -933,21 +940,25 @@ def build_vocab_deck(
|
|||
if mp3_path not in media_files:
|
||||
media_files.append(mp3_path)
|
||||
|
||||
# Consonant-only form for confusable detection and cloze matching
|
||||
word_consonants = _strip_nikkud(word)
|
||||
is_confusable = word_consonants in _confusable_words
|
||||
|
||||
# Example sentences — priority: EPUB (nikkud'd) > Ben Yehuda > none
|
||||
# For confusable words (same consonants, different nikkud), only match by
|
||||
# exact nikkud form to avoid showing wrong-word sentences.
|
||||
example_html = ""
|
||||
# 1. EPUB/PDF sentences (full nikkud)
|
||||
epub_sents = (
|
||||
epub_examples.get(word) or epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik))
|
||||
)
|
||||
epub_sents = epub_examples.get(word)
|
||||
if not epub_sents and not is_confusable:
|
||||
epub_sents = epub_examples.get(word_no_nik) or epub_examples.get(_strip_nikkud(word_no_nik))
|
||||
if epub_sents:
|
||||
example_html = epub_sents[0]
|
||||
else:
|
||||
# 2. Ben Yehuda examples (some have nikkud from nikkud corpus)
|
||||
by_sents = (
|
||||
examples_cache.get(word)
|
||||
or examples_cache.get(word_no_nik)
|
||||
or examples_cache.get(_strip_nikkud(word_no_nik))
|
||||
)
|
||||
by_sents = examples_cache.get(word)
|
||||
if not by_sents and not is_confusable:
|
||||
by_sents = examples_cache.get(word_no_nik) or examples_cache.get(_strip_nikkud(word_no_nik))
|
||||
if by_sents:
|
||||
# Prefer nikkud'd Ben Yehuda sentences (contain combining marks)
|
||||
nikkud_sents = [s for s in by_sents if any("\u0591" <= c <= "\u05c7" for c in s)]
|
||||
|
|
@ -958,13 +969,12 @@ def build_vocab_deck(
|
|||
# Uses stripped (no-nikkud) matching. Skips homographs (confusable words).
|
||||
cloze_example = ""
|
||||
cloze_hint = ""
|
||||
word_consonants = _strip_nikkud(word)
|
||||
if word_consonants and word_consonants not in _confusable_words:
|
||||
if word_consonants and not is_confusable:
|
||||
# Pick best sentence for cloze: vetted first, then example_html
|
||||
cloze_source = None
|
||||
vetted = (
|
||||
vetted_cloze.get(word) or vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik))
|
||||
)
|
||||
vetted = vetted_cloze.get(word)
|
||||
if not vetted and not is_confusable:
|
||||
vetted = vetted_cloze.get(word_no_nik) or vetted_cloze.get(_strip_nikkud(word_no_nik))
|
||||
if vetted:
|
||||
cloze_source = vetted[0]
|
||||
elif example_html:
|
||||
|
|
@ -1041,7 +1051,7 @@ def build_vocab_deck(
|
|||
emoji_str,
|
||||
prep_str,
|
||||
hint_str,
|
||||
noun_plural_lookup.get(word, ""),
|
||||
noun_plural_lookup.get(word, "") or _noun_plural_stripped.get(word_consonants, ""),
|
||||
cloze_example,
|
||||
cloze_hint,
|
||||
],
|
||||
|
|
|
|||
Loading…
Reference in a new issue