v0.14: rescrape vocab, formatting fixes for all decks

- Full pealim.com rescrape: 9,120 words (15 new), all with audio URLs
- Plurals deck: 2:1 regular:irregular ratio (649 notes), RTL arrows, 1.6x hint text
- Conjugation deck: blue infinitive on front, plain meaning on back, nikkud labels
- Confusables deck: larger prompt text (32px), audio only when all words have it
- Validator: non-audio variants no longer false-fail on audio check
- 14 new audio files downloaded

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sochen 2026-03-07 09:26:41 +00:00
parent def2fc1aca
commit 802c369365
6 changed files with 126837 additions and 163568 deletions

View file

@ -39,7 +39,7 @@ COMPLETE_PLURAL_DECK_ID = 1_234_567_903
# Release version tag added to all notes so users can identify which release
# their cards come from (visible in Anki's Browse view and card info).
RELEASE_TAG = "v0.13"
RELEASE_TAG = "v0.14"
# Regex for extracting emoji and Hebrew prepositions from meaning strings
EMOJI_RE = re.compile(r"[\U0001F000-\U0001FFFF\u2600-\u27FF\u2300-\u23FF\uFE00-\uFE0F]+")
@ -362,7 +362,7 @@ VOCAB_MODEL = genanki.Model(
CONJ_FRONT = """
<div class="hebrew">{{Pronoun}}</div>
<div class="hebrew">{{ReferenceForm}}{{#Voice}} <span class="voice-label">({{Voice}})</span>{{/Voice}}</div>
<div class="meaning" style="font-size:28px;">{{ReferenceForm}}{{#Voice}} <span class="voice-label">({{Voice}})</span>{{/Voice}}</div>
<div class="hebrew">{{Tense}}</div>
"""
@ -370,10 +370,10 @@ CONJ_BACK = """
{{FrontSide}}<hr>
<div class="hebrew">{{ConjugatedForm}}</div>
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
{{#Meaning}}<div class="meaning" style="font-size:20px;">{{Meaning}}</div>{{/Meaning}}
<div class="sec-label">שורש: {{Root}}</div>
<div class="sec-label">בניין: {{Binyan}}</div>
{{#RelatedVocab}}<div class="root-info">{{RelatedVocab}}</div>{{/RelatedVocab}}
{{#Meaning}}<div class="sec-label">{{Meaning}}</div>{{/Meaning}}
<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>
<div class="sec-label"><span class="sec-key">בִּנְיָן:</span> {{Binyan}}</div>
{{#RelatedVocab}}<div class="sec-label">{{RelatedVocab}}</div>{{/RelatedVocab}}
"""
CONJ_CSS = CARD_CSS
@ -1245,7 +1245,7 @@ def build_conj_deck(
CONF_FRONT = """
<div class="hebrew" style="font-size:36px;">{{Words}}</div>
<div class="meaning" style="font-size:22px;">מה ההבדל?</div>
<div class="meaning" style="font-size:32px;">מה ההבדל?</div>
"""
CONF_BACK = """
@ -1326,6 +1326,7 @@ def build_confusables_deck(
words_display = " / ".join(w for w, _, _ in unique_entries)
defs_parts = []
audio_parts = []
all_have_audio = True
for w, m, p in unique_entries:
pos_label = f" ({p})" if p else ""
defs_parts.append(
@ -1340,6 +1341,12 @@ def build_confusables_deck(
mp3_path = AUDIO_DIR / mp3_name
if mp3_path not in media_files:
media_files.append(mp3_path)
else:
all_have_audio = False
# Only include audio if every word in the group has it
if not all_have_audio:
audio_parts = []
defs_html = "\n".join(defs_parts)
audio_html = " ".join(audio_parts)
@ -1378,7 +1385,7 @@ PLURAL_FRONT_SG = """
<div class="hebrew">{{Singular}}</div>
{{#SingularAudio}}<div>{{SingularAudio}}</div>{{/SingularAudio}}
<div class="meaning">{{Meaning}}</div>
<div class="hint">יחיד רבים</div>
<div class="hint" style="font-size:28px;">יָחִיד רַבִּים</div>
"""
PLURAL_BACK_SG = """
@ -1391,7 +1398,7 @@ PLURAL_BACK_SG = """
PLURAL_FRONT_PL = """
<div class="hebrew">{{Plural}}</div>
{{#PluralAudio}}<div>{{PluralAudio}}</div>{{/PluralAudio}}
<div class="hint">רבים יחיד</div>
<div class="hint" style="font-size:28px;">רַבִּים יָחִיד</div>
"""
PLURAL_BACK_PL = """
@ -1506,12 +1513,17 @@ def build_plural_deck(
elif mishkal:
by_mishkal.setdefault(mishkal, []).append((word_key, data))
# Select 2-3 exemplars per mishkal, preferring high-frequency words
# Select exemplars per mishkal, preferring high-frequency words.
# Target 2:1 regular:irregular ratio to avoid over-representing irregulars.
# Target ≥2:1 regular:irregular ratio — 6 per mishkal compensates for
# small groups (<6 entries) that can't fill their quota.
per_mishkal = 6
selected: list[tuple[str, dict]] = list(irregulars)
for _mishkal, entries in sorted(by_mishkal.items()):
# Sort by frequency (lower index = more common)
entries.sort(key=lambda e: freq_order.get(e[0], 999999))
selected.extend(entries[:3])
selected.extend(entries[:per_mishkal])
note_count = 0
for _word_key, data in selected:

View file

@ -9104,3 +9104,18 @@
9102,לִתְמֹהַּ,ת - מ - הּ,Verb pa'al,"to be astonished, to be amazed",https://audio.pealim.com/v0/24/24qdaqkg4hhb.mp3,לתמוה
9103,תִּמָּהוֹן,ת - מ - הּ,"Noun kittalonpattern, masculine","amazement, wonder, surprise",https://audio.pealim.com/v0/ru/ru6re1zgrgwe.mp3,תימהון
9104,לְהַתְמִיהַּ,ת - מ - הּ,Verb hif'il,"to amaze, to suprise, to puzzle",https://audio.pealim.com/v0/1t/1t431ihbh20u9.mp3,להתמיה
9105,לְתַמְחֵר,ת - מ - ח - ר,Verb pi'el,"to price, to put a price",https://audio.pealim.com/v0/c1/c1sh9yebge3.mp3,לתמחר
9106,תִּמְחוּר,ת - מ - ח - ר,"Noun kittulpattern, masculine","pricing, price setting",https://audio.pealim.com/v0/xi/xinm4kewoob1.mp3,תמחור
9107,תָּמִיד,-,Adverb,always,https://audio.pealim.com/v0/1p/1pxp0npmvipsg.mp3,תמיד
9108,תְּמִידִי,-,Adjective,"constant, continuous",https://audio.pealim.com/v0/13/13fs1yrbhmwdb.mp3,תמידי
9109,תְּמִיכָה,ת - מ - ך,"Noun ktilapattern, feminine","support, backing, assistance",https://audio.pealim.com/v0/13/13fpxttpeczdf.mp3,תמיכה
9110,לְהִתָּמֵךְ,ת - מ - ך,Verb nif'al,"to rest (on something), to be supported by",https://audio.pealim.com/v0/yz/yznequo62okg.mp3,להיתמך
9111,לִתְמֹךְ,ת - מ - ך,Verb pa'al,"to support, to sustain",https://audio.pealim.com/v0/1r/1r041bgw6gihc.mp3,לתמוך
9112,לְתַמְלֵל,ת - מ - ל - ל,Verb pi'el,to transcribe,https://audio.pealim.com/v0/ti/tifkvr5m5yb7.mp3,לתמלל
9113,תִּמְלוּל,ת - מ - ל - ל,"Noun kittulpattern, masculine","transcription, transcript (of a call, meeting, audio)",https://audio.pealim.com/v0/yh/yhef9kdl90r5.mp3,תימלול
9114,לָתֹם,ת - מ - ם,Verb pa'al,"to be finished, to end",https://audio.pealim.com/v0/bi/bicuzr2g3wh0.mp3,לתום
9115,תָּמִים,ת - מ - ם,Adjective katilpattern,"sincere, innocent, honest; full, complete",https://audio.pealim.com/v0/1p/1pxp0k78sionl.mp3,תמים
9116,תֹּם,ת - מ - ם,"Noun kotelpattern, masculine","end, completion (lit.)",https://audio.pealim.com/v0/wk/wksplskl3zi7.mp3,תום
9117,תֹּם,ת - מ - ם,"Noun kotelpattern, masculine","simplicity, innocence, naivete (lit.)",https://audio.pealim.com/v0/wk/wksplskl3zi7.mp3,תום
9118,לְהִתַּמֵּם,ת - מ - ם,Verb hitpa'el,to play dumb,https://audio.pealim.com/v0/7z/7z00lrx45wxe.mp3,להיתמם
9119,תָּם,ת - מ - ם,Adjective kalpattern,"honest, unsophisticated",https://audio.pealim.com/v0/wk/wksplsz9ik3y.mp3,תם

Can't render this file because it is too large.

View file

@ -9104,3 +9104,18 @@
9102;לִתְמֹהַּ;ת - מ - הּ;Verb pa'al;to be astonished, to be amazed;https://audio.pealim.com/v0/24/24qdaqkg4hhb.mp3;לתמוה;תִּמָּהוֹן לְהַתְמִיהַּ;שורש::תמהּ פעלים
9103;תִּמָּהוֹן;ת - מ - הּ;Noun kittalonpattern, masculine;amazement, wonder, surprise;https://audio.pealim.com/v0/ru/ru6re1zgrgwe.mp3;תימהון;לִתְמֹהַּ לְהַתְמִיהַּ;שורש::תמהּ שם_עצם
9104;לְהַתְמִיהַּ;ת - מ - הּ;Verb hif'il;to amaze, to suprise, to puzzle;https://audio.pealim.com/v0/1t/1t431ihbh20u9.mp3;להתמיה;לִתְמֹהַּ תִּמָּהוֹן;שורש::תמהּ פעלים
9105;לְתַמְחֵר;ת - מ - ח - ר;Verb pi'el;to price, to put a price;https://audio.pealim.com/v0/c1/c1sh9yebge3.mp3;לתמחר;תִּמְחוּר;שורש::תמחר פעלים
9106;תִּמְחוּר;ת - מ - ח - ר;Noun kittulpattern, masculine;pricing, price setting;https://audio.pealim.com/v0/xi/xinm4kewoob1.mp3;תמחור;לְתַמְחֵר;שורש::תמחר שם_עצם
9107;תָּמִיד;-;Adverb;always;https://audio.pealim.com/v0/1p/1pxp0npmvipsg.mp3;תמיד;;תוארי_הפועל
9108;תְּמִידִי;-;Adjective;constant, continuous;https://audio.pealim.com/v0/13/13fs1yrbhmwdb.mp3;תמידי;;שם_תואר
9109;תְּמִיכָה;ת - מ - ך;Noun ktilapattern, feminine;support, backing, assistance;https://audio.pealim.com/v0/13/13fpxttpeczdf.mp3;תמיכה;לְהִתָּמֵךְ לִתְמֹךְ;שורש::תמך שם_עצם
9110;לְהִתָּמֵךְ;ת - מ - ך;Verb nif'al;to rest (on something), to be supported by;https://audio.pealim.com/v0/yz/yznequo62okg.mp3;להיתמך;תְּמִיכָה לִתְמֹךְ;שורש::תמך פעלים
9111;לִתְמֹךְ;ת - מ - ך;Verb pa'al;to support, to sustain;https://audio.pealim.com/v0/1r/1r041bgw6gihc.mp3;לתמוך;תְּמִיכָה לְהִתָּמֵךְ;שורש::תמך פעלים
9112;לְתַמְלֵל;ת - מ - ל - ל;Verb pi'el;to transcribe;https://audio.pealim.com/v0/ti/tifkvr5m5yb7.mp3;לתמלל;תִּמְלוּל;שורש::תמלל פעלים
9113;תִּמְלוּל;ת - מ - ל - ל;Noun kittulpattern, masculine;transcription, transcript (of a call, meeting, audio);https://audio.pealim.com/v0/yh/yhef9kdl90r5.mp3;תימלול;לְתַמְלֵל;שורש::תמלל שם_עצם
9114;לָתֹם;ת - מ - ם;Verb pa'al;to be finished, to end;https://audio.pealim.com/v0/bi/bicuzr2g3wh0.mp3;לתום;תָּמִים תֹּם תֹּם לְהִתַּמֵּם תָּם;שורש::תמם פעלים
9115;תָּמִים;ת - מ - ם;Adjective katilpattern;"sincere, innocent, honest; full, complete";https://audio.pealim.com/v0/1p/1pxp0k78sionl.mp3;תמים;לָתֹם תֹּם תֹּם לְהִתַּמֵּם תָּם;שורש::תמם שם_תואר
9116;תֹּם;ת - מ - ם;Noun kotelpattern, masculine;end, completion (lit.);https://audio.pealim.com/v0/wk/wksplskl3zi7.mp3;תום;לָתֹם תָּמִים לְהִתַּמֵּם תָּם;שורש::תמם שם_עצם
9117;תֹּם;ת - מ - ם;Noun kotelpattern, masculine;simplicity, innocence, naivete (lit.);https://audio.pealim.com/v0/wk/wksplskl3zi7.mp3;תום;לָתֹם תָּמִים לְהִתַּמֵּם תָּם;שורש::תמם שם_עצם
9118;לְהִתַּמֵּם;ת - מ - ם;Verb hitpa'el;to play dumb;https://audio.pealim.com/v0/7z/7z00lrx45wxe.mp3;להיתמם;לָתֹם תָּמִים תֹּם תֹּם תָּם;שורש::תמם פעלים
9119;תָּם;ת - מ - ם;Adjective kalpattern;honest, unsophisticated;https://audio.pealim.com/v0/wk/wksplsz9ik3y.mp3;תם;לָתֹם תָּמִים תֹּם תֹּם לְהִתַּמֵּם;שורש::תמם שם_תואר

Can't render this file because it is too large.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -238,7 +238,11 @@ def validate_apkg(apkg_path: Path) -> int:
notes_with_audio = sum(1 for (flds,) in notes_flds if "[sound:" in flds)
pct = notes_with_audio / note_count * 100 if note_count else 0
check("Notes with audio", notes_with_audio > 0, f"{notes_with_audio:,}/{note_count:,} ({pct:.0f}%)")
if notes_with_audio > 0:
check("Notes with audio", True, f"{notes_with_audio:,}/{note_count:,} ({pct:.0f}%)")
else:
# Non-audio variants intentionally have no audio — not a failure
warn("No audio in this deck variant", f"0/{note_count:,}")
# --- Empty fields check ---
print("\n[Field content]")