Remove strip_nikkud from all pipeline files — use ktiv_male directly. Fix case-insensitive binyan matching in detail scraper (og:description uses UPPERCASE). Fix integration test slugs and test limits. Delete legacy CSVs, stale .apkg, and dead scripts from git. Add vulture to pre-commit hook. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1769 lines
62 KiB
Python
1769 lines
62 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Build Anki .apkg files for both the vocabulary deck and the conjugation deck.
|
|
Uses genanki for reliable, stable deck generation.
|
|
|
|
Deck IDs are hardcoded integers — same ID on re-import updates the existing deck
|
|
in Anki rather than creating a duplicate.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import random
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import genanki
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Stable deck/model IDs — do not change these
|
|
VOCAB_DECK_ID = 1_234_567_890
|
|
VOCAB_MODEL_ID = 1_701_222_017_968 # matches Nevo's original Anki model
|
|
CONJ_DECK_ID = 1_234_567_892
|
|
CONJ_MODEL_ID = 1_234_567_893
|
|
CONF_DECK_ID = 1_234_567_894
|
|
CONF_MODEL_ID = 1_234_567_895
|
|
PLURAL_DECK_ID = 1_234_567_896
|
|
PLURAL_MODEL_ID = 1_234_567_897
|
|
|
|
# Subdeck IDs for combined "Hebrew::*" package — MUST differ from standalone IDs
|
|
COMPLETE_VOCAB_DECK_ID = 1_234_567_900
|
|
COMPLETE_CONJ_DECK_ID = 1_234_567_901
|
|
COMPLETE_CONF_DECK_ID = 1_234_567_902
|
|
COMPLETE_PLURAL_DECK_ID = 1_234_567_903
|
|
|
|
# Release version tag added to all notes so users can identify which release
|
|
# their cards come from (visible in Anki's Browse view and card info).
|
|
RELEASE_TAG = "v0.15"
|
|
|
|
# Regex for extracting emoji and Hebrew prepositions from meaning strings
|
|
EMOJI_RE = re.compile(r"[\U0001F000-\U0001FFFF\u2600-\u27FF\u2300-\u23FF\uFE00-\uFE0F]+")
|
|
HBPAREN_RE = re.compile(r"\(([\u05b0-\u05ea\u05f0-\u05f4]+)\)")
|
|
|
|
DATA_DIR = Path(__file__).parent / "data"
|
|
|
|
AUDIO_DIR = DATA_DIR / "audio"
|
|
AUDIO_CONJ_DIR = DATA_DIR / "audio_conj"
|
|
OUTPUT_DIR = Path(__file__).parent / "output"
|
|
|
|
VOCAB_APKG = OUTPUT_DIR / "hebrew_vocabulary.apkg"
|
|
VOCAB_APKG_AUDIO = OUTPUT_DIR / "hebrew_vocabulary_audio.apkg"
|
|
VOCAB_APKG_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_images.apkg"
|
|
VOCAB_APKG_AUDIO_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_audio_images.apkg"
|
|
CONJ_APKG = OUTPUT_DIR / "hebrew_conjugations.apkg"
|
|
CONJ_APKG_AUDIO = OUTPUT_DIR / "hebrew_conjugations_audio.apkg"
|
|
CONF_APKG = OUTPUT_DIR / "hebrew_confusables.apkg"
|
|
CONF_APKG_AUDIO = OUTPUT_DIR / "hebrew_confusables_audio.apkg"
|
|
PLURAL_APKG = OUTPUT_DIR / "hebrew_plurals.apkg"
|
|
PLURAL_APKG_AUDIO = OUTPUT_DIR / "hebrew_plurals_audio.apkg"
|
|
COMPLETE_APKG = OUTPUT_DIR / "hebrew_complete.apkg"
|
|
COMPLETE_APKG_AUDIO = OUTPUT_DIR / "hebrew_complete_audio.apkg"
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Binyan → Hebrew label mapping (for conjugation card display)
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
BINYAN_TO_HEBREW: dict[str, str] = {
|
|
"Pa'al": "פָּעַל",
|
|
"Nif'al": "נִפְעַל",
|
|
"Pi'el": "פִּעֵל",
|
|
"Pu'al": "פֻּעַל",
|
|
"Hitpa'el": "הִתְפַּעֵל",
|
|
"Hif'il": "הִפְעִיל",
|
|
"Huf'al": "הֻפְעַל",
|
|
}
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# PoS → Hebrew label mapping
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
POS_TO_HEBREW = {
|
|
"Noun": "שם עצם",
|
|
"Verb": "פועל",
|
|
"Adjective": "שם תואר",
|
|
"Adverb": "תואר הפועל",
|
|
"Preposition": "מילת יחס",
|
|
"Conjunction": "מילת חיבור",
|
|
"Pronoun": "כינוי גוף",
|
|
"Particle": "מילית",
|
|
}
|
|
|
|
# PoS category groupings for related-words display
|
|
POS_CATEGORY_LABELS = {
|
|
"Verb": "פעלים",
|
|
"Noun": "שמות עצם",
|
|
"Adjective": "שמות תואר",
|
|
"Adverb": "תוארי הפועל",
|
|
}
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Shared CSS
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
FONTS_DIR = DATA_DIR / "fonts"
|
|
|
|
CARD_CSS = """
|
|
@font-face {
|
|
font-family: 'Heebo';
|
|
src: url('_Heebo-Regular.ttf');
|
|
font-weight: normal;
|
|
}
|
|
@font-face {
|
|
font-family: 'Heebo';
|
|
src: url('_Heebo-Bold.ttf');
|
|
font-weight: bold;
|
|
}
|
|
.card {
|
|
font-family: 'Heebo', 'Arial Hebrew', 'David', Arial, sans-serif;
|
|
font-size: 20px;
|
|
text-align: center;
|
|
color: #222;
|
|
background: #fff;
|
|
padding: 16px;
|
|
}
|
|
.hebrew {
|
|
font-size: 36px;
|
|
font-weight: bold;
|
|
direction: rtl;
|
|
text-align: center;
|
|
line-height: 1.5;
|
|
color: #222;
|
|
}
|
|
.hebrew-sm {
|
|
font-size: 24px;
|
|
font-weight: normal;
|
|
direction: rtl;
|
|
text-align: center;
|
|
color: #333;
|
|
}
|
|
.meaning {
|
|
font-size: 28px;
|
|
color: #1a1a8c;
|
|
margin: 8px 0;
|
|
}
|
|
.hint {
|
|
font-size: 16px;
|
|
color: #888;
|
|
margin: 4px 0;
|
|
direction: rtl;
|
|
}
|
|
.root-info {
|
|
font-size: 18px;
|
|
color: #555;
|
|
margin-top: 6px;
|
|
direction: rtl;
|
|
}
|
|
.example {
|
|
font-size: 18px;
|
|
color: #444;
|
|
direction: rtl;
|
|
text-align: right;
|
|
font-style: italic;
|
|
margin: 10px auto 0;
|
|
max-width: 90%;
|
|
border-right: 3px solid #aaa;
|
|
padding-right: 8px;
|
|
}
|
|
.divider { border-top: 1px solid #ddd; margin: 10px 0; }
|
|
.freq-badge {
|
|
display: inline-block;
|
|
font-size: 11px;
|
|
color: #aaa;
|
|
background: transparent;
|
|
border: 1px solid #eee;
|
|
border-radius: 10px;
|
|
padding: 2px 8px;
|
|
margin-top: 4px;
|
|
}
|
|
.voice-label {
|
|
font-size: 0.6em;
|
|
font-weight: normal;
|
|
color: #555;
|
|
}
|
|
.sec-label {
|
|
font-size: 20px;
|
|
font-weight: normal;
|
|
color: #555;
|
|
direction: rtl;
|
|
text-align: center;
|
|
margin-top: 6px;
|
|
}
|
|
.sec-key {
|
|
font-size: 18px;
|
|
color: #888;
|
|
}
|
|
.definitions {
|
|
direction: rtl;
|
|
text-align: center;
|
|
}
|
|
.conf-entry {
|
|
margin: 8px 0;
|
|
font-size: 20px;
|
|
direction: rtl;
|
|
}
|
|
.related-group {
|
|
direction: rtl;
|
|
text-align: right;
|
|
margin: 2px 0;
|
|
font-size: 18px;
|
|
}
|
|
.emoji-img {
|
|
font-size: 3.5em;
|
|
text-align: center;
|
|
margin: 0.3em 0;
|
|
}
|
|
@media (prefers-color-scheme: dark) {
|
|
.card { color: #e8e8e8; background: #1c1c1e; }
|
|
.hebrew { color: #f0f0f0; }
|
|
.hebrew-sm { color: #ddd; }
|
|
.meaning { color: #82b0ff; }
|
|
.root-info { color: #aaa; }
|
|
.sec-label { color: #aaa; }
|
|
.sec-key { color: #666; }
|
|
.conf-entry { color: #ddd; }
|
|
.hint { color: #777; }
|
|
.voice-label { color: #888; }
|
|
.example { color: #bbb; border-right-color: #555; }
|
|
.divider { border-top-color: #333; }
|
|
.freq-badge { color: #888; border-color: #444; }
|
|
}
|
|
"""
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Vocabulary Deck
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
VOCAB_FRONT_HEB = """
|
|
<div class="hebrew">{{Word}}{{#Prep}} <span class="hebrew-sm">{{Prep}}</span>{{/Prep}}</div>
|
|
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
|
"""
|
|
|
|
VOCAB_BACK_HEB = """
|
|
{{FrontSide}}
|
|
<div class="divider"></div>
|
|
<div class="meaning">{{Meaning}}</div>
|
|
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
|
|
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
|
|
{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
|
|
{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
|
|
{{#SharedRoots}}
|
|
<div class="sec-label"><span class="sec-key">מִילִים קְשׁוּרוֹת:</span></div>
|
|
<div class="root-info">{{SharedRoots}}</div>
|
|
{{/SharedRoots}}
|
|
{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
|
|
{{#Example}}
|
|
<div class="example">{{Example}}</div>
|
|
{{/Example}}
|
|
{{#Frequency}}<div class="freq-badge">#{{Frequency}}</div>{{/Frequency}}
|
|
"""
|
|
|
|
VOCAB_FRONT_ENG = """
|
|
<div class="meaning">{{Meaning}}</div>
|
|
{{#Hint}}<div class="hint">{{Hint}}</div>{{/Hint}}
|
|
{{#Emoji}}<div class="emoji-img">{{Emoji}}</div>{{/Emoji}}
|
|
{{^Emoji}}{{#Image}}<div><img src="{{Image}}" style="max-height:150px;margin-top:8px;" onerror="this.parentElement.style.display='none'"></div>{{/Image}}{{/Emoji}}
|
|
"""
|
|
|
|
VOCAB_BACK_ENG = """
|
|
{{FrontSide}}
|
|
<div class="divider"></div>
|
|
<div class="hebrew">{{Word}}{{#Prep}} <span class="hebrew-sm">{{Prep}}</span>{{/Prep}}</div>
|
|
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
|
{{#WordNoNikkud}}<div class="sec-label"><span class="sec-key">לְלֹא נִיקּוּד:</span> {{WordNoNikkud}}</div>{{/WordNoNikkud}}
|
|
{{#Root}}<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>{{/Root}}
|
|
{{#PoS}}<div class="sec-label"><span class="sec-key">חֵלֶק דִּיבּוּר:</span> {{PoS}}</div>{{/PoS}}
|
|
{{#Plural}}<div class="sec-label"><span class="sec-key">רַבִּים:</span> {{Plural}}</div>{{/Plural}}
|
|
{{#Example}}
|
|
<div class="example">{{Example}}</div>
|
|
{{/Example}}
|
|
"""
|
|
|
|
VOCAB_FRONT_CLOZE = """
|
|
<div class="example" style="font-size:24px;font-style:normal;border:none;padding:0;text-align:center;">{{ClozeExample}}</div>
|
|
{{#ClozeHint}}<div class="hint">{{ClozeHint}}</div>{{/ClozeHint}}
|
|
"""
|
|
|
|
VOCAB_BACK_CLOZE = """
|
|
{{FrontSide}}
|
|
<div class="divider"></div>
|
|
<div class="hebrew">{{Word}}</div>
|
|
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
|
<div class="meaning">{{Meaning}}</div>
|
|
"""
|
|
|
|
VOCAB_MODEL = genanki.Model(
|
|
VOCAB_MODEL_ID,
|
|
"Hebrew Flash Cards",
|
|
fields=[
|
|
{"name": "Word"},
|
|
{"name": "Root"},
|
|
{"name": "PoS"},
|
|
{"name": "Meaning"},
|
|
{"name": "WordNoNikkud"},
|
|
{"name": "SharedRoots"},
|
|
{"name": "Tags"},
|
|
{"name": "Audio"},
|
|
{"name": "Example"},
|
|
{"name": "Frequency"},
|
|
{"name": "Image"},
|
|
{"name": "Emoji"},
|
|
{"name": "Prep"},
|
|
{"name": "Hint"},
|
|
{"name": "Plural"},
|
|
{"name": "ClozeExample"},
|
|
{"name": "ClozeHint"},
|
|
],
|
|
templates=[
|
|
{
|
|
# ord 0 — matches Nevo's original "Card 2" (Eng→Heb)
|
|
"name": "English → Hebrew",
|
|
"qfmt": VOCAB_FRONT_ENG,
|
|
"afmt": VOCAB_BACK_ENG,
|
|
},
|
|
{
|
|
# ord 1 — matches Nevo's original "Card 3" (Heb→Eng)
|
|
"name": "Hebrew → English",
|
|
"qfmt": VOCAB_FRONT_HEB,
|
|
"afmt": VOCAB_BACK_HEB,
|
|
},
|
|
{
|
|
# ord 2 — Cloze-style sentence fill-in (only generated when ClozeExample is non-empty)
|
|
"name": "Sentence Cloze",
|
|
"qfmt": VOCAB_FRONT_CLOZE,
|
|
"afmt": VOCAB_BACK_CLOZE,
|
|
},
|
|
],
|
|
css=CARD_CSS,
|
|
)
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Conjugation Deck
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
CONJ_FRONT = """
|
|
<div class="sec-label">אֵיךְ אוֹמְרִים</div>
|
|
<div class="hebrew" style="color:#1a1a8c;">{{ReferenceForm}}{{#Prep}} ({{Prep}}){{/Prep}}{{#Voice}} <span class="voice-label">({{Voice}})</span>{{/Voice}}</div>
|
|
<div class="hebrew">{{Pronoun}}</div>
|
|
<div class="hebrew">{{Tense}}</div>
|
|
"""
|
|
|
|
CONJ_BACK = """
|
|
{{FrontSide}}<hr>
|
|
<div class="hebrew">{{ConjugatedForm}}{{#Prep}} ({{Prep}}){{/Prep}}</div>
|
|
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
|
{{#Meaning}}<div class="sec-label">{{Meaning}}</div>{{/Meaning}}
|
|
<div class="sec-label"><span class="sec-key">שֹׁרֶשׁ:</span> {{Root}}</div>
|
|
<div class="sec-label"><span class="sec-key">בִּנְיָן:</span> {{Binyan}}</div>
|
|
{{#RelatedVocab}}<div class="sec-label"><span class="sec-key">מִילִים קְשׁוּרוֹת:</span></div>
|
|
<div class="sec-label">{{RelatedVocab}}</div>{{/RelatedVocab}}
|
|
"""
|
|
|
|
CONJ_CSS = CARD_CSS
|
|
|
|
CONJ_MODEL = genanki.Model(
|
|
CONJ_MODEL_ID,
|
|
"Pealim Conjugation",
|
|
fields=[
|
|
{"name": "Infinitive"},
|
|
{"name": "ReferenceForm"},
|
|
{"name": "Pronoun"},
|
|
{"name": "Tense"},
|
|
{"name": "ConjugatedForm"},
|
|
{"name": "Root"},
|
|
{"name": "Binyan"},
|
|
{"name": "Voice"},
|
|
{"name": "Audio"},
|
|
{"name": "Meaning"},
|
|
{"name": "RelatedVocab"},
|
|
{"name": "Prep"},
|
|
],
|
|
templates=[
|
|
{
|
|
"name": "Conjugation Drill",
|
|
"qfmt": CONJ_FRONT,
|
|
"afmt": CONJ_BACK,
|
|
}
|
|
],
|
|
css=CONJ_CSS,
|
|
)
|
|
|
|
# Present-tense expansion: each form key → list of (pronoun, tense_label)
|
|
PRESENT_EXPANSION = {
|
|
"present_ms": [
|
|
("אֲנִי (זָכָר)", "הוֹוֶה"),
|
|
("אַתָּה", "הוֹוֶה"),
|
|
("הוּא", "הוֹוֶה"),
|
|
],
|
|
"present_fs": [
|
|
("אֲנִי (נְקֵבָה)", "הוֹוֶה"),
|
|
("אַתְּ", "הוֹוֶה"),
|
|
("הִיא", "הוֹוֶה"),
|
|
],
|
|
"present_mp": [
|
|
("אֲנַחְנוּ (זָכָר)", "הוֹוֶה"),
|
|
("אַתֶּם", "הוֹוֶה"),
|
|
("הֵם", "הוֹוֶה"),
|
|
],
|
|
"present_fp": [
|
|
("אֲנַחְנוּ (נְקֵבָה)", "הוֹוֶה"),
|
|
("אַתֶּן", "הוֹוֶה"),
|
|
("הֵן", "הוֹוֶה"),
|
|
],
|
|
}
|
|
|
|
# Modern Hebrew: 2fp/3fp future and imperative default to mp form (classical in parens)
|
|
FP_MODERN_FALLBACK = {
|
|
"future_2fp": "future_2mp",
|
|
"future_3fp": "future_3mp",
|
|
"imperative_fp": "imperative_mp",
|
|
}
|
|
|
|
# 3rd person plural past: same form for m/f — generate two separate pronoun cards
|
|
PAST_3P_EXPANSION = [
|
|
("הֵם", "עָבָר"),
|
|
("הֵן", "עָבָר"),
|
|
]
|
|
|
|
# Tense labels with "בְּ" prefix for display on cards
|
|
TENSE_WITH_BE = {
|
|
"עָבָר": "בֶּעָבָר",
|
|
"הוֹוֶה": "בַּהוֹוֶה",
|
|
"עָתִיד": "בֶּעָתִיד",
|
|
"צִיּוּוּי": "בַּצִּוּוּי",
|
|
}
|
|
|
|
# Voice field: passive label only (shown inline on card front for Pu'al/Huf'al)
|
|
VOICE_MAP = {
|
|
"Pu'al": "סָבִיל",
|
|
"Huf'al": "סָבִיל",
|
|
}
|
|
|
|
# Tense Hebrew label → English key prefix (for form_key construction)
|
|
TENSE_KEY_MAP = {
|
|
"עָבָר": "past",
|
|
"הוֹוֶה": "present",
|
|
"עָתִיד": "future",
|
|
"צִוּוּי": "imperative",
|
|
"צִיּוּוּי": "imperative", # alternate spelling
|
|
}
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Helpers
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _load_words() -> dict[str, dict]:
|
|
"""Load the unified words.json data store."""
|
|
path = DATA_DIR / "words.json"
|
|
with open(path, encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
def _audio_tag(word_no_nikkud: str, audio_dir: Path = AUDIO_DIR, slug: str = "") -> str:
|
|
"""Return [sound:xxx.mp3] if audio file exists, else empty string.
|
|
|
|
Tries slug-based filename first (for confusable words), then consonant-based.
|
|
"""
|
|
if slug:
|
|
slug_path = audio_dir / f"{slug}.mp3"
|
|
if slug_path.exists():
|
|
return f"[sound:{slug_path.name}]"
|
|
safe = re.sub(r"[^\u05d0-\u05ea]", "", word_no_nikkud)
|
|
if not safe:
|
|
return ""
|
|
mp3_path = audio_dir / f"{safe}.mp3"
|
|
if mp3_path.exists():
|
|
return f"[sound:{mp3_path.name}]"
|
|
return ""
|
|
|
|
|
|
def _conj_audio_tag(slug: str, form_key: str) -> str:
|
|
"""Return [sound:xxx.mp3] for conjugation audio if downloaded."""
|
|
filename = f"{slug}_{form_key}.mp3"
|
|
mp3_path = AUDIO_CONJ_DIR / filename
|
|
if mp3_path.exists():
|
|
return f"[sound:{filename}]"
|
|
return ""
|
|
|
|
|
|
# Keywords excluded when building emoji lookup AND matching meaning text.
|
|
# Curated from manual review of all 2,261 emoji-word pairs (Sprint 8).
|
|
_EMOJI_STOP = frozenset(
|
|
{
|
|
# Basic stop words
|
|
"to",
|
|
"be",
|
|
"a",
|
|
"an",
|
|
"the",
|
|
"of",
|
|
"in",
|
|
"on",
|
|
"at",
|
|
"for",
|
|
"and",
|
|
"with",
|
|
"by",
|
|
"or",
|
|
"but",
|
|
"not",
|
|
"as",
|
|
"its",
|
|
# Generic emoji description words (too vague)
|
|
"face",
|
|
"hand",
|
|
"sign",
|
|
"symbol",
|
|
"button",
|
|
"small",
|
|
"large",
|
|
"light",
|
|
"dark",
|
|
"open",
|
|
"closed",
|
|
# Numbers → clock emoji (🕐🕑🕒 etc.)
|
|
"one",
|
|
"two",
|
|
"three",
|
|
"four",
|
|
"five",
|
|
"six",
|
|
"seven",
|
|
"eight",
|
|
"nine",
|
|
"ten",
|
|
"hundred",
|
|
"thousand",
|
|
# UI/media buttons (⏭️▶️⏪⏯️🔁🔚🔜⏺️)
|
|
"next",
|
|
"fast",
|
|
"play",
|
|
"pause",
|
|
"repeat",
|
|
"end",
|
|
"soon",
|
|
"record",
|
|
# Abstract words → misleading object emoji
|
|
"part",
|
|
"place",
|
|
"mark",
|
|
"post",
|
|
"department",
|
|
"store",
|
|
"note",
|
|
"control",
|
|
"level",
|
|
"stop",
|
|
"cover",
|
|
"roll",
|
|
"rolling",
|
|
"pick",
|
|
"over",
|
|
"right",
|
|
"way",
|
|
"skin",
|
|
"drop",
|
|
"middle",
|
|
"piece",
|
|
"section",
|
|
# Country/direction words → flag emoji (🇰🇵🇬🇸🇮🇴🇻🇦🇨🇫 etc.)
|
|
"north",
|
|
"south",
|
|
"northern",
|
|
"southern",
|
|
"western",
|
|
"eastern",
|
|
"central",
|
|
"territory",
|
|
"kingdom",
|
|
"united",
|
|
"virgin",
|
|
# Common words producing bad emoji matches
|
|
"new",
|
|
"big",
|
|
"full",
|
|
"last",
|
|
"first",
|
|
"double",
|
|
"slightly",
|
|
"without",
|
|
"from",
|
|
"behind",
|
|
"people",
|
|
"position",
|
|
"status",
|
|
"situation",
|
|
"game",
|
|
"call",
|
|
"trade",
|
|
"male",
|
|
"female",
|
|
"person",
|
|
"letter",
|
|
# Polysemous words → wrong emoji sense
|
|
"french",
|
|
"fried",
|
|
"board",
|
|
"bow",
|
|
"water",
|
|
"union",
|
|
"rock",
|
|
"left",
|
|
"back",
|
|
"crane",
|
|
"dash",
|
|
"bar",
|
|
"wheel",
|
|
"horizontal",
|
|
}
|
|
)
|
|
|
|
|
|
def _load_emoji_lookup() -> dict[str, str]:
|
|
"""Load or fetch Unicode emoji keyword→character lookup.
|
|
|
|
Parses unicode.org emoji-test.txt to build {keyword: emoji_char} mapping.
|
|
Result is cached in data/emoji_lookup.json.
|
|
Returns empty dict on network failure (safe fallback).
|
|
"""
|
|
cache_file = DATA_DIR / "emoji_lookup.json"
|
|
if cache_file.exists():
|
|
with open(cache_file) as f:
|
|
return json.load(f)
|
|
|
|
import requests
|
|
|
|
try:
|
|
resp = requests.get(
|
|
"https://unicode.org/Public/emoji/latest/emoji-test.txt",
|
|
timeout=30,
|
|
)
|
|
resp.raise_for_status()
|
|
except Exception as e:
|
|
logger.warning(f"Could not fetch emoji data: {e}. Emoji lookup disabled.")
|
|
return {}
|
|
|
|
lookup: dict[str, str] = {}
|
|
for line in resp.text.splitlines():
|
|
if "fully-qualified" not in line:
|
|
continue
|
|
m = re.search(r"#\s+(\S+)\s+E[\d.]+\s+(.+)", line)
|
|
if not m:
|
|
continue
|
|
emoji_char = m.group(1)
|
|
desc = m.group(2).lower().strip()
|
|
for word in desc.split():
|
|
word = word.strip(".,'\"-")
|
|
if len(word) > 2 and word not in _EMOJI_STOP and word not in lookup:
|
|
lookup[word] = emoji_char
|
|
|
|
cache_file.write_text(json.dumps(lookup, ensure_ascii=False))
|
|
logger.info(f"Built emoji lookup: {len(lookup)} keywords → {cache_file}")
|
|
return lookup
|
|
|
|
|
|
def _categorize_pos(pos_str: str) -> str:
|
|
"""Return the canonical PoS category key for grouping."""
|
|
for cat in POS_CATEGORY_LABELS:
|
|
if cat.lower() in pos_str.lower():
|
|
return cat
|
|
return "Other"
|
|
|
|
|
|
def _forms_list_to_dict(forms_list: list[dict]) -> dict[str, dict]:
|
|
"""Convert schema's active_forms list to the keyed format the card generator expects.
|
|
|
|
Keys are like ``past_1s``, ``present_ms``, ``future_2mp``, ``imperative_fs``.
|
|
Each value dict has: form, form_ktiv, pronoun (Hebrew string), tense (Hebrew label),
|
|
audio_url, guid, guid_candidates.
|
|
"""
|
|
result: dict[str, dict] = {}
|
|
for f in forms_list:
|
|
tense_en = TENSE_KEY_MAP.get(f["tense"], f["tense"])
|
|
key = f"{tense_en}_{f['person']}"
|
|
result[key] = {
|
|
"form": f["form"]["nikkud"],
|
|
"form_ktiv": f["form"].get("ktiv_male", ""),
|
|
"pronoun": f.get("pronoun_hebrew", ""), # Hebrew pronoun string
|
|
"tense": f["tense"], # Hebrew tense label
|
|
"audio_url": f.get("audio_url", ""),
|
|
"guid": f.get("guid"),
|
|
"guid_candidates": f.get("guid_candidates"),
|
|
}
|
|
return result
|
|
|
|
|
|
def build_vocab_deck(
|
|
words: dict[str, dict],
|
|
limit: int | None = None,
|
|
include_audio: bool = True,
|
|
include_images: bool = True,
|
|
emoji_lookup: dict | None = None,
|
|
) -> tuple[genanki.Deck, list[Path]]:
|
|
"""Build the vocabulary deck from the unified words dict.
|
|
|
|
Args:
|
|
words: Unified data dict keyed by unique_key (from words.json).
|
|
limit: If set, only process the first N entries (by frequency).
|
|
include_audio: Whether to include audio tags in notes.
|
|
include_images: Whether to include image tags in notes.
|
|
emoji_lookup: Optional Unicode emoji keyword→char mapping for fallback emoji.
|
|
|
|
Returns:
|
|
(deck, list_of_media_files)
|
|
"""
|
|
logger.info(f"Building vocabulary deck from {len(words)} words …")
|
|
|
|
images_dir = DATA_DIR / "images"
|
|
|
|
# Build word_unique_key → pos_category dict for related-words grouping
|
|
word_to_pos_cat: dict[str, str] = {}
|
|
for unique_key, entry in words.items():
|
|
pos_raw = entry.get("pos", "")
|
|
if pos_raw:
|
|
word_to_pos_cat[unique_key] = _categorize_pos(pos_raw)
|
|
# Also index by nikkud word (for shared_roots lookup by nikkud form)
|
|
word_nikkud = entry["word"]["nikkud"]
|
|
if word_nikkud not in word_to_pos_cat:
|
|
word_to_pos_cat[word_nikkud] = _categorize_pos(pos_raw) if pos_raw else "Other"
|
|
|
|
# Sort entries by frequency (null → 999999), applying limit after sort
|
|
def _freq_key(item: tuple[str, dict]) -> int:
|
|
return item[1].get("frequency") or 999_999
|
|
|
|
sorted_entries = sorted(words.items(), key=_freq_key)
|
|
if limit:
|
|
sorted_entries = sorted_entries[:limit]
|
|
|
|
deck = genanki.Deck(VOCAB_DECK_ID, "Hebrew Vocabulary")
|
|
media_files: list[Path] = []
|
|
seen_words: set[tuple[str, str]] = set()
|
|
|
|
for _unique_key, entry in sorted_entries:
|
|
word_nikkud = entry["word"]["nikkud"]
|
|
word_no_nik = entry["word"].get("ktiv_male", "")
|
|
root_list = entry.get("root") or []
|
|
root = " ".join(root_list)
|
|
pos_raw = entry.get("pos", "")
|
|
pos_heb = entry.get("pos_hebrew", "")
|
|
meaning = entry.get("meaning", "") or ""
|
|
meaning_raw = entry.get("meaning_raw", "") or ""
|
|
slug = entry.get("slug", "") or ""
|
|
frequency = entry.get("frequency") or 999_999
|
|
audio_file = entry.get("audio_file", "") or ""
|
|
tags_str = entry.get("tags", "") or ""
|
|
hint_str = entry.get("hint", "") or ""
|
|
shared_roots_keys = entry.get("shared_roots") or []
|
|
is_confusable = entry.get("confusable_group") is not None
|
|
|
|
if not word_nikkud or not meaning:
|
|
continue
|
|
|
|
# Skip exact duplicates (same word AND same meaning)
|
|
word_meaning_key = (word_nikkud, meaning)
|
|
if word_meaning_key in seen_words:
|
|
logger.debug(f" Skipping duplicate word+meaning: {word_nikkud}")
|
|
continue
|
|
seen_words.add(word_meaning_key)
|
|
|
|
# Frequency display label
|
|
if frequency <= 500:
|
|
freq_display = f"Core #{frequency}"
|
|
elif frequency <= 1500:
|
|
freq_display = f"Essential #{frequency}"
|
|
elif frequency <= 3000:
|
|
freq_display = f"Intermediate #{frequency}"
|
|
elif frequency <= 5000:
|
|
freq_display = f"Upper-intermediate #{frequency}"
|
|
elif frequency <= 10000:
|
|
freq_display = f"Advanced #{frequency}"
|
|
elif frequency < 999_999:
|
|
freq_display = f"Rare #{frequency}"
|
|
else:
|
|
freq_display = "Unlisted"
|
|
|
|
# Emoji: use entry's emoji if emoji_visible, else fall back to emoji_lookup
|
|
emoji_str = ""
|
|
if entry.get("emoji_visible") and entry.get("emoji"):
|
|
emoji_str = entry["emoji"]
|
|
elif not emoji_str and emoji_lookup:
|
|
meaning_clean_for_emoji = EMOJI_RE.sub("", meaning).strip()
|
|
for kw in re.sub(r"[^\w\s]", " ", meaning_clean_for_emoji.lower()).split()[:5]:
|
|
if len(kw) > 2 and kw not in _EMOJI_STOP and kw in emoji_lookup:
|
|
emoji_str = emoji_lookup[kw]
|
|
break
|
|
|
|
# Extract Hebrew prepositions from meaning_raw
|
|
preps = HBPAREN_RE.findall(meaning_raw)
|
|
prep_str = " ".join(f"({p})" for p in preps)
|
|
|
|
# Audio — use audio_file from entry; for confusables it's already slug-based
|
|
audio_tag = ""
|
|
if include_audio and audio_file:
|
|
mp3_path = AUDIO_DIR / audio_file
|
|
if mp3_path.exists():
|
|
audio_tag = f"[sound:{audio_file}]"
|
|
if mp3_path not in media_files:
|
|
media_files.append(mp3_path)
|
|
else:
|
|
# Fallback: try consonant-based filename
|
|
audio_tag = _audio_tag(word_no_nik, slug=slug if is_confusable else "")
|
|
if audio_tag:
|
|
mp3_name = audio_tag.removeprefix("[sound:").removesuffix("]")
|
|
mp3_path_fb = AUDIO_DIR / mp3_name
|
|
if mp3_path_fb not in media_files:
|
|
media_files.append(mp3_path_fb)
|
|
elif include_audio:
|
|
audio_tag = _audio_tag(word_no_nik, slug=slug if is_confusable else "")
|
|
if audio_tag:
|
|
mp3_name = audio_tag.removeprefix("[sound:").removesuffix("]")
|
|
mp3_path_fb = AUDIO_DIR / mp3_name
|
|
if mp3_path_fb not in media_files:
|
|
media_files.append(mp3_path_fb)
|
|
|
|
# Example sentence from vetted examples
|
|
example_html = ""
|
|
examples = entry.get("examples") or {}
|
|
if examples.get("vetted"):
|
|
example_html = examples["vetted"][0]["text"]
|
|
|
|
# Cloze: use pre-computed cloze from words.json
|
|
cloze_example = ""
|
|
cloze_hint = ""
|
|
if not is_confusable and examples.get("cloze"):
|
|
cloze_data = examples["cloze"]
|
|
cloze_text = cloze_data.get("text", "")
|
|
start = cloze_data.get("cloze_word_start")
|
|
end = cloze_data.get("cloze_word_end")
|
|
if cloze_text and start is not None and end is not None:
|
|
cloze_example = cloze_text[:start] + "_____" + cloze_text[end:]
|
|
raw_hint = cloze_data.get("cloze_hint") or ""
|
|
if raw_hint:
|
|
cloze_hint = raw_hint
|
|
else:
|
|
pos_cat = _categorize_pos(pos_raw) if pos_raw else "Other"
|
|
cloze_hint = meaning
|
|
if pos_cat == "Verb" and pos_heb:
|
|
cloze_hint = f"{meaning} ({pos_heb})"
|
|
|
|
# Related words (shared roots) grouped by PoS category
|
|
related_html = ""
|
|
if shared_roots_keys:
|
|
groups: dict[str, list[str]] = {}
|
|
for rw_key in shared_roots_keys:
|
|
rw_entry = words.get(rw_key)
|
|
if rw_entry:
|
|
rw_nikkud = rw_entry["word"]["nikkud"]
|
|
cat = _categorize_pos(rw_entry.get("pos", ""))
|
|
else:
|
|
# Key not found: use the key itself as display text
|
|
rw_nikkud = rw_key
|
|
cat = "Other"
|
|
groups.setdefault(cat, []).append(rw_nikkud)
|
|
parts = []
|
|
for cat, rw_words in groups.items():
|
|
if cat == "Other":
|
|
parts.append(f'<div class="related-group">{" ".join(rw_words)}</div>')
|
|
else:
|
|
label = POS_CATEGORY_LABELS.get(cat, cat)
|
|
parts.append(f'<div class="related-group"><b>{label}:</b> {" ".join(rw_words)}</div>')
|
|
related_html = "\n".join(parts)
|
|
|
|
# Plural form (for nouns)
|
|
plural_str = ""
|
|
noun_inflection = entry.get("noun_inflection")
|
|
if noun_inflection and noun_inflection.get("plural"):
|
|
plural_str = noun_inflection["plural"].get("nikkud", "")
|
|
|
|
# Image
|
|
image_tag = ""
|
|
if include_images:
|
|
image_filename = entry.get("image") or ""
|
|
if image_filename:
|
|
image_path = images_dir / image_filename
|
|
if image_path.exists():
|
|
image_tag = image_filename
|
|
if image_path not in media_files:
|
|
media_files.append(image_path)
|
|
|
|
# GUID: use vocab_legacy_guid from entry, fall back to deterministic
|
|
legacy_guid = entry.get("vocab_legacy_guid")
|
|
note_guid = legacy_guid or genanki.guid_for(word_nikkud, meaning)
|
|
|
|
note = genanki.Note(
|
|
model=VOCAB_MODEL,
|
|
guid=note_guid,
|
|
fields=[
|
|
word_nikkud,
|
|
root,
|
|
pos_heb,
|
|
meaning,
|
|
word_no_nik,
|
|
related_html or "",
|
|
tags_str,
|
|
audio_tag,
|
|
example_html,
|
|
freq_display,
|
|
image_tag,
|
|
emoji_str,
|
|
prep_str,
|
|
hint_str,
|
|
plural_str,
|
|
cloze_example,
|
|
cloze_hint,
|
|
],
|
|
tags=(tags_str.split() if tags_str else [])
|
|
+ [RELEASE_TAG]
|
|
+ [f"freq::{freq_display.split()[0]}" if freq_display != "Unlisted" else "freq::Unlisted"],
|
|
)
|
|
deck.add_note(note)
|
|
|
|
# Diagnostics
|
|
emoji_count = sum(1 for n in deck.notes if n.fields[11])
|
|
prep_count = sum(1 for n in deck.notes if n.fields[12])
|
|
hint_count = sum(1 for n in deck.notes if n.fields[13])
|
|
plural_count = sum(1 for n in deck.notes if n.fields[14])
|
|
cloze_count = sum(1 for n in deck.notes if n.fields[15])
|
|
unlisted = sum(1 for _, e in sorted_entries if (e.get("frequency") or 999_999) >= 999_999)
|
|
if emoji_count:
|
|
logger.info(f" Emoji extracted: {emoji_count} words")
|
|
if prep_count:
|
|
logger.info(f" Hebrew prepositions extracted: {prep_count} words")
|
|
if hint_count:
|
|
logger.info(f" Eng→Heb hints: {hint_count} words")
|
|
if plural_count:
|
|
logger.info(f" Noun plurals on vocab cards: {plural_count} words")
|
|
if cloze_count:
|
|
logger.info(f" Sentence cloze cards: {cloze_count} words")
|
|
logger.info(f" Unlisted words (not in frequency corpus): {unlisted}/{len(sorted_entries)}")
|
|
logger.info(f"Vocabulary deck: {len(deck.notes)} notes")
|
|
return deck, media_files
|
|
|
|
|
|
def build_conj_deck(
|
|
words: dict[str, dict],
|
|
audio_dir: Path = AUDIO_CONJ_DIR,
|
|
include_audio: bool = True,
|
|
) -> tuple[genanki.Deck, list[Path]]:
|
|
"""Build the conjugation drill deck from words with in_conjugation_deck=True."""
|
|
deck = genanki.Deck(CONJ_DECK_ID, "Hebrew Conjugations")
|
|
media_files: list[Path] = []
|
|
note_count = 0
|
|
verb_count = 0
|
|
|
|
# Build root → [related word nikkud] lookup for cross-linking
|
|
root_words: dict[str, list[str]] = {}
|
|
for entry in words.values():
|
|
root_list = entry.get("root") or []
|
|
root_key = " ".join(root_list)
|
|
if root_key:
|
|
root_words.setdefault(root_key, []).append(entry["word"]["nikkud"])
|
|
|
|
for _unique_key, entry in words.items():
|
|
conj = entry.get("conjugation")
|
|
if not conj or not conj.get("in_conjugation_deck"):
|
|
continue
|
|
|
|
active_forms_list = conj.get("active_forms") or []
|
|
if not active_forms_list:
|
|
continue
|
|
|
|
verb_count += 1
|
|
infinitive = conj["infinitive"]["nikkud"]
|
|
ref_form = conj["reference_form"]["nikkud"]
|
|
binyan = conj.get("binyan", "")
|
|
binyan_heb: str = conj.get("binyan_hebrew") or BINYAN_TO_HEBREW.get(binyan, binyan) or ""
|
|
slug = entry.get("slug", "") or ""
|
|
root_list = entry.get("root") or []
|
|
root = " ".join(root_list)
|
|
voice = VOICE_MAP.get(binyan, "")
|
|
|
|
meaning = entry.get("meaning", "") or ""
|
|
# Extract Hebrew preposition from meaning_raw
|
|
prep_str = ""
|
|
conj_prep = conj.get("prep")
|
|
if conj_prep:
|
|
prep_str = f"({conj_prep})"
|
|
elif meaning:
|
|
preps = HBPAREN_RE.findall(entry.get("meaning_raw", "") or "")
|
|
prep_str = " ".join(f"({p})" for p in preps)
|
|
|
|
related = [w for w in root_words.get(root, []) if w != infinitive]
|
|
related_str = " ".join(related[:8]) if related else ""
|
|
|
|
forms = _forms_list_to_dict(active_forms_list)
|
|
|
|
def add_note(
|
|
pronoun: str,
|
|
tense: str,
|
|
conj_form: str,
|
|
audio_tag: str,
|
|
_form_key_for_guid: str,
|
|
guid_val: str | None = None,
|
|
guid_candidates: list[str] | None = None,
|
|
*,
|
|
_infinitive: str = infinitive,
|
|
_ref_form: str = ref_form,
|
|
_root: str = root,
|
|
_binyan_heb: str = binyan_heb,
|
|
_voice: str = voice,
|
|
_meaning: str = meaning,
|
|
_related_str: str = related_str,
|
|
_prep_str: str = prep_str,
|
|
) -> None:
|
|
nonlocal note_count
|
|
if not conj_form or not re.search(r"[\u05d0-\u05ea]", conj_form):
|
|
return
|
|
# Apply tense prefix (בְּ)
|
|
display_tense = TENSE_WITH_BE.get(tense, tense)
|
|
# GUID: use stored guid, then first candidate, then deterministic fallback
|
|
if guid_val:
|
|
note_guid = guid_val
|
|
elif guid_candidates:
|
|
note_guid = guid_candidates[0]
|
|
else:
|
|
note_guid = genanki.guid_for(_infinitive, pronoun, tense)
|
|
note = genanki.Note(
|
|
model=CONJ_MODEL,
|
|
guid=note_guid,
|
|
fields=[
|
|
_infinitive,
|
|
_ref_form,
|
|
pronoun,
|
|
display_tense,
|
|
conj_form,
|
|
_root,
|
|
_binyan_heb,
|
|
_voice,
|
|
audio_tag,
|
|
_meaning,
|
|
_related_str,
|
|
_prep_str,
|
|
],
|
|
tags=[RELEASE_TAG],
|
|
)
|
|
deck.add_note(note)
|
|
note_count += 1
|
|
|
|
# Seeded RNG per verb — deterministic pronoun/gender choices
|
|
verb_rng = random.Random(hash(infinitive) & 0xFFFFFFFF)
|
|
|
|
for form_key, form_data in forms.items():
|
|
primary_form = form_data.get("form", "")
|
|
conj_form = primary_form
|
|
# Infinitive: shown on card front as reference — skip as a quiz form
|
|
if form_key == "infinitive":
|
|
continue
|
|
|
|
# Audio tag
|
|
audio_tag = ""
|
|
if include_audio and slug:
|
|
audio_tag = _conj_audio_tag(slug, form_key)
|
|
if audio_tag:
|
|
mp3_path = audio_dir / f"{slug}_{form_key}.mp3"
|
|
if mp3_path not in media_files:
|
|
media_files.append(mp3_path)
|
|
|
|
guid_val = form_data.get("guid")
|
|
guid_candidates = form_data.get("guid_candidates")
|
|
|
|
# Present tense expansion: 4 form keys → 1 card each (seeded RNG)
|
|
if form_key in PRESENT_EXPANSION:
|
|
chosen = verb_rng.choice(PRESENT_EXPANSION[form_key])
|
|
add_note(chosen[0], chosen[1], conj_form, audio_tag, form_key, guid_val, guid_candidates)
|
|
continue
|
|
|
|
# Past 3rd plural: same form for m/f → 1 card (seeded RNG)
|
|
if form_key == "past_3p":
|
|
chosen = verb_rng.choice(PAST_3P_EXPANSION)
|
|
add_note(chosen[0], chosen[1], conj_form, audio_tag, form_key, guid_val, guid_candidates)
|
|
continue
|
|
|
|
# 2fp/3fp future and imperative: show modern (mp) form + classical (fp) in parens
|
|
if form_key in FP_MODERN_FALLBACK:
|
|
mp_key = FP_MODERN_FALLBACK[form_key]
|
|
mp_form = forms.get(mp_key, {}).get("form", "")
|
|
fp_form = conj_form
|
|
display_form = f"{mp_form} ({fp_form})" if mp_form and mp_form != fp_form else fp_form
|
|
pronoun = form_data.get("pronoun", "")
|
|
tense = form_data.get("tense", "")
|
|
add_note(pronoun, tense, display_form, audio_tag, form_key, guid_val, guid_candidates)
|
|
continue
|
|
|
|
# Standard card
|
|
pronoun = form_data.get("pronoun", "")
|
|
tense = form_data.get("tense", "")
|
|
|
|
# 1st-person forms get a randomly assigned gender label (deterministic per verb)
|
|
if form_key in {"past_1s", "past_1p", "future_1s", "future_1p"}:
|
|
gender = verb_rng.choice(["זָכָר", "נְקֵבָה"])
|
|
pronoun = f"{pronoun} ({gender})"
|
|
|
|
add_note(pronoun, tense, conj_form, audio_tag, form_key, guid_val, guid_candidates)
|
|
|
|
# Passive partner forms (Huf'al/Pu'al counterpart)
|
|
hufal_forms_list = conj.get("hufal_pual_forms")
|
|
if hufal_forms_list:
|
|
ref_passive = conj.get("reference_form_passive")
|
|
ref_form_passive = ref_passive["nikkud"] if ref_passive else ref_form
|
|
passive_binyan = "Huf'al" if binyan == "Hif'il" else "Pu'al"
|
|
passive_binyan_heb = BINYAN_TO_HEBREW.get(passive_binyan, passive_binyan)
|
|
passive_voice = VOICE_MAP.get(passive_binyan, "סָבִיל")
|
|
|
|
passive_forms = _forms_list_to_dict(hufal_forms_list)
|
|
|
|
for form_key, form_data in passive_forms.items():
|
|
primary_form = form_data.get("form", "")
|
|
conj_form = primary_form
|
|
if form_key == "infinitive":
|
|
continue
|
|
|
|
audio_tag = ""
|
|
if include_audio and slug:
|
|
passive_audio_key = f"passive_{form_key}"
|
|
audio_tag = _conj_audio_tag(slug, passive_audio_key)
|
|
if audio_tag:
|
|
mp3_path = audio_dir / f"{slug}_{passive_audio_key}.mp3"
|
|
if mp3_path not in media_files:
|
|
media_files.append(mp3_path)
|
|
|
|
guid_val = form_data.get("guid")
|
|
guid_candidates = form_data.get("guid_candidates")
|
|
|
|
if form_key in PRESENT_EXPANSION:
|
|
chosen = verb_rng.choice(PRESENT_EXPANSION[form_key])
|
|
add_note(
|
|
chosen[0],
|
|
chosen[1],
|
|
conj_form,
|
|
audio_tag,
|
|
form_key,
|
|
guid_val,
|
|
guid_candidates,
|
|
_ref_form=ref_form_passive,
|
|
_binyan_heb=passive_binyan_heb,
|
|
_voice=passive_voice,
|
|
)
|
|
continue
|
|
|
|
if form_key == "past_3p":
|
|
chosen = verb_rng.choice(PAST_3P_EXPANSION)
|
|
add_note(
|
|
chosen[0],
|
|
chosen[1],
|
|
conj_form,
|
|
audio_tag,
|
|
form_key,
|
|
guid_val,
|
|
guid_candidates,
|
|
_ref_form=ref_form_passive,
|
|
_binyan_heb=passive_binyan_heb,
|
|
_voice=passive_voice,
|
|
)
|
|
continue
|
|
|
|
if form_key in FP_MODERN_FALLBACK:
|
|
mp_key = FP_MODERN_FALLBACK[form_key]
|
|
mp_form = passive_forms.get(mp_key, {}).get("form", "")
|
|
fp_form = conj_form
|
|
display_form = f"{mp_form} ({fp_form})" if mp_form and mp_form != fp_form else fp_form
|
|
pronoun = form_data.get("pronoun", "")
|
|
tense = form_data.get("tense", "")
|
|
add_note(
|
|
pronoun,
|
|
tense,
|
|
display_form,
|
|
audio_tag,
|
|
form_key,
|
|
guid_val,
|
|
guid_candidates,
|
|
_ref_form=ref_form_passive,
|
|
_binyan_heb=passive_binyan_heb,
|
|
_voice=passive_voice,
|
|
)
|
|
continue
|
|
|
|
pronoun = form_data.get("pronoun", "")
|
|
tense = form_data.get("tense", "")
|
|
if form_key in {"past_1s", "past_1p", "future_1s", "future_1p"}:
|
|
gender = verb_rng.choice(["זָכָר", "נְקֵבָה"])
|
|
pronoun = f"{pronoun} ({gender})"
|
|
|
|
add_note(
|
|
pronoun,
|
|
tense,
|
|
conj_form,
|
|
audio_tag,
|
|
form_key,
|
|
guid_val,
|
|
guid_candidates,
|
|
_ref_form=ref_form_passive,
|
|
_binyan_heb=passive_binyan_heb,
|
|
_voice=passive_voice,
|
|
)
|
|
|
|
logger.info(f"Conjugation deck: {note_count} notes across {verb_count} verbs")
|
|
return deck, media_files
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Confusables deck — words that look identical without nikkud
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
CONF_FRONT = """
|
|
<div class="hebrew" style="font-size:36px;">{{Words}}</div>
|
|
<div class="meaning" style="font-size:32px; direction:rtl; text-align:center;">מה ההבדל?</div>
|
|
"""
|
|
|
|
CONF_BACK = """
|
|
{{FrontSide}}<hr>
|
|
<div class="definitions">{{Definitions}}</div>
|
|
{{#Audio}}<div>{{Audio}}</div>{{/Audio}}
|
|
"""
|
|
|
|
CONF_CSS = CARD_CSS
|
|
|
|
CONF_MODEL = genanki.Model(
|
|
CONF_MODEL_ID,
|
|
"Hebrew Confusables",
|
|
fields=[
|
|
{"name": "Words"},
|
|
{"name": "Definitions"},
|
|
{"name": "Audio"},
|
|
{"name": "WordNoNikkud"},
|
|
],
|
|
templates=[
|
|
{
|
|
"name": "Confusable",
|
|
"qfmt": CONF_FRONT,
|
|
"afmt": CONF_BACK,
|
|
},
|
|
],
|
|
css=CONF_CSS,
|
|
)
|
|
|
|
|
|
def build_confusables_deck(
|
|
words: dict[str, dict],
|
|
include_audio: bool = True,
|
|
) -> tuple[genanki.Deck, list[Path]]:
|
|
"""Build confusables deck from words dict — groups words by confusable_group."""
|
|
logger.info("Building confusables deck …")
|
|
|
|
deck = genanki.Deck(CONF_DECK_ID, "Hebrew Confusables")
|
|
media_files: list[Path] = []
|
|
note_count = 0
|
|
|
|
# Group entries by shared ktiv_male (confusable_group members share the same ktiv_male)
|
|
# Use confusables_guid as the stable note GUID — all members of a group share it.
|
|
# Process each unique guid once.
|
|
seen_guids: set[str] = set()
|
|
# Build guid → list of entries
|
|
guid_to_entries: dict[str, list[dict]] = {}
|
|
for unique_key, entry in words.items():
|
|
if entry.get("confusable_group") is None:
|
|
continue
|
|
guid = entry.get("confusables_guid")
|
|
if not guid:
|
|
# Fall back to ktiv_male-based guid
|
|
guid = genanki.guid_for("confusable", entry["word"].get("ktiv_male", unique_key))
|
|
guid_to_entries.setdefault(guid, []).append(entry)
|
|
|
|
for guid, group_entries in sorted(guid_to_entries.items(), key=lambda x: x[0]):
|
|
if guid in seen_guids:
|
|
continue
|
|
seen_guids.add(guid)
|
|
|
|
if len(group_entries) < 2:
|
|
continue
|
|
|
|
# Deduplicate: skip entries with identical word+meaning
|
|
seen: set[tuple[str, str]] = set()
|
|
unique_entries: list[dict] = []
|
|
for e in group_entries:
|
|
key = (e["word"]["nikkud"], e.get("meaning", ""))
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique_entries.append(e)
|
|
if len(unique_entries) < 2:
|
|
continue
|
|
|
|
word_no_nik = unique_entries[0]["word"].get("ktiv_male", "")
|
|
words_display = " / ".join(e["word"]["nikkud"] for e in unique_entries)
|
|
|
|
defs_parts: list[str] = []
|
|
audio_parts: list[str] = []
|
|
for e in unique_entries:
|
|
w = e["word"]["nikkud"]
|
|
m = e.get("meaning", "")
|
|
p = e.get("pos_hebrew", "")
|
|
pos_label = f" ({p})" if p else ""
|
|
defs_parts.append(
|
|
f'<div class="conf-entry"><span class="hebrew" style="font-size:24px;">{w}</span>'
|
|
f" = {m}{pos_label}</div>"
|
|
)
|
|
if include_audio:
|
|
af = e.get("audio_file", "") or ""
|
|
at = ""
|
|
if af:
|
|
mp3_path = AUDIO_DIR / af
|
|
if mp3_path.exists():
|
|
at = f"[sound:{af}]"
|
|
if not at:
|
|
slug = e.get("slug", "") or ""
|
|
ktiv_male = e.get("word", {}).get("ktiv_male", "") or ""
|
|
at = _audio_tag(ktiv_male, slug=slug)
|
|
if at and at not in audio_parts:
|
|
audio_parts.append(at)
|
|
mp3_name = at.removeprefix("[sound:").removesuffix("]")
|
|
mp3_path = AUDIO_DIR / mp3_name
|
|
if mp3_path not in media_files:
|
|
media_files.append(mp3_path)
|
|
|
|
defs_html = "\n".join(defs_parts)
|
|
audio_html = " ".join(audio_parts)
|
|
|
|
note = genanki.Note(
|
|
model=CONF_MODEL,
|
|
guid=guid,
|
|
fields=[words_display, defs_html, audio_html, word_no_nik],
|
|
tags=[RELEASE_TAG],
|
|
)
|
|
deck.add_note(note)
|
|
note_count += 1
|
|
|
|
logger.info(f"Confusables deck: {note_count} notes")
|
|
return deck, media_files
|
|
|
|
|
|
def write_conf_apkg(
|
|
deck: genanki.Deck,
|
|
media_files: list[Path] | None = None,
|
|
out_path: Path = CONF_APKG,
|
|
) -> None:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
pkg = genanki.Package(deck)
|
|
base = [str(p) for p in (media_files or []) if p.exists()]
|
|
pkg.media_files = base + _font_media_files()
|
|
pkg.write_to_file(str(out_path))
|
|
logger.info(f"Confusables deck written → {out_path}")
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
# Noun plurals deck — singular↔plural drilling
|
|
# ──────────────────────────────────────────────────────────────────────────────
|
|
|
|
PLURAL_FRONT_SG = """
|
|
<div class="hebrew" style="color:#1a1a8c;">{{Singular}}</div>
|
|
{{#SingularAudio}}<div>{{SingularAudio}}</div>{{/SingularAudio}}
|
|
<div class="sec-label">{{Meaning}}</div>
|
|
<div class="hint" style="font-size:28px;">יָחִיד ← רַבִּים</div>
|
|
"""
|
|
|
|
PLURAL_BACK_SG = """
|
|
{{FrontSide}}<hr>
|
|
<div class="hebrew">{{Plural}}</div>
|
|
{{#PluralAudio}}<div>{{PluralAudio}}</div>{{/PluralAudio}}
|
|
{{#Mishkal}}<div class="sec-label"><span class="sec-key">מִשְׁקָל:</span> {{Mishkal}}</div>{{/Mishkal}}
|
|
"""
|
|
|
|
PLURAL_FRONT_PL = """
|
|
<div class="hebrew" style="color:#1a1a8c;">{{Plural}}</div>
|
|
{{#PluralAudio}}<div>{{PluralAudio}}</div>{{/PluralAudio}}
|
|
<div class="hint" style="font-size:28px;">רַבִּים ← יָחִיד</div>
|
|
"""
|
|
|
|
PLURAL_BACK_PL = """
|
|
{{FrontSide}}<hr>
|
|
<div class="hebrew">{{Singular}}</div>
|
|
{{#SingularAudio}}<div>{{SingularAudio}}</div>{{/SingularAudio}}
|
|
<div class="sec-label">{{Meaning}}</div>
|
|
{{#Mishkal}}<div class="sec-label"><span class="sec-key">מִשְׁקָל:</span> {{Mishkal}}</div>{{/Mishkal}}
|
|
"""
|
|
|
|
PLURAL_CSS = CARD_CSS
|
|
|
|
PLURAL_MODEL = genanki.Model(
|
|
PLURAL_MODEL_ID,
|
|
"Hebrew Plurals",
|
|
fields=[
|
|
{"name": "Singular"},
|
|
{"name": "SingularAudio"},
|
|
{"name": "Plural"},
|
|
{"name": "PluralAudio"},
|
|
{"name": "Meaning"},
|
|
{"name": "Root"},
|
|
{"name": "Mishkal"},
|
|
{"name": "Gender"},
|
|
],
|
|
templates=[
|
|
{
|
|
"name": "Singular → Plural",
|
|
"qfmt": PLURAL_FRONT_SG,
|
|
"afmt": PLURAL_BACK_SG,
|
|
},
|
|
{
|
|
"name": "Plural → Singular",
|
|
"qfmt": PLURAL_FRONT_PL,
|
|
"afmt": PLURAL_BACK_PL,
|
|
},
|
|
],
|
|
css=PLURAL_CSS,
|
|
)
|
|
|
|
|
|
def _is_irregular_plural(gender: str, plural_ktiv: str) -> bool:
|
|
"""Detect irregular plurals: masc nouns with ות- suffix, fem with ים- suffix.
|
|
|
|
Args:
|
|
gender: ``"masculine"`` or ``"feminine"``.
|
|
plural_ktiv: ktiv male (no nikkud) form of the plural.
|
|
"""
|
|
return (gender == "masculine" and plural_ktiv.endswith("ות")) or (
|
|
gender == "feminine" and plural_ktiv.endswith("ים")
|
|
)
|
|
|
|
|
|
def build_plural_deck(
|
|
words: dict[str, dict],
|
|
include_audio: bool = False,
|
|
) -> tuple[genanki.Deck, list[Path]]:
|
|
"""Build noun plurals deck from words with noun_inflection data.
|
|
|
|
Selection: ALL irregular plurals + 2-3 high-frequency exemplars per mishkal
|
|
pattern (for regular nouns).
|
|
"""
|
|
logger.info("Building plurals deck …")
|
|
|
|
deck = genanki.Deck(PLURAL_DECK_ID, "Hebrew Plurals")
|
|
media_files: list[Path] = []
|
|
|
|
# Collect all nouns with both singular and plural
|
|
irregulars: list[tuple[str, dict, dict]] = [] # (unique_key, entry, noun_inflection)
|
|
by_mishkal: dict[str, list[tuple[str, dict, dict]]] = {}
|
|
|
|
for unique_key, entry in words.items():
|
|
if not entry.get("pos", "").startswith("Noun"):
|
|
continue
|
|
noun_inflection = entry.get("noun_inflection")
|
|
if not noun_inflection:
|
|
continue
|
|
singular_data = noun_inflection.get("singular")
|
|
plural_data = noun_inflection.get("plural")
|
|
if not singular_data or not plural_data:
|
|
continue
|
|
singular = singular_data.get("nikkud", "")
|
|
plural = plural_data.get("nikkud", "")
|
|
plural_ktiv = plural_data.get("ktiv_male", "")
|
|
if not singular or not plural:
|
|
continue
|
|
|
|
gender = noun_inflection.get("gender", "")
|
|
mishkal = noun_inflection.get("mishkal") or ""
|
|
|
|
if _is_irregular_plural(gender, plural_ktiv):
|
|
irregulars.append((unique_key, entry, noun_inflection))
|
|
elif mishkal:
|
|
by_mishkal.setdefault(mishkal, []).append((unique_key, entry, noun_inflection))
|
|
|
|
# Select exemplars per mishkal, preferring high-frequency words
|
|
per_mishkal = 6
|
|
|
|
selected: list[tuple[str, dict, dict]] = list(irregulars)
|
|
for _mishkal, entries in sorted(by_mishkal.items()):
|
|
entries.sort(key=lambda e: e[1].get("frequency") or 999_999)
|
|
selected.extend(entries[:per_mishkal])
|
|
|
|
note_count = 0
|
|
for _unique_key, entry, noun_inflection in selected:
|
|
singular = noun_inflection["singular"]["nikkud"]
|
|
singular_ktiv = noun_inflection["singular"].get("ktiv_male", "")
|
|
plural = noun_inflection["plural"]["nikkud"]
|
|
plural_ktiv = noun_inflection["plural"].get("ktiv_male", "")
|
|
gender = noun_inflection.get("gender") or ""
|
|
mishkal = noun_inflection.get("mishkal") or ""
|
|
meaning = entry.get("meaning") or ""
|
|
root_list = entry.get("root") or []
|
|
root = " ".join(root_list)
|
|
|
|
# GUID from noun_inflection
|
|
note_guid_raw = noun_inflection.get("plurals_guid")
|
|
note_guid = note_guid_raw if note_guid_raw else genanki.guid_for("plural", singular)
|
|
|
|
# Audio tags
|
|
sg_audio = ""
|
|
pl_audio = ""
|
|
if include_audio:
|
|
sg_tag = _audio_tag(singular_ktiv)
|
|
if sg_tag:
|
|
sg_audio = sg_tag
|
|
mp3_path = AUDIO_DIR / sg_tag.removeprefix("[sound:").removesuffix("]")
|
|
if mp3_path not in media_files:
|
|
media_files.append(mp3_path)
|
|
|
|
tags = [RELEASE_TAG]
|
|
if mishkal:
|
|
tags.append(f"mishkal::{mishkal}")
|
|
if _is_irregular_plural(gender, plural_ktiv):
|
|
tags.append("irregular")
|
|
|
|
note = genanki.Note(
|
|
model=PLURAL_MODEL,
|
|
guid=note_guid,
|
|
fields=[
|
|
singular,
|
|
sg_audio,
|
|
plural,
|
|
pl_audio,
|
|
meaning,
|
|
root,
|
|
mishkal,
|
|
gender,
|
|
],
|
|
tags=tags,
|
|
)
|
|
deck.add_note(note)
|
|
note_count += 1
|
|
|
|
irregular_count = len(irregulars)
|
|
regular_count = note_count - irregular_count
|
|
logger.info(
|
|
f"Plurals deck: {note_count} notes "
|
|
f"({irregular_count} irregular + {regular_count} regular exemplars "
|
|
f"from {len(by_mishkal)} mishkal patterns)"
|
|
)
|
|
return deck, media_files
|
|
|
|
|
|
def write_plural_apkg(
|
|
deck: genanki.Deck,
|
|
media_files: list[Path] | None = None,
|
|
out_path: Path = PLURAL_APKG,
|
|
) -> None:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
pkg = genanki.Package(deck)
|
|
base = [str(p) for p in (media_files or []) if p.exists()]
|
|
pkg.media_files = base + _font_media_files()
|
|
pkg.write_to_file(str(out_path))
|
|
logger.info(f"Plurals deck written → {out_path}")
|
|
|
|
|
|
def _font_media_files() -> list[str]:
|
|
"""Return list of Heebo font file paths that exist, for bundling in .apkg."""
|
|
font_paths = list(FONTS_DIR.glob("_Heebo*.ttf"))
|
|
return [str(p) for p in font_paths if p.exists()]
|
|
|
|
|
|
class _RandomOrderPackage(genanki.Package):
|
|
"""genanki.Package subclass that sets new card order to random (0) instead of insertion order (1)."""
|
|
|
|
def write_to_db(self, cursor, timestamp, id_gen):
|
|
super().write_to_db(cursor, timestamp, id_gen)
|
|
row = cursor.execute("SELECT dconf FROM col").fetchone()
|
|
if row:
|
|
dconf = json.loads(row[0])
|
|
for conf in dconf.values():
|
|
if isinstance(conf, dict) and "new" in conf:
|
|
conf["new"]["order"] = 0
|
|
cursor.execute("UPDATE col SET dconf = ?", [json.dumps(dconf)])
|
|
|
|
|
|
def write_vocab_apkg(
|
|
deck: genanki.Deck,
|
|
media_files: list[Path],
|
|
out_path: Path = VOCAB_APKG,
|
|
) -> None:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
pkg = genanki.Package(deck) # insertion order = frequency rank (new.order=1 default)
|
|
pkg.media_files = [str(p) for p in media_files if p.exists()] + _font_media_files()
|
|
pkg.write_to_file(str(out_path))
|
|
logger.info(f"Vocabulary deck written → {out_path}")
|
|
|
|
|
|
def write_conj_apkg(
|
|
deck: genanki.Deck,
|
|
media_files: list[Path] | None = None,
|
|
out_path: Path = CONJ_APKG,
|
|
) -> None:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
pkg = _RandomOrderPackage(deck)
|
|
base = [str(p) for p in (media_files or []) if p.exists()]
|
|
pkg.media_files = base + _font_media_files()
|
|
pkg.write_to_file(str(out_path))
|
|
logger.info(f"Conjugation deck written → {out_path}")
|
|
|
|
|
|
def build_complete_deck(
|
|
words: dict[str, dict],
|
|
limit: int | None = None,
|
|
include_audio: bool = False,
|
|
emoji_lookup: dict | None = None,
|
|
) -> tuple[list[genanki.Deck], list[Path]]:
|
|
"""Build all subdecks under 'Hebrew::*' for the combined .apkg.
|
|
|
|
Returns (list_of_decks, deduplicated_media_files).
|
|
"""
|
|
logger.info(f" Building complete deck (audio={'yes' if include_audio else 'no'}) …")
|
|
|
|
# Build standalone decks using existing functions
|
|
vocab_deck, vocab_media = build_vocab_deck(
|
|
words,
|
|
limit=limit,
|
|
include_audio=include_audio,
|
|
include_images=True,
|
|
emoji_lookup=emoji_lookup,
|
|
)
|
|
|
|
conj_deck, conj_media = build_conj_deck(
|
|
words,
|
|
include_audio=include_audio,
|
|
)
|
|
|
|
conf_deck, conf_media = build_confusables_deck(
|
|
words,
|
|
include_audio=include_audio,
|
|
)
|
|
|
|
plural_deck, plural_media = build_plural_deck(
|
|
words,
|
|
include_audio=include_audio,
|
|
)
|
|
|
|
# Create new Deck objects with subdeck names and different IDs
|
|
complete_vocab = genanki.Deck(COMPLETE_VOCAB_DECK_ID, "Hebrew::Vocabulary")
|
|
for note in vocab_deck.notes:
|
|
complete_vocab.add_note(note)
|
|
|
|
complete_conj = genanki.Deck(COMPLETE_CONJ_DECK_ID, "Hebrew::Conjugations")
|
|
for note in conj_deck.notes:
|
|
complete_conj.add_note(note)
|
|
|
|
complete_conf = genanki.Deck(COMPLETE_CONF_DECK_ID, "Hebrew::Confusables")
|
|
for note in conf_deck.notes:
|
|
complete_conf.add_note(note)
|
|
|
|
complete_plural = genanki.Deck(COMPLETE_PLURAL_DECK_ID, "Hebrew::Plurals")
|
|
for note in plural_deck.notes:
|
|
complete_plural.add_note(note)
|
|
|
|
all_source_media = vocab_media + conj_media + conf_media + plural_media
|
|
|
|
# Deduplicate media files by resolved path
|
|
seen_paths: set[str] = set()
|
|
all_media: list[Path] = []
|
|
for mf in all_source_media:
|
|
resolved = str(mf.resolve()) if mf.exists() else str(mf)
|
|
if resolved not in seen_paths:
|
|
seen_paths.add(resolved)
|
|
all_media.append(mf)
|
|
|
|
decks = [complete_vocab, complete_conj, complete_conf, complete_plural]
|
|
|
|
plural_info = f" + {len(complete_plural.notes)} plural"
|
|
logger.info(
|
|
f" Complete deck: {len(complete_vocab.notes)} vocab + "
|
|
f"{len(complete_conj.notes)} conj + {len(complete_conf.notes)} conf{plural_info} notes, "
|
|
f"{len(all_media)} media files"
|
|
)
|
|
return decks, all_media
|
|
|
|
|
|
def write_complete_apkg(
|
|
decks: list[genanki.Deck],
|
|
media_files: list[Path],
|
|
out_path: Path = COMPLETE_APKG,
|
|
) -> None:
|
|
"""Write a combined .apkg with multiple subdecks."""
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
pkg = genanki.Package(decks)
|
|
pkg.media_files = [str(p) for p in media_files if p.exists()] + _font_media_files()
|
|
pkg.write_to_file(str(out_path))
|
|
logger.info(f"Complete deck written → {out_path}")
|
|
|
|
|
|
def build_all_variants(
|
|
words: dict[str, dict],
|
|
limit: int | None = None,
|
|
) -> None:
|
|
"""Build all 12 release variants into output/."""
|
|
logger.info("Building all release variants …")
|
|
|
|
emoji_lookup = _load_emoji_lookup()
|
|
logger.info(f" Emoji lookup: {len(emoji_lookup)} keywords loaded")
|
|
|
|
vocab_variants = [
|
|
(False, False, VOCAB_APKG),
|
|
(True, False, VOCAB_APKG_AUDIO),
|
|
(False, True, VOCAB_APKG_IMAGES),
|
|
(True, True, VOCAB_APKG_AUDIO_IMAGES),
|
|
]
|
|
for audio, images, path in vocab_variants:
|
|
label = f"audio={'yes' if audio else 'no'} images={'yes' if images else 'no'}"
|
|
logger.info(f" Vocab variant: {label} → {path.name}")
|
|
deck, media = build_vocab_deck(
|
|
words,
|
|
limit=limit,
|
|
include_audio=audio,
|
|
include_images=images,
|
|
emoji_lookup=emoji_lookup,
|
|
)
|
|
write_vocab_apkg(deck, media, out_path=path)
|
|
|
|
conj_variants = [
|
|
(False, CONJ_APKG),
|
|
(True, CONJ_APKG_AUDIO),
|
|
]
|
|
for audio, path in conj_variants:
|
|
label = f"audio={'yes' if audio else 'no'}"
|
|
logger.info(f" Conj variant: {label} → {path.name}")
|
|
deck, media = build_conj_deck(words, include_audio=audio)
|
|
write_conj_apkg(deck, media, out_path=path)
|
|
|
|
conf_variants = [
|
|
(False, CONF_APKG),
|
|
(True, CONF_APKG_AUDIO),
|
|
]
|
|
for audio, path in conf_variants:
|
|
label = f"audio={'yes' if audio else 'no'}"
|
|
logger.info(f" Conf variant: {label} → {path.name}")
|
|
deck, media = build_confusables_deck(words, include_audio=audio)
|
|
write_conf_apkg(deck, media, out_path=path)
|
|
|
|
plural_variants = [
|
|
(False, PLURAL_APKG),
|
|
(True, PLURAL_APKG_AUDIO),
|
|
]
|
|
for audio, path in plural_variants:
|
|
label = f"audio={'yes' if audio else 'no'}"
|
|
logger.info(f" Plural variant: {label} → {path.name}")
|
|
deck, media = build_plural_deck(words, include_audio=audio)
|
|
write_plural_apkg(deck, media, out_path=path)
|
|
|
|
# Combined "Hebrew::*" complete decks
|
|
complete_variants = [
|
|
(False, COMPLETE_APKG),
|
|
(True, COMPLETE_APKG_AUDIO),
|
|
]
|
|
for audio, path in complete_variants:
|
|
decks, media = build_complete_deck(
|
|
words,
|
|
limit=limit,
|
|
include_audio=audio,
|
|
emoji_lookup=emoji_lookup,
|
|
)
|
|
write_complete_apkg(decks, media, out_path=path)
|
|
|
|
logger.info("All variants built.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
|
|
|
words = _load_words()
|
|
deck, media = build_vocab_deck(words, limit=20)
|
|
write_vocab_apkg(deck, media)
|
|
|
|
conj_deck, conj_media = build_conj_deck(words)
|
|
write_conj_apkg(conj_deck, conj_media)
|