Add 6-variant release build (4 vocab + 2 conj), bump to v0.12

- build_vocab_deck(): include_audio/include_images flags
- build_conj_deck(): include_audio flag
- build_all_variants(): builds all 6 apkg files in one call
- Variants: hebrew_vocabulary{,_audio,_images,_audio_images}.apkg
            hebrew_conjugations{,_audio}.apkg
- run.py: step_build_all() replaces step_build_vocab(); conjugation
  extraction reuses cached conjugations.json unless refreshed
- RELEASE_TAG bumped to v0.12

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Sochen 2026-03-05 20:58:06 +00:00
parent 62c92ffae0
commit ccd7d61efb
2 changed files with 102 additions and 41 deletions

View file

@ -28,7 +28,7 @@ CONJ_MODEL_ID = 1_234_567_893
# Release version tag added to all notes so users can identify which release
# their cards come from (visible in Anki's Browse view and card info).
RELEASE_TAG = "v0.11"
RELEASE_TAG = "v0.12"
# Regex for extracting emoji and Hebrew prepositions from meaning strings
EMOJI_RE = re.compile(r'[\U0001F000-\U0001FFFF\u2600-\u27FF\u2300-\u23FF\uFE00-\uFE0F]+')
@ -39,8 +39,12 @@ AUDIO_DIR = DATA_DIR / "audio"
AUDIO_CONJ_DIR = DATA_DIR / "audio_conj"
OUTPUT_DIR = Path(__file__).parent / "output"
VOCAB_APKG = OUTPUT_DIR / "hebrew_vocabulary.apkg"
CONJ_APKG = OUTPUT_DIR / "hebrew_conjugations.apkg"
VOCAB_APKG = OUTPUT_DIR / "hebrew_vocabulary.apkg"
VOCAB_APKG_AUDIO = OUTPUT_DIR / "hebrew_vocabulary_audio.apkg"
VOCAB_APKG_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_images.apkg"
VOCAB_APKG_AUDIO_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_audio_images.apkg"
CONJ_APKG = OUTPUT_DIR / "hebrew_conjugations.apkg"
CONJ_APKG_AUDIO = OUTPUT_DIR / "hebrew_conjugations_audio.apkg"
# ──────────────────────────────────────────────────────────────────────────────
# Binyan → Hebrew label mapping (for conjugation card display)
@ -410,6 +414,8 @@ def build_vocab_deck(
freq_cache: Optional[dict] = None,
image_cache: Optional[dict] = None,
limit: Optional[int] = None,
include_audio: bool = True,
include_images: bool = True,
) -> tuple[genanki.Deck, list[Path]]:
"""
Build the vocabulary deck from pealim_dict_for_anki.csv (or pealim_dict.csv).
@ -505,7 +511,7 @@ def build_vocab_deck(
pos_heb = _translate_pos(pos_raw) if pos_raw else ""
# Audio
audio_tag = _audio_tag(word_no_nik)
audio_tag = _audio_tag(word_no_nik) if include_audio else ""
if audio_tag:
mp3_name = audio_tag.removeprefix("[sound:").removesuffix("]")
mp3_path = AUDIO_DIR / mp3_name
@ -538,14 +544,15 @@ def build_vocab_deck(
related_html = "\n".join(parts)
# Image: look up by stripped word (no-nikkud)
image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
image_tag = ""
if image_filename:
image_path = images_dir / image_filename
if image_path.exists():
image_tag = image_filename
if image_path not in media_files:
media_files.append(image_path)
if include_images:
image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
if image_filename:
image_path = images_dir / image_filename
if image_path.exists():
image_tag = image_filename
if image_path not in media_files:
media_files.append(image_path)
note = genanki.Note(
model=VOCAB_MODEL,
@ -598,6 +605,7 @@ def build_vocab_deck(
def build_conj_deck(
conjugations: dict,
audio_dir: Path = AUDIO_CONJ_DIR,
include_audio: bool = True,
) -> tuple[genanki.Deck, list[Path]]:
"""Build the conjugation drill deck from conjugations.json data."""
deck = genanki.Deck(CONJ_DECK_ID, "Hebrew Conjugations")
@ -659,7 +667,7 @@ def build_conj_deck(
# Audio tag: use downloaded file if present
audio_tag = ""
if slug:
if include_audio and slug:
audio_tag = _conj_audio_tag(slug, form_key)
if audio_tag:
mp3_path = audio_dir / f"{slug}_{form_key}.mp3"
@ -756,6 +764,50 @@ def write_conj_apkg(
logger.info(f"Conjugation deck written → {out_path}")
def build_all_variants(
dict_csv: Path,
conjugations: dict,
examples_cache: Optional[dict] = None,
freq_cache: Optional[dict] = None,
image_cache: Optional[dict] = None,
limit: Optional[int] = None,
) -> None:
"""Build all 6 release variants (4 vocab + 2 conj) into output/."""
logger.info("Building all release variants …")
vocab_variants = [
(False, False, VOCAB_APKG),
(True, False, VOCAB_APKG_AUDIO),
(False, True, VOCAB_APKG_IMAGES),
(True, True, VOCAB_APKG_AUDIO_IMAGES),
]
for audio, images, path in vocab_variants:
label = f"audio={'yes' if audio else 'no'} images={'yes' if images else 'no'}"
logger.info(f" Vocab variant: {label}{path.name}")
deck, media = build_vocab_deck(
dict_csv,
examples_cache=examples_cache,
freq_cache=freq_cache,
image_cache=image_cache or {},
limit=limit,
include_audio=audio,
include_images=images,
)
write_vocab_apkg(deck, media, out_path=path)
conj_variants = [
(False, CONJ_APKG),
(True, CONJ_APKG_AUDIO),
]
for audio, path in conj_variants:
label = f"audio={'yes' if audio else 'no'}"
logger.info(f" Conj variant: {label}{path.name}")
deck, media = build_conj_deck(conjugations, include_audio=audio)
write_conj_apkg(deck, media, out_path=path)
logger.info("All variants built.")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")

67
run.py
View file

@ -358,9 +358,9 @@ def step_images(args) -> dict:
return image_fetch.run(limit=limit)
def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache: dict | None = None):
"""Step 5 — build vocabulary .apkg."""
logger.info("[5] Building vocabulary deck")
def step_build_all(args, examples_cache: dict, freq_cache: dict, conjugations: dict | None, image_cache: dict | None = None):
"""Step 5 — build all 6 release variants (4 vocab + 2 conj)."""
logger.info("[5] Building all deck variants")
import apkg_builder
dict_csv = DATA_DIR / "hebrew_dict_for_anki.csv"
@ -371,20 +371,18 @@ def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache:
if not dict_csv.exists():
dict_csv = DATA_DIR / "pealim_dict.csv"
deck, media = apkg_builder.build_vocab_deck(
apkg_builder.build_all_variants(
dict_csv,
conjugations=conjugations or {},
examples_cache=examples_cache,
freq_cache=freq_cache,
image_cache=image_cache or {},
limit=args.test,
)
apkg_builder.write_vocab_apkg(deck, media)
logger.info(f" Vocabulary .apkg → {apkg_builder.VOCAB_APKG}")
return deck
def step_conjugations(args):
"""Step 6 — extract conjugations and build conjugation deck."""
"""Step 6 — extract conjugations (returns data; building handled by step_build_all)."""
if args.skip_conjugations:
logger.info("[6] Skipping conjugations (--skip-conjugations)")
return None
@ -394,18 +392,21 @@ def step_conjugations(args):
logger.info("[6] verbs_input.txt not found — skipping conjugation deck")
return None
logger.info("[6] Extracting verb conjugations …")
import conjugation_extract
conjugations = conjugation_extract.main(verbs_file)
# Use cached conjugations.json if available (skip re-extraction)
conj_cache = DATA_DIR / "conjugations.json"
if conj_cache.exists() and not getattr(args, 'refresh_conjugations', False):
logger.info("[6] Using cached conjugations.json …")
with open(conj_cache) as f:
import json as _json
conjugations = _json.load(f)
else:
logger.info("[6] Extracting verb conjugations …")
import conjugation_extract
conjugations = conjugation_extract.main(verbs_file)
# Download conjugation audio
step_conj_audio(args, conjugations)
import apkg_builder
conj_deck, conj_media = apkg_builder.build_conj_deck(conjugations)
apkg_builder.write_conj_apkg(conj_deck, conj_media)
logger.info(f" Conjugation .apkg → {apkg_builder.CONJ_APKG}")
return conjugations
@ -453,17 +454,18 @@ def print_summary(args, examples_cache, freq_cache, conjugations):
found_imgs = sum(1 for v in ic.values() if v)
logger.info(f" Images: {found_imgs}/{len(ic)} nouns with images")
vocab_apkg = OUTPUT_DIR / "hebrew_vocabulary.apkg"
conj_apkg = OUTPUT_DIR / "hebrew_conjugations.apkg"
if vocab_apkg.exists():
size_mb = vocab_apkg.stat().st_size / 1e6
logger.info(f" Vocabulary .apkg: {size_mb:.1f} MB → {vocab_apkg}")
if conj_apkg.exists():
size_mb = conj_apkg.stat().st_size / 1e6
logger.info(f" Conjugation .apkg: {size_mb:.1f} MB → {conj_apkg}")
if conjugations:
verb_count = sum(1 for v in conjugations.values() if v)
logger.info(f" Verbs in conjugation deck: {verb_count}")
import apkg_builder as _ab
all_apkgs = [
_ab.VOCAB_APKG, _ab.VOCAB_APKG_AUDIO, _ab.VOCAB_APKG_IMAGES, _ab.VOCAB_APKG_AUDIO_IMAGES,
_ab.CONJ_APKG, _ab.CONJ_APKG_AUDIO,
]
for apkg in all_apkgs:
if apkg.exists():
size_mb = apkg.stat().st_size / 1e6
logger.info(f" {apkg.name}: {size_mb:.1f} MB")
if conjugations:
verb_count = sum(1 for v in conjugations.values() if v)
logger.info(f" Verbs in conjugation deck: {verb_count}")
logger.info("=" * 60)
logger.info("DONE")
@ -485,6 +487,13 @@ def main():
if args.only == "conjugations":
step_fonts(args)
conjugations = step_conjugations(args)
if conjugations:
import apkg_builder
apkg_builder.build_all_variants(
DATA_DIR / "hebrew_dict_for_anki.csv",
conjugations=conjugations,
limit=args.test,
)
print_summary(args, {}, {}, conjugations or {})
return
@ -497,8 +506,8 @@ def main():
step_audio(args)
step_fonts(args)
image_cache = step_images(args)
step_build_vocab(args, examples_cache, freq_cache, image_cache)
conjugations = step_conjugations(args)
conjugations = step_conjugations(args)
step_build_all(args, examples_cache, freq_cache, conjugations, image_cache)
print_summary(args, examples_cache, freq_cache, conjugations or {})