Add 6-variant release build (4 vocab + 2 conj), bump to v0.12

- build_vocab_deck(): include_audio/include_images flags - build_conj_deck(): include_audio flag - build_all_variants(): builds all 6 apkg files in one call - Variants: hebrew_vocabulary{,_audio,_images,_audio_images}.apkg hebrew_conjugations{,_audio}.apkg - run.py: step_build_all() replaces step_build_vocab(); conjugation extraction reuses cached conjugations.json unless refreshed - RELEASE_TAG bumped to v0.12 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-05 20:58:06 +00:00 · 2026-03-05 20:58:06 +00:00 · ccd7d61efb
commit ccd7d61efb
parent 62c92ffae0
2 changed files with 102 additions and 41 deletions
--- a/apkg_builder.py
+++ b/apkg_builder.py
@ -28,7 +28,7 @@ CONJ_MODEL_ID   = 1_234_567_893

 # Release version tag added to all notes so users can identify which release
 # their cards come from (visible in Anki's Browse view and card info).
-RELEASE_TAG = "v0.11"
+RELEASE_TAG = "v0.12"

 # Regex for extracting emoji and Hebrew prepositions from meaning strings
 EMOJI_RE   = re.compile(r'[\U0001F000-\U0001FFFF\u2600-\u27FF\u2300-\u23FF\uFE00-\uFE0F]+')
@ -39,8 +39,12 @@ AUDIO_DIR      = DATA_DIR / "audio"
 AUDIO_CONJ_DIR = DATA_DIR / "audio_conj"
 OUTPUT_DIR     = Path(__file__).parent / "output"

-VOCAB_APKG  = OUTPUT_DIR / "hebrew_vocabulary.apkg"
-CONJ_APKG   = OUTPUT_DIR / "hebrew_conjugations.apkg"
+VOCAB_APKG              = OUTPUT_DIR / "hebrew_vocabulary.apkg"
+VOCAB_APKG_AUDIO        = OUTPUT_DIR / "hebrew_vocabulary_audio.apkg"
+VOCAB_APKG_IMAGES       = OUTPUT_DIR / "hebrew_vocabulary_images.apkg"
+VOCAB_APKG_AUDIO_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_audio_images.apkg"
+CONJ_APKG               = OUTPUT_DIR / "hebrew_conjugations.apkg"
+CONJ_APKG_AUDIO         = OUTPUT_DIR / "hebrew_conjugations_audio.apkg"

 # ──────────────────────────────────────────────────────────────────────────────
 # Binyan → Hebrew label mapping (for conjugation card display)
@ -410,6 +414,8 @@ def build_vocab_deck(
    freq_cache: Optional[dict] = None,
    image_cache: Optional[dict] = None,
    limit: Optional[int] = None,
+    include_audio: bool = True,
+    include_images: bool = True,
 ) -> tuple[genanki.Deck, list[Path]]:
    """
    Build the vocabulary deck from pealim_dict_for_anki.csv (or pealim_dict.csv).
@ -505,7 +511,7 @@ def build_vocab_deck(
        pos_heb = _translate_pos(pos_raw) if pos_raw else ""

        # Audio
-        audio_tag = _audio_tag(word_no_nik)
+        audio_tag = _audio_tag(word_no_nik) if include_audio else ""
        if audio_tag:
            mp3_name = audio_tag.removeprefix("[sound:").removesuffix("]")
            mp3_path = AUDIO_DIR / mp3_name
@ -538,14 +544,15 @@ def build_vocab_deck(
            related_html = "\n".join(parts)

        # Image: look up by stripped word (no-nikkud)
-        image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
        image_tag = ""
-        if image_filename:
-            image_path = images_dir / image_filename
-            if image_path.exists():
-                image_tag = image_filename
-                if image_path not in media_files:
-                    media_files.append(image_path)
+        if include_images:
+            image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
+            if image_filename:
+                image_path = images_dir / image_filename
+                if image_path.exists():
+                    image_tag = image_filename
+                    if image_path not in media_files:
+                        media_files.append(image_path)

        note = genanki.Note(
            model=VOCAB_MODEL,
@ -598,6 +605,7 @@ def build_vocab_deck(
 def build_conj_deck(
    conjugations: dict,
    audio_dir: Path = AUDIO_CONJ_DIR,
+    include_audio: bool = True,
 ) -> tuple[genanki.Deck, list[Path]]:
    """Build the conjugation drill deck from conjugations.json data."""
    deck = genanki.Deck(CONJ_DECK_ID, "Hebrew Conjugations")
@ -659,7 +667,7 @@ def build_conj_deck(

            # Audio tag: use downloaded file if present
            audio_tag = ""
-            if slug:
+            if include_audio and slug:
                audio_tag = _conj_audio_tag(slug, form_key)
                if audio_tag:
                    mp3_path = audio_dir / f"{slug}_{form_key}.mp3"
@ -756,6 +764,50 @@ def write_conj_apkg(
    logger.info(f"Conjugation deck written → {out_path}")


+def build_all_variants(
+    dict_csv: Path,
+    conjugations: dict,
+    examples_cache: Optional[dict] = None,
+    freq_cache: Optional[dict] = None,
+    image_cache: Optional[dict] = None,
+    limit: Optional[int] = None,
+) -> None:
+    """Build all 6 release variants (4 vocab + 2 conj) into output/."""
+    logger.info("Building all release variants …")
+
+    vocab_variants = [
+        (False, False, VOCAB_APKG),
+        (True,  False, VOCAB_APKG_AUDIO),
+        (False, True,  VOCAB_APKG_IMAGES),
+        (True,  True,  VOCAB_APKG_AUDIO_IMAGES),
+    ]
+    for audio, images, path in vocab_variants:
+        label = f"audio={'yes' if audio else 'no'} images={'yes' if images else 'no'}"
+        logger.info(f"  Vocab variant: {label} → {path.name}")
+        deck, media = build_vocab_deck(
+            dict_csv,
+            examples_cache=examples_cache,
+            freq_cache=freq_cache,
+            image_cache=image_cache or {},
+            limit=limit,
+            include_audio=audio,
+            include_images=images,
+        )
+        write_vocab_apkg(deck, media, out_path=path)
+
+    conj_variants = [
+        (False, CONJ_APKG),
+        (True,  CONJ_APKG_AUDIO),
+    ]
+    for audio, path in conj_variants:
+        label = f"audio={'yes' if audio else 'no'}"
+        logger.info(f"  Conj variant: {label} → {path.name}")
+        deck, media = build_conj_deck(conjugations, include_audio=audio)
+        write_conj_apkg(deck, media, out_path=path)
+
+    logger.info("All variants built.")
+
+
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")

--- a/run.py
+++ b/run.py
@ -358,9 +358,9 @@ def step_images(args) -> dict:
    return image_fetch.run(limit=limit)


-def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache: dict | None = None):
-    """Step 5 — build vocabulary .apkg."""
-    logger.info("[5] Building vocabulary deck …")
+def step_build_all(args, examples_cache: dict, freq_cache: dict, conjugations: dict | None, image_cache: dict | None = None):
+    """Step 5 — build all 6 release variants (4 vocab + 2 conj)."""
+    logger.info("[5] Building all deck variants …")
    import apkg_builder

    dict_csv = DATA_DIR / "hebrew_dict_for_anki.csv"
@ -371,20 +371,18 @@ def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache:
    if not dict_csv.exists():
        dict_csv = DATA_DIR / "pealim_dict.csv"

-    deck, media = apkg_builder.build_vocab_deck(
+    apkg_builder.build_all_variants(
        dict_csv,
+        conjugations=conjugations or {},
        examples_cache=examples_cache,
        freq_cache=freq_cache,
        image_cache=image_cache or {},
        limit=args.test,
    )
-    apkg_builder.write_vocab_apkg(deck, media)
-    logger.info(f"    Vocabulary .apkg → {apkg_builder.VOCAB_APKG}")
-    return deck


 def step_conjugations(args):
-    """Step 6 — extract conjugations and build conjugation deck."""
+    """Step 6 — extract conjugations (returns data; building handled by step_build_all)."""
    if args.skip_conjugations:
        logger.info("[6] Skipping conjugations (--skip-conjugations)")
        return None
@ -394,18 +392,21 @@ def step_conjugations(args):
        logger.info("[6] verbs_input.txt not found — skipping conjugation deck")
        return None

-    logger.info("[6] Extracting verb conjugations …")
-    import conjugation_extract
-    conjugations = conjugation_extract.main(verbs_file)
+    # Use cached conjugations.json if available (skip re-extraction)
+    conj_cache = DATA_DIR / "conjugations.json"
+    if conj_cache.exists() and not getattr(args, 'refresh_conjugations', False):
+        logger.info("[6] Using cached conjugations.json …")
+        with open(conj_cache) as f:
+            import json as _json
+            conjugations = _json.load(f)
+    else:
+        logger.info("[6] Extracting verb conjugations …")
+        import conjugation_extract
+        conjugations = conjugation_extract.main(verbs_file)

    # Download conjugation audio
    step_conj_audio(args, conjugations)

-    import apkg_builder
-    conj_deck, conj_media = apkg_builder.build_conj_deck(conjugations)
-    apkg_builder.write_conj_apkg(conj_deck, conj_media)
-    logger.info(f"    Conjugation .apkg → {apkg_builder.CONJ_APKG}")
-
    return conjugations


@ -453,17 +454,18 @@ def print_summary(args, examples_cache, freq_cache, conjugations):
        found_imgs = sum(1 for v in ic.values() if v)
        logger.info(f"  Images: {found_imgs}/{len(ic)} nouns with images")

-    vocab_apkg = OUTPUT_DIR / "hebrew_vocabulary.apkg"
-    conj_apkg  = OUTPUT_DIR / "hebrew_conjugations.apkg"
-    if vocab_apkg.exists():
-        size_mb = vocab_apkg.stat().st_size / 1e6
-        logger.info(f"  Vocabulary .apkg: {size_mb:.1f} MB → {vocab_apkg}")
-    if conj_apkg.exists():
-        size_mb = conj_apkg.stat().st_size / 1e6
-        logger.info(f"  Conjugation .apkg: {size_mb:.1f} MB → {conj_apkg}")
-        if conjugations:
-            verb_count = sum(1 for v in conjugations.values() if v)
-            logger.info(f"  Verbs in conjugation deck: {verb_count}")
+    import apkg_builder as _ab
+    all_apkgs = [
+        _ab.VOCAB_APKG, _ab.VOCAB_APKG_AUDIO, _ab.VOCAB_APKG_IMAGES, _ab.VOCAB_APKG_AUDIO_IMAGES,
+        _ab.CONJ_APKG, _ab.CONJ_APKG_AUDIO,
+    ]
+    for apkg in all_apkgs:
+        if apkg.exists():
+            size_mb = apkg.stat().st_size / 1e6
+            logger.info(f"  {apkg.name}: {size_mb:.1f} MB")
+    if conjugations:
+        verb_count = sum(1 for v in conjugations.values() if v)
+        logger.info(f"  Verbs in conjugation deck: {verb_count}")

    logger.info("=" * 60)
    logger.info("DONE")
@ -485,6 +487,13 @@ def main():
    if args.only == "conjugations":
        step_fonts(args)
        conjugations = step_conjugations(args)
+        if conjugations:
+            import apkg_builder
+            apkg_builder.build_all_variants(
+                DATA_DIR / "hebrew_dict_for_anki.csv",
+                conjugations=conjugations,
+                limit=args.test,
+            )
        print_summary(args, {}, {}, conjugations or {})
        return

@ -497,8 +506,8 @@ def main():
    step_audio(args)
    step_fonts(args)
    image_cache    = step_images(args)
-    step_build_vocab(args, examples_cache, freq_cache, image_cache)
-    conjugations = step_conjugations(args)
+    conjugations   = step_conjugations(args)
+    step_build_all(args, examples_cache, freq_cache, conjugations, image_cache)

    print_summary(args, examples_cache, freq_cache, conjugations or {})