From ccd7d61efbc3d91b9ac05e1ff52bad95c561660a Mon Sep 17 00:00:00 2001
From: Sochen <sochen@nevo.engineer>
Date: Thu, 5 Mar 2026 20:58:06 +0000
Subject: [PATCH] Add 6-variant release build (4 vocab + 2 conj), bump to v0.12

- build_vocab_deck(): include_audio/include_images flags
- build_conj_deck(): include_audio flag
- build_all_variants(): builds all 6 apkg files in one call
- Variants: hebrew_vocabulary{,_audio,_images,_audio_images}.apkg
            hebrew_conjugations{,_audio}.apkg
- run.py: step_build_all() replaces step_build_vocab(); conjugation
  extraction reuses cached conjugations.json unless refreshed
- RELEASE_TAG bumped to v0.12

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 apkg_builder.py | 76 +++++++++++++++++++++++++++++++++++++++++--------
 run.py          | 67 ++++++++++++++++++++++++-------------------
 2 files changed, 102 insertions(+), 41 deletions(-)

diff --git a/apkg_builder.py b/apkg_builder.py
index 3a70619..1572995 100644
--- a/apkg_builder.py
+++ b/apkg_builder.py
@@ -28,7 +28,7 @@ CONJ_MODEL_ID   = 1_234_567_893
 
 # Release version tag added to all notes so users can identify which release
 # their cards come from (visible in Anki's Browse view and card info).
-RELEASE_TAG = "v0.11"
+RELEASE_TAG = "v0.12"
 
 # Regex for extracting emoji and Hebrew prepositions from meaning strings
 EMOJI_RE   = re.compile(r'[\U0001F000-\U0001FFFF\u2600-\u27FF\u2300-\u23FF\uFE00-\uFE0F]+')
@@ -39,8 +39,12 @@ AUDIO_DIR      = DATA_DIR / "audio"
 AUDIO_CONJ_DIR = DATA_DIR / "audio_conj"
 OUTPUT_DIR     = Path(__file__).parent / "output"
 
-VOCAB_APKG  = OUTPUT_DIR / "hebrew_vocabulary.apkg"
-CONJ_APKG   = OUTPUT_DIR / "hebrew_conjugations.apkg"
+VOCAB_APKG              = OUTPUT_DIR / "hebrew_vocabulary.apkg"
+VOCAB_APKG_AUDIO        = OUTPUT_DIR / "hebrew_vocabulary_audio.apkg"
+VOCAB_APKG_IMAGES       = OUTPUT_DIR / "hebrew_vocabulary_images.apkg"
+VOCAB_APKG_AUDIO_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_audio_images.apkg"
+CONJ_APKG               = OUTPUT_DIR / "hebrew_conjugations.apkg"
+CONJ_APKG_AUDIO         = OUTPUT_DIR / "hebrew_conjugations_audio.apkg"
 
 # ──────────────────────────────────────────────────────────────────────────────
 # Binyan → Hebrew label mapping (for conjugation card display)
@@ -410,6 +414,8 @@ def build_vocab_deck(
     freq_cache: Optional[dict] = None,
     image_cache: Optional[dict] = None,
     limit: Optional[int] = None,
+    include_audio: bool = True,
+    include_images: bool = True,
 ) -> tuple[genanki.Deck, list[Path]]:
     """
     Build the vocabulary deck from pealim_dict_for_anki.csv (or pealim_dict.csv).
@@ -505,7 +511,7 @@ def build_vocab_deck(
         pos_heb = _translate_pos(pos_raw) if pos_raw else ""
 
         # Audio
-        audio_tag = _audio_tag(word_no_nik)
+        audio_tag = _audio_tag(word_no_nik) if include_audio else ""
         if audio_tag:
             mp3_name = audio_tag.removeprefix("[sound:").removesuffix("]")
             mp3_path = AUDIO_DIR / mp3_name
@@ -538,14 +544,15 @@ def build_vocab_deck(
             related_html = "\n".join(parts)
 
         # Image: look up by stripped word (no-nikkud)
-        image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
         image_tag = ""
-        if image_filename:
-            image_path = images_dir / image_filename
-            if image_path.exists():
-                image_tag = image_filename
-                if image_path not in media_files:
-                    media_files.append(image_path)
+        if include_images:
+            image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
+            if image_filename:
+                image_path = images_dir / image_filename
+                if image_path.exists():
+                    image_tag = image_filename
+                    if image_path not in media_files:
+                        media_files.append(image_path)
 
         note = genanki.Note(
             model=VOCAB_MODEL,
@@ -598,6 +605,7 @@ def build_vocab_deck(
 def build_conj_deck(
     conjugations: dict,
     audio_dir: Path = AUDIO_CONJ_DIR,
+    include_audio: bool = True,
 ) -> tuple[genanki.Deck, list[Path]]:
     """Build the conjugation drill deck from conjugations.json data."""
     deck = genanki.Deck(CONJ_DECK_ID, "Hebrew Conjugations")
@@ -659,7 +667,7 @@ def build_conj_deck(
 
             # Audio tag: use downloaded file if present
             audio_tag = ""
-            if slug:
+            if include_audio and slug:
                 audio_tag = _conj_audio_tag(slug, form_key)
                 if audio_tag:
                     mp3_path = audio_dir / f"{slug}_{form_key}.mp3"
@@ -756,6 +764,50 @@ def write_conj_apkg(
     logger.info(f"Conjugation deck written → {out_path}")
 
 
+def build_all_variants(
+    dict_csv: Path,
+    conjugations: dict,
+    examples_cache: Optional[dict] = None,
+    freq_cache: Optional[dict] = None,
+    image_cache: Optional[dict] = None,
+    limit: Optional[int] = None,
+) -> None:
+    """Build all 6 release variants (4 vocab + 2 conj) into output/."""
+    logger.info("Building all release variants …")
+
+    vocab_variants = [
+        (False, False, VOCAB_APKG),
+        (True,  False, VOCAB_APKG_AUDIO),
+        (False, True,  VOCAB_APKG_IMAGES),
+        (True,  True,  VOCAB_APKG_AUDIO_IMAGES),
+    ]
+    for audio, images, path in vocab_variants:
+        label = f"audio={'yes' if audio else 'no'} images={'yes' if images else 'no'}"
+        logger.info(f"  Vocab variant: {label} → {path.name}")
+        deck, media = build_vocab_deck(
+            dict_csv,
+            examples_cache=examples_cache,
+            freq_cache=freq_cache,
+            image_cache=image_cache or {},
+            limit=limit,
+            include_audio=audio,
+            include_images=images,
+        )
+        write_vocab_apkg(deck, media, out_path=path)
+
+    conj_variants = [
+        (False, CONJ_APKG),
+        (True,  CONJ_APKG_AUDIO),
+    ]
+    for audio, path in conj_variants:
+        label = f"audio={'yes' if audio else 'no'}"
+        logger.info(f"  Conj variant: {label} → {path.name}")
+        deck, media = build_conj_deck(conjugations, include_audio=audio)
+        write_conj_apkg(deck, media, out_path=path)
+
+    logger.info("All variants built.")
+
+
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
 
diff --git a/run.py b/run.py
index 70b0424..819f440 100644
--- a/run.py
+++ b/run.py
@@ -358,9 +358,9 @@ def step_images(args) -> dict:
     return image_fetch.run(limit=limit)
 
 
-def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache: dict | None = None):
-    """Step 5 — build vocabulary .apkg."""
-    logger.info("[5] Building vocabulary deck …")
+def step_build_all(args, examples_cache: dict, freq_cache: dict, conjugations: dict | None, image_cache: dict | None = None):
+    """Step 5 — build all 6 release variants (4 vocab + 2 conj)."""
+    logger.info("[5] Building all deck variants …")
     import apkg_builder
 
     dict_csv = DATA_DIR / "hebrew_dict_for_anki.csv"
@@ -371,20 +371,18 @@ def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache:
     if not dict_csv.exists():
         dict_csv = DATA_DIR / "pealim_dict.csv"
 
-    deck, media = apkg_builder.build_vocab_deck(
+    apkg_builder.build_all_variants(
         dict_csv,
+        conjugations=conjugations or {},
         examples_cache=examples_cache,
         freq_cache=freq_cache,
         image_cache=image_cache or {},
         limit=args.test,
     )
-    apkg_builder.write_vocab_apkg(deck, media)
-    logger.info(f"    Vocabulary .apkg → {apkg_builder.VOCAB_APKG}")
-    return deck
 
 
 def step_conjugations(args):
-    """Step 6 — extract conjugations and build conjugation deck."""
+    """Step 6 — extract conjugations (returns data; building handled by step_build_all)."""
     if args.skip_conjugations:
         logger.info("[6] Skipping conjugations (--skip-conjugations)")
         return None
@@ -394,18 +392,21 @@ def step_conjugations(args):
         logger.info("[6] verbs_input.txt not found — skipping conjugation deck")
         return None
 
-    logger.info("[6] Extracting verb conjugations …")
-    import conjugation_extract
-    conjugations = conjugation_extract.main(verbs_file)
+    # Use cached conjugations.json if available (skip re-extraction)
+    conj_cache = DATA_DIR / "conjugations.json"
+    if conj_cache.exists() and not getattr(args, 'refresh_conjugations', False):
+        logger.info("[6] Using cached conjugations.json …")
+        with open(conj_cache) as f:
+            import json as _json
+            conjugations = _json.load(f)
+    else:
+        logger.info("[6] Extracting verb conjugations …")
+        import conjugation_extract
+        conjugations = conjugation_extract.main(verbs_file)
 
     # Download conjugation audio
     step_conj_audio(args, conjugations)
 
-    import apkg_builder
-    conj_deck, conj_media = apkg_builder.build_conj_deck(conjugations)
-    apkg_builder.write_conj_apkg(conj_deck, conj_media)
-    logger.info(f"    Conjugation .apkg → {apkg_builder.CONJ_APKG}")
-
     return conjugations
 
 
@@ -453,17 +454,18 @@ def print_summary(args, examples_cache, freq_cache, conjugations):
         found_imgs = sum(1 for v in ic.values() if v)
         logger.info(f"  Images: {found_imgs}/{len(ic)} nouns with images")
 
-    vocab_apkg = OUTPUT_DIR / "hebrew_vocabulary.apkg"
-    conj_apkg  = OUTPUT_DIR / "hebrew_conjugations.apkg"
-    if vocab_apkg.exists():
-        size_mb = vocab_apkg.stat().st_size / 1e6
-        logger.info(f"  Vocabulary .apkg: {size_mb:.1f} MB → {vocab_apkg}")
-    if conj_apkg.exists():
-        size_mb = conj_apkg.stat().st_size / 1e6
-        logger.info(f"  Conjugation .apkg: {size_mb:.1f} MB → {conj_apkg}")
-        if conjugations:
-            verb_count = sum(1 for v in conjugations.values() if v)
-            logger.info(f"  Verbs in conjugation deck: {verb_count}")
+    import apkg_builder as _ab
+    all_apkgs = [
+        _ab.VOCAB_APKG, _ab.VOCAB_APKG_AUDIO, _ab.VOCAB_APKG_IMAGES, _ab.VOCAB_APKG_AUDIO_IMAGES,
+        _ab.CONJ_APKG, _ab.CONJ_APKG_AUDIO,
+    ]
+    for apkg in all_apkgs:
+        if apkg.exists():
+            size_mb = apkg.stat().st_size / 1e6
+            logger.info(f"  {apkg.name}: {size_mb:.1f} MB")
+    if conjugations:
+        verb_count = sum(1 for v in conjugations.values() if v)
+        logger.info(f"  Verbs in conjugation deck: {verb_count}")
 
     logger.info("=" * 60)
     logger.info("DONE")
@@ -485,6 +487,13 @@ def main():
     if args.only == "conjugations":
         step_fonts(args)
         conjugations = step_conjugations(args)
+        if conjugations:
+            import apkg_builder
+            apkg_builder.build_all_variants(
+                DATA_DIR / "hebrew_dict_for_anki.csv",
+                conjugations=conjugations,
+                limit=args.test,
+            )
         print_summary(args, {}, {}, conjugations or {})
         return
 
@@ -497,8 +506,8 @@ def main():
     step_audio(args)
     step_fonts(args)
     image_cache    = step_images(args)
-    step_build_vocab(args, examples_cache, freq_cache, image_cache)
-    conjugations = step_conjugations(args)
+    conjugations   = step_conjugations(args)
+    step_build_all(args, examples_cache, freq_cache, conjugations, image_cache)
 
     print_summary(args, examples_cache, freq_cache, conjugations or {})