Add 6-variant release build (4 vocab + 2 conj), bump to v0.12
- build_vocab_deck(): include_audio/include_images flags
- build_conj_deck(): include_audio flag
- build_all_variants(): builds all 6 apkg files in one call
- Variants: hebrew_vocabulary{,_audio,_images,_audio_images}.apkg
hebrew_conjugations{,_audio}.apkg
- run.py: step_build_all() replaces step_build_vocab(); conjugation
extraction reuses cached conjugations.json unless refreshed
- RELEASE_TAG bumped to v0.12
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
62c92ffae0
commit
ccd7d61efb
2 changed files with 102 additions and 41 deletions
|
|
@ -28,7 +28,7 @@ CONJ_MODEL_ID = 1_234_567_893
|
|||
|
||||
# Release version tag added to all notes so users can identify which release
|
||||
# their cards come from (visible in Anki's Browse view and card info).
|
||||
RELEASE_TAG = "v0.11"
|
||||
RELEASE_TAG = "v0.12"
|
||||
|
||||
# Regex for extracting emoji and Hebrew prepositions from meaning strings
|
||||
EMOJI_RE = re.compile(r'[\U0001F000-\U0001FFFF\u2600-\u27FF\u2300-\u23FF\uFE00-\uFE0F]+')
|
||||
|
|
@ -39,8 +39,12 @@ AUDIO_DIR = DATA_DIR / "audio"
|
|||
AUDIO_CONJ_DIR = DATA_DIR / "audio_conj"
|
||||
OUTPUT_DIR = Path(__file__).parent / "output"
|
||||
|
||||
VOCAB_APKG = OUTPUT_DIR / "hebrew_vocabulary.apkg"
|
||||
CONJ_APKG = OUTPUT_DIR / "hebrew_conjugations.apkg"
|
||||
VOCAB_APKG = OUTPUT_DIR / "hebrew_vocabulary.apkg"
|
||||
VOCAB_APKG_AUDIO = OUTPUT_DIR / "hebrew_vocabulary_audio.apkg"
|
||||
VOCAB_APKG_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_images.apkg"
|
||||
VOCAB_APKG_AUDIO_IMAGES = OUTPUT_DIR / "hebrew_vocabulary_audio_images.apkg"
|
||||
CONJ_APKG = OUTPUT_DIR / "hebrew_conjugations.apkg"
|
||||
CONJ_APKG_AUDIO = OUTPUT_DIR / "hebrew_conjugations_audio.apkg"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Binyan → Hebrew label mapping (for conjugation card display)
|
||||
|
|
@ -410,6 +414,8 @@ def build_vocab_deck(
|
|||
freq_cache: Optional[dict] = None,
|
||||
image_cache: Optional[dict] = None,
|
||||
limit: Optional[int] = None,
|
||||
include_audio: bool = True,
|
||||
include_images: bool = True,
|
||||
) -> tuple[genanki.Deck, list[Path]]:
|
||||
"""
|
||||
Build the vocabulary deck from pealim_dict_for_anki.csv (or pealim_dict.csv).
|
||||
|
|
@ -505,7 +511,7 @@ def build_vocab_deck(
|
|||
pos_heb = _translate_pos(pos_raw) if pos_raw else ""
|
||||
|
||||
# Audio
|
||||
audio_tag = _audio_tag(word_no_nik)
|
||||
audio_tag = _audio_tag(word_no_nik) if include_audio else ""
|
||||
if audio_tag:
|
||||
mp3_name = audio_tag.removeprefix("[sound:").removesuffix("]")
|
||||
mp3_path = AUDIO_DIR / mp3_name
|
||||
|
|
@ -538,14 +544,15 @@ def build_vocab_deck(
|
|||
related_html = "\n".join(parts)
|
||||
|
||||
# Image: look up by stripped word (no-nikkud)
|
||||
image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
|
||||
image_tag = ""
|
||||
if image_filename:
|
||||
image_path = images_dir / image_filename
|
||||
if image_path.exists():
|
||||
image_tag = image_filename
|
||||
if image_path not in media_files:
|
||||
media_files.append(image_path)
|
||||
if include_images:
|
||||
image_filename = image_cache.get(word_no_nik or _strip_nikkud(word), None)
|
||||
if image_filename:
|
||||
image_path = images_dir / image_filename
|
||||
if image_path.exists():
|
||||
image_tag = image_filename
|
||||
if image_path not in media_files:
|
||||
media_files.append(image_path)
|
||||
|
||||
note = genanki.Note(
|
||||
model=VOCAB_MODEL,
|
||||
|
|
@ -598,6 +605,7 @@ def build_vocab_deck(
|
|||
def build_conj_deck(
|
||||
conjugations: dict,
|
||||
audio_dir: Path = AUDIO_CONJ_DIR,
|
||||
include_audio: bool = True,
|
||||
) -> tuple[genanki.Deck, list[Path]]:
|
||||
"""Build the conjugation drill deck from conjugations.json data."""
|
||||
deck = genanki.Deck(CONJ_DECK_ID, "Hebrew Conjugations")
|
||||
|
|
@ -659,7 +667,7 @@ def build_conj_deck(
|
|||
|
||||
# Audio tag: use downloaded file if present
|
||||
audio_tag = ""
|
||||
if slug:
|
||||
if include_audio and slug:
|
||||
audio_tag = _conj_audio_tag(slug, form_key)
|
||||
if audio_tag:
|
||||
mp3_path = audio_dir / f"{slug}_{form_key}.mp3"
|
||||
|
|
@ -756,6 +764,50 @@ def write_conj_apkg(
|
|||
logger.info(f"Conjugation deck written → {out_path}")
|
||||
|
||||
|
||||
def build_all_variants(
|
||||
dict_csv: Path,
|
||||
conjugations: dict,
|
||||
examples_cache: Optional[dict] = None,
|
||||
freq_cache: Optional[dict] = None,
|
||||
image_cache: Optional[dict] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Build all 6 release variants (4 vocab + 2 conj) into output/."""
|
||||
logger.info("Building all release variants …")
|
||||
|
||||
vocab_variants = [
|
||||
(False, False, VOCAB_APKG),
|
||||
(True, False, VOCAB_APKG_AUDIO),
|
||||
(False, True, VOCAB_APKG_IMAGES),
|
||||
(True, True, VOCAB_APKG_AUDIO_IMAGES),
|
||||
]
|
||||
for audio, images, path in vocab_variants:
|
||||
label = f"audio={'yes' if audio else 'no'} images={'yes' if images else 'no'}"
|
||||
logger.info(f" Vocab variant: {label} → {path.name}")
|
||||
deck, media = build_vocab_deck(
|
||||
dict_csv,
|
||||
examples_cache=examples_cache,
|
||||
freq_cache=freq_cache,
|
||||
image_cache=image_cache or {},
|
||||
limit=limit,
|
||||
include_audio=audio,
|
||||
include_images=images,
|
||||
)
|
||||
write_vocab_apkg(deck, media, out_path=path)
|
||||
|
||||
conj_variants = [
|
||||
(False, CONJ_APKG),
|
||||
(True, CONJ_APKG_AUDIO),
|
||||
]
|
||||
for audio, path in conj_variants:
|
||||
label = f"audio={'yes' if audio else 'no'}"
|
||||
logger.info(f" Conj variant: {label} → {path.name}")
|
||||
deck, media = build_conj_deck(conjugations, include_audio=audio)
|
||||
write_conj_apkg(deck, media, out_path=path)
|
||||
|
||||
logger.info("All variants built.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||
|
||||
|
|
|
|||
67
run.py
67
run.py
|
|
@ -358,9 +358,9 @@ def step_images(args) -> dict:
|
|||
return image_fetch.run(limit=limit)
|
||||
|
||||
|
||||
def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache: dict | None = None):
|
||||
"""Step 5 — build vocabulary .apkg."""
|
||||
logger.info("[5] Building vocabulary deck …")
|
||||
def step_build_all(args, examples_cache: dict, freq_cache: dict, conjugations: dict | None, image_cache: dict | None = None):
|
||||
"""Step 5 — build all 6 release variants (4 vocab + 2 conj)."""
|
||||
logger.info("[5] Building all deck variants …")
|
||||
import apkg_builder
|
||||
|
||||
dict_csv = DATA_DIR / "hebrew_dict_for_anki.csv"
|
||||
|
|
@ -371,20 +371,18 @@ def step_build_vocab(args, examples_cache: dict, freq_cache: dict, image_cache:
|
|||
if not dict_csv.exists():
|
||||
dict_csv = DATA_DIR / "pealim_dict.csv"
|
||||
|
||||
deck, media = apkg_builder.build_vocab_deck(
|
||||
apkg_builder.build_all_variants(
|
||||
dict_csv,
|
||||
conjugations=conjugations or {},
|
||||
examples_cache=examples_cache,
|
||||
freq_cache=freq_cache,
|
||||
image_cache=image_cache or {},
|
||||
limit=args.test,
|
||||
)
|
||||
apkg_builder.write_vocab_apkg(deck, media)
|
||||
logger.info(f" Vocabulary .apkg → {apkg_builder.VOCAB_APKG}")
|
||||
return deck
|
||||
|
||||
|
||||
def step_conjugations(args):
|
||||
"""Step 6 — extract conjugations and build conjugation deck."""
|
||||
"""Step 6 — extract conjugations (returns data; building handled by step_build_all)."""
|
||||
if args.skip_conjugations:
|
||||
logger.info("[6] Skipping conjugations (--skip-conjugations)")
|
||||
return None
|
||||
|
|
@ -394,18 +392,21 @@ def step_conjugations(args):
|
|||
logger.info("[6] verbs_input.txt not found — skipping conjugation deck")
|
||||
return None
|
||||
|
||||
logger.info("[6] Extracting verb conjugations …")
|
||||
import conjugation_extract
|
||||
conjugations = conjugation_extract.main(verbs_file)
|
||||
# Use cached conjugations.json if available (skip re-extraction)
|
||||
conj_cache = DATA_DIR / "conjugations.json"
|
||||
if conj_cache.exists() and not getattr(args, 'refresh_conjugations', False):
|
||||
logger.info("[6] Using cached conjugations.json …")
|
||||
with open(conj_cache) as f:
|
||||
import json as _json
|
||||
conjugations = _json.load(f)
|
||||
else:
|
||||
logger.info("[6] Extracting verb conjugations …")
|
||||
import conjugation_extract
|
||||
conjugations = conjugation_extract.main(verbs_file)
|
||||
|
||||
# Download conjugation audio
|
||||
step_conj_audio(args, conjugations)
|
||||
|
||||
import apkg_builder
|
||||
conj_deck, conj_media = apkg_builder.build_conj_deck(conjugations)
|
||||
apkg_builder.write_conj_apkg(conj_deck, conj_media)
|
||||
logger.info(f" Conjugation .apkg → {apkg_builder.CONJ_APKG}")
|
||||
|
||||
return conjugations
|
||||
|
||||
|
||||
|
|
@ -453,17 +454,18 @@ def print_summary(args, examples_cache, freq_cache, conjugations):
|
|||
found_imgs = sum(1 for v in ic.values() if v)
|
||||
logger.info(f" Images: {found_imgs}/{len(ic)} nouns with images")
|
||||
|
||||
vocab_apkg = OUTPUT_DIR / "hebrew_vocabulary.apkg"
|
||||
conj_apkg = OUTPUT_DIR / "hebrew_conjugations.apkg"
|
||||
if vocab_apkg.exists():
|
||||
size_mb = vocab_apkg.stat().st_size / 1e6
|
||||
logger.info(f" Vocabulary .apkg: {size_mb:.1f} MB → {vocab_apkg}")
|
||||
if conj_apkg.exists():
|
||||
size_mb = conj_apkg.stat().st_size / 1e6
|
||||
logger.info(f" Conjugation .apkg: {size_mb:.1f} MB → {conj_apkg}")
|
||||
if conjugations:
|
||||
verb_count = sum(1 for v in conjugations.values() if v)
|
||||
logger.info(f" Verbs in conjugation deck: {verb_count}")
|
||||
import apkg_builder as _ab
|
||||
all_apkgs = [
|
||||
_ab.VOCAB_APKG, _ab.VOCAB_APKG_AUDIO, _ab.VOCAB_APKG_IMAGES, _ab.VOCAB_APKG_AUDIO_IMAGES,
|
||||
_ab.CONJ_APKG, _ab.CONJ_APKG_AUDIO,
|
||||
]
|
||||
for apkg in all_apkgs:
|
||||
if apkg.exists():
|
||||
size_mb = apkg.stat().st_size / 1e6
|
||||
logger.info(f" {apkg.name}: {size_mb:.1f} MB")
|
||||
if conjugations:
|
||||
verb_count = sum(1 for v in conjugations.values() if v)
|
||||
logger.info(f" Verbs in conjugation deck: {verb_count}")
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("DONE")
|
||||
|
|
@ -485,6 +487,13 @@ def main():
|
|||
if args.only == "conjugations":
|
||||
step_fonts(args)
|
||||
conjugations = step_conjugations(args)
|
||||
if conjugations:
|
||||
import apkg_builder
|
||||
apkg_builder.build_all_variants(
|
||||
DATA_DIR / "hebrew_dict_for_anki.csv",
|
||||
conjugations=conjugations,
|
||||
limit=args.test,
|
||||
)
|
||||
print_summary(args, {}, {}, conjugations or {})
|
||||
return
|
||||
|
||||
|
|
@ -497,8 +506,8 @@ def main():
|
|||
step_audio(args)
|
||||
step_fonts(args)
|
||||
image_cache = step_images(args)
|
||||
step_build_vocab(args, examples_cache, freq_cache, image_cache)
|
||||
conjugations = step_conjugations(args)
|
||||
conjugations = step_conjugations(args)
|
||||
step_build_all(args, examples_cache, freq_cache, conjugations, image_cache)
|
||||
|
||||
print_summary(args, examples_cache, freq_cache, conjugations or {})
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue