Sprint 4: fix insertion order, skip infinitive cards, split past_3p, fix empty binyan
- vocab deck uses frequency insertion order (genanki.Package); conjugation deck random (_RandomOrderPackage) - skip infinitive form_key in conjugation deck build (reference only, not a quiz target) - PAST_3P_EXPANSION: split past_3p into separate הֵם and הֵן cards - SECTION_BINYAN parsing: read section headers from verbs_input.txt as binyan hints - add binyan_hint param to _extract_conjugations and _extract_passive_from_active_slug - patch 20 cached entries with empty binyan (Pa'al, Nif'al) using section hints - result: 2428 notes across 69 verbs, all with populated binyan Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
58dc1b8d9b
commit
0db11b1aa1
3 changed files with 65 additions and 32 deletions
|
|
@ -311,6 +311,12 @@ FP_MODERN_FALLBACK = {
|
|||
"imperative_fp": "imperative_mp",
|
||||
}
|
||||
|
||||
# 3rd person plural past: same form for m/f — generate two separate pronoun cards
|
||||
PAST_3P_EXPANSION = [
|
||||
("הֵם", "עָבָר"),
|
||||
("הֵן", "עָבָר"),
|
||||
]
|
||||
|
||||
# Voice field: passive label only (shown inline on card front for Pu'al/Huf'al)
|
||||
VOICE_MAP = {
|
||||
"Pu'al": "סָבִיל",
|
||||
|
|
@ -573,6 +579,10 @@ def build_conj_deck(
|
|||
conj_form = form_data.get("form", "")
|
||||
audio_url_for_key = form_data.get("audio_url", "")
|
||||
|
||||
# Infinitive: shown on card front as reference — skip as a quiz form
|
||||
if form_key == "infinitive":
|
||||
continue
|
||||
|
||||
# Audio tag: use downloaded file if present
|
||||
audio_tag = ""
|
||||
if slug:
|
||||
|
|
@ -588,6 +598,12 @@ def build_conj_deck(
|
|||
add_note(pronoun, tense_label, conj_form, audio_tag)
|
||||
continue
|
||||
|
||||
# Past 3rd plural: same form for m/f → two separate pronoun cards
|
||||
if form_key == "past_3p":
|
||||
for pronoun, tense_label in PAST_3P_EXPANSION:
|
||||
add_note(pronoun, tense_label, conj_form, audio_tag)
|
||||
continue
|
||||
|
||||
# 2fp/3fp future and imperative: show modern (mp) form + classical (fp) in parens
|
||||
if form_key in FP_MODERN_FALLBACK:
|
||||
mp_key = FP_MODERN_FALLBACK[form_key]
|
||||
|
|
@ -641,7 +657,7 @@ def write_vocab_apkg(
|
|||
out_path: Path = VOCAB_APKG,
|
||||
) -> None:
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
pkg = _RandomOrderPackage(deck)
|
||||
pkg = genanki.Package(deck) # insertion order = frequency rank (new.order=1 default)
|
||||
pkg.media_files = [str(p) for p in media_files if p.exists()] + _font_media_files()
|
||||
pkg.write_to_file(str(out_path))
|
||||
logger.info(f"Vocabulary deck written → {out_path}")
|
||||
|
|
|
|||
|
|
@ -414,7 +414,7 @@ def _extract_passive_binyan_from_page(soup: BeautifulSoup) -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def _extract_conjugations(slug: str, search_term: str, is_3ms_search: bool = False) -> dict | None:
|
||||
def _extract_conjugations(slug: str, search_term: str, is_3ms_search: bool = False, binyan_hint: str = "") -> dict | None:
|
||||
"""Fetch /dict/<slug>/ and parse conjugation table (active + passive)."""
|
||||
url = f"{PEALIM_BASE}/dict/{slug}/"
|
||||
try:
|
||||
|
|
@ -434,10 +434,12 @@ def _extract_conjugations(slug: str, search_term: str, is_3ms_search: bool = Fal
|
|||
root = txt
|
||||
break
|
||||
|
||||
# Extract binyan: try PoS lookup first, then page header
|
||||
# Extract binyan: try PoS lookup first, then page header, then section hint
|
||||
binyan = _binyan_from_pos(search_term) if not is_3ms_search else ""
|
||||
if not binyan:
|
||||
binyan = _extract_binyan_from_page(soup)
|
||||
if not binyan:
|
||||
binyan = binyan_hint
|
||||
|
||||
# Parse active forms table
|
||||
forms_raw = _parse_table(soup, passive=False)
|
||||
|
|
@ -495,7 +497,7 @@ def _save_conjugations(data: dict) -> None:
|
|||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def _extract_passive_from_active_slug(active_slug: str, search_term: str) -> dict | None:
|
||||
def _extract_passive_from_active_slug(active_slug: str, search_term: str, binyan_hint: str = "") -> dict | None:
|
||||
"""Fetch active verb page and extract only the passive section forms.
|
||||
Used for Pu'al/Huf'al 3ms entries where we know the active verb's slug."""
|
||||
url = f"{PEALIM_BASE}/dict/{active_slug}/"
|
||||
|
|
@ -527,6 +529,8 @@ def _extract_passive_from_active_slug(active_slug: str, search_term: str) -> dic
|
|||
passive_binyan = _extract_passive_binyan_from_page(soup)
|
||||
if not passive_binyan:
|
||||
passive_binyan = "Pu'al" if active_binyan == "Pi'el" else "Huf'al" if active_binyan == "Hif'il" else ""
|
||||
if not passive_binyan:
|
||||
passive_binyan = binyan_hint
|
||||
|
||||
result = {
|
||||
"infinitive": search_term,
|
||||
|
|
@ -567,8 +571,16 @@ def main(verbs_file: Path = VERBS_INPUT) -> dict:
|
|||
if len(parts) >= 2:
|
||||
slug_overrides[parts[0]] = parts[1]
|
||||
|
||||
# Map section header keywords → binyan name (for binyan_hint fallback)
|
||||
SECTION_BINYAN = {
|
||||
"pa'al": "Pa'al", "nif'al": "Nif'al", "pi'el": "Pi'el",
|
||||
"pu'al": "Pu'al", "hitpa'el": "Hitpa'el", "hif'il": "Hif'il", "huf'al": "Huf'al",
|
||||
}
|
||||
|
||||
# Parse: regular verbs and # 3ms: lines (optional active slug on 3ms lines)
|
||||
verbs: list[tuple[str, bool, str | None]] = [] # (search_term, is_3ms_search, active_slug)
|
||||
# Track current section binyan from comment headers for use as a hint
|
||||
verbs: list[tuple[str, bool, str | None, str]] = [] # (search_term, is_3ms_search, active_slug, binyan_hint)
|
||||
current_binyan_hint = ""
|
||||
for line in raw_lines:
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("# slug:"):
|
||||
|
|
@ -578,21 +590,26 @@ def main(verbs_file: Path = VERBS_INPUT) -> dict:
|
|||
if parts:
|
||||
form = parts[0]
|
||||
active_slug = parts[1] if len(parts) >= 2 else None
|
||||
verbs.append((form, True, active_slug))
|
||||
verbs.append((form, True, active_slug, current_binyan_hint))
|
||||
elif stripped.startswith("#"):
|
||||
continue
|
||||
# Check if this is a section header setting the binyan context
|
||||
low = stripped.lower()
|
||||
for key, bname in SECTION_BINYAN.items():
|
||||
if key in low:
|
||||
current_binyan_hint = bname
|
||||
break
|
||||
else:
|
||||
verbs.append((stripped, False, None))
|
||||
verbs.append((stripped, False, None, current_binyan_hint))
|
||||
|
||||
logger.info(f"Loaded {len(verbs)} verbs from {verbs_file} "
|
||||
f"({sum(1 for _, p, _ in verbs if p)} passive 3ms)")
|
||||
f"({sum(1 for _, p, _, _ in verbs if p)} passive 3ms)")
|
||||
if slug_overrides:
|
||||
logger.info(f" Slug overrides: {slug_overrides}")
|
||||
|
||||
conjugations = _load_conjugations()
|
||||
new_count = 0
|
||||
|
||||
for verb, is_3ms, active_slug in verbs:
|
||||
for verb, is_3ms, active_slug, binyan_hint in verbs:
|
||||
if verb in conjugations:
|
||||
logger.info(f"Skipping {verb} (cached)")
|
||||
continue
|
||||
|
|
@ -614,7 +631,7 @@ def main(verbs_file: Path = VERBS_INPUT) -> dict:
|
|||
continue
|
||||
logger.info(f" Found active slug {slug} for passive extraction")
|
||||
time.sleep(REQUEST_DELAY)
|
||||
data = _extract_passive_from_active_slug(slug, verb)
|
||||
data = _extract_passive_from_active_slug(slug, verb, binyan_hint=binyan_hint)
|
||||
else:
|
||||
override = slug_overrides.get(verb)
|
||||
if override:
|
||||
|
|
@ -628,7 +645,7 @@ def main(verbs_file: Path = VERBS_INPUT) -> dict:
|
|||
_save_conjugations(conjugations)
|
||||
continue
|
||||
time.sleep(REQUEST_DELAY)
|
||||
data = _extract_conjugations(slug, verb, is_3ms_search=False)
|
||||
data = _extract_conjugations(slug, verb, is_3ms_search=False, binyan_hint=binyan_hint)
|
||||
|
||||
conjugations[verb] = data
|
||||
_save_conjugations(conjugations)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
"infinitive": "לשמור",
|
||||
"slug": "2255-lishmor",
|
||||
"root": "שׁ - מ - ר",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִשְׁמֹר",
|
||||
"forms": {
|
||||
|
|
@ -181,7 +181,7 @@
|
|||
"infinitive": "ללמוד",
|
||||
"slug": "41-lilmod",
|
||||
"root": "ל - מ - ד",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִלְמֹד",
|
||||
"forms": {
|
||||
|
|
@ -359,7 +359,7 @@
|
|||
"infinitive": "לאסוף",
|
||||
"slug": "128-leesof",
|
||||
"root": "א - ס - ף",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לֶאֱסֹף",
|
||||
"forms": {
|
||||
|
|
@ -537,7 +537,7 @@
|
|||
"infinitive": "לעבוד",
|
||||
"slug": "51-laavod",
|
||||
"root": "ע - ב - ד",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לַעֲבֹד",
|
||||
"forms": {
|
||||
|
|
@ -715,7 +715,7 @@
|
|||
"infinitive": "לחבוש",
|
||||
"slug": "553-lachbosh",
|
||||
"root": "ח - ב - שׁ",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לַחְבֹּשׁ",
|
||||
"forms": {
|
||||
|
|
@ -893,7 +893,7 @@
|
|||
"infinitive": "לאכול",
|
||||
"slug": "30-leechol",
|
||||
"root": "א - כ - ל",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לֶאֱכֹל",
|
||||
"forms": {
|
||||
|
|
@ -1071,7 +1071,7 @@
|
|||
"infinitive": "לשאול",
|
||||
"slug": "39-lishol",
|
||||
"root": "שׁ - א - ל",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִשְׁאֹל",
|
||||
"forms": {
|
||||
|
|
@ -1249,7 +1249,7 @@
|
|||
"infinitive": "לשלוח",
|
||||
"slug": "2220-lishloach",
|
||||
"root": "שׁ - ל - ח",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִשְׁלֹחַ",
|
||||
"forms": {
|
||||
|
|
@ -1427,7 +1427,7 @@
|
|||
"infinitive": "לגבוה",
|
||||
"slug": "286-ligboah",
|
||||
"root": "ג - ב - הּ",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִגְבֹּהַּ",
|
||||
"forms": {
|
||||
|
|
@ -1961,7 +1961,7 @@
|
|||
"infinitive": "לִיפּוֹל",
|
||||
"slug": "1230-lipol",
|
||||
"root": "נ - פ - ל",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִפֹּל",
|
||||
"forms": {
|
||||
|
|
@ -2317,7 +2317,7 @@
|
|||
"infinitive": "לחון",
|
||||
"slug": "634-lachon",
|
||||
"root": "ח - נ - ן",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לָחֹן",
|
||||
"forms": {
|
||||
|
|
@ -2495,7 +2495,7 @@
|
|||
"infinitive": "לקרוא",
|
||||
"slug": "13-likro",
|
||||
"root": "ק - ר - א",
|
||||
"binyan": "",
|
||||
"binyan": "Pa'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לִקְרֹא",
|
||||
"forms": {
|
||||
|
|
@ -2851,7 +2851,7 @@
|
|||
"infinitive": "להיבדק",
|
||||
"slug": "178-lehibadek",
|
||||
"root": "ב - ד - ק",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהִבָּדֵק",
|
||||
"forms": {
|
||||
|
|
@ -3207,7 +3207,7 @@
|
|||
"infinitive": "להיהרג",
|
||||
"slug": "474-lehehareg",
|
||||
"root": "ה - ר - ג",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהֵהָרֵג",
|
||||
"forms": {
|
||||
|
|
@ -3563,7 +3563,7 @@
|
|||
"infinitive": "להישאר",
|
||||
"slug": "47-lehishaer",
|
||||
"root": "שׁ - א - ר",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהִשָּׁאֵר",
|
||||
"forms": {
|
||||
|
|
@ -3741,7 +3741,7 @@
|
|||
"infinitive": "להיפגע",
|
||||
"slug": "1591-lehipagea",
|
||||
"root": "פ - ג - ע",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהִפָּגֵעַ",
|
||||
"forms": {
|
||||
|
|
@ -3919,7 +3919,7 @@
|
|||
"infinitive": "להיוולד",
|
||||
"slug": "800-lehivaled",
|
||||
"root": "י - ל - ד",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהִוָּלֵד",
|
||||
"forms": {
|
||||
|
|
@ -4275,7 +4275,7 @@
|
|||
"infinitive": "להיסוג",
|
||||
"slug": "1323-laseget",
|
||||
"root": "ס - ו - ג",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לָסֶגֶת",
|
||||
"forms": {
|
||||
|
|
@ -4453,7 +4453,7 @@
|
|||
"infinitive": "להימצא",
|
||||
"slug": "1084-lehimatze",
|
||||
"root": "מ - צ - א",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהִמָּצֵא",
|
||||
"forms": {
|
||||
|
|
@ -4631,7 +4631,7 @@
|
|||
"infinitive": "להיבנות",
|
||||
"slug": "235-lehibanot",
|
||||
"root": "ב - נ - ה",
|
||||
"binyan": "",
|
||||
"binyan": "Nif'al",
|
||||
"is_passive": false,
|
||||
"reference_form": "לְהִבָּנוֹת",
|
||||
"forms": {
|
||||
|
|
|
|||
Loading…
Reference in a new issue