@@ -860,7 +887,6 @@ def build_vocab_deck(
meaning = re.sub(r"\s{2,}", " ", meaning).strip(", ;:")
meaning = re.sub(r"(\w)\(", r"\1 (", meaning) # space before opening paren
meaning = re.sub(r",(\S)", r", \1", meaning) # space after comma
- meaning_raw = entry.get("meaning_raw", "") or ""
slug = entry.get("slug", "") or ""
frequency = entry.get("frequency") or 999_999
audio_file = entry.get("audio_file", "") or ""
@@ -906,14 +932,9 @@ def build_vocab_deck(
emoji_str = emoji_lookup[kw]
break
- # Extract Hebrew prepositions: prefer upstream-parsed prep field, fall back to meaning_raw scan
- # (fallback covers entries scraped before prep was moved upstream)
+ # Hebrew prepositions — extracted upstream by list scraper
entry_prep = entry.get("prep")
- if entry_prep:
- prep_str = " ".join(f"({p})" for p in entry_prep.split())
- else:
- preps = HBPAREN_RE.findall(meaning_raw)
- prep_str = " ".join(f"({p})" for p in preps)
+ prep_str = " ".join(f"({p})" for p in entry_prep.split()) if entry_prep else ""
# Audio — use audio_file from entry; for confusables it's already slug-based
audio_tag = ""
@@ -1123,25 +1144,12 @@ def build_conj_deck(
root = ".".join(root_list)
voice = VOICE_MAP.get(binyan, "")
- meaning_raw = entry.get("meaning_raw", "") or ""
meaning = entry.get("meaning", "") or ""
- # Extract Hebrew preposition — strip from meaning, show on Hebrew side
+ # Hebrew preposition — extracted upstream by scraper
prep_str = ""
- conj_prep = conj.get("prep")
+ conj_prep = conj.get("prep") or entry.get("prep")
if conj_prep:
- # Strip any parentheses from stored prep value
prep_str = conj_prep.strip("() ")
- elif meaning_raw:
- preps = HBPAREN_RE.findall(meaning_raw)
- if preps:
- prep_str = preps[0]
- # Strip Hebrew prepositions from English meaning to avoid duplication
- if prep_str:
- meaning = HBPAREN_RE.sub("", meaning).strip()
- # Also strip from meaning_raw patterns like "(על)"
- meaning = re.sub(r"\(\s*" + re.escape(prep_str) + r"\s*-?\s*\)", "", meaning).strip()
- # Clean up double spaces and trailing commas
- meaning = re.sub(r"\s{2,}", " ", meaning).strip(", ")
related = [(f, w, m) for f, w, m in root_words.get(root, []) if w != infinitive]
if related:
@@ -1461,7 +1469,7 @@ def build_confusables_deck(
continue
word_no_nik = unique_entries[0]["word"].get("ktiv_male", "")
- words_display = " / ".join(e["word"]["nikkud"] for e in unique_entries)
+ words_display = word_no_nik # Show ktiv male (shared form) on front
defs_parts: list[str] = []
audio_parts: list[str] = []
@@ -1530,8 +1538,8 @@ def write_conf_apkg(
PLURAL_FRONT_SG = """
"""
PLURAL_BACK_SG = """
@@ -1547,14 +1555,14 @@ PLURAL_BACK_SG = """
PLURAL_FRONT_PL = """
{{#Gender}}
מִין:{{Gender}}
{{/Gender}}
{{#Mishkal}}
מִשְׁקָל:{{Mishkal}}
{{/Mishkal}}
@@ -1651,9 +1659,9 @@ def build_plural_deck(
irregular_count = len(irregulars)
target_regular = irregular_count * 2
mishkal_count = len(by_mishkal) or 1
- per_mishkal = max(2, target_regular // mishkal_count)
+ # Over-sample per mishkal to compensate for small patterns, then trim
+ per_mishkal = max(3, (target_regular * 3) // (mishkal_count * 2))
- selected: list[tuple[str, dict, dict]] = list(irregulars)
regular_pool: list[tuple[str, dict, dict]] = []
for _mishkal, entries in sorted(by_mishkal.items()):
entries.sort(key=lambda e: e[1].get("frequency") or 999_999)
@@ -1664,7 +1672,24 @@ def build_plural_deck(
regular_pool.sort(key=lambda e: e[1].get("frequency") or 999_999)
regular_pool = regular_pool[:target_regular]
- selected.extend(regular_pool)
+ # Sort both pools by frequency, then interleave for homogeneous 2:1 regular:irregular
+ irregulars.sort(key=lambda e: e[1].get("frequency") or 999_999)
+ regular_pool.sort(key=lambda e: e[1].get("frequency") or 999_999)
+
+ # Interleave: for every 1 irregular, insert 2 regulars
+ selected: list[tuple[str, dict, dict]] = []
+ ri = 0 # regular index
+ for _ii, irr in enumerate(irregulars):
+ # Insert 2 regulars before each irregular (when available)
+ for _ in range(2):
+ if ri < len(regular_pool):
+ selected.append(regular_pool[ri])
+ ri += 1
+ selected.append(irr)
+ # Append remaining regulars
+ while ri < len(regular_pool):
+ selected.append(regular_pool[ri])
+ ri += 1
note_count = 0
for _unique_key, entry, noun_inflection in selected:
diff --git a/data/words.json b/data/words.json
index 09973ae..0a707fc 100644
--- a/data/words.json
+++ b/data/words.json
@@ -28003,7 +28003,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ motorbike, motorcycle",
+ "meaning": "motorbike, motorcycle",
"meaning_raw": "🏍️ motorbike, motorcycle",
"audio_url": "https://audio.pealim.com/v0/1s/1s8poqw78fk8v.mp3",
"audio_file": "אופנוע.mp3",
@@ -107000,7 +107000,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ stadium",
+ "meaning": "stadium",
"meaning_raw": "🏟️ stadium",
"audio_url": "https://audio.pealim.com/v0/vl/vln4sqr0ez7n.mp3",
"audio_file": "אצטדיון.mp3",
@@ -197835,7 +197835,7 @@
],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "recreation, leisure, pleasure️; spending (time, resource)",
+ "meaning": "recreation, leisure, pleasure; spending (time, resource)",
"meaning_raw": "recreation, leisure, pleasure🏖️; spending (time, resource)",
"audio_url": "https://audio.pealim.com/v0/12/12s7w66rute00.mp3",
"audio_file": "בילוי.mp3",
@@ -573280,7 +573280,7 @@
],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ smile",
+ "meaning": "smile",
"meaning_raw": "☺️ smile",
"audio_url": "https://audio.pealim.com/v0/1o/1o0qkdlzzgz5y.mp3",
"audio_file": "חיוך.mp3",
@@ -723335,7 +723335,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ telephone; (coll.) telephone call",
+ "meaning": "telephone; (coll.) telephone call",
"meaning_raw": "☎️ telephone; (coll.) telephone call",
"audio_url": "https://audio.pealim.com/v0/q4/q4vlmpn3xv25.mp3",
"audio_file": "טלפון.mp3",
@@ -768548,7 +768548,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "diamond; ️ diamonds (cards)",
+ "meaning": "diamond; diamonds (cards)",
"meaning_raw": "💎 diamond; ♦️ diamonds (cards)",
"audio_url": "https://audio.pealim.com/v0/2n/2ng3y19ptzo6.mp3",
"audio_file": "יהלום.mp3",
@@ -778091,7 +778091,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ yacht",
+ "meaning": "yacht",
"meaning_raw": "🛥️ yacht",
"audio_url": "https://audio.pealim.com/v0/fn/fnqancplf3p9.mp3",
"audio_file": "יכטה.mp3",
@@ -927031,7 +927031,7 @@
],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "ability, fitness, capability; ️ physical fitness",
+ "meaning": "ability, fitness, capability; physical fitness",
"meaning_raw": "ability, fitness, capability; 🏋️ physical fitness",
"audio_url": "https://audio.pealim.com/v0/15/15dggrlz3ui39.mp3",
"audio_file": "כושר.mp3",
@@ -1003336,7 +1003336,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ meditation",
+ "meaning": "meditation",
"meaning_raw": "🧘♂️ meditation",
"audio_url": "https://audio.pealim.com/v0/3w/3wecs1tujod3.mp3",
"audio_file": "מדיטציה.mp3",
@@ -1019869,7 +1019869,7 @@
],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ recycling",
+ "meaning": "recycling",
"meaning_raw": "♻️ recycling",
"audio_url": "https://audio.pealim.com/v0/1y/1y0idioumf0oj.mp3",
"audio_file": "מיחזור.mp3",
@@ -1107382,7 +1107382,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ sunglasses",
+ "meaning": "sunglasses",
"meaning_raw": "🕶️ sunglasses",
"audio_url": "https://audio.pealim.com/v0/18/18evs0abnwrg0.mp3",
"audio_file": "משקפי שמש.mp3",
@@ -1384284,7 +1384284,7 @@
],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ scissors",
+ "meaning": "scissors",
"meaning_raw": "✂️ scissors",
"audio_url": "https://audio.pealim.com/v0/gl/gllnqtowivaa.mp3",
"audio_file": "מספריים.mp3",
@@ -1451640,7 +1451640,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ spider",
+ "meaning": "spider",
"meaning_raw": "🕷️ spider",
"audio_url": "https://audio.pealim.com/v0/1u/1umo4y75g98kf.mp3",
"audio_file": "עכביש.mp3",
@@ -1451701,7 +1451701,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "mouse ️",
+ "meaning": "mouse",
"meaning_raw": "mouse 🐀🖱️",
"audio_url": "https://audio.pealim.com/v0/eu/euem3vbl9inl.mp3",
"audio_file": "עכבר.mp3",
@@ -1548300,7 +1548300,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ park",
+ "meaning": "park",
"meaning_raw": "🏞️ park",
"audio_url": "https://audio.pealim.com/v0/fb/fb2nrwhv75ib.mp3",
"audio_file": "פארק.mp3",
@@ -1597759,7 +1597759,7 @@
"root": [],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ pepper",
+ "meaning": "pepper",
"meaning_raw": "🌶️ pepper",
"audio_url": "https://audio.pealim.com/v0/1w/1wnt7hycmb6re.mp3",
"audio_file": "פלפל.mp3",
@@ -1891786,7 +1891786,7 @@
],
"pos": "Noun",
"pos_hebrew": "שֵׁם עֶצֶם",
- "meaning": "️ slide projector; radiator; barrel jacket (weaponry)",
+ "meaning": "slide projector; radiator; barrel jacket (weaponry)",
"meaning_raw": "📽️ slide projector; radiator; barrel jacket (weaponry)",
"audio_url": "https://audio.pealim.com/v0/ur/urcm907agh5r.mp3",
"audio_file": "מקרן.mp3",
diff --git a/pealim_list_scrape.py b/pealim_list_scrape.py
index 5bb1190..011e8d5 100644
--- a/pealim_list_scrape.py
+++ b/pealim_list_scrape.py
@@ -82,7 +82,7 @@ BINYAN_HEBREW: dict[str, str] = {
# Regex for extracting emoji characters
EMOJI_RE = re.compile(
- r"[\U0001F300-\U0001FFFF\U00002600-\U000027BF\U0001F000-\U0001F9FF\u2600-\u26FF\u2700-\u27BF]+",
+ r"[\U0001F300-\U0001FFFF\U00002600-\U000027BF\U0001F000-\U0001F9FF\u2600-\u26FF\u2700-\u27BF\uFE0E\uFE0F\u200D]+",
re.UNICODE,
)
diff --git a/release.py b/release.py
index 082176b..bd7e306 100644
--- a/release.py
+++ b/release.py
@@ -24,7 +24,7 @@ sys.path.insert(0, "/home/node/projects")
import load_keeshare
REPO_API = "https://git.nevo.engineer/api/v1/repos/nevo/hebrew_flash_cards"
-FORGEJO_TOKEN: str = load_keeshare.get_entry("git.nevo.engineer")["API_TOKEN"]
+FORGEJO_TOKEN: str = load_keeshare.get_entry("git.nevo.engineer")["password"]
OUTPUT_DIR = Path(__file__).parent / "output"
# All deck variants to include in release