# Hebrew Flash Cards — Unified Data Schema (words.json) # Revised based on Nevo's feedback (2026-03-08) # # Top-level: dict keyed by unique_key # Unique key: nikkud word for most entries (e.g. "אָב") # For 146 homographs (same nikkud, different meaning): "word|pos" e.g. "אָח|Noun" # For same nikkud AND same pos: "word|pos|meaning" e.g. "אָח|Noun|brother" # # Hebrew text fields use nikkud/ktiv_male subfields: # field: # nikkud: "אָב" # with nikkud (hebstyle=mo) # ktiv_male: "אב" # plene spelling (hebstyle=vl) # This pattern applies to: word, singular, plural, construct forms, conjugated forms, etc. # # Pronoun notation for conjugation forms uses grammatical codes: # 1s, 1p, 2ms, 2fs, 2mp, 2fp, 3ms, 3fs, 3mp, 3fp # (not Hebrew pronoun strings, which are ambiguous for gender in some persons) entry: # --- Core Identity --- word: nikkud: "אָב" ktiv_male: "אב" slug: "6009-av" # Pealim URL slug (e.g. pealim.com/dict/6009-av/) root: ["א", "ב"] # Shoresh as list of consonant chars pos: "Noun" # Part of speech in English (as from pealim) pos_hebrew: "שֵׁם עֶצֶם" # Part of speech in Hebrew (with nikkud) meaning: "father" # English meaning (cleaned — no inline emoji, no Hebrew prepositions) meaning_raw: "father 👨" # Original meaning as scraped (may contain emoji and/or Hebrew preps) prep: "על" # Hebrew preposition(s) governing this word, extracted from meaning_raw (e.g. "(על)" → "על"); null if none audio_url: "https://..." # Pealim audio URL audio_file: "6009-av.mp3" # Local filename (slug-based for confusables, consonant-based otherwise) tags: "" # Pealim tags if any last_scrape_date: "2026-03-08" # ISO date of most recent pealim.com scrape for this entry # --- Identity & Progress --- vocab_legacy_guid: "abc123..." # Vocab note GUID from legacy_guid_map.json # Other note GUIDs stored in their respective sections (cloze, plurals, conjugation) # --- Frequency --- frequency: 412 # Hebrew frequency rank from hermitdave/FrequencyWords he_50k (ktiv male based) pseudo_frequency: null # Adjusted frequency for confusable homographs (deferred to future sprint) # --- Display Enrichment --- emoji: "👨" emoji_source: "ai_vetted" # One of: ai_vetted, from_pealim, null emoji_visible: false # Whether to show on cards (false until emoji vetting is done) image: "father.jpg" # Wikipedia/Commons image filename, or null image_source: "wikipedia" # One of: wikipedia, commons, null hint: "" # Eng→Heb disambiguation hint (from refined_meanings.json) # --- Shared Roots --- shared_roots: [] # List of unique_keys of other words sharing the same root # Computed by iterating all entries and grouping by root # --- Confusables --- confusable_group: null # List of unique_keys sharing same ktiv_male, or null # e.g. ["אָח|Noun|brother", "אָח|Noun|fireplace"] # --- Example Sentences --- examples: vetted: # AI-vetted sentences from Ben Yehuda / EPUB corpus - text: "הָאָב הָלַךְ לַעֲבוֹדָה" source: "ben_yehuda" # One of: ben_yehuda, epub_little_prince, epub_alice, ... vetted: true cloze: # Best sentence for cloze card, or null text: "הָאָב הָלַךְ לַעֲבוֹדָה" cloze_word_start: 0 # Character offset of the clozed word in text cloze_word_end: 4 # End offset — enables exact extraction regardless of nikkud changes cloze_hint: "family member" cloze_guid: "def456..." # GUID for the cloze note difficulty_score: 234 # Median frequency rank of context words (lower = easier); optional rejected_count: 0 # --- Noun-specific: Inflection Forms --- noun_inflection: null # null for non-nouns # When populated: # plurals_guid: "ghi789..." # GUID for plurals deck note # singular: # null if noun is inherently plural (e.g. bicycle/אופניים) # nikkud: "אָב" # ktiv_male: "אב" # plural: # nikkud: "אָבוֹת" # ktiv_male: "אבות" # singular_audio: "6009-av.mp3" # plural_audio: null # TODO: scrape from detail pages # construct_singular: # nikkud: "אֲבִי" # ktiv_male: "אבי" # construct_plural: # nikkud: "אֲבוֹת" # ktiv_male: "אבות" # pronominal_suffixes: # Scraped from pealim "forms with pronominal affixes" section # 1s: # nikkud: "אָבִי" # ktiv_male: "אבי" # 1p: # nikkud: "אָבִינוּ" # ktiv_male: "אבינו" # 2ms: ... # 2fs: ... # 2mp: ... # 2fp: ... # 3ms: ... # 3fs: ... # 3mp: ... # 3fp: ... # gender: "masculine" # gender_hebrew: # nikkud: "זָכָר" # ktiv_male: "זכר" # mishkal: "CaCaC" # English mishkal name (scraped from pealim PoS section) # mishkal_hebrew: "קָטָל" # Hebrew mishkal name (computed via mapping) # --- Verb-specific: Conjugation Data --- conjugation: null # null for non-verbs # When populated: # in_conjugation_deck: true # Whether this verb is in the 71-verb conjugation deck # infinitive: # nikkud: "לִשְׁמֹר" # ktiv_male: "לשמור" # reference_form: # 3ms past (the citation form) # nikkud: "שָׁמַר" # ktiv_male: "שמר" # binyan: "Pa'al" # English binyan name # binyan_hebrew: "פָּעַל" # Hebrew binyan name (with nikkud) # prep: "על" # Hebrew preposition the verb takes, or null # active_forms: # - person: "1s" # Grammatical code: 1s, 1p, 2ms, 2fs, 2mp, 2fp, 3ms, 3fs, 3mp, 3fp # tense: "עָבָר" # form: # nikkud: "שָׁמַרְתִּי" # ktiv_male: "שמרתי" # audio_url: "https://..." # audio_file: null # For future use # hufal_pual_forms: null # Same structure as active_forms; non-null only for hif'il/pi'el verbs # # When non-null, binyan MUST be Hif'il or Pi'el (validated) # reference_form_passive: # 3ms past of the huf'al/pu'al counterpart, or null # nikkud: "שֻׁמַּר" # ktiv_male: "שומר" # --- Adjective-specific --- adjective_inflection: null # null for non-adjectives # When populated: # ms: # nikkud: "גָּדוֹל" # ktiv_male: "גדול" # fs: # nikkud: "גְּדוֹלָה" # ktiv_male: "גדולה" # mp: # nikkud: "גְּדוֹלִים" # ktiv_male: "גדולים" # fp: # nikkud: "גְּדוֹלוֹת" # ktiv_male: "גדולות" # mishkal: "CaCaC" # English mishkal name (scraped from pealim PoS section) # mishkal_hebrew: "קָטָל" # Hebrew mishkal name (computed via mapping) # --- Preposition-specific --- preposition_inflection: null # null for non-prepositions # When populated: # 1s: # nikkud: "שֶׁלִּי" # ktiv_male: "שלי" # 1p: # nikkud: "שֶׁלָּנוּ" # ktiv_male: "שלנו" # 2ms: # nikkud: "שֶׁלְּךָ" # ktiv_male: "שלך" # 2fs: # nikkud: "שֶׁלָּךְ" # ktiv_male: "שלך" # 2mp: # nikkud: "שֶׁלָּכֶם" # ktiv_male: "שלכם" # 2fp: # nikkud: "שֶׁלָּכֶן" # ktiv_male: "שלכן" # 3ms: # nikkud: "שֶׁלּוֹ" # ktiv_male: "שלו" # 3fs: # nikkud: "שֶׁלָּהּ" # ktiv_male: "שלה" # 3mp: # nikkud: "שֶׁלָּהֶם" # ktiv_male: "שלהם" # 3fp: # nikkud: "שֶׁלָּהֶן" # ktiv_male: "שלהן"