From 14d567a261df5e0a950d9b1a6a9fafdff7b48761 Mon Sep 17 00:00:00 2001
From: Sochen <sochen@nevo.engineer>
Date: Sun, 15 Mar 2026 13:30:13 +0000
Subject: [PATCH] schema: add difficulty_score field + update spec with
 MIN_WORDS=3

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 SCHEMA.yaml                                                     | 1 +
 .../specs/2026-03-15-adaptive-sentence-difficulty-design.md     | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/SCHEMA.yaml b/SCHEMA.yaml
index f3b7450..2ae43d2 100644
--- a/SCHEMA.yaml
+++ b/SCHEMA.yaml
@@ -69,6 +69,7 @@ entry:
       cloze_word_end: 4            # End offset — enables exact extraction regardless of nikkud changes
       cloze_hint: "family member"
       cloze_guid: "def456..."      # GUID for the cloze note
+      difficulty_score: 234          # Median frequency rank of context words (lower = easier); optional
     rejected_count: 0
 
   # --- Noun-specific: Inflection Forms ---
diff --git a/docs/superpowers/specs/2026-03-15-adaptive-sentence-difficulty-design.md b/docs/superpowers/specs/2026-03-15-adaptive-sentence-difficulty-design.md
index 7ab5e58..3cffe9c 100644
--- a/docs/superpowers/specs/2026-03-15-adaptive-sentence-difficulty-design.md
+++ b/docs/superpowers/specs/2026-03-15-adaptive-sentence-difficulty-design.md
@@ -54,6 +54,8 @@ def _score(s: dict) -> tuple[int,]:
 
 New scoring replaces length with frequency-based difficulty. The `_score` function gains access to the frequency pipeline via closure over the nikkud_map, nikkud_index, and freq_data built once at the start of `update_words_json()`.
 
+**Minimum sentence length:** Reduced from 4 words to 3 words (`MIN_WORDS = 3` in epub_examples.py). Hebrew is more concise than English — 3-word sentences are valid and common. This expands the candidate pool for cloze selection.
+
 **Behavioral change:** Because `pool.sort(key=_score)` determines which 3 sentences are selected as `best = pool[:3]`, changing the scoring function changes **which sentences are selected**, not just their order. This is intentional — we want the easiest sentences as cloze candidates, not the closest-to-9-words ones. Existing cloze GUIDs will be preserved when the same sentence text is re-selected; entries where a different sentence wins will get new GUIDs.
 
 ## Data Model Changes