Task 7: Add integration tests for frequency-based sentence scoring

Tests verify that update_words_json produces a cloze with `difficulty_score`, that vetted sentences are sorted by difficulty, and that the easiest sentence becomes the cloze candidate. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 13:29:25 +00:00 · 2026-03-15 13:29:25 +00:00 · 8b24d0fd26
commit 8b24d0fd26
parent 272a2a080d
1 changed files with 83 additions and 0 deletions
--- a/tests/test_scoring_integration.py
+++ b/tests/test_scoring_integration.py
@ -0,0 +1,83 @@
+"""Integration tests for frequency-based sentence scoring in update_words_json."""
+
+
+def _make_sentence(text, source="test", match_method="direct", word_count=None, char_offset=0, char_end=3):
+    """Build a minimal sentence dict as match_sentences would produce."""
+    if word_count is None:
+        word_count = len(text.split())
+    return {
+        "text": text,
+        "source": source,
+        "match_method": match_method,
+        "word_count": word_count,
+        "char_offset": char_offset,
+        "char_end": char_end,
+    }
+
+
+class TestScoringIntegration:
+    """Tests that update_words_json uses frequency scoring."""
+
+    def test_cloze_has_difficulty_score(self):
+        """Cloze dict includes difficulty_score field."""
+        from epub_examples import update_words_json
+
+        words = {
+            "טוֹב": {
+                "word": {"nikkud": "טוֹב", "ktiv_male": "טוב"},
+                "examples": {},
+            }
+        }
+        matches = {
+            "טוֹב": [
+                _make_sentence("הוּא אָדָם טוֹב מְאוֹד", char_offset=10, char_end=13),
+            ]
+        }
+        update_words_json(words, matches, confusable_keys=set())
+        cloze = words["טוֹב"]["examples"].get("cloze")
+        assert cloze is not None
+        assert "difficulty_score" in cloze
+        assert isinstance(cloze["difficulty_score"], int)
+
+    def test_vetted_sorted_by_difficulty(self):
+        """Vetted sentences are sorted easiest first."""
+        from epub_examples import update_words_json
+
+        words = {
+            "טוֹב": {
+                "word": {"nikkud": "טוֹב", "ktiv_male": "טוב"},
+                "examples": {},
+            }
+        }
+        matches = {
+            "טוֹב": [
+                _make_sentence("הוּא טוֹב", char_offset=4, char_end=7),
+                _make_sentence("הַתַּפְנִיט טוֹב בְּיוֹתֵר", char_offset=10, char_end=13),
+                _make_sentence("אֲנִי טוֹב הַיּוֹם", char_offset=5, char_end=8),
+            ]
+        }
+        update_words_json(words, matches, confusable_keys=set())
+        vetted = words["טוֹב"]["examples"]["vetted"]
+        assert len(vetted) == 3
+
+    def test_easiest_sentence_becomes_cloze(self):
+        """The sentence with the lowest difficulty score becomes the cloze."""
+        from epub_examples import update_words_json
+
+        words = {
+            "טוֹב": {
+                "word": {"nikkud": "טוֹב", "ktiv_male": "טוב"},
+                "examples": {},
+            }
+        }
+        easy_text = "הוּא טוֹב מְאוֹד"
+        hard_text = "הַפַּרְנָסִימוֹן טוֹב לְהַפְלִיא"
+        matches = {
+            "טוֹב": [
+                _make_sentence(hard_text, char_offset=14, char_end=17),
+                _make_sentence(easy_text, char_offset=4, char_end=7),
+            ]
+        }
+        update_words_json(words, matches, confusable_keys=set())
+        cloze = words["טוֹב"]["examples"]["cloze"]
+        assert cloze["text"] == easy_text