diff --git a/tests/test_scoring_integration.py b/tests/test_scoring_integration.py new file mode 100644 index 0000000..4a329f0 --- /dev/null +++ b/tests/test_scoring_integration.py @@ -0,0 +1,83 @@ +"""Integration tests for frequency-based sentence scoring in update_words_json.""" + + +def _make_sentence(text, source="test", match_method="direct", word_count=None, char_offset=0, char_end=3): + """Build a minimal sentence dict as match_sentences would produce.""" + if word_count is None: + word_count = len(text.split()) + return { + "text": text, + "source": source, + "match_method": match_method, + "word_count": word_count, + "char_offset": char_offset, + "char_end": char_end, + } + + +class TestScoringIntegration: + """Tests that update_words_json uses frequency scoring.""" + + def test_cloze_has_difficulty_score(self): + """Cloze dict includes difficulty_score field.""" + from epub_examples import update_words_json + + words = { + "טוֹב": { + "word": {"nikkud": "טוֹב", "ktiv_male": "טוב"}, + "examples": {}, + } + } + matches = { + "טוֹב": [ + _make_sentence("הוּא אָדָם טוֹב מְאוֹד", char_offset=10, char_end=13), + ] + } + update_words_json(words, matches, confusable_keys=set()) + cloze = words["טוֹב"]["examples"].get("cloze") + assert cloze is not None + assert "difficulty_score" in cloze + assert isinstance(cloze["difficulty_score"], int) + + def test_vetted_sorted_by_difficulty(self): + """Vetted sentences are sorted easiest first.""" + from epub_examples import update_words_json + + words = { + "טוֹב": { + "word": {"nikkud": "טוֹב", "ktiv_male": "טוב"}, + "examples": {}, + } + } + matches = { + "טוֹב": [ + _make_sentence("הוּא טוֹב", char_offset=4, char_end=7), + _make_sentence("הַתַּפְנִיט טוֹב בְּיוֹתֵר", char_offset=10, char_end=13), + _make_sentence("אֲנִי טוֹב הַיּוֹם", char_offset=5, char_end=8), + ] + } + update_words_json(words, matches, confusable_keys=set()) + vetted = words["טוֹב"]["examples"]["vetted"] + assert len(vetted) == 3 + + def test_easiest_sentence_becomes_cloze(self): + """The sentence with the lowest difficulty score becomes the cloze.""" + from epub_examples import update_words_json + + words = { + "טוֹב": { + "word": {"nikkud": "טוֹב", "ktiv_male": "טוב"}, + "examples": {}, + } + } + easy_text = "הוּא טוֹב מְאוֹד" + hard_text = "הַפַּרְנָסִימוֹן טוֹב לְהַפְלִיא" + matches = { + "טוֹב": [ + _make_sentence(hard_text, char_offset=14, char_end=17), + _make_sentence(easy_text, char_offset=4, char_end=7), + ] + } + update_words_json(words, matches, confusable_keys=set()) + cloze = words["טוֹב"]["examples"]["cloze"] + assert cloze["text"] == easy_text