"""Tests for adjective and preposition detail page parsing in pealim_detail_scrape.py.""" import sys from pathlib import Path import pytest sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from pealim_detail_scrape import ( _parse_adjective_table, _parse_adjective_table_vl, _parse_preposition_table, _parse_preposition_table_vl, _scrape_adjective_detail, _scrape_preposition_detail, ) # --------------------------------------------------------------------------- # Fixtures — real HTML snippets from pealim.com # --------------------------------------------------------------------------- ADJECTIVE_MO_TABLE = """
Singular Plural
Masculine Feminine Masculine Feminine
🔊 אֲבִיבִי
spring-like, vernal
🔊 אֲבִיבִית
spring-like, vernal
🔊 אֲבִיבִיִּים
spring-like, vernal
🔊 אֲבִיבִיּוֹת
spring-like, vernal
""" # VL version: menukad spans contain unvowelled text (hebstyle=vl) ADJECTIVE_VL_TABLE = """
אביבי
אביבית
אביביים
אביביות
""" PREPOSITION_MO_TABLE = """
Person Singular Plural
Masculine Feminine Masculine Feminine
1st
🔊 שֶׁלִּי
of mine
🔊 שֶׁלָּנוּ
of ours
2nd
🔊 שֶׁלְּךָ
of yours m. sg.
🔊 שֶׁלָּךְ
of yours f. sg.
🔊 שֶׁלָּכֶם
of yours m. pl.
🔊 שֶׁלָּכֶן
of yours f. pl.
3rd
🔊 שֶׁלּוֹ
of his
🔊 שֶׁלָּהּ
of hers
🔊 שֶׁלָּהֶם
of theirs m.
🔊 שֶׁלָּהֶן
of theirs f.
""" PREPOSITION_VL_TABLE = """
1st
שלי
שלנו
2nd
שלך
שלך
שלכם
שלכן
3rd
שלו
שלה
שלהם
שלהן
""" # Minimal full-page wrappers so _scrape_*_detail() can parse them _ADJECTIVE_MO_PAGE = f"{ADJECTIVE_MO_TABLE}" _ADJECTIVE_VL_PAGE = f"{ADJECTIVE_VL_TABLE}" _PREPOSITION_MO_PAGE = f"{PREPOSITION_MO_TABLE}" _PREPOSITION_VL_PAGE = f"{PREPOSITION_VL_TABLE}" # --------------------------------------------------------------------------- # Adjective table tests # --------------------------------------------------------------------------- class TestParseAdjectiveTable: """Tests for _parse_adjective_table (mo/nikkud page).""" def test_returns_four_form_keys(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml")) assert set(result.keys()) == {"ms", "fs", "mp", "fp"} def test_ms_nikkud(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml")) assert result["ms"]["nikkud"] == "אֲבִיבִי" def test_fs_nikkud(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml")) assert result["fs"]["nikkud"] == "אֲבִיבִית" def test_mp_nikkud(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml")) assert result["mp"]["nikkud"] == "אֲבִיבִיִּים" def test_fp_nikkud(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml")) assert result["fp"]["nikkud"] == "אֲבִיבִיּוֹת" def test_audio_url_present(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml")) assert result["ms"]["audio_url"].startswith("https://audio.pealim.com/") def test_empty_on_missing_table(self) -> None: result = _parse_adjective_table(__import__("bs4").BeautifulSoup("", "lxml")) assert result == {} class TestParseAdjectiveTableVl: """Tests for _parse_adjective_table_vl (ktiv male page).""" def test_returns_four_form_keys(self) -> None: result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml")) assert set(result.keys()) == {"ms", "fs", "mp", "fp"} def test_ms_ktiv(self) -> None: result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml")) assert result["ms"] == "אביבי" def test_fs_ktiv(self) -> None: result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml")) assert result["fs"] == "אביבית" def test_mp_ktiv(self) -> None: result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml")) assert result["mp"] == "אביביים" def test_fp_ktiv(self) -> None: result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml")) assert result["fp"] == "אביביות" # --------------------------------------------------------------------------- # _scrape_adjective_detail tests # --------------------------------------------------------------------------- class TestScrapeAdjectiveDetail: """Tests for _scrape_adjective_detail — schema compliance.""" @pytest.fixture() def result(self) -> dict: return _scrape_adjective_detail("9098-avivi", _ADJECTIVE_MO_PAGE, _ADJECTIVE_VL_PAGE) def test_returns_non_empty_dict(self, result: dict) -> None: assert result def test_ms_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["ms"]["nikkud"] == "אֲבִיבִי" assert result["ms"]["ktiv_male"] == "אביבי" def test_fs_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["fs"]["nikkud"] == "אֲבִיבִית" assert result["fs"]["ktiv_male"] == "אביבית" def test_mp_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["mp"]["nikkud"] == "אֲבִיבִיִּים" assert result["mp"]["ktiv_male"] == "אביביים" def test_fp_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["fp"]["nikkud"] == "אֲבִיבִיּוֹת" assert result["fp"]["ktiv_male"] == "אביביות" def test_mishkal_key_present(self, result: dict) -> None: # mishkal may be None since no PoS section is in our minimal fixture assert "mishkal" in result def test_mishkal_hebrew_key_present(self, result: dict) -> None: assert "mishkal_hebrew" in result def test_all_schema_keys_present(self, result: dict) -> None: expected = {"ms", "fs", "mp", "fp", "mishkal", "mishkal_hebrew"} assert expected.issubset(result.keys()) def test_empty_on_no_table(self) -> None: result = _scrape_adjective_detail("missing", "", "") assert result == {} # --------------------------------------------------------------------------- # Preposition table tests # --------------------------------------------------------------------------- class TestParsePrepositionTable: """Tests for _parse_preposition_table (mo/nikkud page).""" @pytest.fixture() def result(self) -> dict: return _parse_preposition_table(__import__("bs4").BeautifulSoup(PREPOSITION_MO_TABLE, "lxml")) def test_returns_ten_form_keys(self, result: dict) -> None: expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"} assert set(result.keys()) == expected def test_1s_nikkud(self, result: dict) -> None: assert result["1s"]["nikkud"] == "שֶׁלִּי" def test_1p_nikkud(self, result: dict) -> None: assert result["1p"]["nikkud"] == "שֶׁלָּנוּ" def test_2ms_nikkud(self, result: dict) -> None: assert result["2ms"]["nikkud"] == "שֶׁלְּךָ" def test_2fs_nikkud(self, result: dict) -> None: assert result["2fs"]["nikkud"] == "שֶׁלָּךְ" def test_2mp_nikkud(self, result: dict) -> None: assert result["2mp"]["nikkud"] == "שֶׁלָּכֶם" def test_2fp_nikkud(self, result: dict) -> None: assert result["2fp"]["nikkud"] == "שֶׁלָּכֶן" def test_3ms_nikkud(self, result: dict) -> None: assert result["3ms"]["nikkud"] == "שֶׁלּוֹ" def test_3fs_nikkud(self, result: dict) -> None: assert result["3fs"]["nikkud"] == "שֶׁלָּהּ" def test_3mp_nikkud(self, result: dict) -> None: assert result["3mp"]["nikkud"] == "שֶׁלָּהֶם" def test_3fp_nikkud(self, result: dict) -> None: assert result["3fp"]["nikkud"] == "שֶׁלָּהֶן" def test_audio_url_present(self, result: dict) -> None: assert result["1s"]["audio_url"].startswith("https://audio.pealim.com/") def test_empty_on_missing_table(self) -> None: result = _parse_preposition_table(__import__("bs4").BeautifulSoup("", "lxml")) assert result == {} class TestParsePrepositionTableVl: """Tests for _parse_preposition_table_vl (ktiv male page).""" @pytest.fixture() def result(self) -> dict: return _parse_preposition_table_vl(__import__("bs4").BeautifulSoup(PREPOSITION_VL_TABLE, "lxml")) def test_returns_ten_form_keys(self, result: dict) -> None: expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"} assert set(result.keys()) == expected def test_1s_ktiv(self, result: dict) -> None: assert result["1s"] == "שלי" def test_1p_ktiv(self, result: dict) -> None: assert result["1p"] == "שלנו" def test_2ms_ktiv(self, result: dict) -> None: assert result["2ms"] == "שלך" def test_3ms_ktiv(self, result: dict) -> None: assert result["3ms"] == "שלו" def test_3fp_ktiv(self, result: dict) -> None: assert result["3fp"] == "שלהן" # --------------------------------------------------------------------------- # _scrape_preposition_detail tests # --------------------------------------------------------------------------- class TestScrapePrepositionDetail: """Tests for _scrape_preposition_detail — schema compliance.""" @pytest.fixture() def result(self) -> dict: return _scrape_preposition_detail("2643-shel", _PREPOSITION_MO_PAGE, _PREPOSITION_VL_PAGE) def test_returns_non_empty_dict(self, result: dict) -> None: assert result def test_all_ten_person_keys_present(self, result: dict) -> None: expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"} assert expected.issubset(result.keys()) def test_1s_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["1s"]["nikkud"] == "שֶׁלִּי" assert result["1s"]["ktiv_male"] == "שלי" def test_1p_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["1p"]["nikkud"] == "שֶׁלָּנוּ" assert result["1p"]["ktiv_male"] == "שלנו" def test_2ms_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["2ms"]["nikkud"] == "שֶׁלְּךָ" assert result["2ms"]["ktiv_male"] == "שלך" def test_3ms_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["3ms"]["nikkud"] == "שֶׁלּוֹ" assert result["3ms"]["ktiv_male"] == "שלו" def test_3fs_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["3fs"]["nikkud"] == "שֶׁלָּהּ" assert result["3fs"]["ktiv_male"] == "שלה" def test_3fp_has_nikkud_and_ktiv(self, result: dict) -> None: assert result["3fp"]["nikkud"] == "שֶׁלָּהֶן" assert result["3fp"]["ktiv_male"] == "שלהן" def test_empty_on_no_table(self) -> None: result = _scrape_preposition_detail("missing", "", "") assert result == {} # --------------------------------------------------------------------------- # Tests for _parse_noun_gender_mishkal mishkal extraction # --------------------------------------------------------------------------- from bs4 import BeautifulSoup # noqa: E402 from pealim_detail_scrape import _parse_noun_gender_mishkal # noqa: E402 class TestNounGenderMishkal: def test_noun_with_mishkal(self): html = '

Noun – ketel pattern, masculine

' soup = BeautifulSoup(html, "html.parser") gender, mishkal = _parse_noun_gender_mishkal(soup) assert gender == "masculine" assert mishkal == "ketel" def test_noun_without_mishkal(self): html = "

Noun – masculine

" soup = BeautifulSoup(html, "html.parser") gender, mishkal = _parse_noun_gender_mishkal(soup) assert gender == "masculine" assert mishkal == "" def test_adjective_mishkal(self): html = '

Adjective – katul pattern

' soup = BeautifulSoup(html, "html.parser") _, mishkal = _parse_noun_gender_mishkal(soup) assert mishkal == "katul" def test_feminine_noun(self): html = '

Noun – ketel pattern, feminine

' soup = BeautifulSoup(html, "html.parser") gender, mishkal = _parse_noun_gender_mishkal(soup) assert gender == "feminine" assert mishkal == "ketel"