"""Tests for adjective and preposition detail page parsing in pealim_detail_scrape.py."""
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from pealim_detail_scrape import (
_parse_adjective_table,
_parse_adjective_table_vl,
_parse_preposition_table,
_parse_preposition_table_vl,
_scrape_adjective_detail,
_scrape_preposition_detail,
)
# ---------------------------------------------------------------------------
# Fixtures — real HTML snippets from pealim.com
# ---------------------------------------------------------------------------
ADJECTIVE_MO_TABLE = """
| Singular |
Plural |
| Masculine |
Feminine |
Masculine |
Feminine |
|
|
|
|
|
"""
# VL version: menukad spans contain unvowelled text (hebstyle=vl)
ADJECTIVE_VL_TABLE = """
"""
PREPOSITION_MO_TABLE = """
| Person |
Singular |
Plural |
| Masculine |
Feminine |
Masculine |
Feminine |
| 1st |
|
|
| 2nd |
|
|
|
|
| 3rd |
|
|
|
|
"""
PREPOSITION_VL_TABLE = """
"""
# Minimal full-page wrappers so _scrape_*_detail() can parse them
_ADJECTIVE_MO_PAGE = f"{ADJECTIVE_MO_TABLE}"
_ADJECTIVE_VL_PAGE = f"{ADJECTIVE_VL_TABLE}"
_PREPOSITION_MO_PAGE = f"{PREPOSITION_MO_TABLE}"
_PREPOSITION_VL_PAGE = f"{PREPOSITION_VL_TABLE}"
# ---------------------------------------------------------------------------
# Adjective table tests
# ---------------------------------------------------------------------------
class TestParseAdjectiveTable:
"""Tests for _parse_adjective_table (mo/nikkud page)."""
def test_returns_four_form_keys(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert set(result.keys()) == {"ms", "fs", "mp", "fp"}
def test_ms_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["ms"]["nikkud"] == "אֲבִיבִי"
def test_fs_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["fs"]["nikkud"] == "אֲבִיבִית"
def test_mp_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["mp"]["nikkud"] == "אֲבִיבִיִּים"
def test_fp_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["fp"]["nikkud"] == "אֲבִיבִיּוֹת"
def test_audio_url_present(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["ms"]["audio_url"].startswith("https://audio.pealim.com/")
def test_empty_on_missing_table(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup("", "lxml"))
assert result == {}
class TestParseAdjectiveTableVl:
"""Tests for _parse_adjective_table_vl (ktiv male page)."""
def test_returns_four_form_keys(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert set(result.keys()) == {"ms", "fs", "mp", "fp"}
def test_ms_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["ms"] == "אביבי"
def test_fs_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["fs"] == "אביבית"
def test_mp_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["mp"] == "אביביים"
def test_fp_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["fp"] == "אביביות"
# ---------------------------------------------------------------------------
# _scrape_adjective_detail tests
# ---------------------------------------------------------------------------
class TestScrapeAdjectiveDetail:
"""Tests for _scrape_adjective_detail — schema compliance."""
@pytest.fixture()
def result(self) -> dict:
return _scrape_adjective_detail("9098-avivi", _ADJECTIVE_MO_PAGE, _ADJECTIVE_VL_PAGE)
def test_returns_non_empty_dict(self, result: dict) -> None:
assert result
def test_ms_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["ms"]["nikkud"] == "אֲבִיבִי"
assert result["ms"]["ktiv_male"] == "אביבי"
def test_fs_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["fs"]["nikkud"] == "אֲבִיבִית"
assert result["fs"]["ktiv_male"] == "אביבית"
def test_mp_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["mp"]["nikkud"] == "אֲבִיבִיִּים"
assert result["mp"]["ktiv_male"] == "אביביים"
def test_fp_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["fp"]["nikkud"] == "אֲבִיבִיּוֹת"
assert result["fp"]["ktiv_male"] == "אביביות"
def test_mishkal_key_present(self, result: dict) -> None:
# mishkal may be None since no PoS section is in our minimal fixture
assert "mishkal" in result
def test_mishkal_hebrew_key_present(self, result: dict) -> None:
assert "mishkal_hebrew" in result
def test_all_schema_keys_present(self, result: dict) -> None:
expected = {"ms", "fs", "mp", "fp", "mishkal", "mishkal_hebrew"}
assert expected.issubset(result.keys())
def test_empty_on_no_table(self) -> None:
result = _scrape_adjective_detail("missing", "", "")
assert result == {}
# ---------------------------------------------------------------------------
# Preposition table tests
# ---------------------------------------------------------------------------
class TestParsePrepositionTable:
"""Tests for _parse_preposition_table (mo/nikkud page)."""
@pytest.fixture()
def result(self) -> dict:
return _parse_preposition_table(__import__("bs4").BeautifulSoup(PREPOSITION_MO_TABLE, "lxml"))
def test_returns_ten_form_keys(self, result: dict) -> None:
expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"}
assert set(result.keys()) == expected
def test_1s_nikkud(self, result: dict) -> None:
assert result["1s"]["nikkud"] == "שֶׁלִּי"
def test_1p_nikkud(self, result: dict) -> None:
assert result["1p"]["nikkud"] == "שֶׁלָּנוּ"
def test_2ms_nikkud(self, result: dict) -> None:
assert result["2ms"]["nikkud"] == "שֶׁלְּךָ"
def test_2fs_nikkud(self, result: dict) -> None:
assert result["2fs"]["nikkud"] == "שֶׁלָּךְ"
def test_2mp_nikkud(self, result: dict) -> None:
assert result["2mp"]["nikkud"] == "שֶׁלָּכֶם"
def test_2fp_nikkud(self, result: dict) -> None:
assert result["2fp"]["nikkud"] == "שֶׁלָּכֶן"
def test_3ms_nikkud(self, result: dict) -> None:
assert result["3ms"]["nikkud"] == "שֶׁלּוֹ"
def test_3fs_nikkud(self, result: dict) -> None:
assert result["3fs"]["nikkud"] == "שֶׁלָּהּ"
def test_3mp_nikkud(self, result: dict) -> None:
assert result["3mp"]["nikkud"] == "שֶׁלָּהֶם"
def test_3fp_nikkud(self, result: dict) -> None:
assert result["3fp"]["nikkud"] == "שֶׁלָּהֶן"
def test_audio_url_present(self, result: dict) -> None:
assert result["1s"]["audio_url"].startswith("https://audio.pealim.com/")
def test_empty_on_missing_table(self) -> None:
result = _parse_preposition_table(__import__("bs4").BeautifulSoup("", "lxml"))
assert result == {}
class TestParsePrepositionTableVl:
"""Tests for _parse_preposition_table_vl (ktiv male page)."""
@pytest.fixture()
def result(self) -> dict:
return _parse_preposition_table_vl(__import__("bs4").BeautifulSoup(PREPOSITION_VL_TABLE, "lxml"))
def test_returns_ten_form_keys(self, result: dict) -> None:
expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"}
assert set(result.keys()) == expected
def test_1s_ktiv(self, result: dict) -> None:
assert result["1s"] == "שלי"
def test_1p_ktiv(self, result: dict) -> None:
assert result["1p"] == "שלנו"
def test_2ms_ktiv(self, result: dict) -> None:
assert result["2ms"] == "שלך"
def test_3ms_ktiv(self, result: dict) -> None:
assert result["3ms"] == "שלו"
def test_3fp_ktiv(self, result: dict) -> None:
assert result["3fp"] == "שלהן"
# ---------------------------------------------------------------------------
# _scrape_preposition_detail tests
# ---------------------------------------------------------------------------
class TestScrapePrepositionDetail:
"""Tests for _scrape_preposition_detail — schema compliance."""
@pytest.fixture()
def result(self) -> dict:
return _scrape_preposition_detail("2643-shel", _PREPOSITION_MO_PAGE, _PREPOSITION_VL_PAGE)
def test_returns_non_empty_dict(self, result: dict) -> None:
assert result
def test_all_ten_person_keys_present(self, result: dict) -> None:
expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"}
assert expected.issubset(result.keys())
def test_1s_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["1s"]["nikkud"] == "שֶׁלִּי"
assert result["1s"]["ktiv_male"] == "שלי"
def test_1p_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["1p"]["nikkud"] == "שֶׁלָּנוּ"
assert result["1p"]["ktiv_male"] == "שלנו"
def test_2ms_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["2ms"]["nikkud"] == "שֶׁלְּךָ"
assert result["2ms"]["ktiv_male"] == "שלך"
def test_3ms_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["3ms"]["nikkud"] == "שֶׁלּוֹ"
assert result["3ms"]["ktiv_male"] == "שלו"
def test_3fs_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["3fs"]["nikkud"] == "שֶׁלָּהּ"
assert result["3fs"]["ktiv_male"] == "שלה"
def test_3fp_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["3fp"]["nikkud"] == "שֶׁלָּהֶן"
assert result["3fp"]["ktiv_male"] == "שלהן"
def test_empty_on_no_table(self) -> None:
result = _scrape_preposition_detail("missing", "", "")
assert result == {}
# ---------------------------------------------------------------------------
# Tests for _parse_noun_gender_mishkal mishkal extraction
# ---------------------------------------------------------------------------
from bs4 import BeautifulSoup # noqa: E402
from pealim_detail_scrape import _parse_noun_gender_mishkal # noqa: E402
class TestNounGenderMishkal:
def test_noun_with_mishkal(self):
html = 'Noun – ketel pattern, masculine
'
soup = BeautifulSoup(html, "html.parser")
gender, mishkal = _parse_noun_gender_mishkal(soup)
assert gender == "masculine"
assert mishkal == "ketel"
def test_noun_without_mishkal(self):
html = "Noun – masculine
"
soup = BeautifulSoup(html, "html.parser")
gender, mishkal = _parse_noun_gender_mishkal(soup)
assert gender == "masculine"
assert mishkal == ""
def test_adjective_mishkal(self):
html = 'Adjective – katul pattern
'
soup = BeautifulSoup(html, "html.parser")
_, mishkal = _parse_noun_gender_mishkal(soup)
assert mishkal == "katul"
def test_feminine_noun(self):
html = 'Noun – ketel pattern, feminine
'
soup = BeautifulSoup(html, "html.parser")
gender, mishkal = _parse_noun_gender_mishkal(soup)
assert gender == "feminine"
assert mishkal == "ketel"