hebrew_flash_cards/tests/test_detail_scrape.py
Sochen 0d92451271 Sprint 16: collapsible card details + related words table
- All secondary fields (shoresh, PoS, ktiv male, plural, related words)
  behind a "מידע נוסף" toggle button using HTML <details>/<summary>
- Conjugation back: English meaning, binyan also behind toggle
- Related words: table format with word + meaning, sorted by frequency
- Hebrew words not bold, English meanings 24px gray (#555)
- "מִילִים קְשׁוּרוֹת" sub-header with nikkud inside toggle
- "אֵיךְ אוֹמְרִים" prompt centered using hint class
- New CSS: .more-toggle, .more-header, .related-header, .rw-word, .rw-meaning
- Dark mode support for all new classes
- Bump to v0.18

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 01:34:14 +00:00

524 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for adjective and preposition detail page parsing in pealim_detail_scrape.py."""
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from pealim_detail_scrape import (
_parse_adjective_table,
_parse_adjective_table_vl,
_parse_preposition_table,
_parse_preposition_table_vl,
_scrape_adjective_detail,
_scrape_preposition_detail,
)
# ---------------------------------------------------------------------------
# Fixtures — real HTML snippets from pealim.com
# ---------------------------------------------------------------------------
ADJECTIVE_MO_TABLE = """
<table class="table table-condensed conjugation-table">
<thead>
<tr>
<th class="column-header" colspan="2">Singular</th>
<th class="column-header" colspan="2">Plural</th>
</tr>
<tr>
<th class="column-header">Masculine</th>
<th class="column-header">Feminine</th>
<th class="column-header">Masculine</th>
<th class="column-header">Feminine</th>
</tr>
</thead>
<tbody>
<tr>
<td class="conj-td">
<div id="ms-a">
<div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/dn/dngfpnovmytc.mp3">&#128266;</span>
<span class="menukad">אֲבִיבִי</span>
</div></div>
<div class="meaning">spring-like, vernal</div>
</div>
</td>
<td class="conj-td">
<div id="fs-a">
<div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/1j/1j6srg3do7n5k.mp3">&#128266;</span>
<span class="menukad">אֲבִיבִית</span>
</div></div>
<div class="meaning">spring-like, vernal</div>
</div>
</td>
<td class="conj-td">
<div id="mp-a">
<div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/tj/tjrhw0b5dkhc.mp3">&#128266;</span>
<span class="menukad">אֲבִיבִיִּים</span>
</div></div>
<div class="meaning">spring-like, vernal</div>
</div>
</td>
<td class="conj-td">
<div id="fp-a">
<div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/h3/h3u1ml5a4xcf.mp3">&#128266;</span>
<span class="menukad">אֲבִיבִיּוֹת</span>
</div></div>
<div class="meaning">spring-like, vernal</div>
</div>
</td>
</tr>
</tbody>
</table>
"""
# VL version: menukad spans contain unvowelled text (hebstyle=vl)
ADJECTIVE_VL_TABLE = """
<table class="table table-condensed conjugation-table">
<tbody>
<tr>
<td class="conj-td">
<div id="ms-a"><div><div>
<span class="menukad">אביבי</span>
</div></div></div>
</td>
<td class="conj-td">
<div id="fs-a"><div><div>
<span class="menukad">אביבית</span>
</div></div></div>
</td>
<td class="conj-td">
<div id="mp-a"><div><div>
<span class="menukad">אביביים</span>
</div></div></div>
</td>
<td class="conj-td">
<div id="fp-a"><div><div>
<span class="menukad">אביביות</span>
</div></div></div>
</td>
</tr>
</tbody>
</table>
"""
PREPOSITION_MO_TABLE = """
<table class="table table-condensed conjugation-table">
<thead>
<tr>
<th rowspan="2">Person</th>
<th class="column-header" colspan="2">Singular</th>
<th class="column-header" colspan="2">Plural</th>
</tr>
<tr>
<th class="column-header">Masculine</th>
<th class="column-header">Feminine</th>
<th class="column-header">Masculine</th>
<th class="column-header">Feminine</th>
</tr>
</thead>
<tbody>
<tr>
<th>1st</th>
<td class="conj-td" colspan="2">
<div id="P-1s"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/fk/fkp5faeteecr.mp3">&#128266;</span>
<span class="menukad">שֶׁלִּי</span>
</div></div><div class="meaning"><strong>of mine</strong></div></div>
</td>
<td class="conj-td" colspan="2">
<div id="P-1p"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/13/13uvi0dz6tgcc.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּנוּ</span>
</div></div><div class="meaning"><strong>of ours</strong></div></div>
</td>
</tr>
<tr>
<th>2nd</th>
<td class="conj-td">
<div id="P-2ms"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/sh/shbxafq8ietx.mp3">&#128266;</span>
<span class="menukad">שֶׁלְּךָ</span>
</div></div><div class="meaning"><strong>of yours</strong> <em>m. sg.</em></div></div>
</td>
<td class="conj-td">
<div id="P-2fs"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/sh/sh9ue3a8buo3.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּךְ</span>
</div></div><div class="meaning"><strong>of yours</strong> <em>f. sg.</em></div></div>
</td>
<td class="conj-td">
<div id="P-2mp"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/ol/olx8vzsctlzn.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּכֶם</span>
</div></div><div class="meaning"><strong>of yours</strong> <em>m. pl.</em></div></div>
</td>
<td class="conj-td">
<div id="P-2fp"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/ol/olxrms6dl8eq.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּכֶן</span>
</div></div><div class="meaning"><strong>of yours</strong> <em>f. pl.</em></div></div>
</td>
</tr>
<tr>
<th>3rd</th>
<td class="conj-td">
<div id="P-3ms"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/fk/fkp5qigelthg.mp3">&#128266;</span>
<span class="menukad">שֶׁלּוֹ</span>
</div></div><div class="meaning"><strong>of his</strong></div></div>
</td>
<td class="conj-td">
<div id="P-3fs"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/sh/sh9w36hojm5w.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּהּ</span>
</div></div><div class="meaning"><strong>of hers</strong></div></div>
</td>
<td class="conj-td">
<div id="P-3mp"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/n9/n99z0jr8pint.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּהֶם</span>
</div></div><div class="meaning"><strong>of theirs</strong> <em>m.</em></div></div>
</td>
<td class="conj-td">
<div id="P-3fp"><div><div>
<span class="audio-play" data-audio="https://audio.pealim.com/v0/n9/n9ahrc59h52w.mp3">&#128266;</span>
<span class="menukad">שֶׁלָּהֶן</span>
</div></div><div class="meaning"><strong>of theirs</strong> <em>f.</em></div></div>
</td>
</tr>
</tbody>
</table>
"""
PREPOSITION_VL_TABLE = """
<table class="table table-condensed conjugation-table">
<tbody>
<tr>
<th>1st</th>
<td colspan="2"><div id="P-1s"><div><div>
<span class="menukad">שלי</span>
</div></div></div></td>
<td colspan="2"><div id="P-1p"><div><div>
<span class="menukad">שלנו</span>
</div></div></div></td>
</tr>
<tr>
<th>2nd</th>
<td><div id="P-2ms"><div><div>
<span class="menukad">שלך</span>
</div></div></div></td>
<td><div id="P-2fs"><div><div>
<span class="menukad">שלך</span>
</div></div></div></td>
<td><div id="P-2mp"><div><div>
<span class="menukad">שלכם</span>
</div></div></div></td>
<td><div id="P-2fp"><div><div>
<span class="menukad">שלכן</span>
</div></div></div></td>
</tr>
<tr>
<th>3rd</th>
<td><div id="P-3ms"><div><div>
<span class="menukad">שלו</span>
</div></div></div></td>
<td><div id="P-3fs"><div><div>
<span class="menukad">שלה</span>
</div></div></div></td>
<td><div id="P-3mp"><div><div>
<span class="menukad">שלהם</span>
</div></div></div></td>
<td><div id="P-3fp"><div><div>
<span class="menukad">שלהן</span>
</div></div></div></td>
</tr>
</tbody>
</table>
"""
# Minimal full-page wrappers so _scrape_*_detail() can parse them
_ADJECTIVE_MO_PAGE = f"<html><body>{ADJECTIVE_MO_TABLE}</body></html>"
_ADJECTIVE_VL_PAGE = f"<html><body>{ADJECTIVE_VL_TABLE}</body></html>"
_PREPOSITION_MO_PAGE = f"<html><body>{PREPOSITION_MO_TABLE}</body></html>"
_PREPOSITION_VL_PAGE = f"<html><body>{PREPOSITION_VL_TABLE}</body></html>"
# ---------------------------------------------------------------------------
# Adjective table tests
# ---------------------------------------------------------------------------
class TestParseAdjectiveTable:
"""Tests for _parse_adjective_table (mo/nikkud page)."""
def test_returns_four_form_keys(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert set(result.keys()) == {"ms", "fs", "mp", "fp"}
def test_ms_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["ms"]["nikkud"] == "אֲבִיבִי"
def test_fs_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["fs"]["nikkud"] == "אֲבִיבִית"
def test_mp_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["mp"]["nikkud"] == "אֲבִיבִיִּים"
def test_fp_nikkud(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["fp"]["nikkud"] == "אֲבִיבִיּוֹת"
def test_audio_url_present(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup(ADJECTIVE_MO_TABLE, "lxml"))
assert result["ms"]["audio_url"].startswith("https://audio.pealim.com/")
def test_empty_on_missing_table(self) -> None:
result = _parse_adjective_table(__import__("bs4").BeautifulSoup("<html><body></body></html>", "lxml"))
assert result == {}
class TestParseAdjectiveTableVl:
"""Tests for _parse_adjective_table_vl (ktiv male page)."""
def test_returns_four_form_keys(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert set(result.keys()) == {"ms", "fs", "mp", "fp"}
def test_ms_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["ms"] == "אביבי"
def test_fs_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["fs"] == "אביבית"
def test_mp_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["mp"] == "אביביים"
def test_fp_ktiv(self) -> None:
result = _parse_adjective_table_vl(__import__("bs4").BeautifulSoup(ADJECTIVE_VL_TABLE, "lxml"))
assert result["fp"] == "אביביות"
# ---------------------------------------------------------------------------
# _scrape_adjective_detail tests
# ---------------------------------------------------------------------------
class TestScrapeAdjectiveDetail:
"""Tests for _scrape_adjective_detail — schema compliance."""
@pytest.fixture()
def result(self) -> dict:
return _scrape_adjective_detail("9098-avivi", _ADJECTIVE_MO_PAGE, _ADJECTIVE_VL_PAGE)
def test_returns_non_empty_dict(self, result: dict) -> None:
assert result
def test_ms_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["ms"]["nikkud"] == "אֲבִיבִי"
assert result["ms"]["ktiv_male"] == "אביבי"
def test_fs_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["fs"]["nikkud"] == "אֲבִיבִית"
assert result["fs"]["ktiv_male"] == "אביבית"
def test_mp_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["mp"]["nikkud"] == "אֲבִיבִיִּים"
assert result["mp"]["ktiv_male"] == "אביביים"
def test_fp_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["fp"]["nikkud"] == "אֲבִיבִיּוֹת"
assert result["fp"]["ktiv_male"] == "אביביות"
def test_mishkal_key_present(self, result: dict) -> None:
# mishkal may be None since no PoS section is in our minimal fixture
assert "mishkal" in result
def test_mishkal_hebrew_key_present(self, result: dict) -> None:
assert "mishkal_hebrew" in result
def test_all_schema_keys_present(self, result: dict) -> None:
expected = {"ms", "fs", "mp", "fp", "mishkal", "mishkal_hebrew"}
assert expected.issubset(result.keys())
def test_empty_on_no_table(self) -> None:
result = _scrape_adjective_detail("missing", "<html><body></body></html>", "<html><body></body></html>")
assert result == {}
# ---------------------------------------------------------------------------
# Preposition table tests
# ---------------------------------------------------------------------------
class TestParsePrepositionTable:
"""Tests for _parse_preposition_table (mo/nikkud page)."""
@pytest.fixture()
def result(self) -> dict:
return _parse_preposition_table(__import__("bs4").BeautifulSoup(PREPOSITION_MO_TABLE, "lxml"))
def test_returns_ten_form_keys(self, result: dict) -> None:
expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"}
assert set(result.keys()) == expected
def test_1s_nikkud(self, result: dict) -> None:
assert result["1s"]["nikkud"] == "שֶׁלִּי"
def test_1p_nikkud(self, result: dict) -> None:
assert result["1p"]["nikkud"] == "שֶׁלָּנוּ"
def test_2ms_nikkud(self, result: dict) -> None:
assert result["2ms"]["nikkud"] == "שֶׁלְּךָ"
def test_2fs_nikkud(self, result: dict) -> None:
assert result["2fs"]["nikkud"] == "שֶׁלָּךְ"
def test_2mp_nikkud(self, result: dict) -> None:
assert result["2mp"]["nikkud"] == "שֶׁלָּכֶם"
def test_2fp_nikkud(self, result: dict) -> None:
assert result["2fp"]["nikkud"] == "שֶׁלָּכֶן"
def test_3ms_nikkud(self, result: dict) -> None:
assert result["3ms"]["nikkud"] == "שֶׁלּוֹ"
def test_3fs_nikkud(self, result: dict) -> None:
assert result["3fs"]["nikkud"] == "שֶׁלָּהּ"
def test_3mp_nikkud(self, result: dict) -> None:
assert result["3mp"]["nikkud"] == "שֶׁלָּהֶם"
def test_3fp_nikkud(self, result: dict) -> None:
assert result["3fp"]["nikkud"] == "שֶׁלָּהֶן"
def test_audio_url_present(self, result: dict) -> None:
assert result["1s"]["audio_url"].startswith("https://audio.pealim.com/")
def test_empty_on_missing_table(self) -> None:
result = _parse_preposition_table(__import__("bs4").BeautifulSoup("<html><body></body></html>", "lxml"))
assert result == {}
class TestParsePrepositionTableVl:
"""Tests for _parse_preposition_table_vl (ktiv male page)."""
@pytest.fixture()
def result(self) -> dict:
return _parse_preposition_table_vl(__import__("bs4").BeautifulSoup(PREPOSITION_VL_TABLE, "lxml"))
def test_returns_ten_form_keys(self, result: dict) -> None:
expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"}
assert set(result.keys()) == expected
def test_1s_ktiv(self, result: dict) -> None:
assert result["1s"] == "שלי"
def test_1p_ktiv(self, result: dict) -> None:
assert result["1p"] == "שלנו"
def test_2ms_ktiv(self, result: dict) -> None:
assert result["2ms"] == "שלך"
def test_3ms_ktiv(self, result: dict) -> None:
assert result["3ms"] == "שלו"
def test_3fp_ktiv(self, result: dict) -> None:
assert result["3fp"] == "שלהן"
# ---------------------------------------------------------------------------
# _scrape_preposition_detail tests
# ---------------------------------------------------------------------------
class TestScrapePrepositionDetail:
"""Tests for _scrape_preposition_detail — schema compliance."""
@pytest.fixture()
def result(self) -> dict:
return _scrape_preposition_detail("2643-shel", _PREPOSITION_MO_PAGE, _PREPOSITION_VL_PAGE)
def test_returns_non_empty_dict(self, result: dict) -> None:
assert result
def test_all_ten_person_keys_present(self, result: dict) -> None:
expected = {"1s", "1p", "2ms", "2fs", "2mp", "2fp", "3ms", "3fs", "3mp", "3fp"}
assert expected.issubset(result.keys())
def test_1s_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["1s"]["nikkud"] == "שֶׁלִּי"
assert result["1s"]["ktiv_male"] == "שלי"
def test_1p_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["1p"]["nikkud"] == "שֶׁלָּנוּ"
assert result["1p"]["ktiv_male"] == "שלנו"
def test_2ms_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["2ms"]["nikkud"] == "שֶׁלְּךָ"
assert result["2ms"]["ktiv_male"] == "שלך"
def test_3ms_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["3ms"]["nikkud"] == "שֶׁלּוֹ"
assert result["3ms"]["ktiv_male"] == "שלו"
def test_3fs_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["3fs"]["nikkud"] == "שֶׁלָּהּ"
assert result["3fs"]["ktiv_male"] == "שלה"
def test_3fp_has_nikkud_and_ktiv(self, result: dict) -> None:
assert result["3fp"]["nikkud"] == "שֶׁלָּהֶן"
assert result["3fp"]["ktiv_male"] == "שלהן"
def test_empty_on_no_table(self) -> None:
result = _scrape_preposition_detail("missing", "<html><body></body></html>", "<html><body></body></html>")
assert result == {}
# ---------------------------------------------------------------------------
# Tests for _parse_noun_gender_mishkal mishkal extraction
# ---------------------------------------------------------------------------
from bs4 import BeautifulSoup # noqa: E402
from pealim_detail_scrape import _parse_noun_gender_mishkal # noqa: E402
class TestNounGenderMishkal:
def test_noun_with_mishkal(self):
html = '<p>Noun <a href="/dict/?pos=noun&amp;nm=qetel"><i>ketel</i> pattern</a>, masculine</p>'
soup = BeautifulSoup(html, "html.parser")
gender, mishkal = _parse_noun_gender_mishkal(soup)
assert gender == "masculine"
assert mishkal == "ketel"
def test_noun_without_mishkal(self):
html = "<p>Noun masculine</p>"
soup = BeautifulSoup(html, "html.parser")
gender, mishkal = _parse_noun_gender_mishkal(soup)
assert gender == "masculine"
assert mishkal == ""
def test_adjective_mishkal(self):
html = '<p>Adjective <a href="/dict/?pos=adjective&amp;am=qatul"><i>katul</i> pattern</a></p>'
soup = BeautifulSoup(html, "html.parser")
_, mishkal = _parse_noun_gender_mishkal(soup)
assert mishkal == "katul"
def test_feminine_noun(self):
html = '<p>Noun <a href="/dict/?pos=noun&amp;nm=qetel"><i>ketel</i> pattern</a>, feminine</p>'
soup = BeautifulSoup(html, "html.parser")
gender, mishkal = _parse_noun_gender_mishkal(soup)
assert gender == "feminine"
assert mishkal == "ketel"