hebrew_flash_cards/test_scrape.py

31 lines
972 B
Python

#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup
word = 'אבל'
url = f'https://www.pealim.com/search/?q={word}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
try:
response = requests.get(url, headers=headers, timeout=10)
print(f'Status: {response.status_code}')
soup = BeautifulSoup(response.content, 'html.parser')
# Debug: check what we find
word_elem = soup.find('h1', class_='word-title')
pos_elem = soup.find('span', class_='pos')
definition_elem = soup.find('div', class_='definition')
print(f'word_elem found: {word_elem is not None}')
print(f'pos_elem found: {pos_elem is not None}')
print(f'definition_elem found: {definition_elem is not None}')
print('\n--- HTML snippet (first 3000 chars) ---')
print(soup.prettify()[:3000])
except Exception as e:
print(f'Error: {e}')
import traceback
traceback.print_exc()