diff --git a/apkg_builder.py b/apkg_builder.py index 97dea4f..39e58e2 100644 --- a/apkg_builder.py +++ b/apkg_builder.py @@ -720,6 +720,116 @@ _EMOJI_STOP = frozenset( "bar", "wheel", "horizontal", + # Polysemous keywords producing wrong-sense emoji (Sprint 17 audit) + "high", # โšก high voltage, not "tall" + "down", # ๐Ÿซณ palm down, not "descend" + "off", # ๐Ÿ“ด phone off, not "remove" + "away", # ๐Ÿ’จ dashing away, not "depart" + "together", # ๐Ÿคฒ palms together, not "unite" + "top", # ๐ŸŽฉ top hat, not "upper" + "low", # ๐Ÿ”ˆ low volume, not "short" + "flat", # ๐Ÿฅฟ ballet flat, not "apartment" + "soft", # ๐Ÿฆ soft serve, not "quiet" + "broken", # ๐Ÿ’” broken heart, not "damaged" + "round", # ๐Ÿ“ round pushpin, not "circular" + "cool", # ๐Ÿ†’ COOL button, not "cold" + "free", # ๐Ÿ†“ FREE button, not "liberated" + "long", # ๐Ÿช˜ long drum, not "lengthy" + "straight", # ๐Ÿ“ straight ruler, not "direct" + "empty", # ๐Ÿชน empty nest, not "void" + "hot", # ๐Ÿฅต hot face, not "warm" + "cross", # โœ๏ธ latin cross, not "intersect" + "bright", # ๐Ÿ”† bright button, not "luminous" + "old", # ๐Ÿ‘ด old man, not "aged" + "head", # ๐Ÿ™‚โ€โ†”๏ธ shaking head, not "leader" + # Category words that match generic emoji + "military", # ๐ŸŽ–๏ธ military medal for any military term + "sports", # ๐Ÿ… sports medal for any sports term + "food", # ๐Ÿ˜‹ yummy face for any food term + "city", # ๐Ÿ‡ป๐Ÿ‡ฆ Vatican flag for any city + "china", # ๐Ÿ‡จ๐Ÿ‡ณ China flag for "porcelain" + "polish", # ๐Ÿ’… nail polish for "to polish/shine" + "aid", # ๐Ÿฆป hearing aid for "to help" + "office", # ๐Ÿง‘โ€๐Ÿ’ผ office worker for "bureau" + "construction", # ๐Ÿ›๏ธ classical building, not construction + "cinema", # ๐ŸŽฆ cinema emoji for any film term + "ceremony", # ๐ŸŽ‘ moon ceremony for any ceremony + "building", # ๐Ÿ›๏ธ classical building for any structure + # Body parts / human features โ†’ wrong emoji + "arm", # ๐Ÿฆพ mechanical arm for "to arm" + "hair", # ๐Ÿ‘ฑ blond person for "hair" + "nose", # ๐Ÿ˜ค steam from nose + "tongue", # ๐Ÿ˜› tongue-out face + "chest", # ๐ŸชŽ not a chest + "eyes", # ๐Ÿ˜ƒ face with eyes + # Abstract/vague words + "fear", # ๐Ÿ˜ฑ screaming face + "anger", # ๐Ÿ’ข anger symbol + "angry", # ๐Ÿ˜  angry face + "tired", # ๐Ÿ˜ซ tired face + "sad", # ๐Ÿ˜ฅ sad face + "joy", # ๐Ÿ˜‚ tears of joy + "love", # ๐Ÿ’Œ love letter + "cold", # ๐Ÿฅถ cold face + "pile", # ๐Ÿ’ฉ pile of poo + "man", # ๐Ÿ‘จ man + "woman", # ๐Ÿ‘ฉ woman + "boy", # ๐Ÿ‘ฆ boy + "girl", # ๐Ÿ‘ง girl + "baby", # ๐Ÿ‘ถ baby + "children", # ๐Ÿšธ children crossing + "student", # ๐Ÿง‘โ€๐ŸŽ“ student + "adult", # ๐Ÿง‘โ€๐Ÿง‘โ€๐Ÿง’ family + "name", # ๐Ÿ“› name badge + "check", # โœ… check mark + "line", # ๐Ÿซฅ dotted line face + "floor", # ๐Ÿคฃ ROFL (rolling on floor) + "room", # ๐Ÿง– person in steamy room + "bubble", # ๐Ÿ‘๏ธโ€๐Ÿ—จ๏ธ speech bubble + "car", # ๐Ÿšƒ railway car, not automobile + "bullet", # ๐Ÿš… bullet train + "steam", # ๐Ÿ˜ค face with steam + "fly", # ๐Ÿชฐ the insect, not the verb + "plant", # ๐Ÿชด potted plant for all "X (plant)" entries + "tree", # ๐ŸŒฒ evergreen for all "X (tree)" entries + "ball", # โ›น๏ธ person bouncing ball + "bag", # ๐Ÿ‘ clutch bag + "fight", # ๐Ÿซฏ not a fight + "cloud", # ๐Ÿซฏ not a cloud + "video", # ๐ŸŽฎ video game, not video + "rescue", # โ›‘๏ธ rescue worker helmet + "exchange", # ๐Ÿ’ฑ currency exchange + "cut", # ๐Ÿฅฉ cut of meat, not "to cut" + "key", # ๐Ÿ” locked with key + "walking", # ๐Ÿšถ person walking + "running", # ๐Ÿƒ person running + "climbing", # ๐Ÿง— person climbing + "speaking", # ๐Ÿ—ฃ๏ธ speaking head + "playing", # ๐Ÿคฝ person playing + "feeding", # ๐Ÿ‘ฉโ€๐Ÿผ person feeding + "shooting", # ๐ŸŒ  shooting star + "clapping", # ๐Ÿ‘ clapping hands + "cooking", # ๐Ÿณ cooking emoji + "holding", # ๐Ÿฅน face holding back tears + # More wrong-sense matches from remaining audit + "paper", # ๐Ÿฎ red lantern for "paper" + "track", # ๐Ÿ›ค๏ธ railroad for "track record" + "vertical", # ๐Ÿšฆ traffic light for "vertical" + "speaker", # ๐Ÿ”‡ muted speaker for "speaker (person)" + "square", # ๐ŸŸฅ red square for "plaza" + "wrapped", # ๐ŸŽ gift for "wrapped, bound" + "volume", # ๐Ÿ”ˆ speaker for "volume (book)" + "mobile", # ๐Ÿ“ฑ phone for "mobile, moveable" + "flash", # ๐Ÿ“ธ camera flash for "to shine" + "identification", # ๐Ÿชช ID card for "locating" + "service", # ๐Ÿ•โ€๐Ÿฆบ service dog for "service, term" + "ground", # โ›ฑ๏ธ umbrella on ground + "machine", # ๐ŸŽฐ slot machine for "mechanism" + "liquid", # ๐Ÿซ— pouring for "liquid, drop" + "vehicle", # ๐Ÿš™ SUV for any vehicle mention + "window", # ๐ŸชŸ window pane for "window, gap" + "information", # โ„น๏ธ info symbol + "child", # ๐Ÿง’ child emoji } ) @@ -921,11 +1031,13 @@ def build_vocab_deck( else: freq_display = "Unlisted" - # Emoji: use entry's emoji if emoji_visible, else fall back to emoji_lookup + # Emoji: use entry's emoji if emoji_visible, else fall back to emoji_lookup. + # Skip fallback for verbs โ€” keyword matching on verb definitions produces + # wrong-sense emoji (e.g. "to cut" โ†’ ๐Ÿฅฉ, "to arm" โ†’ ๐Ÿฆพ). emoji_str = "" if entry.get("emoji_visible") and entry.get("emoji"): emoji_str = entry["emoji"] - elif not emoji_str and emoji_lookup: + elif emoji_lookup and not meaning.startswith("to "): meaning_clean_for_emoji = EMOJI_RE.sub("", meaning).strip() for kw in re.sub(r"[^\w\s]", " ", meaning_clean_for_emoji.lower()).split()[:5]: if len(kw) > 2 and kw not in _EMOJI_STOP and kw in emoji_lookup: