From 1cd602e718f946c5dcde1b6eefc3d7fa7a7f1f9e Mon Sep 17 00:00:00 2001 From: flowerwallpaper <103080777+flowerwallpaper@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:33:11 -0800 Subject: [PATCH 1/3] Update _dictionary.py --- PyMultiDictionary/_dictionary.py | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/PyMultiDictionary/_dictionary.py b/PyMultiDictionary/_dictionary.py index b5eadaa..3f0d0a4 100644 --- a/PyMultiDictionary/_dictionary.py +++ b/PyMultiDictionary/_dictionary.py @@ -10,7 +10,7 @@ 'DICT_EDUCALINGO', 'DICT_SYNONYMCOM', 'DICT_THESAURUS', - 'DICT_WORDNET', + 'DICT_MW', 'MultiDictionary' ] @@ -33,7 +33,7 @@ DICT_EDUCALINGO = 'educalingo' DICT_SYNONYMCOM = 'synonym' DICT_THESAURUS = 'thesaurus' -DICT_WORDNET = 'wordnet' +DICT_MW = 'merriam-webster' # Cache _CACHED_SOUPS: Dict[str, 'BeautifulSoup'] = {} # Stores cached web @@ -362,7 +362,7 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me types, words, wiki = [], '', '' word = self._process(word) - assert dictionary in (DICT_EDUCALINGO, DICT_WORDNET), 'Unsupported dictionary' + assert dictionary in (DICT_EDUCALINGO, DICT_MW), 'Unsupported dictionary' if lang not in self._langs.keys() or not self._langs[lang][1]: raise InvalidLangCode(f'{lang} code is not supported for meanings') elif word == '': @@ -397,27 +397,29 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me return types, words, wiki - elif dictionary == DICT_WORDNET and lang == 'en': - if word == '': + elif dictionary == DICT_MW and lang == 'en': + if not word.strip(): return {} - word = word.replace(' ', '+') - # noinspection HttpUrlsUsage - html = self._bsoup(f'http://wordnetweb.princeton.edu/perl/webwn?s={word}') - types = html.findAll('h3') - lists = html.findAll('ul') - out = {} - for a in types: - reg = str(lists[types.index(a)]) - meanings = [] - for x in re.findall(r'\((.*?)\)', reg): - if 'often followed by' in x: - pass - elif len(x) > 5 or ' ' in str(x): - meanings.append(x.strip()) - name = a.text.strip() - out[name] = meanings - return out - + url = f'https://www.merriam-webster.com/dictionary/{word}' + response = requests.get(url) + soup = BeautifulSoup(response.text, 'html.parser') + definitions = {} + pos_entries = soup.find_all('h2', class_='parts-of-speech') + for pos_tag in pos_entries: + part_of_speech = pos_tag.get_text(strip=True) + if part_of_speech in definitions: + continue + definitions[part_of_speech] = [] + definition_section = pos_tag.find_next('div', class_='vg') + if not definition_section: + continue + for sense in definition_section.find_all('div', class_='sb'): + definition_texts = sense.find_all('span', class_='dtText') + for def_text in definition_texts: + definition = def_text.get_text().lstrip(": ") + if definition: + definitions[part_of_speech].append(definition) + return definitions else: raise InvalidDictionary(f'Dictionary {dictionary} cannot handle language {lang}') From d67a2995f6d8e7283316fcf9596faea9ae8bcf94 Mon Sep 17 00:00:00 2001 From: flowerwallpaper <103080777+flowerwallpaper@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:41:24 -0800 Subject: [PATCH 2/3] Update _dictionary.py --- PyMultiDictionary/_dictionary.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/PyMultiDictionary/_dictionary.py b/PyMultiDictionary/_dictionary.py index 3f0d0a4..7abd81c 100644 --- a/PyMultiDictionary/_dictionary.py +++ b/PyMultiDictionary/_dictionary.py @@ -26,6 +26,8 @@ from typing import Dict, Tuple, Optional, List, Union from warnings import warn +import requests + # Dicts _EDUCALINGO_LANGS = ('bn', 'de', 'en', 'es', 'fr', 'hi', 'it', 'ja', 'jv', 'ko', 'mr', 'ms', 'pl', 'pt', 'ro', 'ru', 'ta', 'tr', 'uk', 'zh') @@ -33,7 +35,7 @@ DICT_EDUCALINGO = 'educalingo' DICT_SYNONYMCOM = 'synonym' DICT_THESAURUS = 'thesaurus' -DICT_MW = 'merriam-webster' +DICT_MW = 'Merriam-Webster' # Cache _CACHED_SOUPS: Dict[str, 'BeautifulSoup'] = {} # Stores cached web @@ -400,26 +402,36 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me elif dictionary == DICT_MW and lang == 'en': if not word.strip(): return {} + url = f'https://www.merriam-webster.com/dictionary/{word}' response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') + definitions = {} + pos_entries = soup.find_all('h2', class_='parts-of-speech') + for pos_tag in pos_entries: part_of_speech = pos_tag.get_text(strip=True) + if part_of_speech in definitions: continue + definitions[part_of_speech] = [] + definition_section = pos_tag.find_next('div', class_='vg') if not definition_section: continue + for sense in definition_section.find_all('div', class_='sb'): definition_texts = sense.find_all('span', class_='dtText') for def_text in definition_texts: definition = def_text.get_text().lstrip(": ") if definition: definitions[part_of_speech].append(definition) + return definitions + else: raise InvalidDictionary(f'Dictionary {dictionary} cannot handle language {lang}') From c3c6ed49a09b92338c71d5b046c0fff6ed82387a Mon Sep 17 00:00:00 2001 From: flowerwallpaper <103080777+flowerwallpaper@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:44:59 -0800 Subject: [PATCH 3/3] Update _dictionary.py