Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 36 additions & 22 deletions PyMultiDictionary/_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
'DICT_EDUCALINGO',
'DICT_SYNONYMCOM',
'DICT_THESAURUS',
'DICT_WORDNET',
'DICT_MW',
'MultiDictionary'
]

Expand All @@ -26,14 +26,16 @@
from typing import Dict, Tuple, Optional, List, Union
from warnings import warn

import requests

# Dicts
_EDUCALINGO_LANGS = ('bn', 'de', 'en', 'es', 'fr', 'hi', 'it', 'ja', 'jv', 'ko', 'mr',
'ms', 'pl', 'pt', 'ro', 'ru', 'ta', 'tr', 'uk', 'zh')

DICT_EDUCALINGO = 'educalingo'
DICT_SYNONYMCOM = 'synonym'
DICT_THESAURUS = 'thesaurus'
DICT_WORDNET = 'wordnet'
DICT_MW = 'Merriam-Webster'

# Cache
_CACHED_SOUPS: Dict[str, 'BeautifulSoup'] = {} # Stores cached web
Expand Down Expand Up @@ -362,7 +364,7 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me
types, words, wiki = [], '', ''
word = self._process(word)

assert dictionary in (DICT_EDUCALINGO, DICT_WORDNET), 'Unsupported dictionary'
assert dictionary in (DICT_EDUCALINGO, DICT_MW), 'Unsupported dictionary'
if lang not in self._langs.keys() or not self._langs[lang][1]:
raise InvalidLangCode(f'{lang} code is not supported for meanings')
elif word == '':
Expand Down Expand Up @@ -397,26 +399,38 @@ def meaning(self, lang: str, word: str, dictionary: str = DICT_EDUCALINGO) -> Me

return types, words, wiki

elif dictionary == DICT_WORDNET and lang == 'en':
if word == '':
elif dictionary == DICT_MW and lang == 'en':
if not word.strip():
return {}
word = word.replace(' ', '+')
# noinspection HttpUrlsUsage
html = self._bsoup(f'http://wordnetweb.princeton.edu/perl/webwn?s={word}')
types = html.findAll('h3')
lists = html.findAll('ul')
out = {}
for a in types:
reg = str(lists[types.index(a)])
meanings = []
for x in re.findall(r'\((.*?)\)', reg):
if 'often followed by' in x:
pass
elif len(x) > 5 or ' ' in str(x):
meanings.append(x.strip())
name = a.text.strip()
out[name] = meanings
return out

url = f'https://www.merriam-webster.com/dictionary/{word}'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

definitions = {}

pos_entries = soup.find_all('h2', class_='parts-of-speech')

for pos_tag in pos_entries:
part_of_speech = pos_tag.get_text(strip=True)

if part_of_speech in definitions:
continue

definitions[part_of_speech] = []

definition_section = pos_tag.find_next('div', class_='vg')
if not definition_section:
continue

for sense in definition_section.find_all('div', class_='sb'):
definition_texts = sense.find_all('span', class_='dtText')
for def_text in definition_texts:
definition = def_text.get_text().lstrip(": ")
if definition:
definitions[part_of_speech].append(definition)

return definitions

else:
raise InvalidDictionary(f'Dictionary {dictionary} cannot handle language {lang}')
Expand Down
Loading