-
Notifications
You must be signed in to change notification settings - Fork 2
chore: update Python dependencies and add test suite #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| name: Compile & Test | ||
|
|
||
| on: | ||
| push: | ||
| pull_request: | ||
|
|
||
| jobs: | ||
| compile: | ||
| runs-on: ubuntu-latest | ||
| strategy: | ||
| matrix: | ||
| python-version: ["3.10", "3.11", "3.12"] | ||
|
|
||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Set up Python ${{ matrix.python-version }} | ||
| uses: actions/setup-python@v5 | ||
| with: | ||
| python-version: ${{ matrix.python-version }} | ||
|
|
||
| - name: Install dependencies | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| python -m pip install -r requirements.txt | ||
|
|
||
| - name: Run tests | ||
| run: | | ||
| python -m pytest -q | ||
|
|
||
| - name: Validate bytecode compilation | ||
| run: | | ||
| python -m compileall -q larousse_api tests |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,23 +1,31 @@ | ||
| name: Pylint | ||
|
|
||
| on: [push] | ||
| on: | ||
| push: | ||
| pull_request: | ||
|
|
||
| jobs: | ||
| build: | ||
| lint: | ||
| runs-on: ubuntu-latest | ||
| strategy: | ||
| matrix: | ||
| python-version: ["3.8", "3.9", "3.10"] | ||
| python-version: ["3.10", "3.11", "3.12"] | ||
|
|
||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up Python ${{ matrix.python-version }} | ||
| uses: actions/setup-python@v3 | ||
| with: | ||
| python-version: ${{ matrix.python-version }} | ||
| - name: Install dependencies | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| pip install pylint | ||
| - name: Analysing the code with pylint | ||
| run: | | ||
| pylint $(git ls-files '*.py') | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Set up Python ${{ matrix.python-version }} | ||
| uses: actions/setup-python@v5 | ||
| with: | ||
| python-version: ${{ matrix.python-version }} | ||
|
|
||
| - name: Install dependencies | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| python -m pip install -r requirements.txt | ||
| python -m pip install pylint | ||
|
|
||
| - name: Analyse package with pylint | ||
| run: | | ||
| pylint larousse_api |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1,3 @@ | ||
| name = "larousse_api" | ||
| """Python package for the Larousse API scraper.""" | ||
|
|
||
| NAME = "larousse_api" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,61 +1,54 @@ | ||
| import requests | ||
| """Larousse dictionary scraping client.""" | ||
|
|
||
| import re | ||
| import unicodedata | ||
|
|
||
| import requests | ||
| from bs4 import BeautifulSoup | ||
|
|
||
|
|
||
| class LarousseError(RuntimeError): | ||
| """Raised when Larousse content cannot be retrieved.""" | ||
|
|
||
|
|
||
| class Larousse: | ||
| """Simple client used to fetch and parse Larousse dictionary pages.""" | ||
|
|
||
| def __init__(self, word): | ||
| self.word = word | ||
| self.soup = self.__get_content() | ||
|
|
||
| def get_definitions(self): | ||
| """ | ||
| :return: A list containing all definitions of word | ||
| """ | ||
|
|
||
| for ul in self.soup.find_all('ul'): | ||
| if ul.get('class') is not None and 'Definitions' in ul.get('class'): | ||
| return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in | ||
| ul.find_all('li')], ul.find_all('li') | ||
| return None, None | ||
| """Return normalized definitions and original `<li>` nodes.""" | ||
| return self._extract_items("Definitions") | ||
|
|
||
| def get_synonymes(self): | ||
| """ | ||
| :return: A list containing all synonymes of word | ||
| """ | ||
|
|
||
| for ul in self.soup.find_all('ul'): | ||
| if ul.get('class') is not None and 'Synonymes' in ul.get('class'): | ||
| return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in | ||
| ul.find_all('li')], ul.find_all('li') | ||
| return None, None | ||
| """Return normalized synonymes and original `<li>` nodes.""" | ||
| return self._extract_items("Synonymes") | ||
|
|
||
| def get_citations(self): | ||
| """ | ||
| :return: A list containing all citations of word | ||
| """ | ||
|
|
||
| for ul in self.soup.find_all('ul'): | ||
| if ul.get('class') is not None and 'ListeCitations' in ul.get('class'): | ||
| return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in | ||
| ul.find_all('li')], ul.find_all('li') | ||
| return None, None | ||
| """Return normalized citations and original `<li>` nodes.""" | ||
| return self._extract_items("ListeCitations") | ||
|
|
||
| def get_locutions(self): | ||
| """ | ||
| :return: A list containing all locutions of word | ||
| """ | ||
| for ul in self.soup.find_all('ul'): | ||
| if ul.get('class') is not None and 'ListeCitations' in ul.get('class'): | ||
| return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in | ||
| ul.find_all('li')], ul.find_all('li') | ||
| """Return normalized locutions and original `<li>` nodes.""" | ||
| return self._extract_items("ListeCitations") | ||
|
|
||
| def _extract_items(self, list_class): | ||
| for ul in self.soup.find_all("ul"): | ||
| classes = ul.get("class") | ||
| if classes is not None and list_class in classes: | ||
| items = ul.find_all("li") | ||
| normalized_items = [ | ||
| unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(item))) | ||
| for item in items | ||
| ] | ||
| return normalized_items, items | ||
| return None, None | ||
|
|
||
| def __get_content(self): | ||
| url = "https://www.larousse.fr/dictionnaires/francais/" + self.word.lower() | ||
| rq = requests.get(url=url) | ||
| if rq.status_code != 200: | ||
| raise Exception("Status code return an error") | ||
| return BeautifulSoup(rq.text, 'html.parser') | ||
| url = f"https://www.larousse.fr/dictionnaires/francais/{self.word.lower()}" | ||
| response = requests.get(url=url, timeout=10) | ||
| if response.status_code != 200: | ||
| raise LarousseError("Status code return an error") | ||
| return BeautifulSoup(response.text, "html.parser") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,8 @@ | ||
| beautifulsoup4==4.9.1 | ||
| certifi==2020.6.20 | ||
| chardet==3.0.4 | ||
| idna==2.10 | ||
| requests==2.24.0 | ||
| soupsieve==2.0.1 | ||
| urllib3==1.26.5 | ||
| beautifulsoup4==4.13.4 | ||
| certifi==2025.1.31 | ||
| charset-normalizer==3.4.1 | ||
| idna==3.10 | ||
| requests==2.32.3 | ||
| soupsieve==2.6 | ||
| urllib3==2.3.0 | ||
| pytest==8.3.4 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| import sys | ||
| import types | ||
| from unittest.mock import Mock, patch | ||
|
|
||
| import pytest | ||
|
|
||
| # Provide lightweight fallback modules so imports succeed in restricted environments. | ||
| if "requests" not in sys.modules: | ||
| fake_requests = types.ModuleType("requests") | ||
| fake_requests.get = lambda **kwargs: None | ||
| sys.modules["requests"] = fake_requests | ||
|
Comment on lines
+8
to
+11
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The fallback bootstrap checks Useful? React with 👍 / 👎. |
||
|
|
||
| if "bs4" not in sys.modules: | ||
| fake_bs4 = types.ModuleType("bs4") | ||
|
|
||
| class _PlaceholderBeautifulSoup: | ||
| def __init__(self, *args, **kwargs): | ||
| pass | ||
|
|
||
| fake_bs4.BeautifulSoup = _PlaceholderBeautifulSoup | ||
| sys.modules["bs4"] = fake_bs4 | ||
|
|
||
| from larousse_api.larousse import Larousse, LarousseError | ||
|
|
||
|
|
||
| class FakeListNode: | ||
| def __init__(self, text): | ||
| self.text = text | ||
|
|
||
| def __str__(self): | ||
| return f"<li>{self.text}</li>" | ||
|
|
||
|
|
||
| class FakeUl: | ||
| def __init__(self, classes, texts): | ||
| self._classes = classes | ||
| self._items = [FakeListNode(text) for text in texts] | ||
|
|
||
| def get(self, key): | ||
| if key == "class": | ||
| return self._classes | ||
| return None | ||
|
|
||
| def find_all(self, tag): | ||
| if tag == "li": | ||
| return self._items | ||
| return [] | ||
|
|
||
|
|
||
| class FakeSoup: | ||
| def __init__(self, uls): | ||
| self._uls = uls | ||
|
|
||
| def find_all(self, tag): | ||
| if tag == "ul": | ||
| return self._uls | ||
| return [] | ||
|
|
||
|
|
||
| @patch.object(Larousse, "_Larousse__get_content") | ||
| def test_get_definitions_returns_entries(mock_get_content): | ||
| mock_get_content.return_value = FakeSoup([FakeUl(["Definitions"], ["Définition 1", "Définition 2"])]) | ||
|
|
||
| larousse = Larousse("Fromage") | ||
| definitions, definition_nodes = larousse.get_definitions() | ||
|
|
||
| assert definitions == ["De\u0301finition 1", "De\u0301finition 2"] | ||
| assert len(definition_nodes) == 2 | ||
|
|
||
|
|
||
| @patch.object(Larousse, "_Larousse__get_content") | ||
| def test_get_synonymes_returns_entries(mock_get_content): | ||
| mock_get_content.return_value = FakeSoup([FakeUl(["Synonymes"], ["Synonyme A", "Synonyme B"])]) | ||
|
|
||
| larousse = Larousse("Fromage") | ||
| synonymes, synonymes_nodes = larousse.get_synonymes() | ||
|
|
||
| assert synonymes == ["Synonyme A", "Synonyme B"] | ||
| assert len(synonymes_nodes) == 2 | ||
|
|
||
|
|
||
| @patch.object(Larousse, "_Larousse__get_content") | ||
| def test_get_citations_returns_entries(mock_get_content): | ||
| mock_get_content.return_value = FakeSoup([FakeUl(["ListeCitations"], ["Citation 1", "Citation 2"])]) | ||
|
|
||
| larousse = Larousse("Fromage") | ||
| citations, citation_nodes = larousse.get_citations() | ||
|
|
||
| assert citations == ["Citation 1", "Citation 2"] | ||
| assert len(citation_nodes) == 2 | ||
|
|
||
|
|
||
| @patch("larousse_api.larousse.requests.get") | ||
| def test_get_content_raises_exception_when_status_code_is_not_200(mock_get): | ||
| mock_get.return_value = Mock(status_code=500, text="Server error") | ||
|
|
||
| with pytest.raises(LarousseError, match="Status code return an error"): | ||
| Larousse("Fromage") | ||
|
|
||
|
|
||
| @patch("larousse_api.larousse.BeautifulSoup") | ||
| @patch("larousse_api.larousse.requests.get") | ||
| def test_request_url_uses_lowercase_word(mock_get, mock_beautiful_soup): | ||
| mock_get.return_value = Mock(status_code=200, text="<html></html>") | ||
| mock_beautiful_soup.return_value = Mock() | ||
|
|
||
| Larousse("FrOmAgE") | ||
|
|
||
| mock_get.assert_called_once_with( | ||
| url="https://www.larousse.fr/dictionnaires/francais/fromage", | ||
| timeout=10, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
setup.pynow requiresrequests>=2.32.3, but the package still declares support forpython_requires>=3.7; requests 2.32.x dropped Python 3.7 support, so installs on 3.7 will fail dependency resolution even though this package advertises compatibility. Please align the declared Python minimum with the new dependency floor (or relax the requests constraint).Useful? React with 👍 / 👎.