diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml new file mode 100644 index 0000000..b0a25c6 --- /dev/null +++ b/.github/workflows/compile.yml @@ -0,0 +1,34 @@ +name: Compile & Test + +on: + push: + pull_request: + +jobs: + compile: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + + - name: Run tests + run: | + python -m pytest -q + + - name: Validate bytecode compilation + run: | + python -m compileall -q larousse_api tests diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index c73e032..7040ffd 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,23 +1,31 @@ name: Pylint -on: [push] +on: + push: + pull_request: jobs: - build: + lint: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.10", "3.11", "3.12"] + steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - - name: Analysing the code with pylint - run: | - pylint $(git ls-files '*.py') + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + python -m pip install pylint + + - name: Analyse package with pylint + run: | + pylint larousse_api diff --git a/larousse_api/__init__.py b/larousse_api/__init__.py index 0b099b9..3c6774a 100644 --- a/larousse_api/__init__.py +++ b/larousse_api/__init__.py @@ -1 +1,3 @@ -name = "larousse_api" +"""Python package for the Larousse API scraper.""" + +NAME = "larousse_api" diff --git a/larousse_api/larousse.py b/larousse_api/larousse.py index 794f717..ab8db0e 100644 --- a/larousse_api/larousse.py +++ b/larousse_api/larousse.py @@ -1,61 +1,54 @@ -import requests +"""Larousse dictionary scraping client.""" + import re import unicodedata + +import requests from bs4 import BeautifulSoup +class LarousseError(RuntimeError): + """Raised when Larousse content cannot be retrieved.""" + + class Larousse: + """Simple client used to fetch and parse Larousse dictionary pages.""" def __init__(self, word): self.word = word self.soup = self.__get_content() def get_definitions(self): - """ - :return: A list containing all definitions of word - """ - - for ul in self.soup.find_all('ul'): - if ul.get('class') is not None and 'Definitions' in ul.get('class'): - return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in - ul.find_all('li')], ul.find_all('li') - return None, None + """Return normalized definitions and original `
  • ` nodes.""" + return self._extract_items("Definitions") def get_synonymes(self): - """ - :return: A list containing all synonymes of word - """ - - for ul in self.soup.find_all('ul'): - if ul.get('class') is not None and 'Synonymes' in ul.get('class'): - return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in - ul.find_all('li')], ul.find_all('li') - return None, None + """Return normalized synonymes and original `
  • ` nodes.""" + return self._extract_items("Synonymes") def get_citations(self): - """ - :return: A list containing all citations of word - """ - - for ul in self.soup.find_all('ul'): - if ul.get('class') is not None and 'ListeCitations' in ul.get('class'): - return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in - ul.find_all('li')], ul.find_all('li') - return None, None + """Return normalized citations and original `
  • ` nodes.""" + return self._extract_items("ListeCitations") def get_locutions(self): - """ - :return: A list containing all locutions of word - """ - for ul in self.soup.find_all('ul'): - if ul.get('class') is not None and 'ListeCitations' in ul.get('class'): - return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in - ul.find_all('li')], ul.find_all('li') + """Return normalized locutions and original `
  • ` nodes.""" + return self._extract_items("ListeCitations") + + def _extract_items(self, list_class): + for ul in self.soup.find_all("ul"): + classes = ul.get("class") + if classes is not None and list_class in classes: + items = ul.find_all("li") + normalized_items = [ + unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(item))) + for item in items + ] + return normalized_items, items return None, None def __get_content(self): - url = "https://www.larousse.fr/dictionnaires/francais/" + self.word.lower() - rq = requests.get(url=url) - if rq.status_code != 200: - raise Exception("Status code return an error") - return BeautifulSoup(rq.text, 'html.parser') + url = f"https://www.larousse.fr/dictionnaires/francais/{self.word.lower()}" + response = requests.get(url=url, timeout=10) + if response.status_code != 200: + raise LarousseError("Status code return an error") + return BeautifulSoup(response.text, "html.parser") diff --git a/requirements.txt b/requirements.txt index 47f5239..8c7a858 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ -beautifulsoup4==4.9.1 -certifi==2020.6.20 -chardet==3.0.4 -idna==2.10 -requests==2.24.0 -soupsieve==2.0.1 -urllib3==1.26.5 +beautifulsoup4==4.13.4 +certifi==2025.1.31 +charset-normalizer==3.4.1 +idna==3.10 +requests==2.32.3 +soupsieve==2.6 +urllib3==2.3.0 +pytest==8.3.4 diff --git a/setup.py b/setup.py index 61c93eb..7f128d0 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,8 @@ +"""Package setup configuration.""" + import setuptools -with open("README.md", "r") as fh: +with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() setuptools.setup( @@ -13,13 +15,14 @@ url="https://github.com/hugo291/larousse_api", packages=setuptools.find_packages(), install_requires=[ - "requests>=2.22.0", - "bs4>=0.0.1", + "requests>=2.32.3", + "beautifulsoup4>=4.13.4", ], + extras_require={"test": ["pytest>=8.3.4"]}, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], - python_requires='>=3.7', + python_requires=">=3.7", ) diff --git a/tests/test_larousse.py b/tests/test_larousse.py new file mode 100644 index 0000000..967b2df --- /dev/null +++ b/tests/test_larousse.py @@ -0,0 +1,112 @@ +import sys +import types +from unittest.mock import Mock, patch + +import pytest + +# Provide lightweight fallback modules so imports succeed in restricted environments. +if "requests" not in sys.modules: + fake_requests = types.ModuleType("requests") + fake_requests.get = lambda **kwargs: None + sys.modules["requests"] = fake_requests + +if "bs4" not in sys.modules: + fake_bs4 = types.ModuleType("bs4") + + class _PlaceholderBeautifulSoup: + def __init__(self, *args, **kwargs): + pass + + fake_bs4.BeautifulSoup = _PlaceholderBeautifulSoup + sys.modules["bs4"] = fake_bs4 + +from larousse_api.larousse import Larousse, LarousseError + + +class FakeListNode: + def __init__(self, text): + self.text = text + + def __str__(self): + return f"
  • {self.text}
  • " + + +class FakeUl: + def __init__(self, classes, texts): + self._classes = classes + self._items = [FakeListNode(text) for text in texts] + + def get(self, key): + if key == "class": + return self._classes + return None + + def find_all(self, tag): + if tag == "li": + return self._items + return [] + + +class FakeSoup: + def __init__(self, uls): + self._uls = uls + + def find_all(self, tag): + if tag == "ul": + return self._uls + return [] + + +@patch.object(Larousse, "_Larousse__get_content") +def test_get_definitions_returns_entries(mock_get_content): + mock_get_content.return_value = FakeSoup([FakeUl(["Definitions"], ["Définition 1", "Définition 2"])]) + + larousse = Larousse("Fromage") + definitions, definition_nodes = larousse.get_definitions() + + assert definitions == ["De\u0301finition 1", "De\u0301finition 2"] + assert len(definition_nodes) == 2 + + +@patch.object(Larousse, "_Larousse__get_content") +def test_get_synonymes_returns_entries(mock_get_content): + mock_get_content.return_value = FakeSoup([FakeUl(["Synonymes"], ["Synonyme A", "Synonyme B"])]) + + larousse = Larousse("Fromage") + synonymes, synonymes_nodes = larousse.get_synonymes() + + assert synonymes == ["Synonyme A", "Synonyme B"] + assert len(synonymes_nodes) == 2 + + +@patch.object(Larousse, "_Larousse__get_content") +def test_get_citations_returns_entries(mock_get_content): + mock_get_content.return_value = FakeSoup([FakeUl(["ListeCitations"], ["Citation 1", "Citation 2"])]) + + larousse = Larousse("Fromage") + citations, citation_nodes = larousse.get_citations() + + assert citations == ["Citation 1", "Citation 2"] + assert len(citation_nodes) == 2 + + +@patch("larousse_api.larousse.requests.get") +def test_get_content_raises_exception_when_status_code_is_not_200(mock_get): + mock_get.return_value = Mock(status_code=500, text="Server error") + + with pytest.raises(LarousseError, match="Status code return an error"): + Larousse("Fromage") + + +@patch("larousse_api.larousse.BeautifulSoup") +@patch("larousse_api.larousse.requests.get") +def test_request_url_uses_lowercase_word(mock_get, mock_beautiful_soup): + mock_get.return_value = Mock(status_code=200, text="") + mock_beautiful_soup.return_value = Mock() + + Larousse("FrOmAgE") + + mock_get.assert_called_once_with( + url="https://www.larousse.fr/dictionnaires/francais/fromage", + timeout=10, + )