diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
new file mode 100644
index 0000000..b0a25c6
--- /dev/null
+++ b/.github/workflows/compile.yml
@@ -0,0 +1,34 @@
+name: Compile & Test
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ compile:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.10", "3.11", "3.12"]
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install -r requirements.txt
+
+ - name: Run tests
+ run: |
+ python -m pytest -q
+
+ - name: Validate bytecode compilation
+ run: |
+ python -m compileall -q larousse_api tests
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index c73e032..7040ffd 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -1,23 +1,31 @@
name: Pylint
-on: [push]
+on:
+ push:
+ pull_request:
jobs:
- build:
+ lint:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.8", "3.9", "3.10"]
+ python-version: ["3.10", "3.11", "3.12"]
+
steps:
- - uses: actions/checkout@v4
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v3
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install pylint
- - name: Analysing the code with pylint
- run: |
- pylint $(git ls-files '*.py')
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install -r requirements.txt
+ python -m pip install pylint
+
+ - name: Analyse package with pylint
+ run: |
+ pylint larousse_api
diff --git a/larousse_api/__init__.py b/larousse_api/__init__.py
index 0b099b9..3c6774a 100644
--- a/larousse_api/__init__.py
+++ b/larousse_api/__init__.py
@@ -1 +1,3 @@
-name = "larousse_api"
+"""Python package for the Larousse API scraper."""
+
+NAME = "larousse_api"
diff --git a/larousse_api/larousse.py b/larousse_api/larousse.py
index 794f717..ab8db0e 100644
--- a/larousse_api/larousse.py
+++ b/larousse_api/larousse.py
@@ -1,61 +1,54 @@
-import requests
+"""Larousse dictionary scraping client."""
+
import re
import unicodedata
+
+import requests
from bs4 import BeautifulSoup
+class LarousseError(RuntimeError):
+ """Raised when Larousse content cannot be retrieved."""
+
+
class Larousse:
+ """Simple client used to fetch and parse Larousse dictionary pages."""
def __init__(self, word):
self.word = word
self.soup = self.__get_content()
def get_definitions(self):
- """
- :return: A list containing all definitions of word
- """
-
- for ul in self.soup.find_all('ul'):
- if ul.get('class') is not None and 'Definitions' in ul.get('class'):
- return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
- ul.find_all('li')], ul.find_all('li')
- return None, None
+ """Return normalized definitions and original `
` nodes."""
+ return self._extract_items("Definitions")
def get_synonymes(self):
- """
- :return: A list containing all synonymes of word
- """
-
- for ul in self.soup.find_all('ul'):
- if ul.get('class') is not None and 'Synonymes' in ul.get('class'):
- return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
- ul.find_all('li')], ul.find_all('li')
- return None, None
+ """Return normalized synonymes and original `` nodes."""
+ return self._extract_items("Synonymes")
def get_citations(self):
- """
- :return: A list containing all citations of word
- """
-
- for ul in self.soup.find_all('ul'):
- if ul.get('class') is not None and 'ListeCitations' in ul.get('class'):
- return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
- ul.find_all('li')], ul.find_all('li')
- return None, None
+ """Return normalized citations and original `` nodes."""
+ return self._extract_items("ListeCitations")
def get_locutions(self):
- """
- :return: A list containing all locutions of word
- """
- for ul in self.soup.find_all('ul'):
- if ul.get('class') is not None and 'ListeCitations' in ul.get('class'):
- return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
- ul.find_all('li')], ul.find_all('li')
+ """Return normalized locutions and original `` nodes."""
+ return self._extract_items("ListeCitations")
+
+ def _extract_items(self, list_class):
+ for ul in self.soup.find_all("ul"):
+ classes = ul.get("class")
+ if classes is not None and list_class in classes:
+ items = ul.find_all("li")
+ normalized_items = [
+ unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(item)))
+ for item in items
+ ]
+ return normalized_items, items
return None, None
def __get_content(self):
- url = "https://www.larousse.fr/dictionnaires/francais/" + self.word.lower()
- rq = requests.get(url=url)
- if rq.status_code != 200:
- raise Exception("Status code return an error")
- return BeautifulSoup(rq.text, 'html.parser')
+ url = f"https://www.larousse.fr/dictionnaires/francais/{self.word.lower()}"
+ response = requests.get(url=url, timeout=10)
+ if response.status_code != 200:
+ raise LarousseError("Status code return an error")
+ return BeautifulSoup(response.text, "html.parser")
diff --git a/requirements.txt b/requirements.txt
index 47f5239..8c7a858 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,8 @@
-beautifulsoup4==4.9.1
-certifi==2020.6.20
-chardet==3.0.4
-idna==2.10
-requests==2.24.0
-soupsieve==2.0.1
-urllib3==1.26.5
+beautifulsoup4==4.13.4
+certifi==2025.1.31
+charset-normalizer==3.4.1
+idna==3.10
+requests==2.32.3
+soupsieve==2.6
+urllib3==2.3.0
+pytest==8.3.4
diff --git a/setup.py b/setup.py
index 61c93eb..7f128d0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,8 @@
+"""Package setup configuration."""
+
import setuptools
-with open("README.md", "r") as fh:
+with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
setuptools.setup(
@@ -13,13 +15,14 @@
url="https://github.com/hugo291/larousse_api",
packages=setuptools.find_packages(),
install_requires=[
- "requests>=2.22.0",
- "bs4>=0.0.1",
+ "requests>=2.32.3",
+ "beautifulsoup4>=4.13.4",
],
+ extras_require={"test": ["pytest>=8.3.4"]},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
- python_requires='>=3.7',
+ python_requires=">=3.7",
)
diff --git a/tests/test_larousse.py b/tests/test_larousse.py
new file mode 100644
index 0000000..967b2df
--- /dev/null
+++ b/tests/test_larousse.py
@@ -0,0 +1,112 @@
+import sys
+import types
+from unittest.mock import Mock, patch
+
+import pytest
+
+# Provide lightweight fallback modules so imports succeed in restricted environments.
+if "requests" not in sys.modules:
+ fake_requests = types.ModuleType("requests")
+ fake_requests.get = lambda **kwargs: None
+ sys.modules["requests"] = fake_requests
+
+if "bs4" not in sys.modules:
+ fake_bs4 = types.ModuleType("bs4")
+
+ class _PlaceholderBeautifulSoup:
+ def __init__(self, *args, **kwargs):
+ pass
+
+ fake_bs4.BeautifulSoup = _PlaceholderBeautifulSoup
+ sys.modules["bs4"] = fake_bs4
+
+from larousse_api.larousse import Larousse, LarousseError
+
+
+class FakeListNode:
+ def __init__(self, text):
+ self.text = text
+
+ def __str__(self):
+ return f"{self.text}"
+
+
+class FakeUl:
+ def __init__(self, classes, texts):
+ self._classes = classes
+ self._items = [FakeListNode(text) for text in texts]
+
+ def get(self, key):
+ if key == "class":
+ return self._classes
+ return None
+
+ def find_all(self, tag):
+ if tag == "li":
+ return self._items
+ return []
+
+
+class FakeSoup:
+ def __init__(self, uls):
+ self._uls = uls
+
+ def find_all(self, tag):
+ if tag == "ul":
+ return self._uls
+ return []
+
+
+@patch.object(Larousse, "_Larousse__get_content")
+def test_get_definitions_returns_entries(mock_get_content):
+ mock_get_content.return_value = FakeSoup([FakeUl(["Definitions"], ["Définition 1", "Définition 2"])])
+
+ larousse = Larousse("Fromage")
+ definitions, definition_nodes = larousse.get_definitions()
+
+ assert definitions == ["De\u0301finition 1", "De\u0301finition 2"]
+ assert len(definition_nodes) == 2
+
+
+@patch.object(Larousse, "_Larousse__get_content")
+def test_get_synonymes_returns_entries(mock_get_content):
+ mock_get_content.return_value = FakeSoup([FakeUl(["Synonymes"], ["Synonyme A", "Synonyme B"])])
+
+ larousse = Larousse("Fromage")
+ synonymes, synonymes_nodes = larousse.get_synonymes()
+
+ assert synonymes == ["Synonyme A", "Synonyme B"]
+ assert len(synonymes_nodes) == 2
+
+
+@patch.object(Larousse, "_Larousse__get_content")
+def test_get_citations_returns_entries(mock_get_content):
+ mock_get_content.return_value = FakeSoup([FakeUl(["ListeCitations"], ["Citation 1", "Citation 2"])])
+
+ larousse = Larousse("Fromage")
+ citations, citation_nodes = larousse.get_citations()
+
+ assert citations == ["Citation 1", "Citation 2"]
+ assert len(citation_nodes) == 2
+
+
+@patch("larousse_api.larousse.requests.get")
+def test_get_content_raises_exception_when_status_code_is_not_200(mock_get):
+ mock_get.return_value = Mock(status_code=500, text="Server error")
+
+ with pytest.raises(LarousseError, match="Status code return an error"):
+ Larousse("Fromage")
+
+
+@patch("larousse_api.larousse.BeautifulSoup")
+@patch("larousse_api.larousse.requests.get")
+def test_request_url_uses_lowercase_word(mock_get, mock_beautiful_soup):
+ mock_get.return_value = Mock(status_code=200, text="")
+ mock_beautiful_soup.return_value = Mock()
+
+ Larousse("FrOmAgE")
+
+ mock_get.assert_called_once_with(
+ url="https://www.larousse.fr/dictionnaires/francais/fromage",
+ timeout=10,
+ )