Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Compile & Test

on:
push:
pull_request:

jobs:
compile:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt

- name: Run tests
run: |
python -m pytest -q

- name: Validate bytecode compilation
run: |
python -m compileall -q larousse_api tests
38 changes: 23 additions & 15 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
name: Pylint

on: [push]
on:
push:
pull_request:

jobs:
build:
lint:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
python -m pip install pylint

- name: Analyse package with pylint
run: |
pylint larousse_api
4 changes: 3 additions & 1 deletion larousse_api/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
name = "larousse_api"
"""Python package for the Larousse API scraper."""

NAME = "larousse_api"
73 changes: 33 additions & 40 deletions larousse_api/larousse.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,54 @@
import requests
"""Larousse dictionary scraping client."""

import re
import unicodedata

import requests
from bs4 import BeautifulSoup


class LarousseError(RuntimeError):
"""Raised when Larousse content cannot be retrieved."""


class Larousse:
"""Simple client used to fetch and parse Larousse dictionary pages."""

def __init__(self, word):
self.word = word
self.soup = self.__get_content()

def get_definitions(self):
"""
:return: A list containing all definitions of word
"""

for ul in self.soup.find_all('ul'):
if ul.get('class') is not None and 'Definitions' in ul.get('class'):
return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
ul.find_all('li')], ul.find_all('li')
return None, None
"""Return normalized definitions and original `<li>` nodes."""
return self._extract_items("Definitions")

def get_synonymes(self):
"""
:return: A list containing all synonymes of word
"""

for ul in self.soup.find_all('ul'):
if ul.get('class') is not None and 'Synonymes' in ul.get('class'):
return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
ul.find_all('li')], ul.find_all('li')
return None, None
"""Return normalized synonymes and original `<li>` nodes."""
return self._extract_items("Synonymes")

def get_citations(self):
"""
:return: A list containing all citations of word
"""

for ul in self.soup.find_all('ul'):
if ul.get('class') is not None and 'ListeCitations' in ul.get('class'):
return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
ul.find_all('li')], ul.find_all('li')
return None, None
"""Return normalized citations and original `<li>` nodes."""
return self._extract_items("ListeCitations")

def get_locutions(self):
"""
:return: A list containing all locutions of word
"""
for ul in self.soup.find_all('ul'):
if ul.get('class') is not None and 'ListeCitations' in ul.get('class'):
return [unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(li))) for li in
ul.find_all('li')], ul.find_all('li')
"""Return normalized locutions and original `<li>` nodes."""
return self._extract_items("ListeCitations")

def _extract_items(self, list_class):
for ul in self.soup.find_all("ul"):
classes = ul.get("class")
if classes is not None and list_class in classes:
items = ul.find_all("li")
normalized_items = [
unicodedata.normalize("NFKD", re.sub("<.*?>", "", str(item)))
for item in items
]
return normalized_items, items
return None, None

def __get_content(self):
url = "https://www.larousse.fr/dictionnaires/francais/" + self.word.lower()
rq = requests.get(url=url)
if rq.status_code != 200:
raise Exception("Status code return an error")
return BeautifulSoup(rq.text, 'html.parser')
url = f"https://www.larousse.fr/dictionnaires/francais/{self.word.lower()}"
response = requests.get(url=url, timeout=10)
if response.status_code != 200:
raise LarousseError("Status code return an error")
return BeautifulSoup(response.text, "html.parser")
15 changes: 8 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
beautifulsoup4==4.9.1
certifi==2020.6.20
chardet==3.0.4
idna==2.10
requests==2.24.0
soupsieve==2.0.1
urllib3==1.26.5
beautifulsoup4==4.13.4
certifi==2025.1.31
charset-normalizer==3.4.1
idna==3.10
requests==2.32.3
soupsieve==2.6
urllib3==2.3.0
pytest==8.3.4
11 changes: 7 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Package setup configuration."""

import setuptools

with open("README.md", "r") as fh:
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

setuptools.setup(
Expand All @@ -13,13 +15,14 @@
url="https://github.com/hugo291/larousse_api",
packages=setuptools.find_packages(),
install_requires=[
"requests>=2.22.0",
"bs4>=0.0.1",
"requests>=2.32.3",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep install_requires compatible with declared Python floor

setup.py now requires requests>=2.32.3, but the package still declares support for python_requires>=3.7; requests 2.32.x dropped Python 3.7 support, so installs on 3.7 will fail dependency resolution even though this package advertises compatibility. Please align the declared Python minimum with the new dependency floor (or relax the requests constraint).

Useful? React with 👍 / 👎.

"beautifulsoup4>=4.13.4",
],
extras_require={"test": ["pytest>=8.3.4"]},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires='>=3.7',
python_requires=">=3.7",
)
112 changes: 112 additions & 0 deletions tests/test_larousse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import sys
import types
from unittest.mock import Mock, patch

import pytest

# Provide lightweight fallback modules so imports succeed in restricted environments.
if "requests" not in sys.modules:
fake_requests = types.ModuleType("requests")
fake_requests.get = lambda **kwargs: None
sys.modules["requests"] = fake_requests
Comment on lines +8 to +11

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Stop shadowing real deps from test module import

The fallback bootstrap checks sys.modules instead of import availability, so this file injects fake requests/bs4 modules whenever they have not been imported yet (the common case), even if those packages are actually installed. Once this test module is imported, the stubs remain in sys.modules, so later tests or helpers that rely on real APIs (for example requests.Session or bs4.element) can fail or silently avoid exercising real integration paths; this makes the new suite order-dependent and can mask dependency regressions.

Useful? React with 👍 / 👎.


if "bs4" not in sys.modules:
fake_bs4 = types.ModuleType("bs4")

class _PlaceholderBeautifulSoup:
def __init__(self, *args, **kwargs):
pass

fake_bs4.BeautifulSoup = _PlaceholderBeautifulSoup
sys.modules["bs4"] = fake_bs4

from larousse_api.larousse import Larousse, LarousseError


class FakeListNode:
def __init__(self, text):
self.text = text

def __str__(self):
return f"<li>{self.text}</li>"


class FakeUl:
def __init__(self, classes, texts):
self._classes = classes
self._items = [FakeListNode(text) for text in texts]

def get(self, key):
if key == "class":
return self._classes
return None

def find_all(self, tag):
if tag == "li":
return self._items
return []


class FakeSoup:
def __init__(self, uls):
self._uls = uls

def find_all(self, tag):
if tag == "ul":
return self._uls
return []


@patch.object(Larousse, "_Larousse__get_content")
def test_get_definitions_returns_entries(mock_get_content):
mock_get_content.return_value = FakeSoup([FakeUl(["Definitions"], ["Définition 1", "Définition 2"])])

larousse = Larousse("Fromage")
definitions, definition_nodes = larousse.get_definitions()

assert definitions == ["De\u0301finition 1", "De\u0301finition 2"]
assert len(definition_nodes) == 2


@patch.object(Larousse, "_Larousse__get_content")
def test_get_synonymes_returns_entries(mock_get_content):
mock_get_content.return_value = FakeSoup([FakeUl(["Synonymes"], ["Synonyme A", "Synonyme B"])])

larousse = Larousse("Fromage")
synonymes, synonymes_nodes = larousse.get_synonymes()

assert synonymes == ["Synonyme A", "Synonyme B"]
assert len(synonymes_nodes) == 2


@patch.object(Larousse, "_Larousse__get_content")
def test_get_citations_returns_entries(mock_get_content):
mock_get_content.return_value = FakeSoup([FakeUl(["ListeCitations"], ["Citation 1", "Citation 2"])])

larousse = Larousse("Fromage")
citations, citation_nodes = larousse.get_citations()

assert citations == ["Citation 1", "Citation 2"]
assert len(citation_nodes) == 2


@patch("larousse_api.larousse.requests.get")
def test_get_content_raises_exception_when_status_code_is_not_200(mock_get):
mock_get.return_value = Mock(status_code=500, text="Server error")

with pytest.raises(LarousseError, match="Status code return an error"):
Larousse("Fromage")


@patch("larousse_api.larousse.BeautifulSoup")
@patch("larousse_api.larousse.requests.get")
def test_request_url_uses_lowercase_word(mock_get, mock_beautiful_soup):
mock_get.return_value = Mock(status_code=200, text="<html></html>")
mock_beautiful_soup.return_value = Mock()

Larousse("FrOmAgE")

mock_get.assert_called_once_with(
url="https://www.larousse.fr/dictionnaires/francais/fromage",
timeout=10,
)