From fb8be4281fba1069fac82a3024f1023520c1ec3b Mon Sep 17 00:00:00 2001 From: Oscar Arbelaez Date: Thu, 20 Feb 2025 22:29:41 +0000 Subject: [PATCH 1/2] Skip articles that don't have authors or year --- src/bibx/article.py | 8 ++++---- src/bibx/builders/scopus_csv.py | 9 +++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/bibx/article.py b/src/bibx/article.py index 08fb3e4..c276984 100644 --- a/src/bibx/article.py +++ b/src/bibx/article.py @@ -87,10 +87,10 @@ def permalink(self) -> Optional[str]: @property def simple_id(self) -> Optional[str]: """Return a simple ID for the article.""" - if self.authors and self.year is not None: - author = self.authors[0].split(" ")[0].replace(",", "") - return f"{author}{self.year}".lower() - return None + if not self.authors or self.year is None: + return None + author = self.authors[0].split(" ")[0].replace(",", "") + return f"{author}{self.year}".lower() def __repr__(self) -> str: """Return a string representation of the article.""" diff --git a/src/bibx/builders/scopus_csv.py b/src/bibx/builders/scopus_csv.py index 66a5d9e..509807b 100644 --- a/src/bibx/builders/scopus_csv.py +++ b/src/bibx/builders/scopus_csv.py @@ -99,6 +99,12 @@ def _parse_file(self, file: TextIO) -> Generator[Article]: reader = csv.DictReader(file) for row in reader: datum = Row.model_validate(row) + if not datum.authors or not datum.year: + logger.info( + "skipping row with missing authors or year: %s", + datum.model_dump_json(indent=2), + ) + continue yield ( Article( label="", @@ -131,6 +137,9 @@ def _parse_file(self, file: TextIO) -> Generator[Article]: def _article_from_reference(self, reference: str) -> Optional[Article]: try: *authors, journal, issue, year = reference.split(", ") + if not authors: + message = "a minimum of one author is required" + raise ValueError(message) _year = int(year.lstrip("(").rstrip(")")) return Article( label=reference, From ed8870f69fd9b809fafa93d5cf10c2747dda81d7 Mon Sep 17 00:00:00 2001 From: Oscar Arbelaez Date: Thu, 20 Feb 2025 22:32:16 +0000 Subject: [PATCH 2/2] Prepare release --- src/bibx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bibx/__init__.py b/src/bibx/__init__.py index 6e0a0e9..f1adfc4 100644 --- a/src/bibx/__init__.py +++ b/src/bibx/__init__.py @@ -27,7 +27,7 @@ "read_wos", ] -__version__ = "0.6.0" +__version__ = "0.6.1" def query_openalex(