diff --git a/src/evaluation/PresentQuiz/docling/backend/asciidoc_backend.py b/src/evaluation/PresentQuiz/docling/backend/asciidoc_backend.py index 397bfc4..dd6ebac 100644 --- a/src/evaluation/PresentQuiz/docling/backend/asciidoc_backend.py +++ b/src/evaluation/PresentQuiz/docling/backend/asciidoc_backend.py @@ -273,14 +273,14 @@ def _parse(self, doc: DoclingDocument): def _get_current_level(self, parents): for k, v in parents.items(): - if v == None and k > 0: + if v is None and k > 0: return k - 1 return 0 def _get_current_parent(self, parents): for k, v in parents.items(): - if v == None and k > 0: + if v is None and k > 0: return parents[k - 1] return None @@ -328,7 +328,7 @@ def _parse_list_item(self, line): "marker": marker, "text": text.strip(), "numbered": False, - "indent": 0 if indent == None else len(indent), + "indent": 0 if indent is None else len(indent), } else: return { @@ -336,7 +336,7 @@ def _parse_list_item(self, line): "marker": marker, "text": text.strip(), "numbered": True, - "indent": 0 if indent == None else len(indent), + "indent": 0 if indent is None else len(indent), } else: # Fallback if no match diff --git a/src/evaluation/PresentQuiz/docling/backend/html_backend.py b/src/evaluation/PresentQuiz/docling/backend/html_backend.py index 286dfbf..822c28b 100644 --- a/src/evaluation/PresentQuiz/docling/backend/html_backend.py +++ b/src/evaluation/PresentQuiz/docling/backend/html_backend.py @@ -158,9 +158,9 @@ def extract_text_recursively(self, item: Tag): try: # Recursively get the child's text content result.extend(self.extract_text_recursively(child)) - except: + except Exception: pass - except: + except Exception: _log.warn("item has no children") pass diff --git a/src/evaluation/PresentQuiz/docling/backend/msexcel_backend.py b/src/evaluation/PresentQuiz/docling/backend/msexcel_backend.py index 19c2534..a34af8c 100644 --- a/src/evaluation/PresentQuiz/docling/backend/msexcel_backend.py +++ b/src/evaluation/PresentQuiz/docling/backend/msexcel_backend.py @@ -337,7 +337,7 @@ def _find_images_in_sheet( image=ImageRef.from_pil(image=pil_image, dpi=72), caption=None, ) - except: + except Exception: _log.error("could not extract the image from excel sheets") """ diff --git a/src/evaluation/PresentQuiz/docling/backend/msword_backend.py b/src/evaluation/PresentQuiz/docling/backend/msword_backend.py index 1a504bc..6dd9fc4 100644 --- a/src/evaluation/PresentQuiz/docling/backend/msword_backend.py +++ b/src/evaluation/PresentQuiz/docling/backend/msword_backend.py @@ -151,7 +151,7 @@ def prev_indent(self) -> Optional[int]: def get_level(self) -> int: """Return the first None index.""" for k, v in self.parents.items(): - if k >= 0 and v == None: + if k >= 0 and v is None: return k return 0 diff --git a/src/evaluation/PresentQuiz/docling/models/tesseract_ocr_model.py b/src/evaluation/PresentQuiz/docling/models/tesseract_ocr_model.py index 5b70155..560998a 100644 --- a/src/evaluation/PresentQuiz/docling/models/tesseract_ocr_model.py +++ b/src/evaluation/PresentQuiz/docling/models/tesseract_ocr_model.py @@ -46,7 +46,7 @@ def __init__(self, enabled: bool, options: TesseractOcrOptions): raise ImportError(install_errmsg) try: tesseract_version = tesserocr.tesseract_version() - except: + except Exception: raise ImportError(install_errmsg) _, self._tesserocr_languages = tesserocr.get_languages() diff --git a/src/evaluation/PresentQuiz/docling/utils/glm_utils.py b/src/evaluation/PresentQuiz/docling/utils/glm_utils.py index c3c4353..50e0c5b 100644 --- a/src/evaluation/PresentQuiz/docling/utils/glm_utils.py +++ b/src/evaluation/PresentQuiz/docling/utils/glm_utils.py @@ -29,7 +29,7 @@ def resolve_item(paths, obj): try: key = int(paths[0]) - except: + except Exception: key = paths[0] if len(paths) == 1: diff --git a/src/evaluation/PresentQuiz/utils/src/experiment/baseline_docpres.py b/src/evaluation/PresentQuiz/utils/src/experiment/baseline_docpres.py index 79cb44e..7d583c6 100644 --- a/src/evaluation/PresentQuiz/utils/src/experiment/baseline_docpres.py +++ b/src/evaluation/PresentQuiz/utils/src/experiment/baseline_docpres.py @@ -175,7 +175,8 @@ def generate(model: Literal["Qwen2.5", "gpt"]): progress = tqdm(total=len(folders)) def process_folder(pdf_folder, model, processor): - source_text = open(f"{pdf_folder}/source.md").read() + with open(f"{pdf_folder}/source.md") as f: + source_text = f.read() bird_eye = json.load(open(f"{pdf_folder}/refined_doc.json")) images = json.load(open(f"{pdf_folder}/image_caption.json")).keys() output_dir = f"{pdf_folder}/docpres/{llm_name}"