From d2e705e02ad8e4cd0f4d49b6a8d5530baacb8712 Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Mon, 30 May 2016 12:17:01 +0200 Subject: [PATCH 1/9] Fix tests on python 3.5 --- src/slate/classes.py | 5 ++++- src/slate/test_slate.py | 22 +++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/slate/classes.py b/src/slate/classes.py index 580b096..27be417 100644 --- a/src/slate/classes.py +++ b/src/slate/classes.py @@ -22,7 +22,10 @@ from pdfminer.pdfparser import PDFPage except ImportError: from pdfminer.pdfpage import PDFPage -import utils +if PYTHON_3: + import slate.utils as utils +else: + import utils __all__ = ['PDF'] diff --git a/src/slate/test_slate.py b/src/slate/test_slate.py index df83e50..35bd6aa 100644 --- a/src/slate/test_slate.py +++ b/src/slate/test_slate.py @@ -6,14 +6,23 @@ http://codespeak.net/py/dist/test/index.html """ -from classes import PDF +import sys +PYTHON_3 = sys.version_info[0] == 3 + +import os + +if PYTHON_3: + from .classes import PDF +else: + from classes import PDF + def pytest_funcarg__doc(request): - with open('example.pdf', 'rb') as f: + with open(get_pdf_path('example.pdf'), 'rb') as f: return PDF(f) def pytest_funcarg__passwd(request): - with open('protected.pdf') as f: + with open(get_pdf_path('protected.pdf'), 'rb') as f: return PDF(f, 'a') def test_basic(doc): @@ -30,3 +39,10 @@ def test_text_method_unclean(doc): def test_password(passwd): assert passwd[0] == "Chamber of secrets.\n\n\x0c" + +def get_pdf_path(pdf_file): + return os.path.join( + os.path.dirname(__file__), + pdf_file) + + From 2cec7ba66cf46a7c0f5624f9a6170c5cd83313f5 Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Mon, 30 May 2016 12:18:07 +0200 Subject: [PATCH 2/9] add a .gitignore file --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 03d16e8..2d1740f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ dist/* *.pyc *.pyo slate.egg-info +.cache +.*.swp From 37b9e2e0579c3a99fbb81c5678cf7a32efa88877 Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Mon, 30 May 2016 12:21:47 +0200 Subject: [PATCH 3/9] Convert README to reStructuredText so it looks better on github --- README => README.rst | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) rename README => README.rst (52%) diff --git a/README b/README.rst similarity index 52% rename from README rename to README.rst index 5fa8792..8d438d6 100644 --- a/README +++ b/README.rst @@ -8,24 +8,24 @@ text from PDF files. It depends on the PDFMiner package. Slate provides one class, PDF. PDF takes a file-like object and will extract all text from the document, presentating each page -as a string of text: +as a string of text:: - >>> with open('example.pdf') as f: - ... doc = slate.PDF(f) - ... - >>> doc - [..., ..., ...] - >>> doc[1] - 'Text from page 2...' + >>> with open('example.pdf') as f: + ... doc = slate.PDF(f) + ... + >>> doc + [..., ..., ...] + >>> doc[1] + 'Text from page 2...' If your pdf is password protected, pass the password as the -second argument: +second argument:: - >>> with open('secrets.pdf') as f: - ... doc = slate.PDF(f, 'password') - ... - >>> doc[0] - "My mother doesn't know this, but..." + >>> with open('secrets.pdf') as f: + ... doc = slate.PDF(f, 'password') + ... + >>> doc[0] + "My mother doesn't know this, but..." More complex operations ----------------------- @@ -37,10 +37,10 @@ information, then take some time to learn the PDFMiner API. What is wrong with PDFMiner? ---------------------------- - 1. Getting simple things done, like extracting the text - is quite complex. The program is not designed to return - Python objects, which makes interfacing things irritating. - 2. It's an extremely complete set of tools, with multiple - and moderately steep learning curves. - 3. It's not written with hackability in mind. +1. Getting simple things done, like extracting the text + is quite complex. The program is not designed to return + Python objects, which makes interfacing things irritating. +2. It's an extremely complete set of tools, with multiple + and moderately steep learning curves. +3. It's not written with hackability in mind. From e53c0e5715a83aeea7dbc243e0fe67b488968ddc Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Mon, 30 May 2016 12:30:25 +0200 Subject: [PATCH 4/9] Fix unit tests Fix both unit test files so that they can be run with python (not only py.test) from any directory. --- src/slate/test_slate.py | 2 +- src/slate/unittests.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/slate/test_slate.py b/src/slate/test_slate.py index 35bd6aa..8416da2 100644 --- a/src/slate/test_slate.py +++ b/src/slate/test_slate.py @@ -12,7 +12,7 @@ import os if PYTHON_3: - from .classes import PDF + from slate.classes import PDF else: from classes import PDF diff --git a/src/slate/unittests.py b/src/slate/unittests.py index 83c07d5..29c2b6b 100644 --- a/src/slate/unittests.py +++ b/src/slate/unittests.py @@ -1,12 +1,13 @@ import unittest +import os from slate import PDF class TestSlate(unittest.TestCase): def setUp(self): - with open('example.pdf', 'rb') as f: + with open(get_pdf_path('example.pdf'), 'rb') as f: self.doc = PDF(f) - with open('protected.pdf', 'rb') as f: + with open(get_pdf_path('protected.pdf'), 'rb') as f: self.passwd = PDF(f, 'a') def test_basic(self): @@ -27,5 +28,11 @@ def test_text_method_unclean(self): def test_password(self): assert self.passwd[0] == "Chamber of secrets.\n\n\x0c" + +def get_pdf_path(pdf_file): + return os.path.join( + os.path.dirname(__file__), + pdf_file) + if __name__ == '__main__': unittest.main() From 713018db09269abdaf627f78f381740ac9386e95 Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Mon, 30 May 2016 12:31:43 +0200 Subject: [PATCH 5/9] Fix demo code --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 8d438d6..ce78d78 100644 --- a/README.rst +++ b/README.rst @@ -10,7 +10,7 @@ Slate provides one class, PDF. PDF takes a file-like object and will extract all text from the document, presentating each page as a string of text:: - >>> with open('example.pdf') as f: + >>> with open('example.pdf', 'rb') as f: ... doc = slate.PDF(f) ... >>> doc @@ -21,7 +21,7 @@ as a string of text:: If your pdf is password protected, pass the password as the second argument:: - >>> with open('secrets.pdf') as f: + >>> with open('secrets.pdf', 'rb') as f: ... doc = slate.PDF(f, 'password') ... >>> doc[0] @@ -38,9 +38,9 @@ What is wrong with PDFMiner? ---------------------------- 1. Getting simple things done, like extracting the text - is quite complex. The program is not designed to return - Python objects, which makes interfacing things irritating. + is quite complex. The program is not designed to return + Python objects, which makes interfacing things irritating. 2. It's an extremely complete set of tools, with multiple - and moderately steep learning curves. + and moderately steep learning curves. 3. It's not written with hackability in mind. From 07e4b7806adce8130e38a240dc6120d1432fb9b1 Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Mon, 30 May 2016 12:34:24 +0200 Subject: [PATCH 6/9] Fix setup.py so it uses README.rst as readme file --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0fd921f..2a89881 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ else: pdfminer = 'pdfminer' -with open('README') as f: +with open('README.rst') as f: long_description = f.read() setup(name='slate', From c890c8c39e1b1b7695c719d3755055e298516b16 Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Fri, 5 May 2017 00:12:46 +0200 Subject: [PATCH 7/9] Apply remarks from PR review and fix py.test warnings --- src/slate/classes.py | 5 +---- src/slate/test_slate.py | 16 ++++++---------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/slate/classes.py b/src/slate/classes.py index 27be417..7784d23 100644 --- a/src/slate/classes.py +++ b/src/slate/classes.py @@ -22,10 +22,7 @@ from pdfminer.pdfparser import PDFPage except ImportError: from pdfminer.pdfpage import PDFPage -if PYTHON_3: - import slate.utils as utils -else: - import utils +from . import utils __all__ = ['PDF'] diff --git a/src/slate/test_slate.py b/src/slate/test_slate.py index 8416da2..1a54e60 100644 --- a/src/slate/test_slate.py +++ b/src/slate/test_slate.py @@ -6,22 +6,18 @@ http://codespeak.net/py/dist/test/index.html """ -import sys -PYTHON_3 = sys.version_info[0] == 3 - import os +import pytest -if PYTHON_3: - from slate.classes import PDF -else: - from classes import PDF - +from .classes import PDF -def pytest_funcarg__doc(request): +@pytest.fixture +def doc(): with open(get_pdf_path('example.pdf'), 'rb') as f: return PDF(f) -def pytest_funcarg__passwd(request): +@pytest.fixture +def passwd(): with open(get_pdf_path('protected.pdf'), 'rb') as f: return PDF(f, 'a') From f149d945b5f752440d91225ef9068034ac0114ce Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Fri, 5 May 2017 09:14:58 +0200 Subject: [PATCH 8/9] Fix extra space --- src/slate/classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/slate/classes.py b/src/slate/classes.py index 7784d23..29591fe 100644 --- a/src/slate/classes.py +++ b/src/slate/classes.py @@ -22,7 +22,7 @@ from pdfminer.pdfparser import PDFPage except ImportError: from pdfminer.pdfpage import PDFPage -from . import utils +from . import utils __all__ = ['PDF'] From 24f85169dfc109226dd97e2c96105df6577b431e Mon Sep 17 00:00:00 2001 From: Romain TOUZE Date: Wed, 14 Feb 2018 08:36:08 +0100 Subject: [PATCH 9/9] Appli martinmare fix on setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2a89881..859adbd 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ packages=find_packages('src'), package_dir={'': 'src'}, requires=[pdfminer], - install_requires=['distribute', pdfminer], + install_requires=[pdfminer], classifiers= [ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers',