diff --git a/.gitignore b/.gitignore index 03d16e8..2d1740f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ dist/* *.pyc *.pyo slate.egg-info +.cache +.*.swp diff --git a/README b/README.rst similarity index 51% rename from README rename to README.rst index 5fa8792..ce78d78 100644 --- a/README +++ b/README.rst @@ -8,24 +8,24 @@ text from PDF files. It depends on the PDFMiner package. Slate provides one class, PDF. PDF takes a file-like object and will extract all text from the document, presentating each page -as a string of text: +as a string of text:: - >>> with open('example.pdf') as f: - ... doc = slate.PDF(f) - ... - >>> doc - [..., ..., ...] - >>> doc[1] - 'Text from page 2...' + >>> with open('example.pdf', 'rb') as f: + ... doc = slate.PDF(f) + ... + >>> doc + [..., ..., ...] + >>> doc[1] + 'Text from page 2...' If your pdf is password protected, pass the password as the -second argument: +second argument:: - >>> with open('secrets.pdf') as f: - ... doc = slate.PDF(f, 'password') - ... - >>> doc[0] - "My mother doesn't know this, but..." + >>> with open('secrets.pdf', 'rb') as f: + ... doc = slate.PDF(f, 'password') + ... + >>> doc[0] + "My mother doesn't know this, but..." More complex operations ----------------------- @@ -37,10 +37,10 @@ information, then take some time to learn the PDFMiner API. What is wrong with PDFMiner? ---------------------------- - 1. Getting simple things done, like extracting the text - is quite complex. The program is not designed to return - Python objects, which makes interfacing things irritating. - 2. It's an extremely complete set of tools, with multiple - and moderately steep learning curves. - 3. It's not written with hackability in mind. +1. Getting simple things done, like extracting the text + is quite complex. The program is not designed to return + Python objects, which makes interfacing things irritating. +2. It's an extremely complete set of tools, with multiple + and moderately steep learning curves. +3. It's not written with hackability in mind. diff --git a/setup.py b/setup.py index 2386fad..6a53932 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ else: pdfminer = 'pdfminer' -with open('README') as f: +with open('README.rst') as f: long_description = f.read() setup(name='slate', diff --git a/src/slate/classes.py b/src/slate/classes.py index 580b096..29591fe 100644 --- a/src/slate/classes.py +++ b/src/slate/classes.py @@ -22,7 +22,7 @@ from pdfminer.pdfparser import PDFPage except ImportError: from pdfminer.pdfpage import PDFPage -import utils +from . import utils __all__ = ['PDF'] diff --git a/src/slate/test_slate.py b/src/slate/test_slate.py index df83e50..1a54e60 100644 --- a/src/slate/test_slate.py +++ b/src/slate/test_slate.py @@ -6,14 +6,19 @@ http://codespeak.net/py/dist/test/index.html """ -from classes import PDF +import os +import pytest -def pytest_funcarg__doc(request): - with open('example.pdf', 'rb') as f: +from .classes import PDF + +@pytest.fixture +def doc(): + with open(get_pdf_path('example.pdf'), 'rb') as f: return PDF(f) -def pytest_funcarg__passwd(request): - with open('protected.pdf') as f: +@pytest.fixture +def passwd(): + with open(get_pdf_path('protected.pdf'), 'rb') as f: return PDF(f, 'a') def test_basic(doc): @@ -30,3 +35,10 @@ def test_text_method_unclean(doc): def test_password(passwd): assert passwd[0] == "Chamber of secrets.\n\n\x0c" + +def get_pdf_path(pdf_file): + return os.path.join( + os.path.dirname(__file__), + pdf_file) + + diff --git a/src/slate/unittests.py b/src/slate/unittests.py index 83c07d5..29c2b6b 100644 --- a/src/slate/unittests.py +++ b/src/slate/unittests.py @@ -1,12 +1,13 @@ import unittest +import os from slate import PDF class TestSlate(unittest.TestCase): def setUp(self): - with open('example.pdf', 'rb') as f: + with open(get_pdf_path('example.pdf'), 'rb') as f: self.doc = PDF(f) - with open('protected.pdf', 'rb') as f: + with open(get_pdf_path('protected.pdf'), 'rb') as f: self.passwd = PDF(f, 'a') def test_basic(self): @@ -27,5 +28,11 @@ def test_text_method_unclean(self): def test_password(self): assert self.passwd[0] == "Chamber of secrets.\n\n\x0c" + +def get_pdf_path(pdf_file): + return os.path.join( + os.path.dirname(__file__), + pdf_file) + if __name__ == '__main__': unittest.main()