-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharticle_loader.py
More file actions
executable file
·45 lines (36 loc) · 1.15 KB
/
article_loader.py
File metadata and controls
executable file
·45 lines (36 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/local/anaconda3/bin/python
# -*- coding:utf-8 -*-
from ma import MA
import os
import codecs
import logging
class ArticleLoader:
def __init__(self):
self.ma = MA()
def load_article(self, filename):
article = []
try:
with codecs.open(filename, "r", "utf-8") as f:
for line in f:
line = line.strip()
article.append(self.parse(line))
except Exception as e:
logging.error(e.message)
return " ".join(article)
def load_articles(self, dirname):
doc2idx = []
articles = []
for basename in os.listdir(dirname):
filepath = os.path.join(dirname, basename)
if not filepath.endswith(".txt"):
continue
article = self.load_article(filepath)
if article is not None:
doc2idx.append(basename.rstrip(".txt"))
articles.append(article)
return (doc2idx, articles)
def parse(self, sentence):
return self.ma.parse(sentence)
if __name__ == '__main__':
ldr = ArticleLoader()
ldr.load_articles("/var/pti/scrape")