-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathtests.py
More file actions
104 lines (88 loc) · 3.25 KB
/
tests.py
File metadata and controls
104 lines (88 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import unittest
from crawler import Crawler
import datetime
d = datetime.datetime.fromtimestamp(0)
class test_crawler(unittest.TestCase):
def test_page_limit_too_high(self):
c = Crawler(d)
try:
c.crawl_institutions_api(page_limit=3)
except:
print("crawl_institutions_api crashed!")
def test_node_urls_updated_by_crawl(self):
c = Crawler(d)
l1 = c.node_url_tuples.copy()
c.crawl_nodes_api(page_limit=1)
l2 = c.node_url_tuples.copy()
self.assertEqual(len(l1), 0)
self.assertGreater(len(l2), len(l1))
self.assertNotEqual(l1, l2)
for x in range(0, len(l2)-1):
self.assertLess(c.node_url_tuples[x][1], c.node_url_tuples[x+1][1])
for t in c.node_url_tuples:
self.assertTrue(is_valid_url(t[0]))
def test_registration_urls_updated_by_crawl(self):
c = Crawler(d)
l1 = c.registration_url_tuples.copy()
c.crawl_registrations_api(page_limit=1)
l2 = c.registration_url_tuples.copy()
self.assertEqual(len(l1), 0)
self.assertGreater(len(l2), len(l1))
self.assertNotEqual(l1, l2)
def test_institutions_urls_updated_by_crawl(self):
c = Crawler(d)
l1 = c.institution_urls.copy()
c.crawl_institutions_api(page_limit=1)
l2 = c.institution_urls.copy()
self.assertEqual(len(l1), 1)
self.assertGreater(len(l2), len(l1))
self.assertNotEqual(l1, l2)
def test_profile_urls_updated_by_crawl(self):
c = Crawler(d)
l1 = c.user_urls.copy()
c.crawl_users_api(page_limit=1)
l2 = c.user_urls.copy()
self.assertEqual(len(l1), 0)
self.assertGreater(len(l2), len(l1))
self.assertNotEqual(l1, l2)
def test_node_wiki_urls_updated_by_crawl(self): # needs node_url_tuples to work
c = Crawler(d)
l1 = c._node_wikis_by_parent_guid.copy()
self.assertEqual(len(l1), 0)
c.crawl_nodes_api(page_limit=1)
c.crawl_node_wiki()
l2 = c._node_wikis_by_parent_guid.copy()
self.assertGreater(len(l2), len(l1))
self.assertNotEqual(l1, l2)
def test_registration_wiki_urls_updated_by_crawl(self): # needs node_url_tuples to work
c = Crawler(d)
l1 = c._registration_wikis_by_parent_guid.copy()
self.assertEqual(len(l1), 0)
c.crawl_registrations_api(page_limit=1)
c.crawl_registration_wiki()
l2 = c._registration_wikis_by_parent_guid.copy()
self.assertGreater(len(l2), len(l1))
self.assertNotEqual(l1, l2)
def test_generate_node_urls(self):
c = Crawler(d)
c.crawl_nodes_api(page_limit=1)
try:
c.generate_node_urls()
except:
self.fail("crawler.generate_node_urls() failed")
def test_scrape_url(self):
c = Crawler(d)
try:
c._scrape_pages(['http://google.com', 'http://google.com/'])
f = open('google.com/index.html')
f.close()
except:
self.fail("page didn't save / get scraped at all")
# if there isn't one already
def is_valid_url(url):
if len(url) > 0:
return True
else:
return False
if __name__ == '__main__':
unittest.main()