-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathxml_process_test.py
More file actions
23 lines (18 loc) · 927 Bytes
/
xml_process_test.py
File metadata and controls
23 lines (18 loc) · 927 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import xml.etree.ElementTree as ET
import re
tree = ET.parse('eClass Data/1k_records.xml')
root = tree.getroot()
length = len(tree.findall("mail"))
alphabet = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_0123456789')
terms = open("terms.txt", "w+")
for x in range(length):
if root[x][4].text is not None:
string = re.split(r"[^0-9a-zA-Z\_\-]", root[x][4].text)
for y in string:
if (len(''.join(filter(alphabet.__contains__, y)).lower()) > 2):
terms.write("s-" + ''.join(filter(alphabet.__contains__, y)).lower() + ":" + root[x][0].text + "\n")
if root[x][7].text is not None:
string = re.split(r"[^0-9a-zA-Z\_\-]", root[x][7].text)
for y in string:
if(len(''.join(filter(alphabet.__contains__, y)).lower())>2):
terms.write("b-" + ''.join(filter(alphabet.__contains__, y)).lower() + ":" + root[x][0].text + "\n")