-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunner.py
More file actions
103 lines (88 loc) · 4.31 KB
/
runner.py
File metadata and controls
103 lines (88 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# runner.py
# 03-04-2021
# This file is for processing some .txt file and convert them to the string type.
# Also, it includes all static functions that other scripts may need.
#
# https://github.com/Yebulabula/String-Sanitization-Project
#
# Author Ye Mao
# King's College London
# Version 1.0
import time
import sys
from optparse import OptionParser
from model import solver
import warnings
import DataProcessing
def default(str):
"""
The function to return default help message.
"""
return str + ' [Default: %default]'
def readCommand(argv):
usageStr = """
USAGE: python runner.py <options>
EXAMPLES: (1) python runner.py
- starts deletion strategy test case
(2) python runner.py -m 2000
- select best deleted symbol by 2000 iterations.
"""
parser = OptionParser(usageStr)
parser.add_option('-w', '--originalFile', dest='w_filename', type='string',
help=default('The string for sanitization(W)'), default='test/test_w.txt')
parser.add_option('-z', '--sanitizedFile', dest='z_filename', type='string',
help=default('The string for sanitization(Z)'), default='test/test_z.txt')
parser.add_option('-t', '--tau', dest='tau', type='int',
help=default('The tau value to identify spurious pattern'), default=1)
parser.add_option('-o', '--omega', dest='omega', type='float',
help=default('The weight of non-spurious pattern'), default=1)
parser.add_option('-s', '--sensitivePatterns', dest='sensitive_pat', type='string',
help=default('A file that consists of all sensitive patterns in W'),
default='test/sen_pattern_test.txt')
parser.add_option('-k', dest='k', type='int',
help=default('The length of each pattern'), default=4)
parser.add_option('-c', dest='c', type='int',
help=default('The exploration parameter for UCB1 formula'), default=20)
parser.add_option('-d', '--delta', dest='delta', type='int',
help=default('The number of deletions'), default=5)
parser.add_option('-e', '--E', dest='tolerance', type='int',
help=default('The pruning parameter for ELLS-ALGO'), default=10)
parser.add_option('-m', '--max', dest='max_simulations', type='int',
help=default('The number of iterations per selection in ELLS-ALGO'), default=3)
options, otherjunk = parser.parse_args(argv)
if len(otherjunk) != 0:
raise Exception('Command line input not understood: ' + str(otherjunk))
args = dict()
args['w'] = DataProcessing.readFile(options.w_filename)
args['k'] = options.k
args['delta'] = options.delta
args['z'] = DataProcessing.readFile(options.z_filename)
args['sensitive_patterns'] = DataProcessing.readMultiLineFile(options.sensitive_pat)
if args['sensitive_patterns'] is None: raise Exception(
"The file " + options.w_filename + " cannot be found")
args['tau'] = options.tau
args['omega'] = options.omega
args['c'] = options.c
args['max_simulations'] = options.max_simulations
args['tolerance'] = options.tolerance
return args
if __name__ == '__main__':
args = readCommand(sys.argv[1:])
solver = solver(**args)
# d_baseline = solver._get_distortion(solver.baseline())
ghosts_origin = solver._get_number_of_spurious(solver.Z)
# print('Baseline Distortion Reduction\nSpurious:', sp - d_baseline[0], 'non-spurious:', nsp - d_baseline[1])
warnings.filterwarnings(action='ignore', category=DeprecationWarning)
tick = time.time()
print('---------------------------')
result = solver.run()
# d_csd_plus = solver._get_distortion(result)
ghosts_after = solver._get_number_of_spurious(result)
tock = time.time()
print('Distortion in sanitized string (Z)', solver._get_distortion(solver.Z))
print('Distortion in the CSD-PLUS result (H)',solver._get_distortion(result))
print('The number of ghost/lost patterns in Z:', ghosts_origin)
print('The number of ghost/lost patterns in H:', ghosts_after)
print('Time consumption in CSD-Plus is', tock - tick)
# solver._exhaustive_search(lst=list(range(len(solver.Z))), n=solver.delta)
# print('Exhaustive Search:', min(solver.EX))