Sense/test.py at master · CSR-Group/Sense · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from csense.benchmark.cqa import getSplitDataSet
from CommonSenseQA import parseQuestion
from csense.parser.sentence import parse
from csense.mining.conceptnet import search
from concurrent.futures import ThreadPoolExecutor
import threading
import spacy
import nltk

nlp = spacy.load("en_core_web_sm")

def task(rawQuestion, index):
    # print("Executing our Task")
    try:
        outfile = open("outfile_temp2.txt", mode="a+")
        count, count1, guessAnswer, guessAnswer1, correctAnswer = scoreCandidates(rawQuestion)
        print(index, " - ", correctAnswer, " - ", guessAnswer, " - " ,guessAnswer1, file=outfile)
        print(index, " - ", correctAnswer, " - ", guessAnswer, " - " ,guessAnswer1)
        if count == 1:
            print("#RES:1:",index, file=outfile)
            return 1
        else:
            print("#RES:0:",index, file=outfile)
            return 0
    except Exception as e:
        print(e)
        print(index, ":FAILED")
        print("#RES:F:",index, file=outfile)
        return 0
    finally:
        outfile.close()

def scoreCandidates(rawQuestion):

    candidateSetSize = {}
    candidateMap = {}
    candidateMap1 = {}
    question, candidates, correctAnswer = parseQuestion(rawQuestion)
    print(question)
    questionChunks = getChunkSet(question)
    print(questionChunks)
    temp = set()
    questionChunks1 = set()
    for question in questionChunks:
        tempSet = search(question)
        # print(tempSet)
        temp = temp.union(tempSet)
        # print(temp)
    questionChunks1 = questionChunks.union(temp)

    print("questionChunks")
    print(questionChunks)
    for candidate in candidates:
        candidateMap[candidate] = 0
        candidateMap1[candidate] = 0
        candidateChunks = search(candidate.replace(' ','_'))
        candidateChunks.add(candidate)
        candidateChunks.add(candidate.split(' ')[-1])
        # print("candidateChunks: " + candidate)
        # print(candidateChunks)
        candidateSetSize[candidate] = len(candidateChunks)
        for chunk in questionChunks:
            if chunk in candidateChunks:
                candidateMap[candidate] += 1

        for chunk in questionChunks1:
            if chunk in candidateChunks:
                candidateMap1[candidate] += 1

    print(candidateMap)
    print(correctAnswer)

    guessAnswer = max(candidateMap, key=candidateMap.get)
    for i in range(len(candidates)):
        if candidateMap[candidates[i]] == candidateMap[guessAnswer] and candidateSetSize[candidates[i]] < candidateSetSize[guessAnswer]:
            guessAnswer = candidates[i]
    print(guessAnswer)
    # if correctAnswer == guessAnswer:
    #     count+=1
    # if candidateMap[correctAnswer] == candidateMap[guessAnswer]:
    #     count1+=1

    guessAnswer1 = max(candidateMap1, key=candidateMap1.get)
    for i in range(len(candidates)):
        if candidateMap1[candidates[i]] == candidateMap1[guessAnswer1] and candidateSetSize[candidates[i]] < candidateSetSize[guessAnswer1]:
            guessAnswer1 = candidates[i]
    print(guessAnswer1)

    if correctAnswer == guessAnswer1 or correctAnswer == guessAnswer:
        count = 1
    else:
        count = 0
    if candidateMap1[correctAnswer] == candidateMap1[guessAnswer1] or candidateMap[correctAnswer] == candidateMap[guessAnswer]:
        count1 = 1
    else:
        count1 = 0
    print("*********")

    return count, count1, guessAnswer, guessAnswer1, correctAnswer

def getChunkSet(sentence):
    doc = nlp(sentence)
    p = nltk.PorterStemmer()
    chunks = set()
    for chunk in doc.noun_chunks:
        chunk_without_sw = ""
        for word in chunk.text.lower().split(' '):
            word = ''.join(filter(str.isalnum, word))
            if not word in ['a','the','an']:
                chunks.add(word)
                if chunk_without_sw == "":
                    chunk_without_sw = word
                else:
                    chunk_without_sw = chunk_without_sw + "_" + word
                chunks.add(chunk_without_sw.lower())

        chunks.add(chunk.root.text.lower())
        chunks.add(p.stem(chunk.root.text.lower()))
    return chunks

def main():
    executor = ThreadPoolExecutor(max_workers=10)

    dataset = getSplitDataSet()
    futures = []

    questions = dataset["where"]

    for index in range(0, len(questions)):
    # for index in range(1619,1620):
        res = executor.submit(task, questions[index], index)
        futures.append(res)

    count = 0
    for future in futures:
        count += future.result()

    print(count / len(questions))

if __name__ == '__main__':
    main()