diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/coco_evaluation.py b/coco_evaluation.py index 7fb9084..5d119de 100644 --- a/coco_evaluation.py +++ b/coco_evaluation.py @@ -1,352 +1,359 @@ -__author__ = 'andreasveit' -__version__ = '1.3' - -# Interface for evaluating with the COCO-Text dataset. - -# COCO-Text is a large dataset designed for text detection and recognition. -# This is a Python API that assists in evaluating text detection and recognition results -# on COCO-Text. The format of the COCO-Text annotations is described on -# the project website http://vision.cornell.edu/se3/coco-text/. In addition to this evaluation API, please download -# the COCO-Text tool API, both the COCO images and annotations. -# This dataset is based on Microsoft COCO. Please visit http://mscoco.org/ -# for more information on COCO, including for the image data, object annotatins -# and caption annotations. - -# The following functions are defined: -# getDetections - Compute TP, FN and FP -# evaluateAttribute - Evaluates accuracy for classifying text attributes -# evaluateTranscription - Evaluates accuracy of transcriptions -# area, intersect, iou_score, decode, inter - small helper functions -# printDetailedResults - Prints detailed results as reported in COCO-Text paper - -# COCO-Text Evaluation Toolbox. Version 1.3 -# Data, Data API and paper available at: http://vision.cornell.edu/se3/coco-text/ -# Code written by Andreas Veit, 2016. -# Licensed under the Simplified BSD License [see bsd.txt] - -import editdistance -import copy -import re - -# Compute detections -def getDetections(groundtruth, evaluation, imgIds = None, annIds = [], detection_threshold = 0.5): - """ - A box is a match iff the intersection of union score is >= 0.5. - Params - ------ - Input dicts have the format of annotation dictionaries - """ - #parameters - - detectRes = {} - # results are lists of dicts {gt_id: xxx, eval_id: yyy} - detectRes['true_positives'] = [] - detectRes['false_negatives'] = [] - detectRes['false_positives'] = [] - - # the default is set to evaluate on the validation set - if imgIds == None: - imgIds = groundtruth.val - - imgIds = imgIds if len(imgIds)>0 else inter(groundtruth.imgToAnns.keys(), evaluation.imgToAnns.keys()) - - for cocoid in imgIds: - gt_bboxes = groundtruth.imgToAnns[cocoid] if cocoid in groundtruth.imgToAnns else [] - eval_bboxes = copy.copy(evaluation.imgToAnns[cocoid]) if cocoid in evaluation.imgToAnns else [] - - for gt_box_id in gt_bboxes: - gt_box = groundtruth.anns[gt_box_id]['bbox'] - max_iou = 0.0 - match = None - for eval_box_id in eval_bboxes: - eval_box = evaluation.anns[eval_box_id]['bbox'] - iou = iou_score(gt_box,eval_box) - if iou >= detection_threshold and iou > max_iou: - max_iou = iou - match = eval_box_id - if match is not None: - detectRes['true_positives'].append({'gt_id': gt_box_id, 'eval_id': match}) - eval_bboxes.remove(match) - else: - detectRes['false_negatives'].append({'gt_id': gt_box_id}) - if len(eval_bboxes)>0: - detectRes['false_positives'].extend([{'eval_id': eval_box_id} for eval_box_id in eval_bboxes]) - - return detectRes - - -def evaluateAttribute(groundtruth, evaluation, resultDict, attributes): - ''' - Input: - groundtruth_Dict: dict, AnnFile format - evalDict: dict, AnnFile format - resultDict: dict, output from getDetections - attributes : list of strings, attribute categories - ----- - Output: - - ''' - assert 'utf8_string' not in attributes, 'there is a separate function for utf8_string' - res = {} - for attribute in attributes: - correct = [] - incorrect = [] - for detection in resultDict['true_positives']: - gt_val = groundtruth.anns[detection['gt_id']][attribute] - eval_val = evaluation.anns[detection['eval_id']][attribute] - if gt_val==eval_val: - correct.append(detection) - else: - if gt_val!='na': - incorrect.append(detection) - res[attribute] = {'attribute': attribute, 'correct':len(correct), 'incorrect':len(incorrect), 'accuracy':len(correct)*1.0/len(correct+incorrect)} - return res - -def evaluateEndToEnd(groundtruth, evaluation, imgIds = None, annIds = [], detection_threshold = 0.5): - """ - A box is a match iff the intersection of union score is >= 0.5. - Params - ------ - Input dicts have the format of annotation dictionaries - """ - #parameters - - detectRes = {} - # results are lists of dicts {gt_id: xxx, eval_id: yyy} - detectRes['true_positives'] = [] - detectRes['false_negatives'] = [] - detectRes['false_positives'] = [] - - # the default is set to evaluate on the validation set - if imgIds == None: - imgIds = groundtruth.val - - imgIds = imgIds if len(imgIds)>0 else inter(groundtruth.imgToAnns.keys(), evaluation.imgToAnns.keys()) - - for cocoid in imgIds: - gt_bboxes = groundtruth.imgToAnns[cocoid] if cocoid in groundtruth.imgToAnns else [] - eval_bboxes = copy.copy(evaluation.imgToAnns[cocoid]) if cocoid in evaluation.imgToAnns else [] - - for gt_box_id in gt_bboxes: - - gt_box = groundtruth.anns[gt_box_id]['bbox'] - if 'utf8_string' not in groundtruth.anns[gt_box_id]: - continue - gt_val = decode(groundtruth.anns[gt_box_id]['utf8_string']) - - max_iou = 0.0 - - match = None - for eval_box_id in eval_bboxes: - eval_box = evaluation.anns[eval_box_id]['bbox'] - iou = iou_score(gt_box,eval_box) - - if iou >=detection_threshold and iou > max_iou: - max_iou = iou - match = eval_box_id - if 'utf8_string' in evaluation.anns[eval_box_id]: - eval_val = decode(evaluation.anns[eval_box_id]['utf8_string']) - if editdistance.eval(gt_val, eval_val)==0: - break - if match is not None: - detectRes['true_positives'].append({'gt_id': gt_box_id, 'eval_id': match}) - eval_bboxes.remove(match) - else: - detectRes['false_negatives'].append({'gt_id': gt_box_id}) - if len(eval_bboxes)>0: - detectRes['false_positives'].extend([{'eval_id': eval_box_id} for eval_box_id in eval_bboxes]) - - resultDict = detectRes - - res = {} - for setting, threshold in zip(['exact', 'distance1'],[0,1]): - correct = [] - incorrect = [] - ignore = [] - for detection in resultDict['true_positives']: - if 'utf8_string' not in groundtruth.anns[detection['gt_id']]: - ignore.append(detection) - continue - - gt_val = decode(groundtruth.anns[detection['gt_id']]['utf8_string']) - if len(gt_val)<3: - ignore.append(detection) - continue - - if 'utf8_string' not in evaluation.anns[detection['eval_id']]: - incorrect.append(detection) - continue - - eval_val = decode(evaluation.anns[detection['eval_id']]['utf8_string']) - - detection['gt_string'] = gt_val - detection['eval_string'] = eval_val - if editdistance.eval(gt_val, eval_val)<=threshold: - correct.append(detection) - else: - incorrect.append(detection) - - res[setting] = {'setting': setting, 'correct':correct, 'incorrect':incorrect, 'ignore':ignore, 'accuracy':len(correct)*1.0/len(correct+incorrect)} - return res - -def area(bbox): - return bbox[2] * 1.0 * bbox[3] # width * height - -def intersect(bboxA, bboxB): - """Return a new bounding box that contains the intersection of - 'self' and 'other', or None if there is no intersection - """ - new_top = max(bboxA[1], bboxB[1]) - new_left = max(bboxA[0], bboxB[0]) - new_right = min(bboxA[0]+bboxA[2], bboxB[0]+bboxB[2]) - new_bottom = min(bboxA[1]+bboxA[3], bboxB[1]+bboxB[3]) - if new_top < new_bottom and new_left < new_right: - return [new_left, new_top, new_right - new_left, new_bottom - new_top] - return None - -def iou_score(bboxA, bboxB): - """Returns the Intersection-over-Union score, defined as the area of - the intersection divided by the intersection over the union of - the two bounding boxes. This measure is symmetric. - """ - if intersect(bboxA, bboxB): - intersection_area = area(intersect(bboxA, bboxB)) - else: - intersection_area = 0 - union_area = area(bboxA) + area(bboxB) - intersection_area - if union_area > 0: - return float(intersection_area) / float(union_area) - else: - return 0 - -def decode(trans): - trans = trans.encode("ascii" ,'ignore') - trans = trans.replace('\n', ' ') - trans2 = re.sub('[^a-zA-Z0-9!?@\_\-\+\*\:\&\/ \.]', '', trans) - return trans2.lower() - -def inter(list1, list2): - return list(set(list1).intersection(set(list2))) - -def printDetailedResults(c_text, detection_results, transcription_results, name): - print name - #detected coco-text annids - found = [x['gt_id'] for x in detection_results['true_positives']] - n_found = [x['gt_id'] for x in detection_results['false_negatives']] - fp = [x['eval_id'] for x in detection_results['false_positives']] - - leg_eng_mp = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('language','english'),('class','machine printed')], areaRng=[]) - leg_eng_hw = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('language','english'),('class','handwritten')], areaRng=[]) - leg_mp = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('class','machine printed')], areaRng=[]) - ileg_mp = c_text.getAnnIds(imgIds=[], catIds=[('legibility','illegible'),('class','machine printed')], areaRng=[]) - leg_hw = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('class','handwritten')], areaRng=[]) - ileg_hw = c_text.getAnnIds(imgIds=[], catIds=[('legibility','illegible'),('class','handwritten')], areaRng=[]) - leg_ot = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('class','others')], areaRng=[]) - ileg_ot = c_text.getAnnIds(imgIds=[], catIds=[('legibility','illegible'),('class','others')], areaRng=[]) - - #Detection - print - print "Detection" - print "Recall" - - if (len(inter(found+n_found, leg_mp)))>0: - lm = "%.2f"%(100*len(inter(found, leg_mp))*1.0/(len(inter(found+n_found, leg_mp)))) - else: - lm = 0 - print 'legible & machine printed: ', lm - - if (len(inter(found+n_found, leg_hw)))>0: - lh = "%.2f"%(100*len(inter(found, leg_hw))*1.0/(len(inter(found+n_found, leg_hw)))) - else: - lh = 0 - print 'legible & handwritten: ', lh - - if (len(inter(found+n_found, leg_ot)))>0: - lo = "%.2f"%(100*len(inter(found, leg_ot))*1.0/(len(inter(found+n_found, leg_ot)))) - else: - lo = 0 - # print 'legible & others: ', lo - - if (len(inter(found+n_found, leg_mp+leg_hw)))>0: - lto = "%.2f"%(100*len(inter(found, leg_mp+leg_hw))*1.0/(len(inter(found+n_found, leg_mp+leg_hw)))) - else: - lto = 0 - print 'legible overall: ', lto - - if (len(inter(found+n_found, ileg_mp)))>0: - ilm = "%.2f"%(100*len(inter(found, ileg_mp))*1.0/(len(inter(found+n_found, ileg_mp)))) - else: - ilm = 0 - print 'illegible & machine printed: ', ilm - - if (len(inter(found+n_found, ileg_hw)))>0: - ilh = "%.2f"%(100*len(inter(found, ileg_hw))*1.0/(len(inter(found+n_found, ileg_hw)))) - else: - ilh = 0 - print 'illegible & handwritten: ', ilh - - if (len(inter(found+n_found, ileg_ot)))>0: - ilo = "%.2f"%(100*len(inter(found, ileg_ot))*1.0/(len(inter(found+n_found, ileg_ot)))) - else: - ilo = 0 - # print 'illegible & others: ', ilo - - if (len(inter(found+n_found, ileg_mp+ileg_hw)))>0: - ilto = "%.2f"%(100*len(inter(found, ileg_mp+ileg_hw))*1.0/(len(inter(found+n_found, ileg_mp+ileg_hw)))) - else: - ilto = 0 - print 'illegible overall: ', ilto - - #total = "%.1f"%(100*len(found)*1.0/(len(found)+len(n_found))) - t_recall = 100*len(found)*1.0/(len(inter(found+n_found, leg_mp+leg_hw+ileg_mp+ileg_hw))) - total = "%.1f"%(t_recall) - print 'total recall: ', total - - print "Precision" - - t_precision = 100*len(found)*1.0/(len(found+fp)) - precision = "%.2f"%(t_precision) - print 'total precision: ', precision - - print "f-score" - - f_score = "%.2f"%(2 * t_recall * t_precision / (t_recall + t_precision)) if (t_recall + t_precision)>0 else 0 - print 'f-score localization: ', f_score - - print - print "Transcription" - transAcc = "%.2f"%(100*transcription_results['exact']['accuracy']) - transAcc1 = "%.2f"%(100*transcription_results['distance1']['accuracy']) - print 'accuracy for exact matches: ', transAcc - print 'accuracy for matches with edit distance<=1: ', transAcc1 - - print - print 'End-to-end' - TP_new = len(inter(found, leg_eng_mp+leg_eng_hw)) * transcription_results['exact']['accuracy'] - FP_new = len(fp) + len(inter(found, leg_eng_mp+leg_eng_hw))*(1-transcription_results['exact']['accuracy']) - FN_new = len(inter(n_found, leg_eng_mp+leg_eng_hw)) + len(inter(found, leg_eng_mp+leg_eng_hw))*(1-transcription_results['exact']['accuracy']) - t_recall_new = 100 * TP_new / (TP_new + FN_new) - t_precision_new = 100 * TP_new / (TP_new + FP_new) if (TP_new + FP_new)>0 else 0 - fscore = "%.2f"%(2 * t_recall_new * t_precision_new / (t_recall_new + t_precision_new)) if (t_recall_new + t_precision_new)>0 else 0 - - recall_new = "%.2f"%(t_recall_new) - precision_new = "%.2f"%(t_precision_new) - print 'recall: ', recall_new, - print 'precision: ', precision_new - print 'End-to-end f-score: ', fscore - - print - #print lm, ' & ', lh, ' & ', lto, ' & ', ilm, ' & ', ilh, ' & ', ilto, '&', total, ' & ', precision, ' & ', transAcc, ' & ', transAcc1, ' & ', fscore - print lm, ' & ', lh, ' & ', ilm, ' & ', ilh, '&', total, ' & ', precision, ' & ', f_score, ' & ', transAcc, ' & ', recall_new, ' & ', precision_new, ' & ', fscore - print - - - - - - - - - - - +from __future__ import print_function +from __future__ import division +from __future__ import print_function + +__author__ = 'andreasveit' +__version__ = '1.3' + +# Interface for evaluating with the COCO-Text dataset. + +# COCO-Text is a large dataset designed for text detection and recognition. +# This is a Python API that assists in evaluating text detection and recognition results +# on COCO-Text. The format of the COCO-Text annotations is described on +# the project website http://vision.cornell.edu/se3/coco-text/. In addition to this evaluation API, please download +# the COCO-Text tool API, both the COCO images and annotations. +# This dataset is based on Microsoft COCO. Please visit http://mscoco.org/ +# for more information on COCO, including for the image data, object annotatins +# and caption annotations. + +# The following functions are defined: +# getDetections - Compute TP, FN and FP +# evaluateAttribute - Evaluates accuracy for classifying text attributes +# evaluateTranscription - Evaluates accuracy of transcriptions +# area, intersect, iou_score, decode, inter - small helper functions +# printDetailedResults - Prints detailed results as reported in COCO-Text paper + +# COCO-Text Evaluation Toolbox. Version 1.3 +# Data, Data API and paper available at: http://vision.cornell.edu/se3/coco-text/ +# Code written by Andreas Veit, 2016. +# Licensed under the Simplified BSD License [see bsd.txt] + +import editdistance +import copy +import re +import sys + +# Compute detections +def getDetections(groundtruth, evaluation, imgIds = None, annIds = [], detection_threshold = 0.5): + """ + A box is a match iff the intersection of union score is >= 0.5. + Params + ------ + Input dicts have the format of annotation dictionaries + """ + #parameters + + detectRes = {} + # results are lists of dicts {gt_id: xxx, eval_id: yyy} + detectRes['true_positives'] = [] + detectRes['false_negatives'] = [] + detectRes['false_positives'] = [] + + # the default is set to evaluate on the validation set + if imgIds == None: + imgIds = groundtruth.val + + imgIds = imgIds if len(imgIds)>0 else inter(list(groundtruth.imgToAnns.keys()), list(evaluation.imgToAnns.keys())) + + for cocoid in imgIds: + gt_bboxes = groundtruth.imgToAnns[cocoid] if cocoid in groundtruth.imgToAnns else [] + eval_bboxes = copy.copy(evaluation.imgToAnns[cocoid]) if cocoid in evaluation.imgToAnns else [] + + for gt_box_id in gt_bboxes: + gt_box = groundtruth.anns[gt_box_id]['bbox'] + max_iou = 0.0 + match = None + for eval_box_id in eval_bboxes: + eval_box = evaluation.anns[eval_box_id]['bbox'] + iou = iou_score(gt_box,eval_box) + if iou >= detection_threshold and iou > max_iou: + max_iou = iou + match = eval_box_id + if match is not None: + detectRes['true_positives'].append({'gt_id': gt_box_id, 'eval_id': match}) + eval_bboxes.remove(match) + else: + detectRes['false_negatives'].append({'gt_id': gt_box_id}) + if len(eval_bboxes)>0: + detectRes['false_positives'].extend([{'eval_id': eval_box_id} for eval_box_id in eval_bboxes]) + + return detectRes + + +def evaluateAttribute(groundtruth, evaluation, resultDict, attributes): + ''' + Input: + groundtruth_Dict: dict, AnnFile format + evalDict: dict, AnnFile format + resultDict: dict, output from getDetections + attributes : list of strings, attribute categories + ----- + Output: + + ''' + assert 'utf8_string' not in attributes, 'there is a separate function for utf8_string' + res = {} + for attribute in attributes: + correct = [] + incorrect = [] + for detection in resultDict['true_positives']: + gt_val = groundtruth.anns[detection['gt_id']][attribute] + eval_val = evaluation.anns[detection['eval_id']][attribute] + if gt_val==eval_val: + correct.append(detection) + else: + if gt_val!='na': + incorrect.append(detection) + res[attribute] = {'attribute': attribute, 'correct':len(correct), 'incorrect':len(incorrect), 'accuracy':len(correct)*1.0/len(correct+incorrect)} + return res + +def evaluateEndToEnd(groundtruth, evaluation, imgIds = None, annIds = [], detection_threshold = 0.5): + """ + A box is a match iff the intersection of union score is >= 0.5. + Params + ------ + Input dicts have the format of annotation dictionaries + """ + #parameters + + detectRes = {} + # results are lists of dicts {gt_id: xxx, eval_id: yyy} + detectRes['true_positives'] = [] + detectRes['false_negatives'] = [] + detectRes['false_positives'] = [] + + # the default is set to evaluate on the validation set + if imgIds == None: + imgIds = groundtruth.val + + imgIds = imgIds if len(imgIds)>0 else inter(list(groundtruth.imgToAnns.keys()), list(evaluation.imgToAnns.keys())) + + for cocoid in imgIds: + gt_bboxes = groundtruth.imgToAnns[cocoid] if cocoid in groundtruth.imgToAnns else [] + eval_bboxes = copy.copy(evaluation.imgToAnns[cocoid]) if cocoid in evaluation.imgToAnns else [] + + for gt_box_id in gt_bboxes: + + gt_box = groundtruth.anns[gt_box_id]['bbox'] + if 'utf8_string' not in groundtruth.anns[gt_box_id]: + continue + gt_val = decode(groundtruth.anns[gt_box_id]['utf8_string']) + + max_iou = 0.0 + + match = None + for eval_box_id in eval_bboxes: + eval_box = evaluation.anns[eval_box_id]['bbox'] + iou = iou_score(gt_box,eval_box) + + if iou >=detection_threshold and iou > max_iou: + max_iou = iou + match = eval_box_id + if 'utf8_string' in evaluation.anns[eval_box_id]: + eval_val = decode(evaluation.anns[eval_box_id]['utf8_string']) + if editdistance.eval(gt_val, eval_val)==0: + break + if match is not None: + detectRes['true_positives'].append({'gt_id': gt_box_id, 'eval_id': match}) + eval_bboxes.remove(match) + else: + detectRes['false_negatives'].append({'gt_id': gt_box_id}) + if len(eval_bboxes)>0: + detectRes['false_positives'].extend([{'eval_id': eval_box_id} for eval_box_id in eval_bboxes]) + + resultDict = detectRes + + res = {} + for setting, threshold in zip(['exact', 'distance1'],[0,1]): + correct = [] + incorrect = [] + ignore = [] + for detection in resultDict['true_positives']: + if 'utf8_string' not in groundtruth.anns[detection['gt_id']]: + ignore.append(detection) + continue + + gt_val = decode(groundtruth.anns[detection['gt_id']]['utf8_string']) + if len(gt_val)<3: + ignore.append(detection) + continue + + if 'utf8_string' not in evaluation.anns[detection['eval_id']]: + incorrect.append(detection) + continue + + eval_val = decode(evaluation.anns[detection['eval_id']]['utf8_string']) + + detection['gt_string'] = gt_val + detection['eval_string'] = eval_val + if editdistance.eval(gt_val, eval_val)<=threshold: + correct.append(detection) + else: + incorrect.append(detection) + + res[setting] = {'setting': setting, 'correct':correct, 'incorrect':incorrect, 'ignore':ignore, 'accuracy':len(correct)*1.0/len(correct+incorrect)} + return res + +def area(bbox): + return bbox[2] * 1.0 * bbox[3] # width * height + +def intersect(bboxA, bboxB): + """Return a new bounding box that contains the intersection of + 'self' and 'other', or None if there is no intersection + """ + new_top = max(bboxA[1], bboxB[1]) + new_left = max(bboxA[0], bboxB[0]) + new_right = min(bboxA[0]+bboxA[2], bboxB[0]+bboxB[2]) + new_bottom = min(bboxA[1]+bboxA[3], bboxB[1]+bboxB[3]) + if new_top < new_bottom and new_left < new_right: + return [new_left, new_top, new_right - new_left, new_bottom - new_top] + return None + +def iou_score(bboxA, bboxB): + """Returns the Intersection-over-Union score, defined as the area of + the intersection divided by the intersection over the union of + the two bounding boxes. This measure is symmetric. + """ + if intersect(bboxA, bboxB): + intersection_area = area(intersect(bboxA, bboxB)) + else: + intersection_area = 0 + union_area = area(bboxA) + area(bboxB) - intersection_area + if union_area > 0: + return float(intersection_area) / float(union_area) + else: + return 0 + +def decode(trans): + if sys.version[0] == '2': + trans = trans.encode("ascii" ,'ignore') + + trans = trans.replace('\n', ' ') + trans2 = re.sub('[^a-zA-Z0-9!?@\_\-\+\*\:\&\/ \.]', '', trans) + return trans2.lower() + +def inter(list1, list2): + return list(set(list1).intersection(set(list2))) + +def printDetailedResults(c_text, detection_results, transcription_results, name): + print(name) + #detected coco-text annids + found = [x['gt_id'] for x in detection_results['true_positives']] + n_found = [x['gt_id'] for x in detection_results['false_negatives']] + fp = [x['eval_id'] for x in detection_results['false_positives']] + + leg_eng_mp = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('language','english'),('class','machine printed')], areaRng=[]) + leg_eng_hw = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('language','english'),('class','handwritten')], areaRng=[]) + leg_mp = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('class','machine printed')], areaRng=[]) + ileg_mp = c_text.getAnnIds(imgIds=[], catIds=[('legibility','illegible'),('class','machine printed')], areaRng=[]) + leg_hw = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('class','handwritten')], areaRng=[]) + ileg_hw = c_text.getAnnIds(imgIds=[], catIds=[('legibility','illegible'),('class','handwritten')], areaRng=[]) + leg_ot = c_text.getAnnIds(imgIds=[], catIds=[('legibility','legible'),('class','others')], areaRng=[]) + ileg_ot = c_text.getAnnIds(imgIds=[], catIds=[('legibility','illegible'),('class','others')], areaRng=[]) + + #Detection + print() + print("Detection") + print("Recall") + + if (len(inter(found+n_found, leg_mp)))>0: + lm = "%.2f"%(100*len(inter(found, leg_mp))*1.0/(len(inter(found+n_found, leg_mp)))) + else: + lm = 0 + print('legible & machine printed: ', lm) + + if (len(inter(found+n_found, leg_hw)))>0: + lh = "%.2f"%(100*len(inter(found, leg_hw))*1.0/(len(inter(found+n_found, leg_hw)))) + else: + lh = 0 + print('legible & handwritten: ', lh) + + if (len(inter(found+n_found, leg_ot)))>0: + lo = "%.2f"%(100*len(inter(found, leg_ot))*1.0/(len(inter(found+n_found, leg_ot)))) + else: + lo = 0 + # print 'legible & others: ', lo + + if (len(inter(found+n_found, leg_mp+leg_hw)))>0: + lto = "%.2f"%(100*len(inter(found, leg_mp+leg_hw))*1.0/(len(inter(found+n_found, leg_mp+leg_hw)))) + else: + lto = 0 + print('legible overall: ', lto) + + if (len(inter(found+n_found, ileg_mp)))>0: + ilm = "%.2f"%(100*len(inter(found, ileg_mp))*1.0/(len(inter(found+n_found, ileg_mp)))) + else: + ilm = 0 + print('illegible & machine printed: ', ilm) + + if (len(inter(found+n_found, ileg_hw)))>0: + ilh = "%.2f"%(100*len(inter(found, ileg_hw))*1.0/(len(inter(found+n_found, ileg_hw)))) + else: + ilh = 0 + print('illegible & handwritten: ', ilh) + + if (len(inter(found+n_found, ileg_ot)))>0: + ilo = "%.2f"%(100*len(inter(found, ileg_ot))*1.0/(len(inter(found+n_found, ileg_ot)))) + else: + ilo = 0 + # print 'illegible & others: ', ilo + + if (len(inter(found+n_found, ileg_mp+ileg_hw)))>0: + ilto = "%.2f"%(100*len(inter(found, ileg_mp+ileg_hw))*1.0/(len(inter(found+n_found, ileg_mp+ileg_hw)))) + else: + ilto = 0 + print('illegible overall: ', ilto) + + #total = "%.1f"%(100*len(found)*1.0/(len(found)+len(n_found))) + t_recall = 100*len(found)*1.0/(len(inter(found+n_found, leg_mp+leg_hw+ileg_mp+ileg_hw))) + total = "%.1f"%(t_recall) + print('total recall: ', total) + + print("Precision") + + t_precision = 100*len(found)*1.0/(len(found+fp)) + precision = "%.2f"%(t_precision) + print('total precision: ', precision) + + print("f-score") + + f_score = "%.2f"%(2 * t_recall * t_precision / (t_recall + t_precision)) if (t_recall + t_precision)>0 else 0 + print('f-score localization: ', f_score) + + print() + print("Transcription") + transAcc = "%.2f"%(100*transcription_results['exact']['accuracy']) + transAcc1 = "%.2f"%(100*transcription_results['distance1']['accuracy']) + print('accuracy for exact matches: ', transAcc) + print('accuracy for matches with edit distance<=1: ', transAcc1) + + print() + print('End-to-end') + TP_new = len(inter(found, leg_eng_mp+leg_eng_hw)) * transcription_results['exact']['accuracy'] + FP_new = len(fp) + len(inter(found, leg_eng_mp+leg_eng_hw))*(1-transcription_results['exact']['accuracy']) + FN_new = len(inter(n_found, leg_eng_mp+leg_eng_hw)) + len(inter(found, leg_eng_mp+leg_eng_hw))*(1-transcription_results['exact']['accuracy']) + t_recall_new = 100 * TP_new / (TP_new + FN_new) + t_precision_new = 100 * TP_new / (TP_new + FP_new) if (TP_new + FP_new)>0 else 0 + fscore = "%.2f"%(2 * t_recall_new * t_precision_new / (t_recall_new + t_precision_new)) if (t_recall_new + t_precision_new)>0 else 0 + + recall_new = "%.2f"%(t_recall_new) + precision_new = "%.2f"%(t_precision_new) + print('recall: ', recall_new, end=' ') + print('precision: ', precision_new) + print('End-to-end f-score: ', fscore) + print() + # print("{} & {} & {} & {} & {} & {} & {} & {} & {} & {} & {}".format(lm, lh, ilm, ilh, total, precision, f_score, transAcc, recall_new, precision_new, fscore)) + print("{} & {} & {} & {} & {} & {} & {} & {} & {} & {} & {}".format(lm, lh, ilm, ilh, total, precision, f_score, transAcc, recall_new, precision_new, fscore)) + # print('{} & {} & {} & {} & {} & {} & {} & {} & {} & {} & {}'.format(lm, lh, ilm, ilh, total, precision, f_score, transAcc, recall_new, precision_new, fscore)) + print() + + + + + + + + + + + diff --git a/coco_text.py b/coco_text.py index 533a2f7..1c2ae9e 100644 --- a/coco_text.py +++ b/coco_text.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function __author__ = 'andreasveit' __version__ = '1.1' # Interface for accessing the COCO-Text dataset. @@ -62,16 +65,16 @@ def __init__(self, annotation_file=None): self.train = [] if not annotation_file == None: assert os.path.isfile(annotation_file), "file does not exist" - print 'loading annotations into memory...' + print('loading annotations into memory...') time_t = datetime.datetime.utcnow() dataset = json.load(open(annotation_file, 'r')) - print datetime.datetime.utcnow() - time_t + print(datetime.datetime.utcnow() - time_t) self.dataset = dataset self.createIndex() def createIndex(self): # create index - print 'creating index...' + print('creating index...') self.imgToAnns = {int(cocoid): self.dataset['imgToAnns'][cocoid] for cocoid in self.dataset['imgToAnns']} self.imgs = {int(cocoid): self.dataset['imgs'][cocoid] for cocoid in self.dataset['imgs']} self.anns = {int(annid): self.dataset['anns'][annid] for annid in self.dataset['anns']} @@ -79,7 +82,7 @@ def createIndex(self): self.val = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'val'] self.test = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'test'] self.train = [int(cocoid) for cocoid in self.dataset['imgs'] if self.dataset['imgs'][cocoid]['set'] == 'train'] - print 'index created!' + print('index created!') def info(self): """ @@ -87,7 +90,7 @@ def info(self): :return: """ for key, value in self.dataset['info'].items(): - print '%s: %s'%(key, value) + print('%s: %s'%(key, value)) def filtering(self, filterDict, criteria): return [key for key in filterDict if all(criterion(filterDict[key]) for criterion in criteria)] @@ -114,12 +117,12 @@ def getAnnIds(self, imgIds=[], catIds=[], areaRng=[]): catIds = catIds if type(catIds) == list else [catIds] if len(imgIds) == len(catIds) == len(areaRng) == 0: - anns = self.anns.keys() + anns = list(self.anns.keys()) else: if not len(imgIds) == 0: anns = sum([self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns],[]) else: - anns = self.anns.keys() + anns = list(self.anns.keys()) anns = anns if len(catIds) == 0 else list(set(anns).intersection(set(self.getAnnByCat(catIds)))) anns = anns if len(areaRng) == 0 else [ann for ann in anns if self.anns[ann]['area'] > areaRng[0] and self.anns[ann]['area'] < areaRng[1]] return anns @@ -135,7 +138,7 @@ def getImgIds(self, imgIds=[], catIds=[]): catIds = catIds if type(catIds) == list else [catIds] if len(imgIds) == len(catIds) == 0: - ids = self.imgs.keys() + ids = list(self.imgs.keys()) else: ids = set(imgIds) if not len(catIds) == 0: @@ -189,7 +192,7 @@ def showAnns(self, anns, show_polygon=False): left, top, width, height = ann['bbox'] boxes.append(Rectangle([left,top],width,height,alpha=0.4)) color.append(c) - if 'utf8_string' in ann.keys(): + if 'utf8_string' in list(ann.keys()): ax.annotate(ann['utf8_string'],(left,top-4),color=c) p = PatchCollection(boxes, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4) ax.add_collection(p) @@ -203,7 +206,7 @@ def loadRes(self, resFile): res = COCO_Text() res.dataset['imgs'] = [img for img in self.dataset['imgs']] - print 'Loading and preparing results... ' + print('Loading and preparing results... ') time_t = datetime.datetime.utcnow() if type(resFile) == str: anns = json.load(open(resFile)) @@ -213,8 +216,8 @@ def loadRes(self, resFile): annsImgIds = [int(ann['image_id']) for ann in anns] if set(annsImgIds) != (set(annsImgIds) & set(self.getImgIds())): - print 'Results do not correspond to current coco set' - print 'skipping ', str(len(set(annsImgIds)) - len(set(annsImgIds) & set(self.getImgIds()))), ' images' + print('Results do not correspond to current coco set') + print('skipping ', str(len(set(annsImgIds)) - len(set(annsImgIds) & set(self.getImgIds()))), ' images') annsImgIds = list(set(annsImgIds) & set(self.getImgIds())) res.imgToAnns = {cocoid : [] for cocoid in annsImgIds} @@ -229,6 +232,6 @@ def loadRes(self, resFile): ann['id'] = id res.anns[id] = ann res.imgToAnns[ann['image_id']].append(id) - print 'DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds()) + print('DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds())) return res