gpu-issues/results_loader.py at main · bubblepipe/gpu-issues · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""Functions for loading categorized results from files"""

import glob
import json
import json5
import sys
from cates import (
    IsReallyBug, UserPerspective, DeveloperPerspective, AcceleratorSpecific, PlatformSpecificity,
    IS_REALLY_BUG_LOOKUP, USER_PERSPECTIVE_LOOKUP,
    DEVELOPER_PERSPECTIVE_LOOKUP, ACCELERATOR_SPECIFIC_LOOKUP, PLATFORM_SPECIFICITY_LOOKUP
)


def load_categorized_results(pattern):
    """
    Load categorized issues from JSON result files.

    Args:
        pattern: Glob pattern for finding JSON result files (e.g., 'categorized_issues_*.json')

    Returns:
        List of tuples containing (title, url, is_really_bug, user_perspective, developer_perspective, accelerator_specific, platform_specificity)
    """
    categorized_issues = []
    result_files = glob.glob(pattern)

    for file in result_files:
        try:
            with open(file, 'r') as f:
                json_data = json.load(f)

            # Convert JSON data back to tuples with enum objects
            for item in json_data:
                # Parse the enum values from their string representations
                is_really_bug = None
                user_perspective = None
                developer_perspective = None
                accelerator_specific = None
                platform_specificity = None

                # Find the enum objects by matching their value strings
                if item.get('is_really_bug'):
                    for code, enum_obj in IS_REALLY_BUG_LOOKUP.items():
                        if enum_obj.value == item['is_really_bug']:
                            is_really_bug = enum_obj
                            break

                if item.get('user_perspective'):
                    for code, enum_obj in USER_PERSPECTIVE_LOOKUP.items():
                        if enum_obj.value == item['user_perspective']:
                            user_perspective = enum_obj
                            break

                if item.get('developer_perspective'):
                    for code, enum_obj in DEVELOPER_PERSPECTIVE_LOOKUP.items():
                        if enum_obj.value == item['developer_perspective']:
                            developer_perspective = enum_obj
                            break

                if item.get('accelerator_specific'):
                    for code, enum_obj in ACCELERATOR_SPECIFIC_LOOKUP.items():
                        if enum_obj.value == item['accelerator_specific']:
                            accelerator_specific = enum_obj
                            break

                # Handle both old and new field names for backwards compatibility
                platform_field = item.get('platform_specificity') or item.get('user_expertise')
                if platform_field:
                    for code, enum_obj in PLATFORM_SPECIFICITY_LOOKUP.items():
                        if enum_obj.value == platform_field:
                            platform_specificity = enum_obj
                            break

                # Note: Skip confidence if present in old files (no longer used)

                # Create tuple in the expected format
                categorized_issues.append((
                    item['title'],
                    item['url'],
                    is_really_bug,
                    user_perspective,
                    developer_perspective,
                    accelerator_specific,
                    platform_specificity
                ))

        except FileNotFoundError:
            print(f"File not found: {file}")
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON from file {file}: {e}")
        except Exception as e:
            print(f"Error processing file {file}: {e}")

    return categorized_issues


def get_categorized_urls(categorized_issues):
    """
    Extract URLs from categorized issues for fast lookup.

    Args:
        categorized_issues: List of categorized issue tuples

    Returns:
        Set of URLs that have been categorized
    """
    return {issue[1] for issue in categorized_issues}


def load_categorized_json_files(pattern):
    """
    Load categorized issues directly as dictionaries from JSON files.

    Args:
        pattern: Glob pattern for finding JSON result files

    Returns:
        List of dictionaries with keys: title, url, is_really_bug, user_perspective,
        developer_perspective, accelerator_specific, platform_specificity (or user_expertise for backwards compatibility)
    """
    all_issues = []
    result_files = glob.glob(pattern)

    for file in result_files:
        try:
            with open(file, 'r') as f:
                json_data = json5.load(f)
                all_issues.extend(json_data)
        except FileNotFoundError:
            print(f"File not found: {file}")
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON from file {file}: {e}")
        except Exception as e:
            print(f"Error processing file {file}: {e}")

    return all_issues