-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathjson2sqlite.py
More file actions
112 lines (98 loc) · 3.64 KB
/
json2sqlite.py
File metadata and controls
112 lines (98 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import sqlite_utils, json
from datetime import datetime, timezone
import sys
import re
dbpath = sys.argv[1]
db = sqlite_utils.Database(dbpath)
def format_datetime(ts):
if ts is None:
return None
return datetime.fromtimestamp(round(ts), tz=timezone.utc).strftime("%d-%m-%Y %H:%M:%S GMT")
def safe_slug(title):
return re.sub(r"\W+", "_", title.strip().lower())[:50] or "chat"
# Create the unified message table if it doesn't exist
if "messages" not in db.table_names():
db["messages"].create({
"chat_id": str,
"node_id": str,
"title": str,
"blocked_urls": str,
"safe_urls": str,
"role": str,
"content": str,
"create_time": str, # formatted string
"create_time_ts": float, # raw timestamp
"update_time": float,
"weight": float,
"status": str,
"parent": str,
"children": str,
"model_slug": str,
"default_model_slug": str,
"finish_type": str,
"citations": str,
"content_references": str
}, pk=("chat_id", "node_id"))
# Populate the messages table
for chat in db["chats"].rows:
chat_id = chat["id"]
mapping_str = chat.get("mapping")
if not mapping_str:
continue
try:
mapping = json.loads(mapping_str)
except Exception:
continue
blocked_urls = json.dumps(chat.get("blocked_urls")) if chat.get("blocked_urls") else None
safe_urls = json.dumps(chat.get("safe_urls")) if chat.get("safe_urls") else None
title = chat.get("title")
for node_id, node in mapping.items():
msg = node.get("message") or {}
meta = msg.get("metadata") or {}
content_parts = ((msg.get("content") or {}).get("parts") or [])
content = content_parts[0] if content_parts else None
finish = meta.get("finish_details") or {}
ts = msg.get("create_time")
# if ts is None:
# print(f"⚠️ Missing create_time for node {node_id} in chat {chat_id}")
db["messages"].insert({
"chat_id": chat_id,
"node_id": node_id,
"title": title,
"blocked_urls": blocked_urls,
"safe_urls": safe_urls,
"role": ((msg.get("author") or {}).get("role")),
"content": content,
"create_time": format_datetime(ts),
"create_time_ts": ts,
"update_time": msg.get("update_time"),
"weight": msg.get("weight"),
"status": msg.get("status"),
"parent": node.get("parent"),
"children": json.dumps(node.get("children")) if node.get("children") else None,
"model_slug": meta.get("model_slug"),
"default_model_slug": meta.get("default_model_slug"),
"finish_type": finish.get("type"),
"citations": json.dumps(meta.get("citations")) if meta.get("citations") else None,
"content_references": json.dumps(meta.get("content_references")) if meta.get("content_references") else None
}, alter=True)
# Create views for each conversation
for row in db.query("""
SELECT chat_id, title, MIN(create_time_ts) AS ts
FROM messages
GROUP BY chat_id
"""):
chat_id = row["chat_id"]
title = row["title"] or "chat"
slug = safe_slug(title)
ts = int(row["ts"]) if row["ts"] else "unknown"
view_name = f"chat_{ts}_{slug}"
sql = f"""
CREATE VIEW IF NOT EXISTS "{view_name}" AS
SELECT * FROM messages
WHERE chat_id = '{chat_id}'
ORDER BY create_time_ts ASC;
"""
print("Attempting view for", chat_id, "| title:", title, "| ts:", ts)
db.executescript(sql)
print(f"✅ Created view: {view_name}")