diff --git a/DHIS2/database_comparator/Readme.md b/DHIS2/database_comparator/Readme.md new file mode 100644 index 00000000..8b873b94 --- /dev/null +++ b/DHIS2/database_comparator/Readme.md @@ -0,0 +1,84 @@ +# DHIS2 Organisation Units Comparison + +This project allows you to **download and compare organisation units (orgUnits)** across multiple DHIS2 instances (e.g. DEV, INDIV, PROD) using authenticated API access. + +## Step 1: Download OrgUnits from Each Instance + +Run the script: +python download_all_orgunits.py +Before executing, **hardcode the following variables** in the script to match each instance: + +```python +BASE_URL = "https://play/dhis2" +OUTPUT_FILE = "organisation_units.json" +USERNAME = "" +PASSWORD = "" +``` + +Repeat this step for each instance (DEV, INDIV, PROD, etc.), saving the result to a different JSON file each time: + +organisation_units_dev.json + +organisation_units_indiv.json + +organisation_units_prod.json + +You can also modify the fields being collected from the DHIS2 API in the script if needed. + +Step 2: Merge JSON Files into a Local SQLite Database +Edit the file merge_all_jsons_in_a_single_db.py to define which JSON files to load and what table name each will have: + +```json +json_files = { + "organisation_units_dev.json": "orgunits_dev", + "organisation_units_indiv.json": "orgunits_indiv", + "organisation_units_prod.json": "orgunits_prod", +} +``` + +Then run: + +```bash + python merge_all_jsons_in_a_single_db.py +``` + +This will create a local SQLite database with one table per orgUnit file. + +Step 3: Create Views for Comparing OrgUnits Across Instances +Use the following SQL statements to create views that help identify discrepancies between the datasets: + +```sql +CREATE VIEW exists_in_dev_but_not_in_indiv AS +SELECT * +FROM orgunits_dev i +WHERE NOT EXISTS ( + SELECT 1 + FROM orgunits_indiv p + WHERE p.id = i.id +); + +CREATE VIEW exists_in_indiv_but_not_in_prod AS +SELECT * +FROM orgunits_indiv i +WHERE NOT EXISTS ( + SELECT 1 + FROM orgunits_prod p + WHERE p.id = i.id +); + +CREATE VIEW exists_in_prod_but_not_in_indiv AS +SELECT * +FROM orgunits_prod i +WHERE NOT EXISTS ( + SELECT 1 + FROM orgunits_indiv p + WHERE p.id = i.id +); +``` + +These views allow you to detect missing or extra orgUnits between pairs of instances. + +Optional Adjustments +You may adjust the fields collected from the DHIS2 API in download_all_orgunits.py. + +You can also change the table structure or filtering logic in merge_all_jsons_in_a_single_db.py as needed for your comparison. diff --git a/DHIS2/database_comparator/download_all_orgunits.py b/DHIS2/database_comparator/download_all_orgunits.py new file mode 100644 index 00000000..1ae000c3 --- /dev/null +++ b/DHIS2/database_comparator/download_all_orgunits.py @@ -0,0 +1,72 @@ +import requests +import json +from requests.auth import HTTPBasicAuth + +# Base URL of the DHIS2 instance +BASE_URL = "https://play/dhis2" +OUTPUT_FILE="organisation_units.json" +USERNAME = "" +PASSWORD = "" +PAGE_SIZE = 5000 +# Pagination parameters +FIELDS = "name,code,shortName,created,path,level,geometry,id,children::size" + +# If using cookie authentication, provide the cookie here +COOKIE = "JSESSIONID=;" + +# Common headers for both authentication methods +HEADERS = { + "Accept": "application/json" +} +if COOKIE: + HEADERS["Cookie"] = COOKIE + + +# Container to hold all organisation units +all_org_units = [] + +# Pagination control +page = 1 +has_more = True + +while has_more: + print(f"Downloading page {page}...") + url = f"{BASE_URL}/api/organisationUnits" + params = { + "fields": FIELDS, + "pageSize": PAGE_SIZE, + "page": page + } + + # Perform the request with the selected authentication method + if COOKIE: + response = requests.get(url, params=params, headers=HEADERS) + else: + response = requests.get(url, params=params, headers=HEADERS, auth=HTTPBasicAuth(USERNAME, PASSWORD)) + # Handle unauthorized access + if response.status_code == 401: + raise Exception("Unauthorized access. Check your credentials or cookie.") + response.raise_for_status() + data = response.json() + + # Get the list of organisation units + org_units = data.get("organisationUnits", []) + + # Remove geometry["coordinates"] if it exists + for unit in org_units: + geometry = unit.get("geometry") + if isinstance(geometry, dict) and "coordinates" in geometry: + del geometry["coordinates"] + + # Append to the result list and control pagination + if not org_units: + has_more = False + else: + all_org_units.extend(org_units) + page += 1 + +print(f"Total downloaded organisation units: {len(all_org_units)}") + +# Save all organisation units to a JSON file +with open(OUTPUT_FILE, "w", encoding="utf-8") as f: + json.dump({"organisationUnits": all_org_units}, f, indent=2, ensure_ascii=False) \ No newline at end of file diff --git a/DHIS2/database_comparator/merge_all_jsons_in_a_single_db.py b/DHIS2/database_comparator/merge_all_jsons_in_a_single_db.py new file mode 100644 index 00000000..b593dc16 --- /dev/null +++ b/DHIS2/database_comparator/merge_all_jsons_in_a_single_db.py @@ -0,0 +1,59 @@ +import sqlite3 +import json +import os + +# Files and tables +json_files = { + "organisation_units_dev.json": "orgunits_dev", + "organisation_units_indiv.json": "orgunits_indiv", + "organisation_units_prod.json": "orgunits_prod", +} + +conn = sqlite3.connect("orgunits_comparison.db") +cur = conn.cursor() + +for file, table in json_files.items(): + if not os.path.exists(file): + print(f"File not found: {file}") + continue + + print(f"Processing {file} → {table}") + + with open(file, "r", encoding="utf-8") as f: + data = json.load(f).get("organisationUnits", []) + + #Create tables by expected fields + cur.execute(f"DROP TABLE IF EXISTS {table}") + cur.execute(f""" + CREATE TABLE {table} ( + id TEXT PRIMARY KEY, + code TEXT, + name TEXT, + shortName TEXT, + created TEXT, + path TEXT, + level INTEGER, + geometry_type TEXT, + children INTEGER + ) + """) + + # Insert data + for ou in data: + cur.execute(f""" + INSERT OR REPLACE INTO {table} (id, code, name, shortName, created, path, level, geometry_type, children) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + ou.get("id"), + ou.get("code"), + ou.get("name"), + ou.get("shortName"), + ou.get("created"), + ou.get("path"), + ou.get("level"), + ou.get("geometry", {}).get("type") if ou.get("geometry") else None, + ou.get("children") + )) + +conn.commit() +conn.close()