diff --git a/galaxykit/client.py b/galaxykit/client.py index 420aa02..4083863 100644 --- a/galaxykit/client.py +++ b/galaxykit/client.py @@ -159,14 +159,18 @@ def _refresh_jwt_token(self): "User-Agent": user_agent(), "Content-Type": "application/x-www-form-urlencoded", } - json = self._http( + json, resp = self._http( "post", self.auth_url, data=payload, headers=headers, + include_response=True, ) - self.token = json["access_token"] - self.token_type = "Bearer" + try: + self.token = json["access_token"] + self.token_type = "Bearer" + except KeyError: + raise GalaxyClientError("Unexpected error in JWT token refresh.", resp) def _update_auth_headers(self): self.headers.update( @@ -187,6 +191,10 @@ def _http(self, method, path, *args, **kwargs): url = urljoin(self.galaxy_root, path) headers = kwargs.pop("headers", self.headers) parse_json = kwargs.pop("parse_json", True) + include_response = kwargs.pop("include_response", False) + + if include_response and not parse_json: + raise ValueError("GalaxyClient._http() called with include_response=True only valid when parse_json=True!") resp = requests.request( method, url, headers=headers, verify=self.https_verify, *args, **kwargs @@ -209,9 +217,13 @@ def _http(self, method, path, *args, **kwargs): raise ValueError("Failed to parse JSON response from API") from exc if "errors" in json: raise GalaxyClientError(resp, *json["errors"]) + if resp.status_code >= 400: raise GalaxyClientError(resp, resp.status_code) - return json + elif include_response: + return json, resp + else: + return json else: if resp.status_code >= 400: raise GalaxyClientError(resp, resp.status_code) @@ -234,6 +246,8 @@ def _payload(self, method, path, body, *args, **kwargs): return self._http(method, path, *args, **kwargs) def get(self, path, *args, **kwargs): + if args: + kwargs["params"], *args = args return self._http("get", path, *args, **kwargs) def post(self, *args, **kwargs): diff --git a/galaxykit/collections.py b/galaxykit/collections.py index 53e698a..df890f1 100644 --- a/galaxykit/collections.py +++ b/galaxykit/collections.py @@ -4,6 +4,7 @@ import uuid import os +import sys import json from time import sleep from urllib.parse import urljoin @@ -24,9 +25,23 @@ def get_collection(client, namespace, collection_name, version): return client.get(collection_url) -def get_collection_list(client): - url = "_ui/v1/collection-versions/?limit=999999" - return client.get(url) +def get_collection_list(client, repo="published", limit=None, offset=None, keywords=None): + # url = "_ui/v1/collection-versions/?limit=999999" + url = f"_ui/v1/repo/{repo}/" + params = {} + + if limit is not None: + params["limit"] = limit + if offset is not None: + params["offset"] = offset + + if isinstance(keywords, str): + keywords = [keywords] + elif keywords is None: + keywords = [] + if keywords: + params["keywords"] = keywords + return client.get(url, params) def upload_test_collection( @@ -201,19 +216,75 @@ def move_or_copy_collection( def delete_collection( - client, namespace, collection, version=None, repository="published" + client, namespace, collection, version=None, repositories=("published",), dependents=False ): """ Delete collection version """ + assert isinstance(repositories, (tuple, list)) logger.debug(f"Deleting {collection} from {namespace} on {client.galaxy_root}") - if version == None: - delete_url = f"v3/plugin/ansible/content/{repository}/collections/index/{namespace}/{collection}/" - else: - delete_url = f"v3/plugin/ansible/content/{repository}/collections/index/{namespace}/{collection}/versions/{version}/" - resp = client.delete(delete_url) - wait_for_task(client, resp) - return resp + + completed_repositories = set() + + final_resp = { + "repositories": list(repositories), + "responses": [], + "delete_count": 0, + } + + for repository in repositories: + if repository in completed_repositories: + continue + else: + completed_repositories.add(repository) + + coll_name = f"{namespace}.{collection}" + if not version: + delete_url = f"v3/plugin/ansible/content/{repository}/collections/index/{namespace}/{collection}/" + else: + delete_url = f"v3/plugin/ansible/content/{repository}/collections/index/{namespace}/{collection}/versions/{version}/" + coll_name += f":{version}" + try: + resp = client.delete(delete_url) + except GalaxyClientError as e: + if e.response.status_code == 404: + logger.debug(f"Ignoring (maybe) already deleted {coll_name}", file=sys.stderr) + resp = { + "collection": coll_name, + "response_body": e.response.text, + "response_status_code": e.response.status_code, + } + else: + raise + else: + if "dependent_collection_versions" in resp: + dep_count = len(resp["dependent_collection_versions"]) + logger.debug(f"Deleting {dep_count} dependents first...") + if dependents: + combined = { + "original": None, + "dependents": [] + } + for dep in resp["dependent_collection_versions"]: + dep_coll, dep_ver = dep.split(" ") + dep_ns, dep_name = dep_coll.split(".") + dep_resp = delete_collection(client, dep_ns, dep_name, dep_ver, repositories, dependents=True) + combined["dependents"].append(dep_resp) + resp = client.delete(delete_url) + if "delete_count" in resp: + final_resp["delete_count"] += resp["delete_count"] + combined["original"] = resp + return combined + else: + raise GalaxyClientError(resp) + + wait_for_task(client, resp) + final_resp["delete_count"] += 1 + final_resp["responses"].append({ + "collection": coll_name, + "response": resp, + }) + return final_resp def deprecate_collection(client, namespace, collection, repository): diff --git a/galaxykit/command.py b/galaxykit/command.py index 120d6c6..0eafe08 100644 --- a/galaxykit/command.py +++ b/galaxykit/command.py @@ -67,7 +67,34 @@ def report_error(resp): "ops": { "list": { "help": "List all collections", - "args": None, + "args": { + "repository": { + "help": "Repository to list collections from (defaults to 'published')", + "nargs": "?", + "default": "published", + }, + "limit": { + "help": "Maximum number of collections to show.", + "nargs": "?", + "default": None, + "short": "-l", + "long": "--limit", + }, + "offset": { + "help": "Collection offset to begin list at. Used for paging.", + "nargs": "?", + "default": None, + "short": "-o", + "long": "--offset", + }, + "keywords": { + "help": "Filter by results that contain one or more keywords", + "nargs": "*", + "default": None, + "short": "-k", + "long": "--keywords", + }, + }, }, "upload": { "help": "Create and upload a new collection", @@ -117,7 +144,17 @@ def report_error(resp): "namespace": {}, "collection": {}, "version": {"nargs": "?", "default": None}, - "repository": {"nargs": "?", "default": "published"}, + "repository": { + "short": "-r", + "long": "--repository", + "default": "published,staging,rejected", + }, + "dependents": { + "short": "-d", + "long": "--dependents", + "default": False, + "action": "store_true", + }, }, }, "download": None, @@ -489,7 +526,15 @@ def report_error(resp): def parse_args(parser, args): for arg in args: - parser.add_argument(arg, **(args[arg])) + flag_args=[] + if "short" in args[arg]: + flag_args.append("-" + args[arg].pop("short").strip("-")) + if "long" in args[arg]: + flag_args.append("--" + args[arg].pop("long").strip("-")) + if not flag_args: + flag_args.append(arg) + + parser.add_argument(*flag_args, **(args[arg])) def parse_subop(subparsers, subop, subop_params): @@ -1001,7 +1046,13 @@ def main(): elif args.kind == "collection": if args.operation == "list": - print(json.dumps(collections.get_collection_list(client))) + resp = collections.get_collection_list( + client, + args.repository, + keywords=args.keywords, + limit=args.limit, + ) + print(json.dumps(resp)) elif args.operation == "upload": namespace, collection_name, version, path = ( args.namespace or client.username, @@ -1053,18 +1104,20 @@ def main(): "copy", ) elif args.operation == "delete": - namespace, collection, version, repository = ( + namespace, collection, version, repository, dependents = ( args.namespace, args.collection, args.version, - args.repository or "published", + args.repository.split(","), + args.dependents ) try: if version == "None": version = None resp = collections.delete_collection( - client, namespace, collection, version, repository + client, namespace, collection, version, repository, dependents ) + print(json.dumps(resp)) except ValueError as e: if not args.ignore: logger.error(e) diff --git a/scripts/galaxy-cleaner.sh b/scripts/galaxy-cleaner.sh new file mode 100644 index 0000000..66eb21d --- /dev/null +++ b/scripts/galaxy-cleaner.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash + +# set -xv + +# LIMIT=100 +# BATCH=$LIMIT +WAIT=0 + +IFS='' read -r -d '' VAR <<'EOF' +galaxy-cleaner.sh +script usage: ./cleanup.sh [-l LIMIT] [-b BATCH_SIZE] [-d] + + -l Limit. Number of collections to remove. + -b Batch size. Number of collections to fetch in each batch + to then delete. Adjust this for performance and/or rate limit + problems. + -d Debug flag. Enables bash -xv flags and displays extra debug + information while running. + -t Token file. Path to a file to read the access token from. + -T Token. Directly pass the access token as a parameter. + -p Proxy. Set the HTTP_PROXY value to access the Galaxy API via. + -w Wait. Pause between steps to reduce server impact or avoid rate + limiting. + +EOF + +while getopts 't:T:l:b:w:p:d' OPTION; do + case "$OPTION" in + t) + TOKEN=$(cat $OPTARG) + ;; + T) + TOKEN=$OPTARG + ;; + l) + LIMIT=$OPTARG + ;; + b) + BATCH=$OPTARG + ;; + w) + WAIT=$OPTARG + ;; + p) + export HTTPS_PROXY="${OPTARG}" + ;; + d) + DEBUG=true + set -xv + ;; + ?) + echo "script usage: ./cleanup.sh [-l LIMIT] [-b BATCH_SIZE] [-d]" >&2 + exit 1 + ;; + esac +done +shift "$(($OPTIND -1))" + +TOKEN="" + +read -r -d '' GKIT << EOM +galaxykit + -s https://console.stage.redhat.com/api/automation-hub/ + -a https://sso.stage.redhat.com/auth/realms/redhat-external/protocol/openid-connect/token + -t $TOKEN +EOM + +anywait() { + for pid in "$@"; do + while kill -0 "$pid"; do + sleep 0.5 + done + done +} + +progress_update() { + V=$1 + T=$2 + R=$3 + SCALE=$((100 / $T)) + V=$(($SCALE * $V)) + T=$(($SCALE * $T)) + if [[ "$V" == "$T" ]] || [[ "$R" == "" ]]; then + # 00:00:00/00:00:00 + R="................." + fi + printf "%-*s" $(($V + 1)) '[' | tr ' ' '#' + printf "%*s%3d%%\r" $(($T - $V)) "] $R " "$V" +} + +if [[ ! -v LIMIT ]]; then + LIMIT=`$GKIT collection list --limit 1 | jq '.meta.count' -r` +fi +if [[ ! -v BATCH ]]; then + BATCH=$LIMIT +fi + +SEEN_COUNT=0 +DELETE_COUNT=0 +BATCH_NUMBER=1 +REMAIN_TIME="" +TOTAL_TO_DELETE=$LIMIT +BATCH_TOTAL=$(($LIMIT / $BATCH)) +SKIPPED=0 + +if [[ $(($LIMIT % $BATCH)) != 0 ]]; then + BATCH_TOTAL=$(($BATCH_TOTAL + 1)) +fi + +SECONDS=0 +while [[ $TOTAL_TO_DELETE != 0 ]]; do + COUNTER=0 + BATCH_LABEL="batch #$BATCH_NUMBER/$BATCH_TOTAL" + BATCH_START=$SECONDS + + echo $"Fetching and Deleting collection $BATCH_LABEL. $TOTAL_TO_DELETE remain. $DELETE_COUNT deleted. (Total time est: ${REMAIN_TIME:-unknown})" + progress_update $COUNTER $BATCH + + COLL_LIST=`$GKIT collection list --limit $BATCH | jq '.data[] | "\(.namespace.name) \(.name)"' -r` + COLL_LIST_PID=$! + while read COLLECTION; do + COUNTER=$(($COUNTER + 1)) + DEL_OUTPUT=$($GKIT collection delete --dependents --repository published,staging,rejected $COLLECTION) + DC=$( + jq ".delete_count" < <(echo "$DEL_OUTPUT") + ) + if [[ "$DC" == "" ]]; then + echo "Delete operation failed for $COLLECTION. Continuing after 30 seconds..." + echo $DEL_OUTPUT + sleep 30 + continue + fi + DELETE_COUNT=$(($DELETE_COUNT + $DC)) + TOTAL_TO_DELETE=$(($TOTAL_TO_DELETE - $DC)) + SEEN_COUNT=$(($SEEN_COUNT + 1)) + + if [[ "$DC" == "0" ]]; then + SKIPPED=$(( $SKIPPED + 1 )) + BATCH_TOTAL=$(( ($LIMIT + $SKIPPED) / $BATCH )) + fi + + DURATION=$SECONDS + if [[ $DELETE_COUNT == 0 ]]; then + REMAIN_TIME="" + else + RATE=$(($DURATION / $DELETE_COUNT)) + REMAIN_SEC=$(($TOTAL_TO_DELETE * $RATE)) + REMAIN_TIME=$(date -d@$REMAIN_SEC -u +%H:%M:%S) + fi + + BATCH_DURATION=$(($SECONDS - $BATCH_START)) + if [[ $DELETE_COUNT == 0 ]]; then + BATCH_REMAIN_TIME="" + else + RATE=$(($DURATION / $SEEN_COUNT)) + REMAIN_SEC=$(( ($BATCH - $COUNTER) * $RATE)) + BATCH_REMAIN_TIME=$(date -d@$REMAIN_SEC -u +%H:%M:%S) + fi + + progress_update $COUNTER $BATCH "$BATCH_REMAIN_TIME/$REMAIN_TIME" + + sleep $WAIT + done < <(echo "$COLL_LIST") + + BATCH_NUMBER=$(($BATCH_NUMBER + 1)) + echo +done + +echo "Done. ${DELETE_COUNT} deletions completed successfully." + +if [[ $DEBUG == true ]]; then + set +xv +fi \ No newline at end of file