Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ CLI Help output::
--throttle-limit to be set)
--exclude [EXCLUDE ...]
names of repositories to exclude

--retries MAX_RETRIES
maximum number of retries for API calls (default: 5)

Usage Details
=============
Expand Down
72 changes: 48 additions & 24 deletions github_backup/github_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ def __init__(self, message, dmca_url=None):
" 3. Debian/Ubuntu: apt-get install ca-certificates\n\n"
)

# Retry configuration
MAX_RETRIES = 5


def logging_subprocess(
popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs
Expand Down Expand Up @@ -144,6 +141,17 @@ def mask_password(url, secret="*****"):
return url.replace(parsed.password, secret)


def non_negative_int(value):
"""Argparse type validator for non-negative integers."""
try:
ivalue = int(value)
except ValueError:
raise argparse.ArgumentTypeError(f"'{value}' is not a valid integer")
if ivalue < 0:
raise argparse.ArgumentTypeError(f"{value} must be 0 or greater")
return ivalue


def parse_args(args=None):
parser = argparse.ArgumentParser(description="Backup a github account")
parser.add_argument("user", metavar="USER", type=str, help="github username")
Expand Down Expand Up @@ -468,6 +476,13 @@ def parse_args(args=None):
parser.add_argument(
"--exclude", dest="exclude", help="names of repositories to exclude", nargs="*"
)
parser.add_argument(
"--retries",
dest="max_retries",
type=non_negative_int,
default=5,
help="maximum number of retries for API calls (default: 5)",
)
return parser.parse_args(args)


Expand Down Expand Up @@ -622,7 +637,7 @@ def retrieve_data(args, template, query_args=None, paginated=True):
def _extract_next_page_url(link_header):
for link in link_header.split(","):
if 'rel="next"' in link:
return link[link.find("<") + 1:link.find(">")]
return link[link.find("<") + 1 : link.find(">")]
return None

def fetch_all() -> Generator[dict, None, None]:
Expand All @@ -631,7 +646,7 @@ def fetch_all() -> Generator[dict, None, None]:
while True:
# FIRST: Fetch response

for attempt in range(MAX_RETRIES):
for attempt in range(args.max_retries + 1):
request = _construct_request(
per_page=per_page if paginated else None,
query_args=query_args,
Expand All @@ -640,7 +655,7 @@ def fetch_all() -> Generator[dict, None, None]:
as_app=args.as_app,
fine=args.token_fine is not None,
)
http_response = make_request_with_retry(request, auth)
http_response = make_request_with_retry(request, auth, args.max_retries)

match http_response.getcode():
case 200:
Expand All @@ -654,10 +669,10 @@ def fetch_all() -> Generator[dict, None, None]:
TimeoutError,
) as e:
logger.warning(f"{type(e).__name__} reading response")
if attempt < MAX_RETRIES - 1:
if attempt < args.max_retries:
delay = calculate_retry_delay(attempt, {})
logger.warning(
f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})"
f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries + 1})"
)
time.sleep(delay)
continue # Next retry attempt
Expand All @@ -683,10 +698,10 @@ def fetch_all() -> Generator[dict, None, None]:
)
else:
logger.error(
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}"
)
raise Exception(
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}"
)

# SECOND: Process and paginate
Expand Down Expand Up @@ -718,7 +733,7 @@ def fetch_all() -> Generator[dict, None, None]:
return list(fetch_all())


def make_request_with_retry(request, auth):
def make_request_with_retry(request, auth, max_retries=5):
"""Make HTTP request with automatic retry for transient errors."""

def is_retryable_status(status_code, headers):
Expand All @@ -730,40 +745,49 @@ def is_retryable_status(status_code, headers):
return int(headers.get("x-ratelimit-remaining", 1)) < 1
return False

for attempt in range(MAX_RETRIES):
for attempt in range(max_retries + 1):
try:
return urlopen(request, context=https_ctx)

except HTTPError as exc:
# HTTPError can be used as a response-like object
if not is_retryable_status(exc.code, exc.headers):
logger.error(
f"API Error: {exc.code} {exc.reason} for {request.full_url}"
)
raise # Non-retryable error

if attempt >= MAX_RETRIES - 1:
logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts")
if attempt >= max_retries:
logger.error(
f"HTTP {exc.code} failed after {max_retries + 1} attempts for {request.full_url}"
)
raise

delay = calculate_retry_delay(attempt, exc.headers)
logger.warning(
f"HTTP {exc.code}, retrying in {delay:.1f}s "
f"(attempt {attempt + 1}/{MAX_RETRIES})"
f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s "
f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}"
)
if auth is None and exc.code in (403, 429):
logger.info("Hint: Authenticate to raise your GitHub rate limit")
time.sleep(delay)

except (URLError, socket.error) as e:
if attempt >= MAX_RETRIES - 1:
logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}")
if attempt >= max_retries:
logger.error(
f"Connection error failed after {max_retries + 1} attempts: {e} for {request.full_url}"
)
raise
delay = calculate_retry_delay(attempt, {})
logger.warning(
f"Connection error: {e}, retrying in {delay:.1f}s "
f"(attempt {attempt + 1}/{MAX_RETRIES})"
f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}"
)
time.sleep(delay)

raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover
raise Exception(
f"Request failed after {max_retries + 1} attempts"
) # pragma: no cover


def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
Expand Down Expand Up @@ -1579,9 +1603,7 @@ def filter_repositories(args, unfiltered_repositories):
repositories = [r for r in repositories if not r.get("archived")]
if args.starred_skip_size_over is not None:
if args.starred_skip_size_over <= 0:
logger.warning(
"--starred-skip-size-over must be greater than 0, ignoring"
)
logger.warning("--starred-skip-size-over must be greater than 0, ignoring")
else:
size_limit_kb = args.starred_skip_size_over * 1024
filtered = []
Expand All @@ -1590,7 +1612,9 @@ def filter_repositories(args, unfiltered_repositories):
size_mb = r.get("size", 0) / 1024
logger.info(
"Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format(
r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over
r.get("full_name", r.get("name")),
size_mb,
args.starred_skip_size_over,
)
)
else:
Expand Down
41 changes: 31 additions & 10 deletions tests/test_http_451.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_repository_unavailable_error_raised(self):
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
args.max_retries = 5

mock_response = Mock()
mock_response.getcode.return_value = 451
Expand All @@ -30,18 +31,26 @@ def test_repository_unavailable_error_raised(self):
"block": {
"reason": "dmca",
"created_at": "2024-11-12T14:38:04Z",
"html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
}
"html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md",
},
}
mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8")
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons"

with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
with patch(
"github_backup.github_backup.make_request_with_retry",
return_value=mock_response,
):
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")

assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
github_backup.retrieve_data(
args, "https://api.github.com/repos/test/dmca/issues"
)

assert (
exc_info.value.dmca_url
== "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
)
assert "451" in str(exc_info.value)

def test_repository_unavailable_error_without_dmca_url(self):
Expand All @@ -54,16 +63,22 @@ def test_repository_unavailable_error_without_dmca_url(self):
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
args.max_retries = 5

mock_response = Mock()
mock_response.getcode.return_value = 451
mock_response.read.return_value = b'{"message": "Blocked"}'
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons"

with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
with patch(
"github_backup.github_backup.make_request_with_retry",
return_value=mock_response,
):
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
github_backup.retrieve_data(
args, "https://api.github.com/repos/test/dmca/issues"
)

assert exc_info.value.dmca_url is None
assert "451" in str(exc_info.value)
Expand All @@ -78,16 +93,22 @@ def test_repository_unavailable_error_with_malformed_json(self):
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
args.max_retries = 5

mock_response = Mock()
mock_response.getcode.return_value = 451
mock_response.read.return_value = b"invalid json {"
mock_response.headers = {"x-ratelimit-remaining": "5000"}
mock_response.reason = "Unavailable For Legal Reasons"

with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
with patch(
"github_backup.github_backup.make_request_with_retry",
return_value=mock_response,
):
with pytest.raises(github_backup.RepositoryUnavailableError):
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
github_backup.retrieve_data(
args, "https://api.github.com/repos/test/dmca/issues"
)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions tests/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def mock_args():
args.osx_keychain_item_account = None
args.throttle_limit = None
args.throttle_pause = 0
args.max_retries = 5
return args


Expand Down
Loading