Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ def parse_args():
parser.add_argument(
"-p", "--pull_requests", help="log pull requests", action="store_true"
)
parser.add_argument(
"--graphql", help="use graphql for requesting data (work only with --pull_requests) ", action="store_true"
)
parser.add_argument("-i", "--issues", help="log issues", action="store_true")
parser.add_argument("-w", "--wikis", help="log wikis", action="store_true")
parser.add_argument("--contributors", help="log contributors", action="store_true")
Expand Down Expand Up @@ -150,14 +153,20 @@ def run(args, binded_repos, repos_for_wiki=None):
binded_repos, args.out, start, finish, args.branch, args.forks_include
)
if args.pull_requests:
pull_requests_parser.log_pull_requests(
binded_repos,
args.out,
start,
finish,
args.forks_include,
args.pr_comments,
)
if args.graphql:
pull_requests_parser.log_pull_requests_by_graphql(
binded_repos=binded_repos,
csv_name=args.out
)
else:
pull_requests_parser.log_pull_requests(
binded_repos,
args.out,
start,
finish,
args.forks_include,
args.pr_comments,
)
if args.issues:
issues_parser.log_issues(
binded_repos, args.out, start, finish, args.forks_include, args.base_url,
Expand Down
202 changes: 202 additions & 0 deletions src/graphql/pull_request_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
from dataclasses import asdict
from typing import Generator
from time import sleep

import requests

from src.constants import TIMEDELTA
from src.repo_dataclasses import PullRequestData
from src.interface_wrapper import IRepositoryAPI, Repository
from src.utils import logger


# -----------GraphQLAPI block--------------

def log_repositories_pr_by_graphql(owner, repo_name, token, csv_name, first_n=100):
HEADERS = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

query = """
query GetPRData($owner: String!, $repo: String!, $first: Int!, $after: String) {
repository(owner: $owner, name: $repo) {
nameWithOwner
pullRequests(first: $first, after: $after, states: [OPEN, CLOSED, MERGED], orderBy: {field: CREATED_AT, direction: DESC}) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
title
number
state
createdAt

author {
login
... on User {
name
email
}
}

baseRef {
name
target {
oid
}
}

headRef {
name
target {
oid
}
}

changedFiles
additions
deletions

mergedAt
mergedBy {
login
... on User {
name
email
}
}

assignees(first: 10) {
nodes {
login
name
}
}

labels(first: 20) {
nodes {
name
color
}
}
}
}
}
}
"""

has_next_page = True
after_cursor = None
processed_count = 0

while has_next_page:

variables = {
"owner": owner,
"repo": repo_name,
"first": first_n,
"after": after_cursor,
}

response = requests.post(
"https://api.github.com/graphql",
headers=HEADERS,
json={"query": query, "variables": variables},
)

if response.status_code != 200:
logger.log_error(f"GraphQL request failed: {response.status_code} - {response.text}")
logger.log_to_stdout(f"Sleep to {100*TIMEDELTA} and retry")
sleep(100*TIMEDELTA)
continue

graphql_data = response.json()

if "errors" in graphql_data:
logger.log_error(f"GraphQL errors: {graphql_data['errors']}")
logger.log_to_stdout(f"Sleep to {100*TIMEDELTA} and retry")
sleep(100*TIMEDELTA)
continue

repo_data = graphql_data["data"]["repository"]

page_info = repo_data["pullRequests"]["pageInfo"]
has_next_page = page_info["hasNextPage"]
after_cursor = page_info["endCursor"]

prs = repo_data["pullRequests"]["nodes"]

processed_count += len(prs)
logger.log_to_stdout(f"Processing {processed_count} / {repo_data["pullRequests"]['totalCount']}")

for pr in prs:
pr_data = PullRequestData(
repository_name=repo_data["nameWithOwner"],
title=pr["title"],
id=pr["number"],
state=str(pr["state"]).lower(),
commit_into=(
pr["baseRef"]["target"]["oid"]
if pr["baseRef"] and pr["baseRef"]["target"]
else None
),
commit_from=(
pr["headRef"]["target"]["oid"]
if pr["headRef"] and pr["headRef"]["target"]
else None
),
created_at=pr["createdAt"],
creator_name=(
pr["author"]["name"]
if pr["author"] and "name" in pr["author"]
else None
),
creator_login=pr["author"]["login"] if pr["author"] else None,
creator_email=(
pr["author"]["email"]
if pr["author"] and "email" in pr["author"]
else None
),
changed_files=pr["changedFiles"],
comment_body=None,
comment_created_at=None,
comment_author_name=None,
comment_author_login=None,
comment_author_email=None,
merger_name=(
pr["mergedBy"]["name"]
if pr["mergedBy"] and "name" in pr["mergedBy"]
else None
),
merger_login=pr["mergedBy"]["login"] if pr["mergedBy"] else None,
merger_email=(
pr["mergedBy"]["email"]
if pr["mergedBy"] and "email" in pr["mergedBy"]
else None
),
source_branch=pr["headRef"]["name"] if pr["headRef"] else None,
target_branch=pr["baseRef"]["name"] if pr["baseRef"] else None,
assignee_story=None,
related_issues=None,
labels=", ".join([label["name"] for label in pr["labels"]["nodes"]]),
milestone=None,
)

pr_info = asdict(pr_data)
logger.log_to_csv(csv_name, list(pr_info.keys()), pr_info)
logger.log_to_stdout(pr_info)


def log_pull_requests_by_graphql(
binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None],
csv_name: str,
):
info = asdict(PullRequestData())
logger.log_to_csv(csv_name, list(info.keys()))

for _, repo, token in binded_repos:
logger.log_title(repo.name)
log_repositories_pr_by_graphql(
owner=repo.owner.login, repo_name=repo.name, csv_name=csv_name, token=token
)
sleep(100*TIMEDELTA)
34 changes: 5 additions & 29 deletions src/pull_requests_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,8 @@
from src.git_logger import get_assignee_story
from src.interface_wrapper import IRepositoryAPI, Repository
from src.utils import logger


@dataclass(kw_only=True, frozen=True)
class PullRequestData:
repository_name: str = ''
title: str = ''
id: int = 0
state: str = ''
commit_into: str = ''
commit_from: str = ''
created_at: str = ''
creator_name: str = ''
creator_login: str = ''
creator_email: str = ''
changed_files: str = ''
comment_body: str = ''
comment_created_at: str = ''
comment_author_name: str = ''
comment_author_login: str = ''
comment_author_email: str = ''
merger_name: str | None = None
merger_login: str | None = None
merger_email: str | None = None
source_branch: str = ''
target_branch: str = ''
assignee_story: str = ''
related_issues: str = ''
labels: str = ''
milestone: str = ''
from src.graphql.pull_request_parser import log_pull_requests_by_graphql # for using in main.py
from src.repo_dataclasses import PullRequestData


def get_related_issues(pull_request_number, repo_owner, repo_name, token):
Expand Down Expand Up @@ -106,6 +79,9 @@ def get_info(obj, attr):
return EMPTY_FIELD if obj is None else getattr(obj, attr)


# -----------GithubAPI block--------------


def log_repositories_pr(
client: IRepositoryAPI,
repository: Repository,
Expand Down
30 changes: 30 additions & 0 deletions src/repo_dataclasses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dataclasses import dataclass


@dataclass(kw_only=True, frozen=True)
class PullRequestData:
repository_name: str = ''
title: str = ''
id: int = 0
state: str = ''
commit_into: str = ''
commit_from: str = ''
created_at: str = ''
creator_name: str = ''
creator_login: str = ''
creator_email: str = ''
changed_files: str = ''
comment_body: str = ''
comment_created_at: str = ''
comment_author_name: str = ''
comment_author_login: str = ''
comment_author_email: str = ''
merger_name: str | None = None
merger_login: str | None = None
merger_email: str | None = None
source_branch: str = ''
target_branch: str = ''
assignee_story: str = ''
related_issues: str = ''
labels: str = ''
milestone: str = ''
Loading