From 522d5bf23afd94f433863c0e2e77f894b3d5f02f Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Mon, 13 Oct 2025 11:49:29 +0100 Subject: [PATCH 01/13] Create ReviewCommentReactionsStream --- tap_github/repository_streams.py | 24 ++++++++++++++++++++++++ tap_github/streams.py | 2 ++ 2 files changed, 26 insertions(+) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index e8a57ec8..cf625ee6 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1673,6 +1673,30 @@ class ReviewCommentsStream(GitHubRestStream): ).to_dict() +class ReviewCommentReactionsStream(GitHubRestStream): + name = "review_comment_reactions" + path = "/repos/{org}/{repo}/pulls/comments/{comment_id}/reactions" + primary_keys: ClassVar[list[str]] = ["id"] + replication_key = None + parent_stream_type = ReviewCommentsStream + ignore_parent_replication_key = True + state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] + + schema = th.PropertiesList( + # Parent keys + th.Property("org", th.StringType), + th.Property("repo", th.StringType), + th.Property("repo_id", th.IntegerType), + th.Property("comment_id", th.IntegerType), + # Reaction properties + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("user", user_object), + th.Property("content", th.StringType), + th.Property("created_at", th.DateTimeType), + ).to_dict() + + class ContributorsStream(GitHubRestStream): """Defines 'Contributors' stream. Fetching User & Bot contributors.""" diff --git a/tap_github/streams.py b/tap_github/streams.py index bda8f74d..ed004630 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -48,6 +48,7 @@ ReadmeStream, ReleasesStream, RepositoryStream, + ReviewCommentReactionsStream, ReviewCommentsStream, ReviewsStream, StargazersGraphqlStream, @@ -116,6 +117,7 @@ def __init__(self, valid_queries: set[str], streams: list[type[Stream]]) -> None ReleasesStream, ExtraMetricsStream, RepositoryStream, + ReviewCommentReactionsStream, ReviewCommentsStream, ReviewsStream, StargazersGraphqlStream, From 4cd924fbe98f1ff7e07735e4b39001959bb05d10 Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 14 Oct 2025 11:21:25 +0100 Subject: [PATCH 02/13] Add get_child_context to parent stream --- tap_github/repository_streams.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index cf625ee6..92851234 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1672,6 +1672,15 @@ class ReviewCommentsStream(GitHubRestStream): th.Property("side", th.StringType), ).to_dict() + def get_child_context(self, record: dict, context: Context | None) -> dict: + """Return a child context object from the record and optional provided context.""" + return { + "org": context["org"] if context else None, + "repo": context["repo"] if context else None, + "repo_id": context["repo_id"] if context else None, + "comment_id": record["id"], + } + class ReviewCommentReactionsStream(GitHubRestStream): name = "review_comment_reactions" From 98e258c0f75ab74262c77273199ca75004162f9b Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 14 Oct 2025 17:11:16 +0100 Subject: [PATCH 03/13] Change replication key to created_at --- tap_github/repository_streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 92851234..e80bc0fe 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1686,7 +1686,7 @@ class ReviewCommentReactionsStream(GitHubRestStream): name = "review_comment_reactions" path = "/repos/{org}/{repo}/pulls/comments/{comment_id}/reactions" primary_keys: ClassVar[list[str]] = ["id"] - replication_key = None + replication_key = "created_at" parent_stream_type = ReviewCommentsStream ignore_parent_replication_key = True state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] From 8098b7ec4af3068bd22b79f3e6c07e91b22dd559 Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 28 Oct 2025 15:21:59 +0000 Subject: [PATCH 04/13] Change ignore_parent_replication_key to False --- tap_github/repository_streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index e80bc0fe..cf68f70c 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1688,7 +1688,7 @@ class ReviewCommentReactionsStream(GitHubRestStream): primary_keys: ClassVar[list[str]] = ["id"] replication_key = "created_at" parent_stream_type = ReviewCommentsStream - ignore_parent_replication_key = True + ignore_parent_replication_key = False state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] schema = th.PropertiesList( From 604c858da3fab3ff9acb730f13baa3c12081dfdf Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 28 Oct 2025 15:37:59 +0000 Subject: [PATCH 05/13] Add pull_request_url and comment_url to schema --- tap_github/repository_streams.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index cf68f70c..2682d467 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1678,7 +1678,9 @@ def get_child_context(self, record: dict, context: Context | None) -> dict: "org": context["org"] if context else None, "repo": context["repo"] if context else None, "repo_id": context["repo_id"] if context else None, + "pull_request_url": record["pull_request_url"], "comment_id": record["id"], + "comment_url": record["html_url"], } @@ -1696,7 +1698,9 @@ class ReviewCommentReactionsStream(GitHubRestStream): th.Property("org", th.StringType), th.Property("repo", th.StringType), th.Property("repo_id", th.IntegerType), + th.Property("pull_request_url", th.StringType), th.Property("comment_id", th.IntegerType), + th.Property("comment_url", th.StringType), # Reaction properties th.Property("id", th.IntegerType), th.Property("node_id", th.StringType), From 1eb314e4b0ba42164fcbbead436f8d07f38b7726 Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 28 Oct 2025 15:52:28 +0000 Subject: [PATCH 06/13] Add if context else None to new fields --- tap_github/repository_streams.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 2682d467..92263703 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1678,9 +1678,9 @@ def get_child_context(self, record: dict, context: Context | None) -> dict: "org": context["org"] if context else None, "repo": context["repo"] if context else None, "repo_id": context["repo_id"] if context else None, - "pull_request_url": record["pull_request_url"], - "comment_id": record["id"], - "comment_url": record["html_url"], + "pull_request_url": record["pull_request_url"] if context else None, + "comment_id": record["id"] if context else None, + "comment_url": record["html_url"] if context else None, } From 50d01871637f818bae604167f7180f15f2a365cb Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 28 Oct 2025 16:00:24 +0000 Subject: [PATCH 07/13] Add logging --- tap_github/repository_streams.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 92263703..a9b28864 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1693,6 +1693,15 @@ class ReviewCommentReactionsStream(GitHubRestStream): ignore_parent_replication_key = False state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] + + def post_process(self, row: dict, context: Context | None = None) -> dict: + self.logger.info(f"Context: {context}") + self.logger.info(f"Unprocessed row: {row}") + + row = super().post_process(row, context) + self.logger.info(f"Processed row: {row}") + return row + schema = th.PropertiesList( # Parent keys th.Property("org", th.StringType), From afca45eea75196f99406f3973c4ba202928d5f09 Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Tue, 28 Oct 2025 16:13:42 +0000 Subject: [PATCH 08/13] Explicitly set row keys using context --- tap_github/repository_streams.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index a9b28864..87ad4dcd 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1700,6 +1700,13 @@ def post_process(self, row: dict, context: Context | None = None) -> dict: row = super().post_process(row, context) self.logger.info(f"Processed row: {row}") + + if context: + row["pull_request_url"] = context.get("pull_request_url") + row["comment_id"] = context.get("comment_id") + row["comment_url"] = context.get("comment_url") + self.logger.info(f"Post-Processed row: {row}") + return row schema = th.PropertiesList( From eba8c99cb6552070d1e70c5b849ad627c4539049 Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Thu, 30 Oct 2025 12:12:37 +0000 Subject: [PATCH 09/13] Remove newline --- tap_github/repository_streams.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 87ad4dcd..87d8795b 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1692,7 +1692,6 @@ class ReviewCommentReactionsStream(GitHubRestStream): parent_stream_type = ReviewCommentsStream ignore_parent_replication_key = False state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] - def post_process(self, row: dict, context: Context | None = None) -> dict: self.logger.info(f"Context: {context}") From d792551340aa6858661ba1aa245d8afffbaae81d Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Thu, 30 Oct 2025 12:21:31 +0000 Subject: [PATCH 10/13] Remove logging --- tap_github/repository_streams.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 87d8795b..50c7a32c 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1694,17 +1694,12 @@ class ReviewCommentReactionsStream(GitHubRestStream): state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] def post_process(self, row: dict, context: Context | None = None) -> dict: - self.logger.info(f"Context: {context}") - self.logger.info(f"Unprocessed row: {row}") - row = super().post_process(row, context) - self.logger.info(f"Processed row: {row}") if context: row["pull_request_url"] = context.get("pull_request_url") row["comment_id"] = context.get("comment_id") row["comment_url"] = context.get("comment_url") - self.logger.info(f"Post-Processed row: {row}") return row From e201e76b22004f2ca4229cf9bd9be8d32b44a2e9 Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Thu, 30 Oct 2025 14:43:10 +0000 Subject: [PATCH 11/13] Remove pull_request_url as it's the API url --- tap_github/repository_streams.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 50c7a32c..568a9d1b 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1678,7 +1678,6 @@ def get_child_context(self, record: dict, context: Context | None) -> dict: "org": context["org"] if context else None, "repo": context["repo"] if context else None, "repo_id": context["repo_id"] if context else None, - "pull_request_url": record["pull_request_url"] if context else None, "comment_id": record["id"] if context else None, "comment_url": record["html_url"] if context else None, } @@ -1697,7 +1696,6 @@ def post_process(self, row: dict, context: Context | None = None) -> dict: row = super().post_process(row, context) if context: - row["pull_request_url"] = context.get("pull_request_url") row["comment_id"] = context.get("comment_id") row["comment_url"] = context.get("comment_url") @@ -1708,7 +1706,6 @@ def post_process(self, row: dict, context: Context | None = None) -> dict: th.Property("org", th.StringType), th.Property("repo", th.StringType), th.Property("repo_id", th.IntegerType), - th.Property("pull_request_url", th.StringType), th.Property("comment_id", th.IntegerType), th.Property("comment_url", th.StringType), # Reaction properties From 325b54df970e23cd7ff279985e8e70d4c9b54bce Mon Sep 17 00:00:00 2001 From: Lin Taylor Date: Fri, 31 Oct 2025 10:55:18 +0000 Subject: [PATCH 12/13] Remove docstring that is causing E501 Line too long (90 > 88) --- tap_github/repository_streams.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 568a9d1b..da6e249c 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1673,7 +1673,6 @@ class ReviewCommentsStream(GitHubRestStream): ).to_dict() def get_child_context(self, record: dict, context: Context | None) -> dict: - """Return a child context object from the record and optional provided context.""" return { "org": context["org"] if context else None, "repo": context["repo"] if context else None, From 0649323f976226585c6d17158e645bab1f6aa9a0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:56:06 +0000 Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tap_github/repository_streams.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index da6e249c..f61065aa 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1690,7 +1690,7 @@ class ReviewCommentReactionsStream(GitHubRestStream): parent_stream_type = ReviewCommentsStream ignore_parent_replication_key = False state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] - + def post_process(self, row: dict, context: Context | None = None) -> dict: row = super().post_process(row, context) @@ -1699,7 +1699,7 @@ def post_process(self, row: dict, context: Context | None = None) -> dict: row["comment_url"] = context.get("comment_url") return row - + schema = th.PropertiesList( # Parent keys th.Property("org", th.StringType),