diff --git a/scripts/gha/firebase_github.py b/scripts/gha/firebase_github.py index 79d17f0f40..9bc961fbd7 100644 --- a/scripts/gha/firebase_github.py +++ b/scripts/gha/firebase_github.py @@ -225,6 +225,49 @@ def get_reviews(token, pull_number): return results +def get_pull_request_review_comments(token, pull_number, since=None): + """https://docs.github.com/en/rest/pulls/comments#list-review-comments-on-a-pull-request""" + url = f'{GITHUB_API_URL}/pulls/{pull_number}/comments' + headers = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {token}'} + + page = 1 + per_page = 100 + results = [] + + # Base parameters for the API request + base_params = {'per_page': per_page} + if since: + base_params['since'] = since + + while True: # Loop indefinitely until explicitly broken + current_page_params = base_params.copy() + current_page_params['page'] = page + + try: + with requests_retry_session().get(url, headers=headers, params=current_page_params, + stream=True, timeout=TIMEOUT) as response: + response.raise_for_status() + # Log which page and if 'since' was used for clarity + logging.info("get_pull_request_review_comments: %s params %s response: %s", url, current_page_params, response) + + current_page_results = response.json() + if not current_page_results: # No more results on this page + break # Exit loop, no more comments to fetch + + results.extend(current_page_results) + + # If fewer results than per_page were returned, it's the last page + if len(current_page_results) < per_page: + break # Exit loop, this was the last page + + page += 1 # Increment page for the next iteration + + except requests.exceptions.RequestException as e: + logging.error(f"Error fetching review comments (page {page}, params: {current_page_params}) for PR {pull_number}: {e}") + break # Stop trying if there's an error + return results + + def create_workflow_dispatch(token, workflow_id, ref, inputs): """https://docs.github.com/en/rest/reference/actions#create-a-workflow-dispatch-event""" url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/dispatches' diff --git a/scripts/gha/get_pr_review_comments.py b/scripts/gha/get_pr_review_comments.py new file mode 100755 index 0000000000..0eae324858 --- /dev/null +++ b/scripts/gha/get_pr_review_comments.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Fetches and formats review comments from a GitHub Pull Request.""" + +import argparse +import os +import sys +import firebase_github +import datetime +from datetime import timezone, timedelta + + +def main(): + STATUS_IRRELEVANT = "[IRRELEVANT]" + STATUS_OLD = "[OLD]" + STATUS_CURRENT = "[CURRENT]" + + default_owner = firebase_github.OWNER + default_repo = firebase_github.REPO + + parser = argparse.ArgumentParser( + description="Fetch review comments from a GitHub PR and format into simple text output.", + formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument( + "--pull_number", + type=int, + required=True, + help="Pull request number." + ) + parser.add_argument( + "--owner", + type=str, + default=default_owner, + help=f"Repository owner. Defaults to '{default_owner}'." + ) + parser.add_argument( + "--repo", + type=str, + default=default_repo, + help=f"Repository name. Defaults to '{default_repo}'." + ) + parser.add_argument( + "--token", + type=str, + default=os.environ.get("GITHUB_TOKEN"), + help="GitHub token. Can also be set via GITHUB_TOKEN env var." + ) + parser.add_argument( + "--context-lines", + type=int, + default=10, + help="Number of context lines from the diff hunk. 0 for full hunk. If > 0, shows header (if any) and last N lines of the remaining hunk. Default: 10." + ) + parser.add_argument( + "--since", + type=str, + default=None, + help="Only show comments updated at or after this ISO 8601 timestamp (e.g., YYYY-MM-DDTHH:MM:SSZ)." + ) + parser.add_argument( + "--exclude-old", + action="store_true", + default=False, + help="Exclude comments marked [OLD] (where line number has changed due to code updates but position is still valid)." + ) + parser.add_argument( + "--include-irrelevant", + action="store_true", + default=False, + help="Include comments marked [IRRELEVANT] (where GitHub can no longer anchor the comment to the diff, i.e., position is null)." + ) + + args = parser.parse_args() + + if not args.token: + sys.stderr.write("Error: GitHub token not provided. Set GITHUB_TOKEN or use --token.\n") + sys.exit(1) + + if args.owner != firebase_github.OWNER or args.repo != firebase_github.REPO: + repo_url = f"https://github.com/{args.owner}/{args.repo}" + if not firebase_github.set_repo_url(repo_url): + sys.stderr.write(f"Error: Invalid repo URL: {args.owner}/{args.repo}. Expected https://github.com/owner/repo\n") + sys.exit(1) + sys.stderr.write(f"Targeting repository: {firebase_github.OWNER}/{firebase_github.REPO}\n") + + sys.stderr.write(f"Fetching comments for PR #{args.pull_number} from {firebase_github.OWNER}/{firebase_github.REPO}...\n") + if args.since: + sys.stderr.write(f"Filtering comments updated since: {args.since}\n") + + + comments = firebase_github.get_pull_request_review_comments( + args.token, + args.pull_number, + since=args.since + ) + + if not comments: + sys.stderr.write(f"No review comments found for PR #{args.pull_number} (or matching filters), or an error occurred.\n") + return + + latest_activity_timestamp_obj = None + processed_comments_count = 0 + print("# Review Comments\n\n") + for comment in comments: + created_at_str = comment.get("created_at") + + current_pos = comment.get("position") + current_line = comment.get("line") + original_line = comment.get("original_line") + + status_text = "" + line_to_display = None + + if current_pos is None: + status_text = STATUS_IRRELEVANT + line_to_display = original_line + elif original_line is not None and current_line != original_line: + status_text = STATUS_OLD + line_to_display = current_line + else: + status_text = STATUS_CURRENT + line_to_display = current_line + + if line_to_display is None: + line_to_display = "N/A" + + if status_text == STATUS_IRRELEVANT and not args.include_irrelevant: + continue + if status_text == STATUS_OLD and args.exclude_old: + continue + + # Track latest 'updated_at' for '--since' suggestion; 'created_at' is for display. + updated_at_str = comment.get("updated_at") + if updated_at_str: # Check if updated_at_str is not None and not empty + try: + if sys.version_info < (3, 11): + dt_str_updated = updated_at_str.replace("Z", "+00:00") + else: + dt_str_updated = updated_at_str + current_comment_activity_dt = datetime.datetime.fromisoformat(dt_str_updated) + if latest_activity_timestamp_obj is None or current_comment_activity_dt > latest_activity_timestamp_obj: + latest_activity_timestamp_obj = current_comment_activity_dt + except ValueError: + sys.stderr.write(f"Warning: Could not parse updated_at timestamp: {updated_at_str}\n") + + # Get other comment details + user = comment.get("user", {}).get("login", "Unknown user") + path = comment.get("path", "N/A") + body = comment.get("body", "").strip() + + if not body: + continue + + processed_comments_count += 1 + + diff_hunk = comment.get("diff_hunk") + html_url = comment.get("html_url", "N/A") + comment_id = comment.get("id") + in_reply_to_id = comment.get("in_reply_to_id") + + print(f"## Comment by: **{user}** (ID: `{comment_id}`){f' (In Reply To: `{in_reply_to_id}`)' if in_reply_to_id else ''}\n") + if created_at_str: + print(f"* **Timestamp**: `{created_at_str}`") + print(f"* **Status**: `{status_text}`") + print(f"* **File**: `{path}`") + print(f"* **Line**: `{line_to_display}`") + print(f"* **URL**: <{html_url}>\n") + + print("\n### Context:") + print("```") # Start of Markdown code block + if diff_hunk and diff_hunk.strip(): + if args.context_lines == 0: # User wants the full hunk + print(diff_hunk) + else: # User wants N lines of context (args.context_lines > 0) + hunk_lines = diff_hunk.split('\n') + if hunk_lines and hunk_lines[0].startswith("@@ "): + print(hunk_lines[0]) + hunk_lines = hunk_lines[1:] # Modify list in place for remaining operations + + # Proceed with the (potentially modified) hunk_lines + # If hunk_lines is empty here (e.g. original hunk was only a header that was removed), + # hunk_lines[-args.context_lines:] will be [], and "\n".join([]) is "", + # so print("") will effectively print a newline. This is acceptable. + print("\n".join(hunk_lines[-args.context_lines:])) + else: # diff_hunk was None or empty + print("(No diff hunk available for this comment)") + print("```") # End of Markdown code block + + print("\n### Comment:") + print(body) + print("\n---") + + sys.stderr.write(f"\nPrinted {processed_comments_count} comments to stdout.\n") + + if latest_activity_timestamp_obj: + try: + # Ensure it's UTC before adding timedelta, then format + next_since_dt = latest_activity_timestamp_obj.astimezone(timezone.utc) + timedelta(seconds=2) + next_since_str = next_since_dt.strftime('%Y-%m-%dT%H:%M:%SZ') + + new_cmd_args = [sys.executable, sys.argv[0]] # Start with interpreter and script path + i = 1 # Start checking from actual arguments in sys.argv + while i < len(sys.argv): + if sys.argv[i] == "--since": + i += 2 # Skip --since and its value + continue + new_cmd_args.append(sys.argv[i]) + i += 1 + + new_cmd_args.extend(["--since", next_since_str]) + suggested_cmd = " ".join(new_cmd_args) + sys.stderr.write(f"\nTo get comments created after the last one in this batch, try:\n{suggested_cmd}\n") + except Exception as e: + sys.stderr.write(f"\nWarning: Could not generate next command suggestion: {e}\n") + +if __name__ == "__main__": + main()