From 99868000b38133982d181add2e495cd5a8c57d11 Mon Sep 17 00:00:00 2001
From: Peter Jaszkowiak
Date: Sun, 29 Jan 2023 23:09:33 -0700
Subject: [PATCH] Track review and author response latency
---
.gitignore | 2 +
.../rust-clippy/pr-author-latency.csv | 2 +
.../rust-clippy/pr-review-latency.csv | 2 +
data/rust-lang/rust/pr-author-latency.csv | 2 +
data/rust-lang/rust/pr-review-latency.csv | 2 +
index.md | 2 +
rust-clippy.md | 2 +
updater.py | 225 ++++++++++++++++--
8 files changed, 224 insertions(+), 15 deletions(-)
create mode 100644 data/rust-lang/rust-clippy/pr-author-latency.csv
create mode 100644 data/rust-lang/rust-clippy/pr-review-latency.csv
create mode 100644 data/rust-lang/rust/pr-author-latency.csv
create mode 100644 data/rust-lang/rust/pr-review-latency.csv
diff --git a/.gitignore b/.gitignore
index 0baf01522..99a7d043b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
/_site
+*.env
+*.tmp
diff --git a/data/rust-lang/rust-clippy/pr-author-latency.csv b/data/rust-lang/rust-clippy/pr-author-latency.csv
new file mode 100644
index 000000000..b5b3623ea
--- /dev/null
+++ b/data/rust-lang/rust-clippy/pr-author-latency.csv
@@ -0,0 +1,2 @@
+is:open draft:false -label:S-blocked label:S-waiting-on-author __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
+2023-01-30,0,0,0,1,0,2,3,0
diff --git a/data/rust-lang/rust-clippy/pr-review-latency.csv b/data/rust-lang/rust-clippy/pr-review-latency.csv
new file mode 100644
index 000000000..d23b4d970
--- /dev/null
+++ b/data/rust-lang/rust-clippy/pr-review-latency.csv
@@ -0,0 +1,2 @@
+is:open draft:false -label:S-blocked label:S-waiting-on-review __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
+2023-01-30,0,5,4,0,7,7,5,7
diff --git a/data/rust-lang/rust/pr-author-latency.csv b/data/rust-lang/rust/pr-author-latency.csv
new file mode 100644
index 000000000..a8968ddaa
--- /dev/null
+++ b/data/rust-lang/rust/pr-author-latency.csv
@@ -0,0 +1,2 @@
+is:open draft:false -label:S-blocked label:S-waiting-on-author __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
+2023-01-30,1,12,7,15,24,29,12,0
diff --git a/data/rust-lang/rust/pr-review-latency.csv b/data/rust-lang/rust/pr-review-latency.csv
new file mode 100644
index 000000000..300bc9198
--- /dev/null
+++ b/data/rust-lang/rust/pr-review-latency.csv
@@ -0,0 +1,2 @@
+is:open draft:false -label:S-blocked label:S-waiting-on-review __status_updated:{{param|relative_date}},>1|today,3..1|last 3 days,7..4|last week,14..8|last 2 weeks,30..15|last month,90..31|last 3 months,180..91|last 6 months,<180|more than 6 months
+2023-01-30,1,24,18,35,46,49,27,0
diff --git a/index.md b/index.md
index 276b6fef6..d1a93e274 100644
--- a/index.md
+++ b/index.md
@@ -6,5 +6,7 @@ graphs:
pr-activity: Last activity on pull requests
pr-age: Pull requests creation dates
pr-merged: Pull requests merged
+ pr-review-latency: Duration waiting on review
+ pr-author-latency: Duration waiting on author
layout: graphs
---
diff --git a/rust-clippy.md b/rust-clippy.md
index 7ce43f14a..19c6aa9ca 100644
--- a/rust-clippy.md
+++ b/rust-clippy.md
@@ -5,5 +5,7 @@ graphs:
pr-status: Pull requests status
pr-activity: Last activity on pull requests
pr-age: Pull requests creation dates
+ pr-review-latency: Duration waiting on review
+ pr-author-latency: Duration waiting on author
layout: graphs
---
diff --git a/updater.py b/updater.py
index 90828f7e7..424054d08 100755
--- a/updater.py
+++ b/updater.py
@@ -19,26 +19,212 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+# pyright: strict
+
import csv
-import datetime
-import json
+from datetime import datetime, timedelta, timezone
import os
-import subprocess
import sys
import time
-import jinja2
+from typing import Callable
+from urllib.parse import urlparse, parse_qs
+import jinja2
import requests
API_URL = "https://api.github.com/search/issues"
+# Various insignificant comments from triage, merge conflicts, etc
+# count as "updates" on GitHub, so `updated:` isn't a ideal way to
+# gauge the last activity on a PR.
+#
+# Instead, use the Issue Events API to find when the
+# status label (`S-*`) was most recently changed.
+def get_pr_status_updated(http_session: requests.Session, repo: str, events_url: str, pr_number: int) -> str | None:
+ """Get the timestamp of the last status label change for the given PR"""
+
+ page = 1
+
+ while True:
+ print(f"Fetching events for {repo}#{pr_number}")
+ res = http_session.get(events_url, params={"per_page": 100, "page": page})
+
+ # Properly handle rate limits
+ if res.status_code == 403:
+ wait = float(res.headers["X-RateLimit-Reset"]) - time.time() + 1
+ print("Rate limit reached, waiting %s seconds..." % int(wait), flush=True)
+ time.sleep(wait)
+ continue
+
+ # Make sure we got the last page
+ #
+ # In most cases, the `per_page` of 100 should avoid
+ # needing to issue another request.
+ last = res.links.get("last")
+ if last is not None and "url" in last:
+ parsed = urlparse(last["url"])
+ parsed_query = parse_qs(parsed.query)
+ last_page = int(parsed_query["page"][0])
+
+ if last_page > page:
+ page = last_page
+ continue
+
+ data = res.json()
+ print(data)
+ if "errors" in data:
+ for error in data["errors"]:
+ print("Error while fetching events for '%s': %s" % (f"{repo}#{pr_number}", error["message"]))
+ exit(1)
+ else:
+ break
+
+ # Process events
+ data = list(data) # data is a list of events
+
+ # Find last 'labeled' event with label name 'S-*'
+ for i, event in enumerate(reversed(data)):
+ event_index = len(data) - i # because enumerating reversed
+
+ if event["event"] == "labeled":
+ label = str(event["label"]["name"])
+ if label.startswith("S-"):
+ # Continue iterating backwards to see if this label was the last one removed
+ found_prev_event = False
+ for prev_event in reversed(data[:event_index]):
+ if prev_event["event"] == "unlabeled":
+ prev_label = str(prev_event["label"]["name"])
+ if prev_label == label:
+ found_prev_event = True
+ break
+ elif prev_label.startswith("-S"):
+ break
+
+ if found_prev_event:
+ # Same label was just removed and added back, so keep searching
+ continue
+ else:
+ return event["created_at"]
+
+ return None
+
+
+# Convert ">1" to `(0.0, 1.0 + epsilon)`
+# Convert "7..4" to `(4.0, 7.0)`
+# Convert "<30" to `(30.0 + epsilon, inf)`
+def relative_date_to_range(param: str) -> Callable[[int], int]:
+ """Get a lambda that returns whether or not the argument
+ is within the relative date range"""
+
+ if "|" in param:
+ value = param.split("|")[0]
+ else:
+ value = param
+
+ # Support date ranges
+ if ".." in value:
+ end, start = value.split("..", 1)
+ end, start = int(end), int(start)
+
+ return lambda x : (x >= start and x <= end)
+ else:
+ # Properly handle comparison operators
+ if value.startswith(">"):
+ value = value[len(">"):]
+ value = int(value)
+ return lambda x : (x < value)
+ if value.startswith(">="):
+ value = value[len(">="):]
+ value = int(value)
+ return lambda x : (x <= value)
+ if value.startswith("<"):
+ value = value[len("<"):]
+ value = int(value)
+ return lambda x : (x > value)
+ if value.startswith("<="):
+ value = value[len("<="):]
+ value = int(value)
+ return lambda x : (x >= value)
+
+ value = int(value)
+ return lambda x : (x == value)
+
+
+def status_updated(http_session: requests.Session, now: datetime, repo: str, query: str, params: list[str]):
+ """Fetch the time the status was last updated for each PR,
+ and compile into a histogram with bins defined by params"""
+
+ # `__status_updated:` must come last in the query
+ query = query.split("__status_updated:{{param|relative_date}}", 1)[0]
+ query = f"is:pr repo:{repo} {query}".strip()
+
+ bin_ranges = [relative_date_to_range(param) for param in params]
+ bins = [0 for _ in params]
+
+ # Iterate through list of all PRs
+ page = 1
+ last_page = 1
+
+ while page <= last_page:
+ print(f"Querying {query}", flush=True)
+ res = http_session.get(API_URL, params={"q": query, "per_page": 100, "page": page})
+
+ # Properly handle rate limits
+ if res.status_code == 403:
+ wait = float(res.headers["X-RateLimit-Reset"]) - time.time() + 1
+ print("Rate limit reached, waiting %s seconds..." % int(wait), flush=True)
+ time.sleep(wait)
+ continue
+
+ data = res.json()
+ print(data)
+ if "errors" in data:
+ for error in data["errors"]:
+ print("Error while searching for '%s': %s" % (query, error["message"]))
+ exit(1)
+
+ # Calculate last page
+ last_page = int(data["total_count"]) / 100
+
+ # Process each PR
+ for pr in data["items"]:
+ pr_number = int(pr["number"])
+ updated = get_pr_status_updated(http_session, repo, pr["events_url"], pr_number)
+
+ if updated is None:
+ updated = str(pr["updated_at"])
+ print(f"{repo}#{pr_number} status updated not found, using updated field instead: {updated}")
+ else:
+ print(f"{repo}#{pr_number} last updated at {updated}")
+
+ # Correct for older pythons
+ updated = updated.replace("Z", "+00:00")
+
+ # Get the relative time period
+ diff = now.date() - datetime.fromisoformat(updated).date()
+
+ print(f"{repo}#{pr_number} not updated in {diff.days} days")
+
+ # Increment the bin that this diff fits in
+ for i, r in enumerate(bin_ranges):
+ if r(diff.days):
+ param = params[i]
+ print(f"{repo}#{pr_number} added to bin #{i} for '{param}'")
+ bins[i] += 1
+ break
+
+ page += 1
+
+ return bins
+
+
# GitHub doesn't support relative dates on `created:` and `updated:`, so this
# allows the CSV files to use `{{param|relative_date}}`
-def filter_relative_date(value):
- def format_relative_date(date):
- return str(datetime.date.today() - datetime.timedelta(days=int(date))) + "T00:00:00+00:00"
+def filter_relative_date(now: datetime, value: str):
+ def format_relative_date(date: str):
+ return str(now.date() - timedelta(days=int(date))) + "T00:00:00+00:00"
# Support date ranges
if ".." in value:
@@ -60,7 +246,7 @@ def format_relative_date(date):
return cmp+format_relative_date(value)
-def get_issues_count(http_session, repo, jinja_env, query, param):
+def get_issues_count(http_session: requests.Session, repo: str, jinja_env: jinja2.Environment, query: str, param: str):
"""Get the number of issues with the provided label"""
# Strip pretty labels from the query
if "|" in param:
@@ -79,7 +265,7 @@ def get_issues_count(http_session, repo, jinja_env, query, param):
# Properly handle rate limits
if res.status_code == 403:
wait = float(res.headers["X-RateLimit-Reset"]) - time.time() + 1
- print("Rate limit reached, waiting %s seconds..." % int(wait))
+ print("Rate limit reached, waiting %s seconds..." % int(wait), flush=True)
time.sleep(wait)
continue
@@ -93,9 +279,10 @@ def get_issues_count(http_session, repo, jinja_env, query, param):
return data["total_count"]
-def update_csv_file(http_session, repo, path):
+def update_csv_file(http_session: requests.Session, repo: str, path: str):
"""Add today's records to the provided csv file"""
- today = str(datetime.date.today())
+ now = datetime.now(tz=timezone.utc)
+ today = str(now.date())
# Load the CSV file in memory
with open(path) as f:
@@ -103,16 +290,22 @@ def update_csv_file(http_session, repo, path):
# If today already has its own row don't add another one
if len(content) == 1 or content[1][0] != today:
- content.insert(1, None)
+ content.insert(1, [])
content[1] = [today]
# Setup the Jinja2 environment
jinja_env = jinja2.Environment()
- jinja_env.filters["relative_date"] = filter_relative_date
+ jinja_env.filters["relative_date"] = lambda value : filter_relative_date(now, str(value)) # type: ignore
query = content[0][0]
- for param in content[0][1:]:
- content[1].append(str(get_issues_count(http_session, repo, jinja_env, query, param)))
+
+ # Custom query requring custom logic
+ if "__status_updated:{{param|relative_date}}" in query:
+ for bin in status_updated(http_session, now, repo, query, content[0][1:]):
+ content[1].append(str(bin))
+ else:
+ for param in content[0][1:]:
+ content[1].append(str(get_issues_count(http_session, repo, jinja_env, query, param)))
with open(path, "w") as f:
writer = csv.writer(f, lineterminator="\n")
@@ -121,6 +314,8 @@ def update_csv_file(http_session, repo, path):
if __name__ == "__main__":
http_session = requests.Session()
+ http_session.headers["Accept"] = "application/vnd.github+json"
+ http_session.headers["X-GitHub-Api-Version"] = "2022-11-28"
if "GITHUB_TOKEN" in os.environ:
http_session.auth = ('x-token', os.environ["GITHUB_TOKEN"])