Skip to content

Commit 6176464

Browse files
authored
[Queue Time Histogran][Fetch Github Config File -> find closest commit config file (#6508)
[Bug] when call github apiwith path parameter. Only commits containing this file path will be returned (have changes) `all_commits = self.repo.get_commits(path=self.path)` This causes issue since the config file fetch will always return empty if there is no cofig file change happen after our time to take snapshot. Proposal Change 1. find the first commit that is later than start_time -> found, fetch config 2. if nothing find, fetch commits that are before/equal to commit time -> get the closest commit and fetch config
1 parent 7754e8e commit 6176464

File tree

2 files changed

+48
-23
lines changed

2 files changed

+48
-23
lines changed

aws/lambda/oss_ci_job_queue_time/lambda_function.py

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ def get_clickhouse_client(
3131
host: str, user: str, password: str
3232
) -> clickhouse_connect.driver.client.Client:
3333
# for local testing only, disable SSL verification
34-
# return clickhouse_connect.get_client(host=host, user=user, password=password, secure=True, verify=False)
34+
return clickhouse_connect.get_client(
35+
host=host, user=user, password=password, secure=True, verify=False
36+
)
3537

3638
return clickhouse_connect.get_client(
3739
host=host, user=user, password=password, secure=True
@@ -98,7 +100,7 @@ def write_to_file(data: Any, filename="", path=""):
98100
# --------- Github Config File Methods Start----
99101
class LazyFileHistory:
100102
"""
101-
Reads the content of a file from a GitHub repository on the version that it was on a specific time and date provided. It then caches the commits and file contents avoiding unnecessary requests to the GitHub API.
103+
Reads the content of a file from a GitHub repository on the version that it was closest to a specific time and date provided. It then caches the commits and file contents avoiding unnecessary requests to the GitHub API.
102104
All public methods are thread-safe.
103105
"""
104106

@@ -110,7 +112,9 @@ def __init__(self, repo: Any, path: str) -> None:
110112
self._fetched_all_commits = False
111113
self._lock = threading.RLock()
112114

113-
def get_version_after_timestamp(self, timestamp: str | datetime) -> Optional[str]:
115+
def get_version_close_to_timestamp(
116+
self, timestamp: str | datetime
117+
) -> Optional[str]:
114118
try:
115119
with self._lock:
116120
if not isinstance(timestamp, datetime):
@@ -120,31 +124,34 @@ def get_version_after_timestamp(self, timestamp: str | datetime) -> Optional[str
120124
)
121125
else:
122126
timestamp = parse(timestamp)
123-
commit = self._find_earliest_after_in_cache(timestamp)
127+
128+
info(
129+
f" [LazyFileHistory] Try fetch cached content for {self.repo} : {self.path} at {timestamp.isoformat()}"
130+
)
131+
commit = self._find_closest_before_or_equal_in_cache(timestamp)
124132
if commit:
125133
return self._fetch_content_for_commit(commit)
126134

127135
if not self._fetched_all_commits:
136+
info(
137+
f" [LazyFileHistory] Nothing found in cache, fetching all commit includes {self.repo}/{self.path} close/equal to {timestamp.isoformat()}"
138+
)
128139
commit = self._fetch_until_timestamp(timestamp)
129140
if commit:
130141
return self._fetch_content_for_commit(commit)
142+
info(
143+
f" [LazyFileHistory] No config file found in cache and in commits for {self.repo}/{self.path}."
144+
)
145+
return None
131146
except Exception as e:
132147
warning(
133-
f"Error fetching content for {self.repo} : {self.path} at {timestamp}: {e}"
148+
f" [LazyFileHistory] Error fetching content for {self.repo} : {self.path} at {timestamp}: {e}"
134149
)
135-
136-
return None
137-
138-
def _find_earliest_after_in_cache(self, timestamp: datetime) -> Optional[str]:
139-
commits_after = [
140-
c for c in self._commits_cache if c.commit.author.date > timestamp
141-
]
142-
143-
if not commits_after:
144150
return None
145-
return commits_after[-1]
146151

147152
def _fetch_until_timestamp(self, timestamp: datetime) -> Optional[str]:
153+
# call github api, with path parameter
154+
# Only commits containing this file path will be returned.
148155
all_commits = self.repo.get_commits(path=self.path)
149156
known_shas = {c.sha for c in self._commits_cache}
150157

@@ -158,13 +165,29 @@ def _fetch_until_timestamp(self, timestamp: datetime) -> Optional[str]:
158165
if commit.commit.author.date <= timestamp:
159166
break
160167

168+
info(f" [LazyFileHistory] Fetched new commits {len(newly_fetched)}")
169+
if len(newly_fetched) > 0:
170+
newly_fetched.sort(key=lambda c: c.commit.author.date)
171+
info(
172+
f" [LazyFileHistory] Fetched new commits with latest: {newly_fetched[-1].commit.author.date}, oldest:{newly_fetched[-1].commit.author.date}"
173+
)
174+
161175
self._commits_cache.extend(newly_fetched)
162-
self._commits_cache.sort(key=lambda c: c.commit.author.date, reverse=True)
176+
self._commits_cache.sort(key=lambda c: c.commit.author.date)
163177

164178
if not newly_fetched:
165179
self._fetched_all_commits = True
166180

167-
return self._find_earliest_after_in_cache(timestamp)
181+
return self._find_closest_before_or_equal_in_cache(timestamp)
182+
183+
def _find_closest_before_or_equal_in_cache(
184+
self, timestamp: datetime
185+
) -> Optional[str]:
186+
commits_before_equal = [
187+
c for c in self._commits_cache if c.commit.author.date <= timestamp
188+
]
189+
commits_before_equal.sort(key=lambda c: c.commit.author.date)
190+
return commits_before_equal[-1] if commits_before_equal else None
168191

169192
def _fetch_content_for_commit(self, commit: Any) -> str:
170193
if commit.sha not in self._content_cache:
@@ -286,7 +309,7 @@ def create_runner_labels(
286309
)
287310

288311
if len(all_runners_configs.keys()) == 0:
289-
warning(
312+
raise ValueError(
290313
" [method: create_runner_labels] No runner labels found in the github config files, something is wrong, please investigate."
291314
)
292315

@@ -324,7 +347,7 @@ def create_runner_labels(
324347
def get_runner_config(
325348
retriever: LazyFileHistory, start_time: str | datetime
326349
) -> Dict[str, Dict[str, Any]]:
327-
contents = retriever.get_version_after_timestamp(start_time)
350+
contents = retriever.get_version_close_to_timestamp(start_time)
328351
if contents:
329352
return explode_runner_variants(yaml.safe_load(contents))
330353
return {"runner_types": {}}
@@ -590,7 +613,6 @@ def _add_runner_labels(
590613
old_lf_lf_runner_config_retriever,
591614
) -> None:
592615
# create dictionary of tags with set of targeting machine types
593-
594616
lf_runner_config = get_runner_config(lf_runner_config_retriever, start_time)
595617
if not lf_runner_config or not lf_runner_config["runner_types"]:
596618
lf_runner_config = get_runner_config(
@@ -1170,7 +1192,9 @@ def get_latest_queue_time_histogram_table(
11701192
query = """
11711193
SELECT toUnixTimestamp(MAX(time)) as latest FROM fortesting.oss_ci_queue_time_histogram
11721194
"""
1173-
info(" Getting lastest timestamp from misc.oss_ci_queue_time_histogram....")
1195+
info(
1196+
" Getting lastest timestamp from fortesting.oss_ci_queue_time_histogram...."
1197+
)
11741198
res = cc.query(query, {})
11751199

11761200
if res.row_count != 1:

aws/lambda/tests/test_lambda_oss_ci_job_queue_time.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,13 +238,14 @@ def setUp(self) -> None:
238238
self.mock_get_config_retrievers = patcher4.start()
239239
self.mock_envs = envs_patcher.start()
240240

241-
self.mock_get_runner_config.return_value = {"runner_types": {}}
241+
self.mock_get_runner_config.return_value = {
242+
"runner_types": {"pet": {"os": "linux", "is_ephemeral": "false"}}
243+
}
242244
self.mock_get_config_retrievers.return_value = {
243245
"meta": MagicMock(),
244246
"lf": MagicMock(),
245247
"old_lf": MagicMock(),
246248
}
247-
248249
self.addCleanup(patcher2.stop)
249250
self.addCleanup(patcher3.stop)
250251
self.addCleanup(patcher4.stop)

0 commit comments

Comments
 (0)