Skip to content

Commit db9c2da

Browse files
jmbergkuba-moo
authored andcommitted
pw_poller: poll events, not series
It's much easier since events get a timestamp from the server, not from the email. It should also be cheaper for the server since we can limit to the last event we saw before. Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
1 parent 79af2ee commit db9c2da

File tree

2 files changed

+22
-53
lines changed

2 files changed

+22
-53
lines changed

pw/patchwork.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,21 @@ def get_patches_all(self, delegate=None, project=None, since=None, action_requir
178178
query['archived'] = 'false'
179179
return self.get_all('patches', query)
180180

181-
def get_series_all(self, project=None, since=None):
181+
def get_new_series(self, project=None, since=None):
182182
if project is None:
183183
project = self._project
184-
return self.get_all('series', {'project': project, 'since': since})
184+
event_params = {
185+
'project': project,
186+
'since': since,
187+
'order': 'date',
188+
'category': 'series-created',
189+
}
190+
events = self.get_all('events', event_params)
191+
if not events:
192+
return [], since
193+
since = events[-1]['date']
194+
series = [self.get('series', e['payload']['series']['id']) for e in events]
195+
return series, since
185196

186197
def post_check(self, patch, name, state, url, desc):
187198
headers = {}

pw_poller.py

Lines changed: 9 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,10 @@ def __init__(self, config) -> None:
7070
self._pw = Patchwork(config)
7171

7272
self._state = {
73-
'last_poll': (datetime.datetime.now() - datetime.timedelta(hours=2)).timestamp(),
74-
'done_series': [],
73+
'last_event_ts': (datetime.datetime.now() -
74+
datetime.timedelta(hours=2)).strftime('%Y-%m-%dT%H:%M:%S'),
7575
}
7676
self.init_state_from_disk()
77-
self.seen_series = set(self._state['done_series'])
78-
self.done_series = self.seen_series.copy()
7977

8078
self._recheck_period = config.getint('poller', 'recheck_period', fallback=3)
8179
self._recheck_lookback = config.getint('poller', 'recheck_lookback', fallback=9)
@@ -152,10 +150,6 @@ def series_determine_tree(self, s: PwSeries) -> str:
152150
return ret
153151

154152
def _process_series(self, pw_series) -> None:
155-
if pw_series['id'] in self.seen_series:
156-
log(f"Already seen {pw_series['id']}", "")
157-
return
158-
159153
s = PwSeries(self._pw, pw_series)
160154

161155
log("Series info",
@@ -184,8 +178,6 @@ def _process_series(self, pw_series) -> None:
184178
core.write_tree_selection_result(self.result_dir, s, comment)
185179
core.mark_done(self.result_dir, s)
186180

187-
self.seen_series.add(s['id'])
188-
189181
def process_series(self, pw_series) -> None:
190182
log_open_sec(f"Checking series {pw_series['id']} with {pw_series['total']} patches")
191183
try:
@@ -194,59 +186,26 @@ def process_series(self, pw_series) -> None:
194186
log_end_sec()
195187

196188
def run(self, life) -> None:
197-
partial_series = {}
189+
since = self._state['last_event_ts']
198190

199-
prev_big_scan = datetime.datetime.fromtimestamp(self._state['last_poll'])
200-
prev_req_time = datetime.datetime.now()
201-
202-
# We poll every 2 minutes, for series from last 10 minutes
203-
# Every 3 hours we do a larger check of series of last 12 hours to make sure we didn't miss anything
204-
# apparently patchwork uses the time from the email headers and people back date their emails, a lot
205-
# We keep a history of the series we've seen in and since the last big poll to not process twice
206191
try:
192+
# We poll every 2 minutes after this
207193
secs = 0
208194
while life.next_poll(secs):
209-
this_poll_seen = set()
210195
req_time = datetime.datetime.now()
211-
212-
# Decide if this is a normal 4 minute history poll or big scan of last 12 hours
213-
if prev_big_scan + datetime.timedelta(hours=self._recheck_period) < req_time:
214-
big_scan = True
215-
since = prev_big_scan - datetime.timedelta(hours=self._recheck_lookback)
216-
log_open_sec(f"Big scan of last 12 hours at {req_time} since {since}")
217-
else:
218-
big_scan = False
219-
since = prev_req_time - datetime.timedelta(minutes=10)
220-
log_open_sec(f"Checking at {req_time} since {since}")
221-
222-
json_resp = self._pw.get_series_all(since=since)
196+
json_resp, since = self._pw.get_new_series(since=since)
223197
log(f"Loaded {len(json_resp)} series", "")
224198

225-
had_partial_series = False
226199
for pw_series in json_resp:
227200
try:
228201
self.process_series(pw_series)
229-
this_poll_seen.add(pw_series['id'])
230202
except IncompleteSeries:
231-
partial_series.setdefault(pw_series['id'], 0)
232-
if partial_series[pw_series['id']] < 5:
233-
had_partial_series = True
234-
partial_series[pw_series['id']] += 1
235-
236-
if big_scan:
237-
prev_req_time = req_time
238-
prev_big_scan = req_time
239-
# Shorten the history of series we've seen to just the last 12 hours
240-
self.seen_series = this_poll_seen
241-
self.done_series &= self.seen_series
242-
elif had_partial_series:
243-
log("Partial series, not moving time forward", "")
244-
else:
245-
prev_req_time = req_time
203+
# didn't make it to the list fully, patchwork
204+
# shouldn't have had this event at all though
205+
pass
246206

247207
while not self._done_queue.empty():
248208
s = self._done_queue.get()
249-
self.done_series.add(s['id'])
250209
log(f"Testing complete for series {s['id']}", "")
251210

252211
secs = 120 - (datetime.datetime.now() - req_time).total_seconds()
@@ -257,8 +216,7 @@ def run(self, life) -> None:
257216
pass # finally will still run, but don't splat
258217
finally:
259218
# Dump state before trying to stop workers, in case they hang
260-
self._state['last_poll'] = prev_big_scan.timestamp()
261-
self._state['done_series'] = list(self.seen_series)
219+
self._state['last_event_ts'] = since
262220
with open('poller.state', 'w') as f:
263221
json.dump(self._state, f)
264222

0 commit comments

Comments
 (0)