@@ -70,12 +70,10 @@ def __init__(self, config) -> None:
7070 self ._pw = Patchwork (config )
7171
7272 self ._state = {
73- 'last_poll ' : (datetime .datetime .now () - datetime . timedelta ( hours = 2 )). timestamp (),
74- 'done_series' : [] ,
73+ 'last_event_ts ' : (datetime .datetime .now () -
74+ datetime . timedelta ( hours = 2 )). strftime ( '%Y-%m-%dT%H:%M:%S' ) ,
7575 }
7676 self .init_state_from_disk ()
77- self .seen_series = set (self ._state ['done_series' ])
78- self .done_series = self .seen_series .copy ()
7977
8078 self ._recheck_period = config .getint ('poller' , 'recheck_period' , fallback = 3 )
8179 self ._recheck_lookback = config .getint ('poller' , 'recheck_lookback' , fallback = 9 )
@@ -152,10 +150,6 @@ def series_determine_tree(self, s: PwSeries) -> str:
152150 return ret
153151
154152 def _process_series (self , pw_series ) -> None :
155- if pw_series ['id' ] in self .seen_series :
156- log (f"Already seen { pw_series ['id' ]} " , "" )
157- return
158-
159153 s = PwSeries (self ._pw , pw_series )
160154
161155 log ("Series info" ,
@@ -184,8 +178,6 @@ def _process_series(self, pw_series) -> None:
184178 core .write_tree_selection_result (self .result_dir , s , comment )
185179 core .mark_done (self .result_dir , s )
186180
187- self .seen_series .add (s ['id' ])
188-
189181 def process_series (self , pw_series ) -> None :
190182 log_open_sec (f"Checking series { pw_series ['id' ]} with { pw_series ['total' ]} patches" )
191183 try :
@@ -194,59 +186,26 @@ def process_series(self, pw_series) -> None:
194186 log_end_sec ()
195187
196188 def run (self , life ) -> None :
197- partial_series = {}
189+ since = self . _state [ 'last_event_ts' ]
198190
199- prev_big_scan = datetime .datetime .fromtimestamp (self ._state ['last_poll' ])
200- prev_req_time = datetime .datetime .now ()
201-
202- # We poll every 2 minutes, for series from last 10 minutes
203- # Every 3 hours we do a larger check of series of last 12 hours to make sure we didn't miss anything
204- # apparently patchwork uses the time from the email headers and people back date their emails, a lot
205- # We keep a history of the series we've seen in and since the last big poll to not process twice
206191 try :
192+ # We poll every 2 minutes after this
207193 secs = 0
208194 while life .next_poll (secs ):
209- this_poll_seen = set ()
210195 req_time = datetime .datetime .now ()
211-
212- # Decide if this is a normal 4 minute history poll or big scan of last 12 hours
213- if prev_big_scan + datetime .timedelta (hours = self ._recheck_period ) < req_time :
214- big_scan = True
215- since = prev_big_scan - datetime .timedelta (hours = self ._recheck_lookback )
216- log_open_sec (f"Big scan of last 12 hours at { req_time } since { since } " )
217- else :
218- big_scan = False
219- since = prev_req_time - datetime .timedelta (minutes = 10 )
220- log_open_sec (f"Checking at { req_time } since { since } " )
221-
222- json_resp = self ._pw .get_series_all (since = since )
196+ json_resp , since = self ._pw .get_new_series (since = since )
223197 log (f"Loaded { len (json_resp )} series" , "" )
224198
225- had_partial_series = False
226199 for pw_series in json_resp :
227200 try :
228201 self .process_series (pw_series )
229- this_poll_seen .add (pw_series ['id' ])
230202 except IncompleteSeries :
231- partial_series .setdefault (pw_series ['id' ], 0 )
232- if partial_series [pw_series ['id' ]] < 5 :
233- had_partial_series = True
234- partial_series [pw_series ['id' ]] += 1
235-
236- if big_scan :
237- prev_req_time = req_time
238- prev_big_scan = req_time
239- # Shorten the history of series we've seen to just the last 12 hours
240- self .seen_series = this_poll_seen
241- self .done_series &= self .seen_series
242- elif had_partial_series :
243- log ("Partial series, not moving time forward" , "" )
244- else :
245- prev_req_time = req_time
203+ # didn't make it to the list fully, patchwork
204+ # shouldn't have had this event at all though
205+ pass
246206
247207 while not self ._done_queue .empty ():
248208 s = self ._done_queue .get ()
249- self .done_series .add (s ['id' ])
250209 log (f"Testing complete for series { s ['id' ]} " , "" )
251210
252211 secs = 120 - (datetime .datetime .now () - req_time ).total_seconds ()
@@ -257,8 +216,7 @@ def run(self, life) -> None:
257216 pass # finally will still run, but don't splat
258217 finally :
259218 # Dump state before trying to stop workers, in case they hang
260- self ._state ['last_poll' ] = prev_big_scan .timestamp ()
261- self ._state ['done_series' ] = list (self .seen_series )
219+ self ._state ['last_event_ts' ] = since
262220 with open ('poller.state' , 'w' ) as f :
263221 json .dump (self ._state , f )
264222
0 commit comments