diff --git a/scripts/data_collector/base.py b/scripts/data_collector/base.py index 2efc2feadcb..5f5076fa4d9 100644 --- a/scripts/data_collector/base.py +++ b/scripts/data_collector/base.py @@ -305,7 +305,7 @@ def _executor(self, file_path: Path): df = self._normalize_obj.normalize(df) if df is not None and not df.empty: if self._end_date is not None: - _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date) + _mask = pd.to_datetime(df[self._date_field_name], utc=True).dt.tz_convert(None) <= pd.Timestamp(self._end_date) df = df[_mask] df.to_csv(self._target_dir.joinpath(file_path.name), index=False) diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 82660f1112b..67d136adb33 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -392,8 +392,8 @@ def normalize_yahoo( columns = copy.deepcopy(YahooNormalize.COLUMNS) df = df.copy() df.set_index(date_field_name, inplace=True) - df.index = pd.to_datetime(df.index) - df.index = df.index.tz_localize(None) + df.index = pd.to_datetime(df.index, utc=True) + df.index = df.index.tz_convert(None) df = df[~df.index.duplicated(keep="first")] if calendar_list is not None: df = df.reindex( @@ -458,6 +458,8 @@ def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame: df = df.copy() df.set_index(self._date_field_name, inplace=True) if "adjclose" in df: + df["adjclose"] = pd.to_numeric(df["adjclose"], errors="coerce") + df["close"] = pd.to_numeric(df["close"], errors="coerce") df["factor"] = df["adjclose"] / df["close"] df["factor"] = df["factor"].ffill() else: @@ -465,6 +467,7 @@ def adjusted_price(self, df: pd.DataFrame) -> pd.DataFrame: for _col in self.COLUMNS: if _col not in df.columns: continue + df[_col] = pd.to_numeric(df[_col], errors="coerce") if _col == "volume": df[_col] = df[_col] / df["factor"] else: @@ -500,6 +503,7 @@ def _manual_adj_data(self, df: pd.DataFrame) -> pd.DataFrame: # NOTE: retain original adjclose, required for incremental updates if _col in [self._symbol_field_name, "adjclose", "change"]: continue + df[_col] = pd.to_numeric(df[_col], errors="coerce") if _col == "volume": df[_col] = df[_col] * _close else: