Skip to content
Merged

Sst #33

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 17 additions & 22 deletions scripts/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,58 +46,53 @@ def main():

args = parser.parse_args()

# remove file if exists
if os.path.exists(args.output_file):
os.remove(args.output_file)
download_function(args.output_file, args.start_date, args.end_date, args.datatypes, args.loc_name, args.station_id, args.token)

# Make query string
dtypes_string = '&'.join([f'datatypeid={dt}' for dt in args.datatypes])

def download_function(output_file, start_date, end_date, datatypes, loc_name, station_id, noaa_api_token):
# remove file if exists
if os.path.exists(output_file):
os.remove(output_file)
# Make query string
dtypes_string = '&'.join([f'datatypeid={dt}' for dt in datatypes])
# convert datestring to dt
dt_start = datetime.strptime(args.start_date, '%Y-%m-%d')
dt_end = datetime.strptime(args.end_date, '%Y-%m-%d')
dt_start = datetime.strptime(start_date, '%Y-%m-%d')
dt_end = datetime.strptime(end_date, '%Y-%m-%d')
# calculate number of days
n_days = (dt_end - dt_start).days
# calculate number of splits to fit into 1000 lines/rows
split_size = np.floor(1000 / len(args.datatypes))
split_size = np.floor(1000 / len(datatypes))
# calculate splits
split_range = np.arange(0, n_days, split_size)

# Data Loading
print('Downloading data through NOAA API')
datasets_list = Parallel(n_jobs=4)(
delayed(dl_noaa_api)(i, dtypes_string, args.station_id, args.token, args.start_date, args.end_date, split_size)
delayed(dl_noaa_api)(i, dtypes_string, station_id, noaa_api_token, start_date, end_date, split_size)
for i in tqdm.tqdm(split_range[:])
)

# Merge subsets and create DataFrame
df = pd.concat(datasets_list)
#### Pivot table to correct form
df_pivot = df.pivot(index='date', columns='datatype', values='value')
#### adapt factor
df_pivot.loc[:, :] /= 10

df_pivot = df_pivot.reset_index(drop=False)
df_pivot['DATE'] = df_pivot.apply(lambda x: datetime.fromisoformat(x['date']).strftime('%Y-%m-%d'), axis=1)

dr = pd.DataFrame(pd.date_range(start=args.start_date, end=args.end_date), columns=['DATE'])
dr = pd.DataFrame(pd.date_range(start=start_date, end=end_date), columns=['DATE'])
dr['DATE'] = dr['DATE'].astype(str)
df_merged = pd.concat([df_pivot.set_index('DATE'), dr.set_index('DATE')], join='outer', axis=1,
sort=True)
df_merged['DATE'] = df_merged.index
df_merged['STATION'] = args.station_id
df_merged['NAME'] = args.loc_name

df_merged['STATION'] = station_id
df_merged['NAME'] = loc_name
df_merged['TAVG'] = None
df_merged['SNWD'] = None

final_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"]

df_final = df_merged[final_cols]
df_final = df_final.replace({np.nan: None})

print(f'Saving data to {args.output_file}')
df_final.to_csv(args.output_file, index=False, quoting=csv.QUOTE_ALL)
print(f'Saving data to {output_file}')
df_final.to_csv(output_file, index=False, quoting=csv.QUOTE_ALL)
return 0


if __name__ == "__main__":
Expand Down