diff --git a/.gitignore b/.gitignore index 3a5da06..d1dda52 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store SPICE/ +__pycache__ \ No newline at end of file diff --git a/make_Metakernel.py b/make_Metakernel.py index 1e45a7b..77d2343 100644 --- a/make_Metakernel.py +++ b/make_Metakernel.py @@ -11,6 +11,14 @@ import glob import pandas as pd from pathlib import Path +import requests +from bs4 import BeautifulSoup +from tqdm import tqdm +from datetime import datetime, timedelta, timezone +from contextlib import nullcontext +import tempfile +import logging +import fnmatch def make_SPICEDirectories(spacecraft, basedir=''): @@ -95,7 +103,7 @@ def get_SpacecraftKernels(spacecraft, spacecraft_kernel_dir, force_update=False) for savedir in column['savedir']: for namepattern in column['namepattern']: # Get the file with wget - run_wgetForSPICE(url, savedir, namepattern, force_update=force_update) + run_requestsForSPICE(url, savedir, namepattern, force_update=force_update) # Look for the files retrieved_files.extend(list(savedir.glob(namepattern))) @@ -165,7 +173,7 @@ def get_GenericKernels(generic_kernel_dir, basedir='', force_update=False): print(savedir) # Get the file with wget - run_wgetForSPICE(url, savedir, namepattern, force_update=force_update) + run_requestsForSPICE(url, savedir, namepattern, force_update=force_update) # Look for the files retrieved_files.extend(list(savedir.glob(namepattern))) @@ -204,8 +212,83 @@ def run_wgetForSPICE(url, savedir, namepattern, show_progress=True, force_update commandline = commandname + flags + filepattern_flags + host_url + logging.info(f"Download with wget: {' '.join(commandline)}") subprocess.run(commandline) + +def run_requestsForSPICE(url, savedir, namepattern, show_progress=True, force_update=False): + ''' + Use requests lib to download data. This doesn't depend on wget. + + @author: RibomBalt + ''' + # In case savedir is being handled as a pathlib Path, which subprocess + # doesn't like + savedir = str(savedir) + # make directory for download first + os.makedirs(savedir, exist_ok=True) + + # get index page + r = requests.get(url) + html = BeautifulSoup(r.text, 'lxml') + # get all a tag in html + hrefs = html.select('td>pre>a') + + for href in hrefs: + filename = href.get('href') + local_path = os.path.join(savedir, filename) + remote_path = f"{url}/{filename}" + + logging.debug(filename) + + if fnmatch.fnmatch(filename, namepattern): + logging.info(f"File Matched: {filename}") + + # ===================== + # Followings are general downloading code with requests + # TODO: add retry + # TODO: maybe download to temp file first then move to target path, so not likely to download incomplete files + + headers = {} + if (not force_update) and os.path.isfile(local_path): + # seems always UTC time + headers['If-Modified-Since'] = datetime.fromtimestamp(os.path.getmtime(local_path), tz=timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') + logging.debug(f"headers: {headers}") + + else: + pass + + r = requests.get(remote_path, headers=headers, stream=True) + if r.status_code == 304: + # data already downloaded, skip + logging.info(f"File Skiped: {local_path}, {remote_path}") + elif r.status_code == 200: + # actually downloading data + logging.info(f"Start downloading...: {local_path} <- {remote_path}") + if show_progress: + guess_len = int(r.headers['Content-Length']) if 'Content-Length' in r.headers else 1024 ** 2 * 5 + pbar = tqdm(desc=filename, total=guess_len, unit='B') + else: + pbar = nullcontext() + # implement a null update method + pbar.update = lambda n: None + + + with pbar: + with open(local_path, 'wb') as fp: + for chunk in r.iter_content(chunk_size=8192): + fp.write(chunk) + pbar.update(len(chunk)) + + logging.info(f"File Downloaded: {local_path}") + + else: + raise ConnectionError(f"{remote_path} not downloaded, {r.status_code}, {r.text}, {r.headers}") + # ============== + + + + # def run_urllibForSPICE(url, savedir, namepattern, show_progress=True, force_update=False): # # In case savedir is being handled as a pathlib Path, which subprocess diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9c221b0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +spiceypy +requests +BeautifulSoup4 +lxml +pandas +matplotlib +tqdm diff --git a/test_make_metakernel.py b/test_make_metakernel.py new file mode 100644 index 0000000..ef9dfd3 --- /dev/null +++ b/test_make_metakernel.py @@ -0,0 +1,38 @@ +import unittest + +from make_Metakernel import * + +class TestMakeKernel(unittest.TestCase): + + def test_requests(self): + # no force update + run_requestsForSPICE('https://naif.jpl.nasa.gov/pub/naif/generic_kernels/lsk/', + './SPICE/generic/kernels/lsk', + 'naif????.tls', + show_progress=True, + force_update=False, + ) + + # force update + run_requestsForSPICE('https://naif.jpl.nasa.gov/pub/naif/generic_kernels/lsk/', + './SPICE/generic/kernels/lsk', + 'naif????.tls', + show_progress=True, + force_update=True, + ) + + # no progress bar + run_requestsForSPICE('https://naif.jpl.nasa.gov/pub/naif/generic_kernels/lsk/', + './SPICE/generic/kernels/lsk', + 'naif????.tls', + show_progress=False, + force_update=True, + ) + + def test_Metakernels(self): + make_Metakernel('juno') + +if __name__ == '__main__': + import logging + logging.getLogger().setLevel(logging.INFO) + unittest.main() \ No newline at end of file