Source code for sunpy.data.data_manager.manager

import pathlib
import functools
from contextlib import contextmanager
from urllib.parse import urlparse

from sunpy.util.exceptions import warn_user
from sunpy.util.util import hash_file

__all__ = ['DataManager']


[docs] class DataManager: """ This class provides a remote data manager for managing remote files. Parameters ---------- cache: `sunpy.data.data_manager.cache.Cache` Cache object to be used by `~sunpy.data.data_manager.manager.DataManager`. """ def __init__(self, cache): self._cache = cache self._file_cache = {} self._namespace = None self._skip_hash_check = False self._skip_file = {} self._require_files = {}
[docs] def require(self, name, urls, sha_hash, defer_download=False): """ Decorator for informing the data manager about the requirement of a file by a function. Optionally defer downloading the file until it's requested in `~sunpy.data.data_manager.manager.DataManager.get`. Parameters ---------- name : `str` The name to reference the file with. urls : `list` or `str` A list of urls to download the file from. sha_hash : `str` SHA-256 hash of file. defer_download : `bool`, optional If `True`, the file download is deferred until it is requested via `~sunpy.data.data_manager.manager.DataManager.get`. """ if isinstance(urls, str): urls = [urls] def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): self._namespace = self._get_module(func) if defer_download: self._require_files[name] = { 'urls': urls, 'sha_hash': sha_hash, } else: self._download_and_cache_file(name, urls, sha_hash) result = func(*args, **kwargs) self._namespace = None return result return wrapper return decorator
def _download_and_cache_file(self, name, urls, sha_hash): """ Internal method to handle the downloading and caching logic. """ replace = self._skip_file.get(name) if replace: uri_parse = urlparse(replace['uri']) if uri_parse.scheme in ("", "file"): file_path = uri_parse.netloc + uri_parse.path file_hash = hash_file(file_path) else: file_path, file_hash, _ = self._cache._download_and_hash([replace['uri']], self._namespace) if replace['hash'] and file_hash != replace['hash']: raise ValueError("Hash provided to override_file does not match hash of the file.") elif self._skip_hash_check: file_path = self._cache.download(urls, self._namespace, redownload=True) else: details = self._cache.get_by_hash(sha_hash) if not details: if self._cache_has_file(urls): raise ValueError(f"{urls} has already been downloaded, but no file matching the hash {sha_hash} can be found.") file_path = self._cache.download(urls, self._namespace) file_hash = hash_file(file_path) if file_hash != sha_hash: raise RuntimeError(f"Hash of local file ({file_hash}) does not match expected hash ({sha_hash}). File may have changed on the remote server.") else: if not pathlib.Path(details["file_path"]).is_file(): warn_user("Requested file appears to missing and will be redownloaded.") self._cache._download_and_hash(urls, self._namespace) if hash_file(details['file_path']) != details['file_hash']: warn_user("Hashes do not match, the file will be redownloaded (could be tampered/corrupted)") file_path = self._cache.download(urls, self._namespace, redownload=True) if hash_file(file_path) != details['file_hash']: raise RuntimeError("Redownloaded file also has the incorrect hash. The remote file on the server might have changed.") else: file_path = details['file_path'] if name not in self._file_cache: self._file_cache[name] = {} self._file_cache[name][self._namespace] = file_path
[docs] def get(self, name): """ Get the file by name, and download it if deferred. Parameters ---------- name : `str` Name of the file given to the data manager, same as the one provided in `~sunpy.data.data_manager.manager.DataManager.require`. Returns ------- `pathlib.Path` Path of the file. Raises ------ `KeyError` If ``name`` is not in the cache. """ if name in self._require_files: file_info = self._require_files.pop(name) self._download_and_cache_file(name, file_info['urls'], file_info['sha_hash']) return pathlib.Path(self._file_cache[name][self._namespace])
[docs] @contextmanager def override_file(self, name, uri, sha_hash=None): """ Replaces the file by the name with the file provided by the url/path. Parameters ---------- name : `str` Name of the file provided in the `require` decorator. uri : `str` URI of the file which replaces original file. Scheme should be one of ``http``, ``https``, ``ftp`` or ``file``. If no scheme is given the uri will be interpreted as a local path. i.e. ``file:///tmp/test`` and ``/tmp/test`` are the same. sha_hash : `str`, optional SHA256 hash of the file to compared to after downloading. """ try: self._skip_file[name] = { 'uri': uri, 'hash': sha_hash, } yield finally: _ = self._skip_file.pop(name, None)
[docs] @contextmanager def skip_hash_check(self): """ Disables hash checking temporarily. Examples -------- >>> with remote_data_manager.skip_hash_check(): # doctest: +SKIP ... myfunction() # doctest: +SKIP """ try: self._skip_hash_check = True yield finally: self._skip_hash_check = False
def _cache_has_file(self, urls): for url in urls: if self._cache._get_by_url(url): return True return False def _get_module(self, func): """ Returns the name of the module (appended with a dot) that a function belongs to. Parameters ---------- func : function A function whose module is to be found. Returns ------- A `str` that represents the module name appended with a dot. """ module = func.__module__.lstrip('.').split('.')[0] + '.' if module == '__main__.': module = '' return module