Source code for sunpy.net.base_client

import re
import string
import importlib
from abc import ABC, abstractmethod
from textwrap import dedent
from functools import wraps
from collections.abc import Sequence

import numpy as np

import astropy.units as u
from astropy.table import Column, QTable, Row, Table, TableAttribute

from sunpy.util.util import get_width

__all__ = ['QueryResponseColumn', 'BaseQueryResponse',
           'QueryResponseRow', 'QueryResponseTable', 'BaseClient',
           'convert_row_to_table']



[docs]
class BaseQueryResponse(Sequence):
    """
    An Abstract Base Class for results returned from BaseClient.

    Notes
    -----
    * A QueryResponse object must be able to be instantiated with only one
      iterable argument. (i.e. the ``__init__`` must only have one required
      argument).
    * The `client` property must be settable.
    * The base class does not prescribe how you store the results from your
      client, only that it must be possible to represent them as an astropy
      table in the ``build_table`` method.
    * ``__getitem__`` **must** return an instance of the type it was called on.
      I.e. it must always return an object of ``type(self)``.

    """


[docs]
    @abstractmethod
    def build_table(self):
        """
        Return an `astropy.table.Table` representation of the query response.
        """


    @property
    @abstractmethod
    def client(self):
        """
        An instance of `BaseClient` used to generate the results.

        Generally this is used to fetch the results later.

        .. note::

            In general, this doesn't have to be the same instance of
            ``BaseClient``, this is left to the client developer. If there is a
            significant connection overhead in creating an instance of a client
            you might want it to be the same instance as used for the search.
        """

    @client.setter
    @abstractmethod
    def client(self, value):
        pass

    @property
    @abstractmethod
    def blocks(self):
        """
        A `collections.abc.Sequence` object which contains the records
        contained within the Query Response.
        """


[docs]
    def response_block_properties(self):
        """
        Returns a set of class attributes on all the response blocks.

        Returns
        -------
        s : `set`
            List of strings, containing attribute names in the response blocks.
        """
        return set()


    def __str__(self):
        """Print out human-readable summary of records retrieved"""
        return '\n'.join(self.build_table().pformat(show_dtype=False))

    def __repr__(self):
        """Print out human-readable summary of records retrieved"""
        return object.__repr__(self) + "\n" + str(self)

    def _repr_html_(self):
        return self.build_table()._repr_html_()


[docs]
    def show(self, *cols):
        """
        Returns response tables with desired columns for the Query.

        Parameters
        ----------
        \\*cols : `tuple`
            Name of columns to be shown.

        Returns
        -------
        `astropy.table.Table`
            A table showing values for specified columns.
        """
        table = self.build_table()
        if len(cols) == 0:
            return table
        tablecols = table.columns
        valid_cols = [col for col in cols if col in tablecols]
        return table[valid_cols]





[docs]
class QueryResponseRow(Row):
    """
    A row subclass which knows about the client of the parent table.
    """


[docs]
    def as_table(self):
        """
        Return this Row as a length one Table
        """
        return self.table[self.index:self.index + 1]



[docs]
    def get(self, key, default=None):
        """
        Extract a value from the row if the key is present otherwise return the value of ``default``
        """
        if key in self.colnames:
            return self[key]
        return default


    @property
    def response_block_map(self):
        """
        A dictionary designed to be used to format a filename.

        This takes all the columns in this Row and lower cases them and
        replaces spaces with underscores. Also removes any characters not
        allowed in Python identifiers.
        """
        def key_clean(key):
            key = re.sub('[%s]' % re.escape(string.punctuation), '_', key)
            key = key.replace(' ', '_')
            key = ''.join(char for char in key
                          if char.isidentifier() or char.isnumeric())
            return key.lower()

        return {key_clean(key): value for key, value in zip(self.colnames, self)}




[docs]
class QueryResponseColumn(Column):
    """
    A column subclass which knows about the client of the parent table.
    """


[docs]
    def as_table(self):
        """
        Return this Row as a length one Table
        """
        return self.parent_table[(self.name,)]





[docs]
class QueryResponseTable(QTable):
    __doc__ = QTable.__doc__

    Row = QueryResponseRow
    Column = QueryResponseColumn

    client = TableAttribute()
    display_keys = TableAttribute(default=slice(None))
    hide_keys = TableAttribute()

    size_column = None


[docs]
    def unhide_columns(self):
        """
        Modify this table so that all columns are displayed.
        """
        self.display_keys = slice(None)
        self.hide_keys = None
        return self


    def _reorder_columns(self, first_columns, remove_empty=True):
        """
        Generate a new version of this table with ``first_columns`` at the start.

        Parameters
        ----------
        first_columns : list
            The column names to put at the start of the table.
        remove_empty : bool, optional
            Remove columns where all values are `None`.
            Defaults to ``True``.

        Returns
        -------
        new_table : QueryResponseTable
            A sliced version of this table instance so that the columns are
            reordered.
        """
        all_cols = list(self.colnames)
        first_names = [n for n in first_columns if n in all_cols]
        extra_cols = [col for col in all_cols if col not in first_names]
        all_cols = first_names + extra_cols
        new_table = self[[col for col in all_cols if self[col] is not None]]

        if remove_empty:
            empty_cols = [col.info.name for col in self.itercols()
                          if col.info.dtype.kind == 'O' and all(val is None for val in col)]
            new_table.remove_columns(empty_cols)

        return new_table

    @property
    def _display_table(self):
        """
        Apply the display_keys and hide_keys attributes to the table.

        This removes any keys in hide keys and then slices by any keys in
        display_keys to return the correct table.
        """

        keys = list(self.colnames)
        if self.hide_keys:
            # Index only the keys not in hide keys in order
            [keys.remove(key) for key in self.hide_keys if key in keys]

        if self.display_keys != slice(None):
            keys = [dk for dk in self.display_keys if dk in keys]

        table = self[keys]
        # The slicing operation resets display and hide keys to default, but we
        # have already applied it
        table.unhide_columns()

        return table

    def __str__(self):
        """Print out human-readable summary of records retrieved"""
        return '\n'.join(self._display_table.pformat(show_dtype=False))

    def __repr__(self):
        """Print out human-readable summary of records retrieved"""
        return object.__repr__(self) + "\n" + str(self._display_table)

    def _repr_html_(self):
        return QTable._repr_html_(self._display_table)


[docs]
    def show(self, *cols):
        """
        Return a table with only ``cols`` present.

        If no ``cols`` are specified, all columns will be shown, including any
        hidden by default.

        This differs slightly from ``QueryResponseTable[cols]`` as it allows
        keys which are not in the table to be requested.
        """
        table = self.copy()
        table.unhide_columns()

        if len(cols) == 0:
            return table

        valid_cols = [col for col in cols if col in table.colnames]
        table = table[valid_cols]

        # The slicing operation resets display and hide keys to default, but we
        # want to bypass it here.
        table.unhide_columns()
        return table



[docs]
    def path_format_keys(self):
        """
        Returns all the names that can be used to format filenames.

        Each one corresponds to a single column in the table, and the format
        syntax should match the dtype of that column, i.e. for a ``Time``
        object or a ``Quantity``.
        """
        rbp = set(self[0].response_block_map.keys())
        for row in self[1:]:
            rbp.intersection(row.response_block_map.keys())
        return rbp



[docs]
    def total_size(self):
        """
        Returns the total size of all files in a query.

        Derived classes must set the 'size_column' class attribute to make use
        of this.
        """
        if self.size_column not in self.colnames:
            return np.nan * u.byte
        sizes = self[self.size_column]
        # Strip negative filesizes
        total = np.nansum(sizes[sizes > 0])
        if not (total > 0 * u.byte):
            return np.nan * u.byte
        # Find the first power of 3 below the total filesize
        power = 10**(np.floor(np.log10(total.to_value(u.byte)) // 3) * 3)
        # Create mapping from prefix value to prefix name
        prefix_dict = {p[2]: p[0][0] for p in u.si_prefixes}
        prefix_unit = u.Unit(f'{prefix_dict[power]}byte')
        return total.to(prefix_unit).round(3)




BaseQueryResponse.register(QueryResponseTable)



[docs]
def convert_row_to_table(func):
    """
    A wrapper to convert any `~.QueryResponseRow` objects to `~.QueryResponseTable` objects.
    """
    @wraps(func)
    def wrapper(self, query_results, **kwargs):
        if isinstance(query_results, QueryResponseRow):
            query_results = query_results.as_table()
        return func(self, query_results, **kwargs)

    return wrapper



def _print_client(client, html=False, visible_entries=None):
    """
    Given a BaseClient instance will print out each registered attribute.

    Parameters
    ----------
    client : BaseClient
        The instance class to print for.
    html : bool
        Will return a html table instead.

    Returns
    -------
    `str`
        String with the client.
    """
    width = -1 if html else get_width()
    class_name = f"{client.__module__+'.' or ''}{client.__class__.__name__}"
    attrs = client.register_values()
    lines = []
    t = Table(names=["Attr Type", "Name", "Description"],
              dtype=["U80", "U80", "U80"])
    for client_key in attrs.keys():
        # Work around for * attrs having one length.
        if len(attrs[client_key]) == 1 and attrs[client_key][0] == "*":
            t.add_row((client_key.__name__, "All", "All valid values"))
            continue
        for name, desc in attrs[client_key]:
            t.add_row((client_key.__name__, name, desc))
    lines = [class_name, dedent(client.__doc__.partition("\n\n")[0])]
    if html:
        lines = [f"<p>{line}</p>" for line in lines]
    lines.extend(t.pformat_all(max_lines=visible_entries, show_dtype=False,
                               max_width=width, align="<", html=html))
    return '\n'.join(lines)



[docs]
class BaseClient(ABC):
    """
    This defines the Abstract Base Class for each download client.

    The BaseClient has several abstract methods that ensure that any subclass enforces the bare minimum API.
    These are `search`, `fetch` and `_can_handle_query`.
    The last one ensures that each download client can be registered with Fido.

    Most download clients should subclass `~sunpy.net.dataretriever.GenericClient`.
    If the structure of `~sunpy.net.dataretriever.GenericClient`
    is not useful you should use `BaseClient`.
    `~sunpy.net.vso.VSOClient` and `~sunpy.net.jsoc.JSOCClient`
    are examples of download clients that subclass ``BaseClient``.
    """

    _registry = dict()

    def __init_subclass__(cls, *args, **kwargs):
        """
        An __init_subclass__ hook initializes all of the subclasses of a given class.
        So for each subclass, it will call this block of code on import.
        This replicates some metaclass magic without the need to be aware of metaclasses.
        Here we use this to register each subclass in a dict that has the `_can_handle_query` attribute.
        This is then passed into the UnifiedDownloaderFactory so we can register them.
        This means that Fido can use the clients internally.
        """
        super().__init_subclass__(**kwargs)

        # We do not want to register GenericClient since its a dummy client.
        if cls.__name__ in ('GenericClient'):
            return

        cls._registry[cls] = cls._can_handle_query

        if hasattr(cls, "_attrs_module"):
            from sunpy.net import attrs

            name, module = cls._attrs_module()
            module_obj = importlib.import_module(module)

            existing_mod = getattr(attrs, name, None)
            if existing_mod and existing_mod is not module_obj:
                raise NameError(f"{name} has already been registered as an attrs name.")

            setattr(attrs, name, module_obj)

            if name not in attrs.__all__:
                attrs.__all__.append(name)

        # Register client attrs after it has registered its own attrs
        from sunpy.net import attr
        values = cls.register_values()
        # If the client has no support, we won't try to register attrs
        if values:
            attr.Attr.update_values({cls: values})

    def __repr__(self):
        """
        Returns the normal repr plus the pretty client __str__.
        """
        return object.__repr__(self) + "\n" + _print_client(visible_entries=15, client=self)

    def __str__(self):
        """
        This enables the "pretty" printing of BaseClient.
        """
        return _print_client(client=self)

    def _repr_html_(self):
        """
        This enables the "pretty" printing of the BaseClient with html.
        """
        return _print_client(visible_entries=15, client=self, html=True)


[docs]
    @abstractmethod
    def search(self, *args, **kwargs):
        """
        This enables the user to search for data using the client.

        Must return a subclass of `BaseQueryResponse`.
        """



[docs]
    @abstractmethod
    def fetch(self, query_results, *, path, downloader, **kwargs):
        """
        This enables the user to fetch the data using the client, after a search.

        Parameters
        ----------
        query_results:
            Results to download.
        path : `str` or `pathlib.Path`, optional
            Path to the download directory
        downloader : `parfive.Downloader`
            The download manager to use.

        Returns
        -------
        `parfive.Results`
            The results object, can be `None` if ``wait`` is `False` and
            ``downloader`` is not None.
        """


    @classmethod
    @abstractmethod
    def _can_handle_query(cls, *query):
        """
        This enables the client to register what kind of searches it can handle, to prevent Fido
        using the incorrect client.
        """

    @property
    def info_url(self):
        """
        This should return a string that is a URL to the data server or
        documentation on the data being served.
        """


[docs]
    @staticmethod
    def check_attr_types_in_query(query, required_attrs={}, optional_attrs={}):
        """
        Check a query againsted required and optional attributes.

        Returns `True` if *query* contains all the attrs in *required_attrs*,
        and if *query* contains only attrs in both *required_attrs* and *optional_attrs*.
        """
        query_attrs = {type(x) for x in query}
        all_attrs = required_attrs.union(optional_attrs)

        return required_attrs.issubset(query_attrs) and query_attrs.issubset(all_attrs)



[docs]
    @classmethod
    def register_values(cls, *query):
        """
        This enables the client to register what kind of Attrs it can use directly.

        Returns
        -------
        `dict`
            A dictionary with key values of Attrs and the values are a tuple of
            ("Attr Type", "Name", "Description").
        """
        return {}