Source code for sunpy.io._cdf

"""
This module provides a CDF file reader for internal use.
"""
import cdflib
import numpy as np
import pandas as pd
from cdflib.epochs import CDFepoch
from packaging.version import Version

import astropy.units as u

from sunpy import log
from sunpy.timeseries import GenericTimeSeries
from sunpy.util.exceptions import warn_user

__all__ = ['read_cdf']


[docs] def read_cdf(fname): """ Read a CDF file that follows the ISTP/IACG guidelines. Parameters ---------- fname : path-like Location of single CDF file to read. Returns ------- list[GenericTimeSeries] A list of time series objects, one for each unique time index within the CDF file. References ---------- Space Physics Guidelines for CDF https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html """ cdf = cdflib.CDF(str(fname)) # Extract the time varying variables cdf_info = cdf.cdf_info() meta = cdf.globalattsget() if hasattr(cdflib, "__version__") and Version(cdflib.__version__) >= Version("1.0.0"): all_var_keys = cdf_info.rVariables + cdf_info.zVariables else: all_var_keys = cdf_info['rVariables'] + cdf_info['zVariables'] var_attrs = {key: cdf.varattsget(key) for key in all_var_keys} # Get keys that depend on time var_keys = [var for var in var_attrs if 'DEPEND_0' in var_attrs[var] and var_attrs[var]['DEPEND_0'] is not None] # Get unique time index keys time_index_keys = sorted(set([var_attrs[var]['DEPEND_0'] for var in var_keys])) all_ts = [] # For each time index, construct a GenericTimeSeries for index_key in time_index_keys: try: index = cdf.varget(index_key) except ValueError: # Empty index for cdflib >= 0.3.20 continue # TODO: use to_astropy_time() instead here when we drop pandas in timeseries index = CDFepoch.to_datetime(index) df_dict = {} units = {} for var_key in sorted(var_keys): attrs = var_attrs[var_key] # If this variable doesn't depend on this index, continue if attrs['DEPEND_0'] != index_key: continue # Get data if hasattr(cdflib, "__version__") and Version(cdflib.__version__) >= Version("1.0.0"): var_last_rec = cdf.varinq(var_key).Last_Rec else: var_last_rec = cdf.varinq(var_key)['Last_Rec'] if var_last_rec == -1: log.debug(f'Skipping {var_key} in {fname} as it has zero elements') continue data = cdf.varget(var_key) # Set fillval values to NaN # It would be nice to properley mask these values to work with # non-floating point (ie. int) dtypes, but this is not possible with pandas if np.issubdtype(data.dtype, np.floating): data[data == attrs['FILLVAL']] = np.nan # Get units if 'UNITS' in attrs: unit_str = attrs['UNITS'] try: unit = u.Unit(unit_str) except ValueError: if unit_str in _known_units: unit = _known_units[unit_str] else: warn_user(f'astropy did not recognize units of "{unit_str}". ' 'Assigning dimensionless units. ' 'If you think this unit should not be dimensionless, ' 'please raise an issue at https://github.com/sunpy/sunpy/issues') unit = u.dimensionless_unscaled else: warn_user(f'No units provided for variable "{var_key}". ' 'Assigning dimensionless units.') unit = u.dimensionless_unscaled if data.ndim > 2: # Skip data with dimensions >= 3 and give user warning warn_user(f'The variable "{var_key}" has been skipped because it has more than 2 dimensions, which is unsupported.') elif data.ndim == 2: # Multiple columns, give each column a unique label for i, col in enumerate(data.T): df_dict[var_key + f'_{i}'] = col units[var_key + f'_{i}'] = unit else: # Single column df_dict[var_key] = data units[var_key] = unit df = pd.DataFrame(df_dict, index=pd.DatetimeIndex(name=index_key, data=index)) all_ts.append(GenericTimeSeries(data=df, units=units, meta=meta)) if not len(all_ts): log.debug(f'No data found in file {fname}') return all_ts
# Unfortunately (unlike e.g. FITS), there is no standard for the strings that # CDF files use to represent units. To allow for this we maintain a dictionary # mapping unit strings to their astropy unit equivalents. # # Please only add new entries if # 1. A user identifies which specific mission/data source they are needed for # 2. The mapping from the string to unit is un-ambiguous. If we get this # wrong then users will silently have the wrong units in their data! _known_units = {'ratio': u.dimensionless_unscaled, 'NOTEXIST': u.dimensionless_unscaled, 'Unitless': u.dimensionless_unscaled, 'unitless': u.dimensionless_unscaled, 'Quality_Flag': u.dimensionless_unscaled, 'None': u.dimensionless_unscaled, 'none': u.dimensionless_unscaled, ' none': u.dimensionless_unscaled, 'counts': u.dimensionless_unscaled, 'cnts': u.dimensionless_unscaled, 'microW m^-2': u.mW * u.m**-2, 'years': u.yr, 'days': u.d, '#/cc': u.cm**-3, '#/cm^3': u.cm**-3, 'cm^{-3}': u.cm**-3, 'particles cm^-3': u.cm**-3, 'n/cc (from moments)': u.cm**-3, 'n/cc (from fits)': u.cm**-3, 'Per cc': u.cm**-3, '#/cm3': u.cm**-3, 'n/cc': u.cm**-3, 'km/sec': u.km / u.s, 'km/sec (from fits)': u.km / u.s, 'km/sec (from moments)': u.km / u.s, 'Km/s': u.km / u.s, 'Volts': u.V, 'earth radii': u.earthRad, 'Re': u.earthRad, 'Earth Radii': u.earthRad, 'Re (1min)': u.earthRad, 'Re (1hr)': u.earthRad, 'Degrees': u.deg, 'degrees': u.deg, 'Deg': u.deg, 'deg (from fits)': u.deg, 'deg (from moments)': u.deg, 'deg (>200)': u.deg, 'Deg K': u.K, 'deg_K': u.K, '#/{cc*(cm/s)^3}': (u.cm**3 * (u.cm / u.s)**3)**-1, 'sec': u.s, 'Samples/s': 1 / u.s, 'seconds': u.s, 'nT GSE': u.nT, 'nT GSM': u.nT, 'nT DSL': u.nT, 'nT SSL': u.nT, 'nT (1min)': u.nT, 'nT (3sec)': u.nT, 'nT (1hr)': u.nT, 'nT (>200)': u.nT, 'msec': u.ms, 'milliseconds': u.ms, '#/cm2-ster-eV-sec': 1 / (u.cm**2 * u.sr * u.eV * u.s), '#/(cm^2*s*sr*MeV/nuc)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '#/(cm^2*s*sr*Mev/nuc)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '#/(cm^2*s*sr*Mev/nucleon)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '#/(cm2-steradian-second-MeV/nucleon) ': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '1/(cm2 Sr sec MeV/nucleon)': 1 / (u.cm**2 * u.sr * u.s * u.MeV), '1/(cm**2-s-sr-MeV)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '1/(cm**2-s-sr-MeV/nuc.)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '(cm^2 s sr MeV/n)^-1': 1 / (u.cm**2 * u.s * u.sr * u.MeV), 'cm!E-2!Nsr!E-1!Nsec!E-1!N(MeV/nuc)!E-1!N': 1 / (u.cm**2 * u.s * u.sr * u.MeV), 'cm!E-2!Nsr!E-1!Nsec!E-1!NMeV!E-1!N': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '1/(cm^2 sec ster MeV)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '(cm^2 s sr MeV)^-1': 1 / (u.cm**2 * u.s * u.sr * u.MeV), 'cnts/sec/sr/cm^2/MeV': 1 / (u.cm**2 * u.s * u.sr * u.MeV), 'particles / (s cm^2 sr MeV)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), 'particles / (s cm^2 sr MeV/n)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), 'particles/(s cm2 sr MeV/n)': 1 / (u.cm**2 * u.s * u.sr * u.MeV), '1/(cm**2-s-sr)': 1 / (u.cm**2 * u.s * u.sr), '1/(SQcm-ster-s)': 1 / (u.cm**2 * u.s * u.sr), '1/(SQcm-ster-s)..': 1 / (u.cm**2 * u.s * u.sr), 'photons cm^-2 s^-1': 1 / (u.cm**2 * u.s), 'Counts/256sec': 1 / (256 * u.s), 'Counts/hour': 1 / u.hr, 'counts/min': 1 / u.min, 'counts / s': 1/u.s, 'counts/s': 1/u.s, 'cnts/sec': 1/u.s, 'counts s!E-1!N': 1/u.s, }