Source code for sunpy.net.dataretriever.sources.goes

# Author: Rishabh Sharma <rishabh.sharma.gunner@gmail.com>
# This module was developed under funding provided by
# Google Summer of Code 2014

from datetime import datetime
from collections import OrderedDict

import astropy.units as u
from astropy.time import Time

from sunpy import config
from sunpy.net import attrs as a
from sunpy.net.dataretriever import GenericClient, QueryResponse
from sunpy.net.scraper import Scraper, get_timerange_from_exdict
from sunpy.time import TimeRange, parse_time

TIME_FORMAT = config.get("general", "time_format")

__all__ = ["XRSClient", "SUVIClient"]


[docs] class XRSClient(GenericClient): """ Provides access to several GOES XRS files archive. For older GOES satellites (7 and below), NASA servers are used. For newer GOES satellites (8 and above), NOAA servers are used. For GOES 8-15, the data is the re-processed science-quality data. .. note:: The new science quality data have the scaling factors removed for GOES 8-15 and they are not added to GOES 16 AND 17 data products. This means the peak flux will be different to the older version of the data, such as those collected from the NASA servers. See the following readmes about these data: * GOES 1 - 7: https://umbra.nascom.nasa.gov/goes/fits/goes_fits_files_notes.txt * Reprocessed 8 - 15: https://www.ncei.noaa.gov/data/goes-space-environment-monitor/access/science/xrs/GOES_1-15_XRS_Science-Quality_Data_Readme.pdf * GOES-R 16 - 17: https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes16/l1b/docs/GOES-R_EXIS_XRS_L1b_Science-Quality_Data_ReadMe.pdf Examples -------- >>> from sunpy.net import Fido, attrs as a >>> results = Fido.search(a.Time("2016/1/1", "2016/1/2"), ... a.Instrument.xrs) # doctest: +REMOTE_DATA >>> results # doctest: +REMOTE_DATA <sunpy.net.fido_factory.UnifiedResponse object at ...> Results from 1 Provider: <BLANKLINE> 8 Results from the XRSClient: Source: <8: https://umbra.nascom.nasa.gov/goes/fits 8-15: https://www.ncei.noaa.gov/data/goes-space-environment-monitor/access/science/ 16-17: https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/ <BLANKLINE> Start Time End Time ... Provider Resolution ----------------------- ----------------------- ... -------- ---------- 2016-01-01 00:00:00.000 2016-01-01 23:59:59.999 ... NOAA flx1s 2016-01-02 00:00:00.000 2016-01-02 23:59:59.999 ... NOAA flx1s 2016-01-01 00:00:00.000 2016-01-01 23:59:59.999 ... NOAA avg1m 2016-01-02 00:00:00.000 2016-01-02 23:59:59.999 ... NOAA avg1m 2016-01-01 00:00:00.000 2016-01-01 23:59:59.999 ... NOAA flx1s 2016-01-02 00:00:00.000 2016-01-02 23:59:59.999 ... NOAA flx1s 2016-01-01 00:00:00.000 2016-01-01 23:59:59.999 ... NOAA avg1m 2016-01-02 00:00:00.000 2016-01-02 23:59:59.999 ... NOAA avg1m <BLANKLINE> <BLANKLINE> """ # GOES XRS data from NASA servers up to GOES 7. baseurl_old = r'https://umbra.nascom.nasa.gov/goes/fits/%Y/go(\d){2}(\d){6,8}\.fits' pattern_old = '{}/fits/{year:4d}/go{SatelliteNumber:02d}{}{month:2d}{day:2d}.fits' # The reprocessed 8-15 data should be taken from NOAA. baseurl_new = (r"https://www.ncei.noaa.gov/data/goes-space-environment-monitor/access/science/xrs/" r"goes{SatelliteNumber:02d}/{filename_res}-l2-{resolution}_science/%Y/%m/sci_{filename_res}-l2-{resolution}_g{SatelliteNumber:02d}_d%Y%m%d_.*\.nc") pattern_new = ("{}/goes{SatelliteNumber:02d}/{filename_res}-l2-{resolution}_science/{year:4d}/" "{month:2d}/sci_{filename_res}-l2-{resolution}_g{SatelliteNumber:02d}_d{year:4d}{month:2d}{day:2d}_{}.nc") # GOES-R Series 16-17 XRS data from NOAA. baseurl_r = (r"https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes{SatelliteNumber}" r"/l2/data/xrsf-l2-{Resolution}_science/%Y/%m/sci_xrsf-l2-{Resolution}_g{SatelliteNumber}_d%Y%m%d_.*\.nc") pattern_r = ("{}/goes/goes{SatelliteNumber:02d}/l2/data/xrsf-l2-{Resolution}_science/{year:4d}/" "{month:2d}/sci_xrsf-l2-{Resolution}_g{SatelliteNumber:02d}_d{year:4d}{month:2d}{day:2d}_{}.nc") @property def info_url(self): return ("<8: https://umbra.nascom.nasa.gov/goes/fits \n" "8-15: https://www.ncei.noaa.gov/data/goes-space-environment-monitor/access/science/ \n" "16-17: https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/")
[docs] def post_search_hook(self, i, matchdict): tr = get_timerange_from_exdict(i) rowdict = OrderedDict() rowdict['Start Time'] = tr.start rowdict['Start Time'].format = 'iso' rowdict['End Time'] = tr.end rowdict['End Time'].format = 'iso' rowdict["Instrument"] = matchdict["Instrument"][0].upper() rowdict["SatelliteNumber"] = i["SatelliteNumber"] rowdict["Physobs"] = matchdict["Physobs"][0] rowdict["url"] = i["url"] rowdict["Source"] = matchdict["Source"][0] if i["url"].endswith(".fits"): # for older FITS files rowdict["Provider"] = matchdict["Provider"][0] else: # only Resolution attrs for the netcdf files rowdict["Provider"] = matchdict["Provider"][1] if "avg1m" in i["url"]: rowdict["Resolution"] = "avg1m" elif ("flx1s" in i["url"]) or ("irrad" in i["url"]): rowdict["Resolution"] = "flx1s" else: raise RuntimeError("Could not parse resolution from URL") return rowdict
[docs] def search(self, *args, **kwargs): matchdict = self._get_match_dict(*args, **kwargs) # this is for the case when the timerange overlaps with the provider change. if matchdict["Start Time"] < "2001-03-01" and matchdict["End Time"] >= "2001-03-01": matchdict_before, matchdict_after = matchdict.copy(), matchdict.copy() matchdict_after["Start Time"] = parse_time('2001-03-01') matchdict_before["End Time"] = parse_time('2001-03-01') metalist_before = self._get_metalist(matchdict_before) metalist_after = self._get_metalist(matchdict_after) metalist = metalist_before + metalist_after else: metalist = self._get_metalist(matchdict) return QueryResponse(metalist, client=self)
def _get_metalist_fn(self, matchdict, baseurl, pattern): """ Function to help get list of OrderedDicts. """ metalist = [] scraper = Scraper(baseurl, regex=True) tr = TimeRange(matchdict["Start Time"], matchdict["End Time"]) filemeta = scraper._extract_files_meta(tr, extractor=pattern, matcher=matchdict) for i in filemeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) return metalist def _get_metalist(self, matchdict): """ Function to get the list of OrderDicts. This makes it easier for when searching for overlapping providers. """ metalist = [] # The data before the re-processed GOES 8-15 data. if (matchdict["End Time"] < "2001-03-01") or (matchdict["End Time"] >= "2001-03-01" and matchdict["Provider"] == ["sdac"]): metalist += self._get_metalist_fn(matchdict, self.baseurl_old, self.pattern_old) # New data from NOAA. It searches for both the high cadence and 1 minute average data. else: if matchdict["End Time"] >= "2017-02-07": for sat in matchdict["SatelliteNumber"]: if int(sat) >= 16: # here check for GOES 16 and 17 for res in matchdict["Resolution"]: metalist += self._get_metalist_fn(matchdict, self.baseurl_r.format(SatelliteNumber=int(sat), Resolution=res), self.pattern_r) if matchdict["End Time"] <= "2020-03-04": for sat in matchdict["SatelliteNumber"]: if (int(sat) >= 8) & (int(sat) <= 15): # here check for GOES 8-15 # The 1 minute average data is at a different base URL to that of the high cadence data which is why things are done this way. for res in matchdict["Resolution"]: if res == "avg1m": filename_res = "xrsf" resolution = "avg1m" metalist += self._get_metalist_fn(matchdict, self.baseurl_new.format(SatelliteNumber=int(sat), filename_res=filename_res, resolution=resolution), self.pattern_new) elif res == "flx1s": filename_res = "gxrs" resolution = "irrad" metalist += self._get_metalist_fn(matchdict, self.baseurl_new.format(SatelliteNumber=int(sat), filename_res=filename_res, resolution=resolution), self.pattern_new) else: raise RuntimeError(f"{res}` is not an accepted resolution attrs for the XRSClient") return metalist @classmethod def _attrs_module(cls): return 'goes', 'sunpy.net.dataretriever.attrs.goes'
[docs] @classmethod def register_values(cls): from sunpy.net import attrs goes_number = [2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] adict = {attrs.Instrument: [ ("GOES", "The Geostationary Operational Environmental Satellite Program."), ("XRS", "GOES X-ray Sensor")], attrs.Physobs: [('irradiance', 'the flux of radiant energy per unit area.')], attrs.Source: [("GOES", "The Geostationary Operational Environmental Satellite Program.")], attrs.Provider: [('SDAC', 'The Solar Data Analysis Center.'), ('NOAA', 'The National Oceanic and Atmospheric Administration.')], attrs.goes.SatelliteNumber: [(str(x), f"GOES Satellite Number {x}") for x in goes_number], attrs.Resolution: [('flx1s', 'High-cadence measurements XRS observation, 1s for GOES-R, 2s for GOES 13-15, 3s for GOES<13'), ('avg1m', '1-minute averages of XRS measurements')]} return adict
[docs] class SUVIClient(GenericClient): """ Provides access to data from the GOES Solar Ultraviolet Imager (SUVI). `SUVI data are provided by NOAA <https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/>`__. The SUVI instrument was first included on GOES-16. It produces level 1b as well as level-2 data products. Level 2 data products are a weighted average of level 1b product files and therefore provide higher imaging dynamic range than individual images. The exposure time of level 1b images range from 1 s to 0.005 s. SUVI supports the following wavelengths; 94, 131, 171, 195, 284, 304 angstrom. If no wavelength is specified, images from all wavelengths are returned. Examples -------- >>> from sunpy.net import Fido, attrs as a >>> import astropy.units as u >>> results = Fido.search(a.Time("2020/7/10", "2020/7/10 00:10"), a.Instrument('suvi'),a.Level.two, ... a.goes.SatelliteNumber(16), a.Wavelength(304*u.Angstrom)) # doctest: +REMOTE_DATA >>> results # doctest: +REMOTE_DATA <sunpy.net.fido_factory.UnifiedResponse object at ...> Results from 1 Provider: <BLANKLINE> 3 Results from the SUVIClient: Source: https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes <BLANKLINE> Start Time End Time Instrument ... Level Wavelength ... Angstrom ----------------------- ----------------------- ---------- ... ----- ---------- 2020-07-10 00:00:00.000 2020-07-10 00:04:00.000 SUVI ... 2 304.0 2020-07-10 00:04:00.000 2020-07-10 00:08:00.000 SUVI ... 2 304.0 2020-07-10 00:08:00.000 2020-07-10 00:12:00.000 SUVI ... 2 304.0 <BLANKLINE> <BLANKLINE> """ baseurl1b = (r'https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes' r'{SatelliteNumber}/l1b/suvi-l1b-{elem:2}{wave:03}/%Y/%m/%d/OR_SUVI-L1b.*\.fits.gz') pattern1b = ('{}/goes/goes{SatelliteNumber:2d}/l{Level:2w}/suvi-l1b-{}{Wavelength:03d}/' '{year:4d}/{month:2d}/{day:2d}/{}_s{:7d}{hour:2d}{minute:2d}{second:2d}' '{:1d}_e{:7d}{ehour:2d}{eminute:2d}{esecond:2d}{:1d}_{}') baseurl2 = (r'https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes{SatelliteNumber}/' r'l2/data/suvi-l2-ci{wave:03}/%Y/%m/%d/dr_suvi-l2-ci{wave:03}_g{SatelliteNumber}_s%Y%m%dT%H%M%SZ_.*\.fits') pattern2 = ('{}/goes/goes{SatelliteNumber:2d}/{}/dr_suvi-l{Level}-ci{Wavelength:03d}_g{SatelliteNumber:2d}_s' '{year:4d}{month:2d}{day:2d}T{hour:2d}{minute:2d}{second:2d}Z_e' '{eyear:4d}{emonth:2d}{eday:2d}T{ehour:2d}{eminute:2d}{esecond:2d}Z_{}') @property def info_url(self): return 'https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes'
[docs] def post_search_hook(self, i, matchdict): # Extracting start times and end times start = Time(datetime(i['year'], i['month'], i['day'], i['hour'], i['minute'], i['second'])) start.format = 'iso' end = Time(datetime(i['year'], i['month'], i['day'], i['ehour'], i['eminute'], i['esecond'])) end.format = 'iso' rowdict = OrderedDict() rowdict['Start Time'] = start rowdict['End Time'] = end rowdict['Instrument'] = matchdict['Instrument'][0].upper() rowdict['Physobs'] = matchdict['Physobs'][0] rowdict['Source'] = matchdict['Source'][0] rowdict['Provider'] = matchdict['Provider'][0] rowdict['SatelliteNumber'] = i['SatelliteNumber'] rowdict['Level'] = i['Level'] rowdict['Wavelength'] = i['Wavelength']*u.Angstrom rowdict['url'] = i['url'] return rowdict
[docs] def search(self, *args, **kwargs): supported_waves = [94, 131, 171, 195, 284, 304]*u.Angstrom all_waves = [] matchdict = self._get_match_dict(*args, **kwargs) req_wave = matchdict.get('Wavelength', None) if req_wave is not None: wmin = req_wave.min.to(u.Angstrom, equivalencies=u.spectral()) wmax = req_wave.max.to(u.Angstrom, equivalencies=u.spectral()) req_wave = a.Wavelength(wmin, wmax) for wave in supported_waves: if wave in req_wave: all_waves.append(int(wave.value)) else: all_waves = [int(i.value) for i in supported_waves] all_satnos = matchdict.get('SatelliteNumber') all_levels = matchdict.get('Level') metalist = [] # iterating over all possible Attr values through loops for satno in all_satnos: for level in all_levels: for wave in all_waves: formdict = {'wave': wave, 'SatelliteNumber': satno} if str(level) == '1b': formdict['elem'] = 'fe' if wave == 304: formdict['elem'] = 'he' baseurl = self.baseurl1b pattern = self.pattern1b elif str(level) == '2': baseurl = self.baseurl2 pattern = self.pattern2 else: raise ValueError(f"Level {level} is not supported.") # formatting baseurl using Level, SatelliteNumber and Wavelength urlpattern = baseurl.format(**formdict) scraper = Scraper(urlpattern) tr = TimeRange(matchdict['Start Time'], matchdict['End Time']) filesmeta = scraper._extract_files_meta(tr, extractor=pattern) for i in filesmeta: rowdict = self.post_search_hook(i, matchdict) metalist.append(rowdict) return QueryResponse(metalist, client=self)
@classmethod def _attrs_module(cls): return 'goes', 'sunpy.net.dataretriever.attrs.goes'
[docs] @classmethod def register_values(cls): from sunpy.net import attrs goes_number = [16, 17, 18] adict = {attrs.Instrument: [ ("SUVI", "GOES Solar Ultraviolet Imager.")], attrs.goes.SatelliteNumber: [(str(x), f"GOES Satellite Number {x}") for x in goes_number], attrs.Source: [('GOES', 'The Geostationary Operational Environmental Satellite Program.')], attrs.Physobs: [('flux', 'a measure of the amount of radiation received by an object from a given source.')], attrs.Provider: [('NOAA', 'The National Oceanic and Atmospheric Administration.')], attrs.Level: [('1b', 'Solar images at six wavelengths with image exposures 10 msec or 1 sec.'), ('2', 'Weighted average of level-1b product files of SUVI.')], attrs.Wavelength: [('*')]} return adict