'wx_data': {}, # placeholder key, values to come later
}
- data = dict()
- data['provider'] = config['station']['type']
+ station = dict()
+ station['provider'] = config['station']['type']
- if data['provider'] not in ['nrcs', 'mesowest']:
+ if station['provider'] not in ['nrcs', 'mesowest']:
print("Please specify either nrcs or mesowest as the station type.")
sys.exit(1)
- if data['provider'] == 'nrcs':
- data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
- data['station_id'] = config['station']['station_id']
+ if station['provider'] == 'nrcs':
+ station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
+ station['station_id'] = config['station']['station_id']
try:
- data['desired_data'] = config['station']['desired_data'].split(',')
+ station['desired_data'] = config['station']['desired_data'].split(',')
except:
# desired_data malformed or missing, setting default
- data['desired_data'] = [
+ station['desired_data'] = [
'TOBS', # AIR TEMPERATURE OBSERVED (degF)
'SNWD', # SNOW DEPTH (in)
'PREC' # PRECIPITATION ACCUMULATION (in)
# XXX: For NRCS, we're manually overriding units for now! Once
# unit conversion is supported for NRCS, REMOVE THIS!
- if 'units' not in data:
- data['units'] = 'imperial'
+ if 'units' not in station:
+ station['units'] = 'imperial'
- if data['provider'] == 'mesowest':
- data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
- data['station_id'] = config['station']['station_id']
- data['units'] = config['station']['units']
+ if station['provider'] == 'mesowest':
+ station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
+ station['station_id'] = config['station']['station_id']
+ station['units'] = config['station']['units']
try:
- data['desired_data'] = config['station']['desired_data']
+ station['desired_data'] = config['station']['desired_data']
except:
# desired_data malformed or missing, setting default
- data['desired_data'] = 'air_temp,snow_depth'
+ station['desired_data'] = 'air_temp,snow_depth'
# construct full API URL (sans start/end time, added later)
- data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + data['desired_data']
+ station['source'] = station['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + station['units'] + '&stid=' + station['station_id'] + '&vars=' + station['desired_data']
except KeyError as e:
LOG.critical("%s not defined in %s" % (e, options.config))
LOG.critical("Config value '%s.%s' is empty" % (key, subkey))
exit(1)
- return (infoex, data)
+ return (infoex, station)
def setup_logging(log_level):
"""Setup our logging infrastructure"""
print("\nPlease select an appropriate log level or remove the switch (--log-level).")
sys.exit(1)
- (infoex, data) = setup_config(config)
+ (infoex, station) = setup_config(config)
LOG.debug('Config parsed, starting up')
# create mappings
(fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
- iemap = setup_infoex_counterparts_mapping(data['provider'])
+ iemap = setup_infoex_counterparts_mapping(station['provider'])
# override units if user selected metric
#
# itself handles the unit conversion; in the future, we will also
# support NRCS unit conversion, but this must be done by this
# program.
- if data['units'] == 'metric':
+ if station['units'] == 'metric':
final_data[fmap['tempPresUnit']] = 'C'
final_data[fmap['hsUnit']] = 'm'
final_data[fmap['windSpeedUnit']] = 'm/s'
begin_date = end_date - datetime.timedelta(hours=3)
# get the data
- LOG.debug("Getting %s data from %s to %s" % (str(data['desired_data']),
+ LOG.debug("Getting %s data from %s to %s" % (str(station['desired_data']),
str(begin_date), str(end_date)))
time_all_elements = time.time()
- # NRCS-specific code
- if data['provider'] == 'nrcs':
- transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
- client = zeep.Client(wsdl=data['source'], transport=transport)
-
- for elementCd in data['desired_data']:
- time_element = time.time()
-
- # get the last three hours of data for this elementCd
- tmp = client.service.getHourlyData(
- stationTriplets=[data['station_id']],
- elementCd=elementCd,
- ordinal=1,
- beginDate=begin_date,
- endDate=end_date)
-
- LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
- time.time() - time_element))
-
- values = tmp[0]['values']
-
- # sort and isolate the most recent
- #
- # NOTE: we do this because sometimes there are gaps in hourly data
- # in NRCS; yes, we may end up with slightly inaccurate data,
- # so perhaps this decision will be re-evaluated in the future
- if values:
- ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
- infoex['wx_data'][elementCd] = ordered[0]['value']
- else:
- infoex['wx_data'][elementCd] = None
-
- # MesoWest-specific code
- elif data['provider'] == 'mesowest':
- # massage begin/end date format
- begin_date_str = begin_date.strftime('%Y%m%d%H%M')
- end_date_str = end_date.strftime('%Y%m%d%H%M')
-
- # construct final, completed API URL
- api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
- req = requests.get(api_req_url)
-
- try:
- json = req.json()
- except ValueError:
- LOG.error("Bad JSON in MesoWest response")
- sys.exit(1)
-
- try:
- observations = json['STATION'][0]['OBSERVATIONS']
- except ValueError:
- LOG.error("Bad JSON in MesoWest response")
- sys.exit(1)
-
- pos = len(observations['date_time']) - 1
-
- for elementCd in data['desired_data'].split(','):
- # sort and isolate the most recent, see note above in NRCS for how and
- # why this is done
- #
- # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
- # data (whereas with NRCS, we have to make a separate request for
- # each element we want). This is nice for network efficiency but
- # it means we have to handle this part differently for each.
- #
- # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
- # provides hourly data, but MesoWest can often provide data every
- # 10 minutes -- though this provides more opportunity for
- # irregularities
-
- # we may not have the data at all
- key_name = elementCd + '_set_1'
- if key_name in observations:
- if observations[key_name][pos]:
- infoex['wx_data'][elementCd] = observations[key_name][pos]
- else:
- infoex['wx_data'][elementCd] = None
- else:
- infoex['wx_data'][elementCd] = None
+ # get the data
+ if station['provider'] == 'nrcs':
+ infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
+ elif station['provider'] == 'mesowest':
+ infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
+ station)
- LOG.info("Time to get all data : %.3f sec" % (time.time() -
+ LOG.info("Time taken to get all data : %.3f sec" % (time.time() -
time_all_elements))
LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
LOG.debug('DONE')
return 0
-# Data structure operations
+# data structure operations
def setup_infoex_fields_mapping(location_uuid):
"""
Create a mapping of InfoEx fields to the local data's indexing scheme.
return iemap
+# provider-specific operations
+def get_nrcs_data(begin, end, station):
+ """get the data we're after from the NRCS WSDL"""
+ transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
+ client = zeep.Client(wsdl=station['source'], transport=transport)
+ remote_data = {}
+
+ for elementCd in station['desired_data']:
+ time_element = time.time()
+
+ # get the last three hours of data for this elementCd
+ tmp = client.service.getHourlyData(
+ stationTriplets=[station['station_id']],
+ elementCd=elementCd,
+ ordinal=1,
+ beginDate=begin,
+ endDate=end)
+
+ LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
+ time.time() - time_element))
+
+ values = tmp[0]['values']
+
+ # sort and isolate the most recent
+ #
+ # NOTE: we do this because sometimes there are gaps in hourly data
+ # in NRCS; yes, we may end up with slightly inaccurate data,
+ # so perhaps this decision will be re-evaluated in the future
+ if values:
+ ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
+ remote_data[elementCd] = ordered[0]['value']
+ else:
+ remote_data[elementCd] = None
+
+ return remote_data
+
+def get_mesowest_data(begin, end, station):
+ """get the data we're after from the MesoWest/Synoptic API"""
+ remote_data = {}
+
+ # massage begin/end date format
+ begin_date_str = begin.strftime('%Y%m%d%H%M')
+ end_date_str = end.strftime('%Y%m%d%H%M')
+
+ # construct final, completed API URL
+ api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
+ req = requests.get(api_req_url)
+
+ try:
+ json = req.json()
+ except ValueError:
+ LOG.error("Bad JSON in MesoWest response")
+ sys.exit(1)
+
+ try:
+ observations = json['STATION'][0]['OBSERVATIONS']
+ except ValueError:
+ LOG.error("Bad JSON in MesoWest response")
+ sys.exit(1)
+
+ pos = len(observations['date_time']) - 1
+
+ for elementCd in station['desired_data'].split(','):
+ # sort and isolate the most recent, see note above in NRCS for how and
+ # why this is done
+ #
+ # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
+ # data (whereas with NRCS, we have to make a separate request for
+ # each element we want). This is nice for network efficiency but
+ # it means we have to handle this part differently for each.
+ #
+ # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
+ # provides hourly data, but MesoWest can often provide data every
+ # 10 minutes -- though this provides more opportunity for
+ # irregularities
+
+ # we may not have the data at all
+ key_name = elementCd + '_set_1'
+ if key_name in observations:
+ if observations[key_name][pos]:
+ remote_data[elementCd] = observations[key_name][pos]
+ else:
+ remote_data[elementCd] = None
+ else:
+ remote_data[elementCd] = None
+
+ return remote_data
+
# CSV operations
def write_local_csv(path_to_file, data):
"""Write the specified CSV file to disk"""