X-Git-Url: https://wylark.com/src/infoex-autowx.git/blobdiff_plain/84b9766b747b9310e73cda1d746b7a52250cb67c..517f048d9255bb05dc8a4fc04aef3bd0859ed74f:/infoex-autowx.py?ds=inline diff --git a/infoex-autowx.py b/infoex-autowx.py index d66683c..48a9063 100755 --- a/infoex-autowx.py +++ b/infoex-autowx.py @@ -78,22 +78,22 @@ def setup_config(config): 'wx_data': {}, # placeholder key, values to come later } - data = dict() - data['provider'] = config['station']['type'] + station = dict() + station['provider'] = config['station']['type'] - if data['provider'] not in ['nrcs', 'mesowest']: + if station['provider'] not in ['nrcs', 'mesowest']: print("Please specify either nrcs or mesowest as the station type.") sys.exit(1) - if data['provider'] == 'nrcs': - data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL' - data['station_id'] = config['station']['station_id'] + if station['provider'] == 'nrcs': + station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL' + station['station_id'] = config['station']['station_id'] try: - data['desired_data'] = config['station']['desired_data'].split(',') + station['desired_data'] = config['station']['desired_data'].split(',') except: # desired_data malformed or missing, setting default - data['desired_data'] = [ + station['desired_data'] = [ 'TOBS', # AIR TEMPERATURE OBSERVED (degF) 'SNWD', # SNOW DEPTH (in) 'PREC' # PRECIPITATION ACCUMULATION (in) @@ -101,22 +101,22 @@ def setup_config(config): # XXX: For NRCS, we're manually overriding units for now! Once # unit conversion is supported for NRCS, REMOVE THIS! - if 'units' not in data: - data['units'] = 'imperial' + if 'units' not in station: + station['units'] = 'imperial' - if data['provider'] == 'mesowest': - data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries' - data['station_id'] = config['station']['station_id'] - data['units'] = config['station']['units'] + if station['provider'] == 'mesowest': + station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries' + station['station_id'] = config['station']['station_id'] + station['units'] = config['station']['units'] try: - data['desired_data'] = config['station']['desired_data'] + station['desired_data'] = config['station']['desired_data'] except: # desired_data malformed or missing, setting default - data['desired_data'] = 'air_temp,snow_depth' + station['desired_data'] = 'air_temp,snow_depth' # construct full API URL (sans start/end time, added later) - data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + data['desired_data'] + station['source'] = station['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + station['units'] + '&stid=' + station['station_id'] + '&vars=' + station['desired_data'] except KeyError as e: LOG.critical("%s not defined in %s" % (e, options.config)) @@ -135,7 +135,7 @@ def setup_config(config): LOG.critical("Config value '%s.%s' is empty" % (key, subkey)) exit(1) - return (infoex, data) + return (infoex, station) def setup_logging(log_level): """Setup our logging infrastructure""" @@ -184,122 +184,34 @@ def main(): print("\nPlease select an appropriate log level or remove the switch (--log-level).") sys.exit(1) - (infoex, data) = setup_config(config) + (infoex, station) = setup_config(config) LOG.debug('Config parsed, starting up') # create mappings (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid']) - iemap = setup_infoex_counterparts_mapping(data['provider']) + iemap = setup_infoex_counterparts_mapping(station['provider']) # override units if user selected metric - # - # NOTE: to update this, use the fmap<->final_data mapping laid out above - # - # NOTE: this only 'works' with MesoWest for now, as the MesoWest API - # itself handles the unit conversion; in the future, we will also - # support NRCS unit conversion, but this must be done by this - # program. - if data['units'] == 'metric': - final_data[fmap['tempPresUnit']] = 'C' - final_data[fmap['hsUnit']] = 'm' - final_data[fmap['windSpeedUnit']] = 'm/s' - final_data[fmap['windGustSpeedNumUnit']] = 'm/s' + if station['units'] == 'metric': + final_data = switch_units_to_metric(final_data, fmap) - # floor time to nearest hour - dt = datetime.datetime.now() - end_date = dt - datetime.timedelta(minutes=dt.minute % 60, - seconds=dt.second, - microseconds=dt.microsecond) - begin_date = end_date - datetime.timedelta(hours=3) + (begin_date, end_date) = setup_time_values() # get the data - LOG.debug("Getting %s data from %s to %s" % (str(data['desired_data']), + LOG.debug("Getting %s data from %s to %s" % (str(station['desired_data']), str(begin_date), str(end_date))) time_all_elements = time.time() - # NRCS-specific code - if data['provider'] == 'nrcs': - transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) - client = zeep.Client(wsdl=data['source'], transport=transport) - - for elementCd in data['desired_data']: - time_element = time.time() - - # get the last three hours of data for this elementCd - tmp = client.service.getHourlyData( - stationTriplets=[data['station_id']], - elementCd=elementCd, - ordinal=1, - beginDate=begin_date, - endDate=end_date) - - LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd, - time.time() - time_element)) - - values = tmp[0]['values'] - - # sort and isolate the most recent - # - # NOTE: we do this because sometimes there are gaps in hourly data - # in NRCS; yes, we may end up with slightly inaccurate data, - # so perhaps this decision will be re-evaluated in the future - if values: - ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) - infoex['wx_data'][elementCd] = ordered[0]['value'] - else: - infoex['wx_data'][elementCd] = None - - # MesoWest-specific code - elif data['provider'] == 'mesowest': - # massage begin/end date format - begin_date_str = begin_date.strftime('%Y%m%d%H%M') - end_date_str = end_date.strftime('%Y%m%d%H%M') - - # construct final, completed API URL - api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str - req = requests.get(api_req_url) - - try: - json = req.json() - except ValueError: - LOG.error("Bad JSON in MesoWest response") - sys.exit(1) - - try: - observations = json['STATION'][0]['OBSERVATIONS'] - except ValueError: - LOG.error("Bad JSON in MesoWest response") - sys.exit(1) - - pos = len(observations['date_time']) - 1 - - for elementCd in data['desired_data'].split(','): - # sort and isolate the most recent, see note above in NRCS for how and - # why this is done - # - # NOTE: Unlike in the NRCS case, the MesoWest API response contains all - # data (whereas with NRCS, we have to make a separate request for - # each element we want). This is nice for network efficiency but - # it means we have to handle this part differently for each. - # - # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS - # provides hourly data, but MesoWest can often provide data every - # 10 minutes -- though this provides more opportunity for - # irregularities - - # we may not have the data at all - key_name = elementCd + '_set_1' - if key_name in observations: - if observations[key_name][pos]: - infoex['wx_data'][elementCd] = observations[key_name][pos] - else: - infoex['wx_data'][elementCd] = None - else: - infoex['wx_data'][elementCd] = None + # get the data + if station['provider'] == 'nrcs': + infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station) + elif station['provider'] == 'mesowest': + infoex['wx_data'] = get_mesowest_data(begin_date, end_date, + station) - LOG.info("Time to get all data : %.3f sec" % (time.time() - + LOG.info("Time taken to get all data : %.3f sec" % (time.time() - time_all_elements)) LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data'])) @@ -339,7 +251,7 @@ def main(): LOG.debug('DONE') return 0 -# Data structure operations +# data structure operations def setup_infoex_fields_mapping(location_uuid): """ Create a mapping of InfoEx fields to the local data's indexing scheme. @@ -417,6 +329,111 @@ def setup_infoex_counterparts_mapping(provider): return iemap +# provider-specific operations +def get_nrcs_data(begin, end, station): + """get the data we're after from the NRCS WSDL""" + transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) + client = zeep.Client(wsdl=station['source'], transport=transport) + remote_data = {} + + for elementCd in station['desired_data']: + time_element = time.time() + + # get the last three hours of data for this elementCd + tmp = client.service.getHourlyData( + stationTriplets=[station['station_id']], + elementCd=elementCd, + ordinal=1, + beginDate=begin, + endDate=end) + + LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd, + time.time() - time_element)) + + values = tmp[0]['values'] + + # sort and isolate the most recent + # + # NOTE: we do this because sometimes there are gaps in hourly data + # in NRCS; yes, we may end up with slightly inaccurate data, + # so perhaps this decision will be re-evaluated in the future + if values: + ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) + remote_data[elementCd] = ordered[0]['value'] + else: + remote_data[elementCd] = None + + return remote_data + +def get_mesowest_data(begin, end, station): + """get the data we're after from the MesoWest/Synoptic API""" + remote_data = {} + + # massage begin/end date format + begin_date_str = begin.strftime('%Y%m%d%H%M') + end_date_str = end.strftime('%Y%m%d%H%M') + + # construct final, completed API URL + api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str + req = requests.get(api_req_url) + + try: + json = req.json() + except ValueError: + LOG.error("Bad JSON in MesoWest response") + sys.exit(1) + + try: + observations = json['STATION'][0]['OBSERVATIONS'] + except ValueError: + LOG.error("Bad JSON in MesoWest response") + sys.exit(1) + + pos = len(observations['date_time']) - 1 + + for elementCd in station['desired_data'].split(','): + # sort and isolate the most recent, see note above in NRCS for how and + # why this is done + # + # NOTE: Unlike in the NRCS case, the MesoWest API response contains all + # data (whereas with NRCS, we have to make a separate request for + # each element we want). This is nice for network efficiency but + # it means we have to handle this part differently for each. + # + # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS + # provides hourly data, but MesoWest can often provide data every + # 10 minutes -- though this provides more opportunity for + # irregularities + + # we may not have the data at all + key_name = elementCd + '_set_1' + if key_name in observations: + if observations[key_name][pos]: + remote_data[elementCd] = observations[key_name][pos] + else: + remote_data[elementCd] = None + else: + remote_data[elementCd] = None + + return remote_data + +def switch_units_to_metric(data_map, mapping): + """replace units with metric counterparts""" + + # NOTE: to update this, use the fmap<->final_data mapping laid out + # in setup_infoex_fields_mapping () + # + # NOTE: this only 'works' with MesoWest for now, as the MesoWest API + # itself handles the unit conversion; in the future, we will also + # support NRCS unit conversion, but this must be done by this + # program. + data_map[mapping['tempPresUnit']] = 'C' + data_map[mapping['hsUnit']] = 'm' + data_map[mapping['windSpeedUnit']] = 'm/s' + data_map[mapping['windGustSpeedNumUnit']] = 'm/s' + + return data_map + # CSV operations def write_local_csv(path_to_file, data): """Write the specified CSV file to disk""" @@ -440,5 +457,16 @@ def upload_csv(path_to_file, infoex_data): file_object.close() os.remove(path_to_file) +# other miscellaneous routines +def setup_time_values(): + """establish time bounds of data request(s)""" + # floor time to nearest hour + dt = datetime.datetime.now() + end_date = dt - datetime.timedelta(minutes=dt.minute % 60, + seconds=dt.second, + microseconds=dt.microsecond) + begin_date = end_date - datetime.timedelta(hours=3) + return (begin_date, end_date) + if __name__ == "__main__": sys.exit(main())