From 3b4c907fb7437a25efaa6d15b333d742a95491a4 Mon Sep 17 00:00:00 2001 From: Alexander Vasarab Date: Thu, 2 Jul 2020 11:20:11 -0700 Subject: [PATCH] Break out data fetching operations into routines --- infoex-autowx.py | 171 ++++++++++++++++++++++++++--------------------- 1 file changed, 93 insertions(+), 78 deletions(-) diff --git a/infoex-autowx.py b/infoex-autowx.py index d66683c..63989b9 100755 --- a/infoex-autowx.py +++ b/infoex-autowx.py @@ -219,85 +219,12 @@ def main(): time_all_elements = time.time() - # NRCS-specific code + # get the data if data['provider'] == 'nrcs': - transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) - client = zeep.Client(wsdl=data['source'], transport=transport) - - for elementCd in data['desired_data']: - time_element = time.time() - - # get the last three hours of data for this elementCd - tmp = client.service.getHourlyData( - stationTriplets=[data['station_id']], - elementCd=elementCd, - ordinal=1, - beginDate=begin_date, - endDate=end_date) - - LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd, - time.time() - time_element)) - - values = tmp[0]['values'] - - # sort and isolate the most recent - # - # NOTE: we do this because sometimes there are gaps in hourly data - # in NRCS; yes, we may end up with slightly inaccurate data, - # so perhaps this decision will be re-evaluated in the future - if values: - ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) - infoex['wx_data'][elementCd] = ordered[0]['value'] - else: - infoex['wx_data'][elementCd] = None - - # MesoWest-specific code + infoex['wx_data'] = get_nrcs_data(begin_date, end_date, data) elif data['provider'] == 'mesowest': - # massage begin/end date format - begin_date_str = begin_date.strftime('%Y%m%d%H%M') - end_date_str = end_date.strftime('%Y%m%d%H%M') - - # construct final, completed API URL - api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str - req = requests.get(api_req_url) - - try: - json = req.json() - except ValueError: - LOG.error("Bad JSON in MesoWest response") - sys.exit(1) - - try: - observations = json['STATION'][0]['OBSERVATIONS'] - except ValueError: - LOG.error("Bad JSON in MesoWest response") - sys.exit(1) - - pos = len(observations['date_time']) - 1 - - for elementCd in data['desired_data'].split(','): - # sort and isolate the most recent, see note above in NRCS for how and - # why this is done - # - # NOTE: Unlike in the NRCS case, the MesoWest API response contains all - # data (whereas with NRCS, we have to make a separate request for - # each element we want). This is nice for network efficiency but - # it means we have to handle this part differently for each. - # - # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS - # provides hourly data, but MesoWest can often provide data every - # 10 minutes -- though this provides more opportunity for - # irregularities - - # we may not have the data at all - key_name = elementCd + '_set_1' - if key_name in observations: - if observations[key_name][pos]: - infoex['wx_data'][elementCd] = observations[key_name][pos] - else: - infoex['wx_data'][elementCd] = None - else: - infoex['wx_data'][elementCd] = None + infoex['wx_data'] = get_mesowest_data(begin_date, end_date, + data) LOG.info("Time to get all data : %.3f sec" % (time.time() - time_all_elements)) @@ -339,7 +266,7 @@ def main(): LOG.debug('DONE') return 0 -# Data structure operations +# data structure operations def setup_infoex_fields_mapping(location_uuid): """ Create a mapping of InfoEx fields to the local data's indexing scheme. @@ -417,6 +344,94 @@ def setup_infoex_counterparts_mapping(provider): return iemap +# provider-specific operations +def get_nrcs_data(begin, end, data): + """get the data we're after from the NRCS WSDL""" + transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) + client = zeep.Client(wsdl=data['source'], transport=transport) + remote_data = {} + + for elementCd in data['desired_data']: + time_element = time.time() + + # get the last three hours of data for this elementCd + tmp = client.service.getHourlyData( + stationTriplets=[data['station_id']], + elementCd=elementCd, + ordinal=1, + beginDate=begin, + endDate=end) + + LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd, + time.time() - time_element)) + + values = tmp[0]['values'] + + # sort and isolate the most recent + # + # NOTE: we do this because sometimes there are gaps in hourly data + # in NRCS; yes, we may end up with slightly inaccurate data, + # so perhaps this decision will be re-evaluated in the future + if values: + ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) + remote_data[elementCd] = ordered[0]['value'] + else: + remote_data[elementCd] = None + + return remote_data + +def get_mesowest_data(begin, end, data): + """get the data we're after from the MesoWest/Synoptic API""" + remote_data = {} + + # massage begin/end date format + begin_date_str = begin.strftime('%Y%m%d%H%M') + end_date_str = end.strftime('%Y%m%d%H%M') + + # construct final, completed API URL + api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str + req = requests.get(api_req_url) + + try: + json = req.json() + except ValueError: + LOG.error("Bad JSON in MesoWest response") + sys.exit(1) + + try: + observations = json['STATION'][0]['OBSERVATIONS'] + except ValueError: + LOG.error("Bad JSON in MesoWest response") + sys.exit(1) + + pos = len(observations['date_time']) - 1 + + for elementCd in data['desired_data'].split(','): + # sort and isolate the most recent, see note above in NRCS for how and + # why this is done + # + # NOTE: Unlike in the NRCS case, the MesoWest API response contains all + # data (whereas with NRCS, we have to make a separate request for + # each element we want). This is nice for network efficiency but + # it means we have to handle this part differently for each. + # + # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS + # provides hourly data, but MesoWest can often provide data every + # 10 minutes -- though this provides more opportunity for + # irregularities + + # we may not have the data at all + key_name = elementCd + '_set_1' + if key_name in observations: + if observations[key_name][pos]: + remote_data[elementCd] = observations[key_name][pos] + else: + remote_data[elementCd] = None + else: + remote_data[elementCd] = None + + return remote_data + # CSV operations def write_local_csv(path_to_file, data): """Write the specified CSV file to disk""" -- 2.30.2