X-Git-Url: https://wylark.com/src/infoex-autowx.git/blobdiff_plain/9eb3f3d88ae216472fdcf6db4bcf1d2542477686..f41647a44c410ccf016776389d07b89d65519889:/infoex-autowx.py diff --git a/infoex-autowx.py b/infoex-autowx.py index 4f6c9dc..5f35d0e 100755 --- a/infoex-autowx.py +++ b/infoex-autowx.py @@ -27,12 +27,16 @@ import configparser import csv import datetime import logging +import os +import sys import time from collections import OrderedDict from ftplib import FTP from optparse import OptionParser +import requests + import zeep import zeep.cache import zeep.transports @@ -44,43 +48,84 @@ try: from systemd.journal import JournalHandler log.addHandler(JournalHandler()) except: - # fallback to syslog - import logging.handlers - log.addHandler(logging.handlers.SysLogHandler()) + ## fallback to syslog + #import logging.handlers + #log.addHandler(logging.handlers.SysLogHandler()) + # fallback to stdout + handler = logging.StreamHandler(sys.stdout) + log.addHandler(handler) parser = OptionParser() -parser.add_option("--config", dest="config", metavar="FILE", help="location of config file") +parser.add_option("--config", + dest="config", + metavar="FILE", + help="location of config file") +parser.add_option("--dry-run", + action="store_true", + dest="dry_run", + default=False, + help="fetch data but don't upload to InfoEx") (options, args) = parser.parse_args() config = configparser.ConfigParser(allow_no_value=False) + +if not options.config: + print("Please specify a configuration file via --config") + sys.exit(1) + config.read(options.config) -log.debug('STARTING UP') +if 'nrcs' in config and 'mesowest' in config: + print("Both MesoWest and NRCS configuration option specified, " + "please choose just one.") + sys.exit(1) -wsdl = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL' +log.debug('STARTING UP') try: infoex = { - 'host': config['ftp']['host'], - 'uuid': config['ftp']['uuid'], - 'api_key': config['ftp']['api_key'], - 'location_uuid': config['wxsite']['location_uuid'], + 'host': config['infoex']['host'], + 'uuid': config['infoex']['uuid'], + 'api_key': config['infoex']['api_key'], + 'csv_filename': config['infoex']['csv_filename'], + 'location_uuid': config['infoex']['location_uuid'], 'wx_data': {}, # placeholder key, values to come later - 'csv_filename': config['wxsite']['csv_filename'] } - station_triplet = config['wxsite']['station_triplet'] + data = dict() + + if 'nrcs' in config: + data['provider'] = 'nrcs' + data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL' + data['stationID'] = config['nrcs']['station_triplet'] + + try: + desired_data = config['nrcs']['desired_data'].split(',') + except: + # desired_data malformed or missing, setting default + desired_data = [ + 'TOBS', # AIR TEMPERATURE OBSERVED (degF) + 'SNWD', # SNOW DEPTH (in) + 'PREC' # PRECIPITATION ACCUMULATION (in) + ] + + if 'mesowest' in config: + data['provider'] = 'mesowest' + #data['source'] = 'https://api.synopticdata.com/v2/stations/latest' + data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries' + data['stationID'] = config['mesowest']['stid'] + data['units'] = config['mesowest']['units'] + + try: + desired_data = config['mesowest']['desired_data'] + except: + # desired_data malformed or missing, setting default + desired_data = 'air_temp,snow_depth' + + # construct full API URL + data['source'] = data['source'] + '?token=' + config['mesowest']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['stationID'] + '&vars=' + desired_data - try: - desired_data = config['wxsite']['desired_data'].split(',') - except: - # desired_data malformed or missing, setting default - desired_data = [ - 'TOBS', # AIR TEMPERATURE OBSERVED (degF) - 'SNWD', # SNOW DEPTH (in) - 'PREC' # PRECIPITATION ACCUMULATION (in) - ] except KeyError as e: log.critical("%s not defined in %s" % (e, options.config)) exit(1) @@ -142,9 +187,15 @@ fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in' # one final mapping, the NRCS fields that this program supports to # their InfoEx counterpart iemap = {} -iemap['PREC'] = 'precipitationGauge' -iemap['TOBS'] = 'tempPres' -iemap['SNWD'] = 'hS' + +if data['provider'] == 'nrcs': + iemap['PREC'] = 'precipitationGauge' + iemap['TOBS'] = 'tempPres' + iemap['SNWD'] = 'hS' +elif data['provider'] == 'mesowest': + iemap['precip_accum'] = 'precipitationGauge' + iemap['air_temp'] = 'tempPres' + iemap['snow_depth'] = 'hS' # floor time to nearest hour dt = datetime.datetime.now() @@ -153,41 +204,93 @@ end_date = dt - datetime.timedelta(minutes=dt.minute % 60, microseconds=dt.microsecond) begin_date = end_date - datetime.timedelta(hours=3) -transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) -client = zeep.Client(wsdl=wsdl, transport=transport) -time_all_elements = time.time() - +# get the data log.debug("Getting %s data from %s to %s" % (str(desired_data), str(begin_date), str(end_date))) -for elementCd in desired_data: - time_element = time.time() - - # get the last three hours of data for this elementCd - tmp = client.service.getHourlyData( - stationTriplets=[station_triplet], - elementCd=elementCd, - ordinal=1, - beginDate=begin_date, - endDate=end_date) +time_all_elements = time.time() - log.info("Time to get elementCd '%s': %.3f sec" % (elementCd, - time.time() - time_element)) +# NRCS-specific code +if data['provider'] == 'nrcs': + transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) + client = zeep.Client(wsdl=data['source'], transport=transport) + + for elementCd in desired_data: + time_element = time.time() + + # get the last three hours of data for this elementCd + tmp = client.service.getHourlyData( + stationTriplets=[data['stationID']], + elementCd=elementCd, + ordinal=1, + beginDate=begin_date, + endDate=end_date) + + log.info("Time to get elementCd '%s': %.3f sec" % (elementCd, + time.time() - time_element)) + + values = tmp[0]['values'] + + # sort and isolate the most recent + # + # NOTE: we do this because sometimes there are gaps in hourly data + # in NRCS; yes, we may end up with slightly inaccurate data, + # so perhaps this decision will be re-evaluated in the future + if values: + ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) + infoex['wx_data'][elementCd] = ordered[0]['value'] + else: + infoex['wx_data'][elementCd] = None + +# MesoWest-specific code +elif data['provider'] == 'mesowest': + # massage begin/end date format + begin_date_str = begin_date.strftime('%Y%m%d%H%M') + end_date_str = end_date.strftime('%Y%m%d%H%M') + + # construct final, completed API URL + api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str + req = requests.get(api_req_url) - values = tmp[0]['values'] + try: + json = req.json() + except ValueError: + log.error("Bad JSON in MesoWest response") + sys.exit(1) - # sort and isolate the most recent - # - # NOTE: we do this because sometimes there are gaps in hourly data - # in NRCS; yes, we may end up with slightly inaccurate data, - # so perhaps this decision will be re-evaluated in the future - if values: - ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) - infoex['wx_data'][elementCd] = ordered[0]['value'] - else: - infoex['wx_data'][elementCd] = None - -log.info("Time to get all elementCds : %.3f sec" % (time.time() - + try: + observations = json['STATION'][0]['OBSERVATIONS'] + except ValueError: + log.error("Bad JSON in MesoWest response") + sys.exit(1) + + pos = len(observations['date_time']) - 1 + + for elementCd in desired_data.split(','): + # sort and isolate the most recent, see note above in NRCS for how and + # why this is done + # + # NOTE: Unlike in the NRCS case, the MesoWest API respones contains all + # data (whereas with NRCS, we have to make a separate request for + # each element we want. This is nice for network efficiency but + # it means we have to handle this part differently for each. + # + # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS + # provides hourly data, but MesoWest can often provide data every + # 10 minutes -- though this provides more opportunity for + # irregularities + + # we may not have the data at all + key_name = elementCd + '_set_1' + if key_name in observations: + if observations[key_name][pos]: + infoex['wx_data'][elementCd] = observations[key_name][pos] + else: + infoex['wx_data'][elementCd] = None + else: + infoex['wx_data'][elementCd] = None + +log.info("Time to get all data : %.3f sec" % (time.time() - time_all_elements)) log.debug("infoex[wx_data]: %s", str(infoex['wx_data'])) @@ -224,11 +327,14 @@ with open(infoex['csv_filename'], 'w') as f: writer.writerow(final_data) f.close() -with open(infoex['csv_filename'], 'rb') as f: - log.debug("uploading FTP file '%s'" % (infoex['host'])) - ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key']) - ftp.storlines('STOR ' + infoex['csv_filename'], f) - ftp.close() - f.close() +if not options.dry_run: + # not a dry run + with open(infoex['csv_filename'], 'rb') as f: + log.debug("uploading FTP file '%s'" % (infoex['host'])) + ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key']) + ftp.storlines('STOR ' + infoex['csv_filename'], f) + ftp.close() + f.close() + os.remove(infoex['csv_filename']) log.debug('DONE')