#!/usr/bin/env python # -*- coding: utf-8 -*- """ InfoEx <-> NRCS Auto Wx implementation Alexander Vasarab Wylark Mountaineering LLC Version 1.0.0 This program fetches data from an NRCS SNOTEL site and pushes it to InfoEx using the new automated weather system implementation. It is designed to be run hourly, and it asks for the last three hours of data of each desired type, and selects the most recent one. This lends some resiliency to the process and helps ensure that we have a value to send, but it can lead to somewhat inconsistent/untruthful data if e.g. the HS is from the last hour but the tempPres is from two hours ago because the instrumentation had a hiccup. It's worth considering if this is a bug or a feature. For more information, see file: README For licensing, see file: LICENSE """ import configparser import csv import datetime import logging import os import sys import time from collections import OrderedDict from ftplib import FTP from optparse import OptionParser import requests import zeep import zeep.cache import zeep.transports log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) try: from systemd.journal import JournalHandler log.addHandler(JournalHandler()) except: ## fallback to syslog #import logging.handlers #log.addHandler(logging.handlers.SysLogHandler()) # fallback to stdout handler = logging.StreamHandler(sys.stdout) log.addHandler(handler) parser = OptionParser() parser.add_option("--config", dest="config", metavar="FILE", help="location of config file") parser.add_option("--dry-run", action="store_true", dest="dry_run", default=False, help="fetch data but don't upload to InfoEx") (options, args) = parser.parse_args() config = configparser.ConfigParser(allow_no_value=False) if not options.config: print("Please specify a configuration file via --config") sys.exit(1) config.read(options.config) if 'nrcs' in config and 'mesowest' in config: print("Both MesoWest and NRCS configuration option specified, " "please choose just one.") sys.exit(1) log.debug('STARTING UP') try: infoex = { 'host': config['infoex']['host'], 'uuid': config['infoex']['uuid'], 'api_key': config['infoex']['api_key'], 'csv_filename': config['infoex']['csv_filename'], 'location_uuid': config['infoex']['location_uuid'], 'wx_data': {}, # placeholder key, values to come later } data = dict() if 'nrcs' in config: data['provider'] = 'nrcs' data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL' data['stationID'] = config['nrcs']['station_triplet'] try: desired_data = config['nrcs']['desired_data'].split(',') except: # desired_data malformed or missing, setting default desired_data = [ 'TOBS', # AIR TEMPERATURE OBSERVED (degF) 'SNWD', # SNOW DEPTH (in) 'PREC' # PRECIPITATION ACCUMULATION (in) ] if 'mesowest' in config: data['provider'] = 'mesowest' #data['source'] = 'https://api.synopticdata.com/v2/stations/latest' data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries' data['stationID'] = config['mesowest']['stid'] data['units'] = config['mesowest']['units'] try: desired_data = config['mesowest']['desired_data'] except: # desired_data malformed or missing, setting default desired_data = 'air_temp,snow_depth' # construct full API URL data['source'] = data['source'] + '?token=' + config['mesowest']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['stationID'] + '&vars=' + desired_data except KeyError as e: log.critical("%s not defined in %s" % (e, options.config)) exit(1) except Exception as exc: log.critical("Exception occurred in config parsing: '%s'" % (exc)) exit(1) # all sections/values present in config file, final sanity check try: for key in config.sections(): for subkey in config[key]: if not len(config[key][subkey]): raise ValueError; except ValueError as exc: log.critical("Config value '%s.%s' is empty" % (key, subkey)) exit(1) # INFOEX FIELDS # # This won't earn style points in Python, but here we establish a couple # of helpful mappings variables. The reason this is helpful is that the # end result is simply an ordered set, the CSV file. But we still may # want to manipulate the values arbitrarily before writing that file. # # Also note that the current Auto Wx InfoEx documentation shows these # keys in a graphical table with the "index" beginning at 1, but here we # are sanely indexing beginning at 0. fmap = {} ; final_data = [None] * 29 fmap['Location UUID'] = 0 ; final_data[0] = infoex['location_uuid'] fmap['obDate'] = 1 ; final_data[1] = None fmap['obTime'] = 2 ; final_data[2] = None fmap['timeZone'] = 3 ; final_data[3] = 'Pacific' fmap['tempMaxHour'] = 4 ; final_data[4] = None fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F' fmap['tempMinHour'] = 6 ; final_data[6] = None fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F' fmap['tempPres'] = 8 ; final_data[8] = None fmap['tempPresUnit'] = 9 ; final_data[9] = 'F' fmap['precipitationGauge'] = 10 ; final_data[10] = None fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in' fmap['windSpeedNum'] = 12 ; final_data[12] = None fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph' fmap['windDirectionNum'] = 14 ; final_data[14] = None fmap['hS'] = 15 ; final_data[15] = None fmap['hsUnit'] = 16 ; final_data[16] = 'in' fmap['baro'] = 17 ; final_data[17] = None fmap['baroUnit'] = 18 ; final_data[18] = 'inHg' fmap['rH'] = 19 ; final_data[19] = None fmap['windGustSpeedNum'] = 20 ; final_data[20] = None fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph' fmap['windGustDirNum'] = 22 ; final_data[22] = None fmap['dewPoint'] = 23 ; final_data[23] = None fmap['dewPointUnit'] = 24 ; final_data[24] = 'F' fmap['hn24Auto'] = 25 ; final_data[25] = None fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in' fmap['hstAuto'] = 27 ; final_data[27] = None fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in' # one final mapping, the NRCS fields that this program supports to # their InfoEx counterpart iemap = {} if data['provider'] == 'nrcs': iemap['PREC'] = 'precipitationGauge' iemap['TOBS'] = 'tempPres' iemap['SNWD'] = 'hS' elif data['provider'] == 'mesowest': iemap['precip_accum'] = 'precipitationGauge' iemap['air_temp'] = 'tempPres' iemap['snow_depth'] = 'hS' # floor time to nearest hour dt = datetime.datetime.now() end_date = dt - datetime.timedelta(minutes=dt.minute % 60, seconds=dt.second, microseconds=dt.microsecond) begin_date = end_date - datetime.timedelta(hours=3) # get the data log.debug("Getting %s data from %s to %s" % (str(desired_data), str(begin_date), str(end_date))) time_all_elements = time.time() # NRCS-specific code if data['provider'] == 'nrcs': transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache()) client = zeep.Client(wsdl=data['source'], transport=transport) for elementCd in desired_data: time_element = time.time() # get the last three hours of data for this elementCd tmp = client.service.getHourlyData( stationTriplets=[data['stationID']], elementCd=elementCd, ordinal=1, beginDate=begin_date, endDate=end_date) log.info("Time to get elementCd '%s': %.3f sec" % (elementCd, time.time() - time_element)) values = tmp[0]['values'] # sort and isolate the most recent # # NOTE: we do this because sometimes there are gaps in hourly data # in NRCS; yes, we may end up with slightly inaccurate data, # so perhaps this decision will be re-evaluated in the future if values: ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True) infoex['wx_data'][elementCd] = ordered[0]['value'] else: infoex['wx_data'][elementCd] = None # MesoWest-specific code elif data['provider'] == 'mesowest': # massage begin/end date format begin_date_str = begin_date.strftime('%Y%m%d%H%M') end_date_str = end_date.strftime('%Y%m%d%H%M') # construct final, completed API URL api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str req = requests.get(api_req_url) try: json = req.json() except ValueError: log.error("Bad JSON in MesoWest response") sys.exit(1) try: observations = json['STATION'][0]['OBSERVATIONS'] except ValueError: log.error("Bad JSON in MesoWest response") sys.exit(1) pos = len(observations['date_time']) - 1 for elementCd in desired_data.split(','): # sort and isolate the most recent, see note above in NRCS for how and # why this is done # # NOTE: Unlike in the NRCS case, the MesoWest API respones contains all # data (whereas with NRCS, we have to make a separate request for # each element we want. This is nice for network efficiency but # it means we have to handle this part differently for each. # # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS # provides hourly data, but MesoWest can often provide data every # 10 minutes -- though this provides more opportunity for # irregularities # we may not have the data at all key_name = elementCd + '_set_1' if key_name in observations: if observations[key_name][pos]: infoex['wx_data'][elementCd] = observations[key_name][pos] else: infoex['wx_data'][elementCd] = None else: infoex['wx_data'][elementCd] = None log.info("Time to get all data : %.3f sec" % (time.time() - time_all_elements)) log.debug("infoex[wx_data]: %s", str(infoex['wx_data'])) # Now we only need to add in what we want to change thanks to that # abomination of a variable declaration earlier final_data[fmap['Location UUID']] = infoex['location_uuid'] final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y') final_data[fmap['obTime']] = end_date.strftime('%H:%M') for elementCd in infoex['wx_data']: if elementCd not in iemap: log.warning("BAD KEY wx_data['%s']" % (elementCd)) continue # CONSIDER: Casting every value to Float() -- need to investigate if # any possible elementCds we may want are any other data # type than float. # # Another possibility is to query the API with # getStationElements and temporarily store the # storedUnitCd. But that's pretty network-intensive and # may not even be worth it if there's only e.g. one or two # exceptions to any otherwise uniformly Float value set. final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd] log.debug("final_data: %s" % (str(final_data))) with open(infoex['csv_filename'], 'w') as f: # The requirement is that empty values are represented in the CSV # file as "", csv.QUOTE_NONNUMERIC achieves that log.debug("writing CSV file '%s'" % (infoex['csv_filename'])) writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC) writer.writerow(final_data) f.close() if not options.dry_run: # not a dry run with open(infoex['csv_filename'], 'rb') as f: log.debug("uploading FTP file '%s'" % (infoex['host'])) ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key']) ftp.storlines('STOR ' + infoex['csv_filename'], f) ftp.close() f.close() os.remove(infoex['csv_filename']) log.debug('DONE')