Flesh out all available/supported measurements
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 Version 2.0.0
10
11 This program fetches data from either an NRCS SNOTEL site or MesoWest
12 weather station and pushes it to InfoEx using the new automated weather
13 system implementation.
14
15 It is designed to be run hourly, and it asks for the last three hours
16 of data of each desired type, and selects the most recent one. This
17 lends some resiliency to the process and helps ensure that we have a
18 value to send, but it can lead to somewhat inconsistent/untruthful
19 data if e.g. the HS is from the last hour but the tempPres is from two
20 hours ago because the instrumentation had a hiccup. It's worth
21 considering if this is a bug or a feature.
22
23 For more information, see file: README
24 For licensing, see file: LICENSE
25 """
26
27 import configparser
28 import csv
29 import datetime
30 import logging
31 import os
32 import sys
33 import time
34
35 from collections import OrderedDict
36 from ftplib import FTP
37 from optparse import OptionParser
38
39 import requests
40
41 import zeep
42 import zeep.cache
43 import zeep.transports
44
45 log = logging.getLogger(__name__)
46 log.setLevel(logging.DEBUG)
47
48 try:
49 from systemd.journal import JournalHandler
50 log.addHandler(JournalHandler())
51 except:
52 ## fallback to syslog
53 #import logging.handlers
54 #log.addHandler(logging.handlers.SysLogHandler())
55 # fallback to stdout
56 handler = logging.StreamHandler(sys.stdout)
57 log.addHandler(handler)
58
59 parser = OptionParser()
60
61 parser.add_option("--config",
62 dest="config",
63 metavar="FILE",
64 help="location of config file")
65
66 parser.add_option("--dry-run",
67 action="store_true",
68 dest="dry_run",
69 default=False,
70 help="fetch data but don't upload to InfoEx")
71
72 (options, args) = parser.parse_args()
73
74 config = configparser.ConfigParser(allow_no_value=False)
75
76 if not options.config:
77 print("Please specify a configuration file via --config.")
78 sys.exit(1)
79
80 config.read(options.config)
81
82 log.debug('STARTING UP')
83
84 try:
85 infoex = {
86 'host': config['infoex']['host'],
87 'uuid': config['infoex']['uuid'],
88 'api_key': config['infoex']['api_key'],
89 'csv_filename': config['infoex']['csv_filename'],
90 'location_uuid': config['infoex']['location_uuid'],
91 'wx_data': {}, # placeholder key, values to come later
92 }
93
94 data = dict()
95 data['provider'] = config['station']['type']
96
97 if data['provider'] not in ['nrcs', 'mesowest']:
98 print("Please specify either nrcs or mesowest as the station type.")
99 sys.exit(1)
100
101 if data['provider'] == 'nrcs':
102 data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
103 data['station_id'] = config['station']['station_id']
104
105 try:
106 desired_data = config['station']['desired_data'].split(',')
107 except:
108 # desired_data malformed or missing, setting default
109 desired_data = [
110 'TOBS', # AIR TEMPERATURE OBSERVED (degF)
111 'SNWD', # SNOW DEPTH (in)
112 'PREC' # PRECIPITATION ACCUMULATION (in)
113 ]
114
115 if data['provider'] == 'mesowest':
116 data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
117 data['station_id'] = config['station']['station_id']
118 data['units'] = config['station']['units']
119
120 try:
121 desired_data = config['station']['desired_data']
122 except:
123 # desired_data malformed or missing, setting default
124 desired_data = 'air_temp,snow_depth'
125
126 # construct full API URL (sans start/end time, added later)
127 data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + desired_data
128
129 except KeyError as e:
130 log.critical("%s not defined in %s" % (e, options.config))
131 exit(1)
132 except Exception as exc:
133 log.critical("Exception occurred in config parsing: '%s'" % (exc))
134 exit(1)
135
136 # all sections/values present in config file, final sanity check
137 try:
138 for key in config.sections():
139 for subkey in config[key]:
140 if not len(config[key][subkey]):
141 raise ValueError;
142 except ValueError as exc:
143 log.critical("Config value '%s.%s' is empty" % (key, subkey))
144 exit(1)
145
146 # INFOEX FIELDS
147 #
148 # This won't earn style points in Python, but here we establish a couple
149 # of helpful mappings variables. The reason this is helpful is that the
150 # end result is simply an ordered set, the CSV file. But we still may
151 # want to manipulate the values arbitrarily before writing that file.
152 #
153 # Also note that the current Auto Wx InfoEx documentation shows these
154 # keys in a graphical table with the "index" beginning at 1, but here we
155 # are sanely indexing beginning at 0.
156 fmap = {} ; final_data = [None] * 29
157 fmap['Location UUID'] = 0 ; final_data[0] = infoex['location_uuid']
158 fmap['obDate'] = 1 ; final_data[1] = None
159 fmap['obTime'] = 2 ; final_data[2] = None
160 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
161 fmap['tempMaxHour'] = 4 ; final_data[4] = None
162 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
163 fmap['tempMinHour'] = 6 ; final_data[6] = None
164 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
165 fmap['tempPres'] = 8 ; final_data[8] = None
166 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
167 fmap['precipitationGauge'] = 10 ; final_data[10] = None
168 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
169 fmap['windSpeedNum'] = 12 ; final_data[12] = None
170 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
171 fmap['windDirectionNum'] = 14 ; final_data[14] = None
172 fmap['hS'] = 15 ; final_data[15] = None
173 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
174 fmap['baro'] = 17 ; final_data[17] = None
175 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
176 fmap['rH'] = 19 ; final_data[19] = None
177 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
178 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
179 fmap['windGustDirNum'] = 22 ; final_data[22] = None
180 fmap['dewPoint'] = 23 ; final_data[23] = None
181 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
182 fmap['hn24Auto'] = 25 ; final_data[25] = None
183 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
184 fmap['hstAuto'] = 27 ; final_data[27] = None
185 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
186
187 # one final mapping, the NRCS/MesoWest fields that this program supports to
188 # their InfoEx counterpart
189 iemap = {}
190
191 if data['provider'] == 'nrcs':
192 iemap['PREC'] = 'precipitationGauge'
193 iemap['TOBS'] = 'tempPres'
194 iemap['SNWD'] = 'hS'
195 iemap['PRES'] = 'baro'
196 iemap['RHUM'] = 'rH'
197 iemap['WSPD'] = 'windSpeedNum'
198 iemap['WDIR'] = 'windDirectionNum'
199 # unsupported by NRCS:
200 # windGustSpeedNum
201 elif data['provider'] == 'mesowest':
202 iemap['precip_accum'] = 'precipitationGauge'
203 iemap['air_temp'] = 'tempPres'
204 iemap['snow_depth'] = 'hS'
205 iemap['pressure'] = 'baro'
206 iemap['relative_humidity'] = 'rH'
207 iemap['wind_speed'] = 'windSpeedNum'
208 iemap['wind_direction'] = 'windDirectionNum'
209 iemap['wind_gust'] = 'windGustSpeedNum'
210
211 # floor time to nearest hour
212 dt = datetime.datetime.now()
213 end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
214 seconds=dt.second,
215 microseconds=dt.microsecond)
216 begin_date = end_date - datetime.timedelta(hours=3)
217
218 # get the data
219 log.debug("Getting %s data from %s to %s" % (str(desired_data),
220 str(begin_date), str(end_date)))
221
222 time_all_elements = time.time()
223
224 # NRCS-specific code
225 if data['provider'] == 'nrcs':
226 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
227 client = zeep.Client(wsdl=data['source'], transport=transport)
228
229 for elementCd in desired_data:
230 time_element = time.time()
231
232 # get the last three hours of data for this elementCd
233 tmp = client.service.getHourlyData(
234 stationTriplets=[data['station_id']],
235 elementCd=elementCd,
236 ordinal=1,
237 beginDate=begin_date,
238 endDate=end_date)
239
240 log.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
241 time.time() - time_element))
242
243 values = tmp[0]['values']
244
245 # sort and isolate the most recent
246 #
247 # NOTE: we do this because sometimes there are gaps in hourly data
248 # in NRCS; yes, we may end up with slightly inaccurate data,
249 # so perhaps this decision will be re-evaluated in the future
250 if values:
251 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
252 infoex['wx_data'][elementCd] = ordered[0]['value']
253 else:
254 infoex['wx_data'][elementCd] = None
255
256 # MesoWest-specific code
257 elif data['provider'] == 'mesowest':
258 # massage begin/end date format
259 begin_date_str = begin_date.strftime('%Y%m%d%H%M')
260 end_date_str = end_date.strftime('%Y%m%d%H%M')
261
262 # construct final, completed API URL
263 api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
264 req = requests.get(api_req_url)
265
266 try:
267 json = req.json()
268 except ValueError:
269 log.error("Bad JSON in MesoWest response")
270 sys.exit(1)
271
272 try:
273 observations = json['STATION'][0]['OBSERVATIONS']
274 except ValueError:
275 log.error("Bad JSON in MesoWest response")
276 sys.exit(1)
277
278 pos = len(observations['date_time']) - 1
279
280 for elementCd in desired_data.split(','):
281 # sort and isolate the most recent, see note above in NRCS for how and
282 # why this is done
283 #
284 # NOTE: Unlike in the NRCS case, the MesoWest API respones contains all
285 # data (whereas with NRCS, we have to make a separate request for
286 # each element we want. This is nice for network efficiency but
287 # it means we have to handle this part differently for each.
288 #
289 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
290 # provides hourly data, but MesoWest can often provide data every
291 # 10 minutes -- though this provides more opportunity for
292 # irregularities
293
294 # we may not have the data at all
295 key_name = elementCd + '_set_1'
296 if key_name in observations:
297 if observations[key_name][pos]:
298 infoex['wx_data'][elementCd] = observations[key_name][pos]
299 else:
300 infoex['wx_data'][elementCd] = None
301 else:
302 infoex['wx_data'][elementCd] = None
303
304 log.info("Time to get all data : %.3f sec" % (time.time() -
305 time_all_elements))
306
307 log.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
308
309 # Now we only need to add in what we want to change thanks to that
310 # abomination of a variable declaration earlier
311 final_data[fmap['Location UUID']] = infoex['location_uuid']
312 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
313 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
314
315 for elementCd in infoex['wx_data']:
316 if elementCd not in iemap:
317 log.warning("BAD KEY wx_data['%s']" % (elementCd))
318 continue
319
320 # CONSIDER: Casting every value to Float() -- need to investigate if
321 # any possible elementCds we may want are any other data
322 # type than float.
323 #
324 # Another possibility is to query the API with
325 # getStationElements and temporarily store the
326 # storedUnitCd. But that's pretty network-intensive and
327 # may not even be worth it if there's only e.g. one or two
328 # exceptions to any otherwise uniformly Float value set.
329 final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
330
331 log.debug("final_data: %s" % (str(final_data)))
332
333 with open(infoex['csv_filename'], 'w') as f:
334 # The requirement is that empty values are represented in the CSV
335 # file as "", csv.QUOTE_NONNUMERIC achieves that
336 log.debug("writing CSV file '%s'" % (infoex['csv_filename']))
337 writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
338 writer.writerow(final_data)
339 f.close()
340
341 if not options.dry_run:
342 # not a dry run
343 with open(infoex['csv_filename'], 'rb') as f:
344 log.debug("uploading FTP file '%s'" % (infoex['host']))
345 ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key'])
346 ftp.storlines('STOR ' + infoex['csv_filename'], f)
347 ftp.close()
348 f.close()
349 os.remove(infoex['csv_filename'])
350
351 log.debug('DONE')