e4befadd20e8206f1ceb8d216a8c989eac657545
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 Version 2.0.0
10
11 This program fetches data from either an NRCS SNOTEL site or MesoWest
12 weather station and pushes it to InfoEx using the new automated weather
13 system implementation.
14
15 It is designed to be run hourly, and it asks for the last three hours
16 of data of each desired type, and selects the most recent one. This
17 lends some resiliency to the process and helps ensure that we have a
18 value to send, but it can lead to somewhat inconsistent/untruthful
19 data if e.g. the HS is from the last hour but the tempPres is from two
20 hours ago because the instrumentation had a hiccup. It's worth
21 considering if this is a bug or a feature.
22
23 For more information, see file: README
24 For licensing, see file: LICENSE
25 """
26
27 import configparser
28 import csv
29 import datetime
30 import logging
31 import os
32 import sys
33 import time
34
35 from collections import OrderedDict
36 from ftplib import FTP
37 from optparse import OptionParser
38
39 import requests
40
41 import zeep
42 import zeep.cache
43 import zeep.transports
44
45 __version__ = '2.0.0'
46
47 log = logging.getLogger(__name__)
48 log.setLevel(logging.NOTSET)
49
50 try:
51 from systemd.journal import JournalHandler
52 log.addHandler(JournalHandler())
53 except:
54 ## fallback to syslog
55 #import logging.handlers
56 #log.addHandler(logging.handlers.SysLogHandler())
57 # fallback to stdout
58 handler = logging.StreamHandler(sys.stdout)
59 log.addHandler(handler)
60
61 parser = OptionParser(version=__version__)
62
63 parser.add_option("--config",
64 dest="config",
65 metavar="FILE",
66 help="location of config file")
67
68 parser.add_option("--log-level",
69 dest="log_level",
70 default=None,
71 help="set the log level (debug, info, warning)")
72
73 parser.add_option("--dry-run",
74 action="store_true",
75 dest="dry_run",
76 default=False,
77 help="fetch data but don't upload to InfoEx")
78
79 (options, args) = parser.parse_args()
80
81 config = configparser.ConfigParser(allow_no_value=False)
82
83 if not options.config:
84 parser.print_help()
85 print("\nPlease specify a configuration file via --config.")
86 sys.exit(1)
87
88 config.read(options.config)
89
90 # ugly, but passable
91 if options.log_level in [None, 'debug', 'info', 'warning']:
92 if options.log_level == 'debug':
93 log.setLevel(logging.DEBUG)
94 elif options.log_level == 'info':
95 log.setLevel(logging.INFO)
96 elif options.log_level == 'warning':
97 log.setLevel(logging.WARNING)
98 else:
99 log.setLevel(logging.NOTSET)
100 else:
101 parser.print_help()
102 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
103 sys.exit(1)
104
105 log.debug('STARTING UP')
106
107 try:
108 infoex = {
109 'host': config['infoex']['host'],
110 'uuid': config['infoex']['uuid'],
111 'api_key': config['infoex']['api_key'],
112 'csv_filename': config['infoex']['csv_filename'],
113 'location_uuid': config['infoex']['location_uuid'],
114 'wx_data': {}, # placeholder key, values to come later
115 }
116
117 data = dict()
118 data['provider'] = config['station']['type']
119
120 if data['provider'] not in ['nrcs', 'mesowest']:
121 print("Please specify either nrcs or mesowest as the station type.")
122 sys.exit(1)
123
124 if data['provider'] == 'nrcs':
125 data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
126 data['station_id'] = config['station']['station_id']
127
128 try:
129 desired_data = config['station']['desired_data'].split(',')
130 except:
131 # desired_data malformed or missing, setting default
132 desired_data = [
133 'TOBS', # AIR TEMPERATURE OBSERVED (degF)
134 'SNWD', # SNOW DEPTH (in)
135 'PREC' # PRECIPITATION ACCUMULATION (in)
136 ]
137
138 # XXX: For NRCS, we're manually overriding units for now! Once
139 # unit conversion is supported for NRCS, REMOVE THIS!
140 if 'units' not in data:
141 data['units'] = 'imperial'
142
143 if data['provider'] == 'mesowest':
144 data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
145 data['station_id'] = config['station']['station_id']
146 data['units'] = config['station']['units']
147
148 try:
149 desired_data = config['station']['desired_data']
150 except:
151 # desired_data malformed or missing, setting default
152 desired_data = 'air_temp,snow_depth'
153
154 # construct full API URL (sans start/end time, added later)
155 data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + desired_data
156
157 except KeyError as e:
158 log.critical("%s not defined in %s" % (e, options.config))
159 exit(1)
160 except Exception as exc:
161 log.critical("Exception occurred in config parsing: '%s'" % (exc))
162 exit(1)
163
164 # all sections/values present in config file, final sanity check
165 try:
166 for key in config.sections():
167 for subkey in config[key]:
168 if not len(config[key][subkey]):
169 raise ValueError;
170 except ValueError as exc:
171 log.critical("Config value '%s.%s' is empty" % (key, subkey))
172 exit(1)
173
174 # INFOEX FIELDS
175 #
176 # This won't earn style points in Python, but here we establish a couple
177 # of helpful mappings variables. The reason this is helpful is that the
178 # end result is simply an ordered set, the CSV file. But we still may
179 # want to manipulate the values arbitrarily before writing that file.
180 #
181 # Also note that the current Auto Wx InfoEx documentation shows these
182 # keys in a graphical table with the "index" beginning at 1, but here we
183 # are sanely indexing beginning at 0.
184 fmap = {} ; final_data = [None] * 29
185 fmap['Location UUID'] = 0 ; final_data[0] = infoex['location_uuid']
186 fmap['obDate'] = 1 ; final_data[1] = None
187 fmap['obTime'] = 2 ; final_data[2] = None
188 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
189 fmap['tempMaxHour'] = 4 ; final_data[4] = None
190 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
191 fmap['tempMinHour'] = 6 ; final_data[6] = None
192 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
193 fmap['tempPres'] = 8 ; final_data[8] = None
194 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
195 fmap['precipitationGauge'] = 10 ; final_data[10] = None
196 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
197 fmap['windSpeedNum'] = 12 ; final_data[12] = None
198 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
199 fmap['windDirectionNum'] = 14 ; final_data[14] = None
200 fmap['hS'] = 15 ; final_data[15] = None
201 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
202 fmap['baro'] = 17 ; final_data[17] = None
203 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
204 fmap['rH'] = 19 ; final_data[19] = None
205 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
206 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
207 fmap['windGustDirNum'] = 22 ; final_data[22] = None
208 fmap['dewPoint'] = 23 ; final_data[23] = None
209 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
210 fmap['hn24Auto'] = 25 ; final_data[25] = None
211 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
212 fmap['hstAuto'] = 27 ; final_data[27] = None
213 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
214
215 # one final mapping, the NRCS/MesoWest fields that this program supports to
216 # their InfoEx counterpart
217 iemap = {}
218
219 if data['provider'] == 'nrcs':
220 iemap['PREC'] = 'precipitationGauge'
221 iemap['TOBS'] = 'tempPres'
222 iemap['SNWD'] = 'hS'
223 iemap['PRES'] = 'baro'
224 iemap['RHUM'] = 'rH'
225 iemap['WSPD'] = 'windSpeedNum'
226 iemap['WDIR'] = 'windDirectionNum'
227 # unsupported by NRCS:
228 # windGustSpeedNum
229 elif data['provider'] == 'mesowest':
230 iemap['precip_accum'] = 'precipitationGauge'
231 iemap['air_temp'] = 'tempPres'
232 iemap['snow_depth'] = 'hS'
233 iemap['pressure'] = 'baro'
234 iemap['relative_humidity'] = 'rH'
235 iemap['wind_speed'] = 'windSpeedNum'
236 iemap['wind_direction'] = 'windDirectionNum'
237 iemap['wind_gust'] = 'windGustSpeedNum'
238
239 # override units if user selected metric
240 #
241 # NOTE: to update this, use the fmap<->final_data mapping laid out above
242 #
243 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
244 # itself handles the unit conversion; in the future, we will also
245 # support NRCS unit conversion, but this must be done by this
246 # program.
247 if data['units'] == 'metric':
248 final_data[fmap['tempPresUnit']] = 'C'
249 final_data[fmap['hsUnit']] = 'm'
250 final_data[fmap['windSpeedUnit']] = 'm/s'
251 final_data[fmap['windGustSpeedNumUnit']] = 'm/s'
252
253 # floor time to nearest hour
254 dt = datetime.datetime.now()
255 end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
256 seconds=dt.second,
257 microseconds=dt.microsecond)
258 begin_date = end_date - datetime.timedelta(hours=3)
259
260 # get the data
261 log.debug("Getting %s data from %s to %s" % (str(desired_data),
262 str(begin_date), str(end_date)))
263
264 time_all_elements = time.time()
265
266 # NRCS-specific code
267 if data['provider'] == 'nrcs':
268 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
269 client = zeep.Client(wsdl=data['source'], transport=transport)
270
271 for elementCd in desired_data:
272 time_element = time.time()
273
274 # get the last three hours of data for this elementCd
275 tmp = client.service.getHourlyData(
276 stationTriplets=[data['station_id']],
277 elementCd=elementCd,
278 ordinal=1,
279 beginDate=begin_date,
280 endDate=end_date)
281
282 log.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
283 time.time() - time_element))
284
285 values = tmp[0]['values']
286
287 # sort and isolate the most recent
288 #
289 # NOTE: we do this because sometimes there are gaps in hourly data
290 # in NRCS; yes, we may end up with slightly inaccurate data,
291 # so perhaps this decision will be re-evaluated in the future
292 if values:
293 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
294 infoex['wx_data'][elementCd] = ordered[0]['value']
295 else:
296 infoex['wx_data'][elementCd] = None
297
298 # MesoWest-specific code
299 elif data['provider'] == 'mesowest':
300 # massage begin/end date format
301 begin_date_str = begin_date.strftime('%Y%m%d%H%M')
302 end_date_str = end_date.strftime('%Y%m%d%H%M')
303
304 # construct final, completed API URL
305 api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
306 req = requests.get(api_req_url)
307
308 try:
309 json = req.json()
310 except ValueError:
311 log.error("Bad JSON in MesoWest response")
312 sys.exit(1)
313
314 try:
315 observations = json['STATION'][0]['OBSERVATIONS']
316 except ValueError:
317 log.error("Bad JSON in MesoWest response")
318 sys.exit(1)
319
320 pos = len(observations['date_time']) - 1
321
322 for elementCd in desired_data.split(','):
323 # sort and isolate the most recent, see note above in NRCS for how and
324 # why this is done
325 #
326 # NOTE: Unlike in the NRCS case, the MesoWest API respones contains all
327 # data (whereas with NRCS, we have to make a separate request for
328 # each element we want. This is nice for network efficiency but
329 # it means we have to handle this part differently for each.
330 #
331 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
332 # provides hourly data, but MesoWest can often provide data every
333 # 10 minutes -- though this provides more opportunity for
334 # irregularities
335
336 # we may not have the data at all
337 key_name = elementCd + '_set_1'
338 if key_name in observations:
339 if observations[key_name][pos]:
340 infoex['wx_data'][elementCd] = observations[key_name][pos]
341 else:
342 infoex['wx_data'][elementCd] = None
343 else:
344 infoex['wx_data'][elementCd] = None
345
346 log.info("Time to get all data : %.3f sec" % (time.time() -
347 time_all_elements))
348
349 log.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
350
351 # Now we only need to add in what we want to change thanks to that
352 # abomination of a variable declaration earlier
353 final_data[fmap['Location UUID']] = infoex['location_uuid']
354 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
355 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
356
357 for elementCd in infoex['wx_data']:
358 if elementCd not in iemap:
359 log.warning("BAD KEY wx_data['%s']" % (elementCd))
360 continue
361
362 # CONSIDER: Casting every value to Float() -- need to investigate if
363 # any possible elementCds we may want are any other data
364 # type than float.
365 #
366 # Another possibility is to query the API with
367 # getStationElements and temporarily store the
368 # storedUnitCd. But that's pretty network-intensive and
369 # may not even be worth it if there's only e.g. one or two
370 # exceptions to any otherwise uniformly Float value set.
371 final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
372
373 log.debug("final_data: %s" % (str(final_data)))
374
375 with open(infoex['csv_filename'], 'w') as f:
376 # The requirement is that empty values are represented in the CSV
377 # file as "", csv.QUOTE_NONNUMERIC achieves that
378 log.debug("writing CSV file '%s'" % (infoex['csv_filename']))
379 writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
380 writer.writerow(final_data)
381 f.close()
382
383 if not options.dry_run:
384 # not a dry run
385 with open(infoex['csv_filename'], 'rb') as f:
386 log.debug("uploading FTP file '%s'" % (infoex['host']))
387 ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key'])
388 ftp.storlines('STOR ' + infoex['csv_filename'], f)
389 ftp.close()
390 f.close()
391 os.remove(infoex['csv_filename'])
392
393 log.debug('DONE')