9bd77c1f8315d1ad4528bd11859cad8eadeb3ae3
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32
33 from ftplib import FTP
34 from argparse import ArgumentParser
35
36 import pytz
37
38 import requests
39
40 import zeep
41 import zeep.cache
42 import zeep.transports
43
44 __version__ = '3.1.0'
45
46 LOG = logging.getLogger(__name__)
47 LOG.setLevel(logging.NOTSET)
48
49 def get_parser():
50 """Return OptionParser for this program"""
51 parser = ArgumentParser()
52
53 parser.add_argument("--version",
54 action="version",
55 version=__version__)
56
57 parser.add_argument("--config",
58 dest="config",
59 metavar="FILE",
60 help="location of config file")
61
62 parser.add_argument("--log-level",
63 dest="log_level",
64 default=None,
65 help="set the log level (debug, info, warning)")
66
67 parser.add_argument("--dry-run",
68 action="store_true",
69 dest="dry_run",
70 default=False,
71 help="fetch data but don't upload to InfoEx")
72
73 return parser
74
75 def setup_config(config):
76 """Setup config variable based on values specified in the ini file"""
77 try:
78 infoex = {
79 'host': config['infoex']['host'],
80 'uuid': config['infoex']['uuid'],
81 'api_key': config['infoex']['api_key'],
82 'csv_filename': config['infoex']['csv_filename'],
83 'location_uuid': config['infoex']['location_uuid'],
84 'wx_data': {}, # placeholder key, values to come later
85 }
86
87 station = dict()
88 station['provider'] = config['station']['type']
89
90 if station['provider'] not in ['nrcs', 'mesowest', 'python']:
91 print("Please specify either nrcs or mesowest as the station type.")
92 sys.exit(1)
93
94 if station['provider'] == 'nrcs':
95 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
96 station['station_id'] = config['station']['station_id']
97 station['desired_data'] = config['station']['desired_data'].split(',')
98
99 # XXX: For NRCS, we're manually overriding units for now! Once
100 # unit conversion is supported for NRCS, REMOVE THIS!
101 if 'units' not in station:
102 station['units'] = 'imperial'
103
104 if station['provider'] == 'mesowest':
105 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
106 station['station_id'] = config['station']['station_id']
107 station['units'] = config['station']['units']
108 station['desired_data'] = config['station']['desired_data']
109
110 # construct full API URL (sans start/end time, added later)
111 station['source'] = station['source'] + '?token=' + \
112 config['station']['token'] + \
113 '&within=60&units=' + station['units'] + \
114 '&stid=' + station['station_id'] + \
115 '&vars=' + station['desired_data']
116
117 if station['provider'] == 'python':
118 station['path'] = config['station']['path']
119
120 tz = 'America/Los_Angeles'
121
122 if 'tz' in config['station']:
123 tz = config['station']['tz']
124
125 try:
126 station['tz'] = pytz.timezone(tz)
127 except pytz.exceptions.UnknownTimeZoneError:
128 LOG.critical("%s is not a valid timezone", tz)
129 sys.exit(1)
130
131 except KeyError as err:
132 LOG.critical("%s not defined in configuration file", err)
133 sys.exit(1)
134
135 # all sections/values present in config file, final sanity check
136 try:
137 for key in config.sections():
138 for subkey in config[key]:
139 if not config[key][subkey]:
140 raise ValueError
141 except ValueError:
142 LOG.critical("Config value '%s.%s' is empty", key, subkey)
143 sys.exit(1)
144
145 return (infoex, station)
146
147 def setup_logging(log_level):
148 """Setup our logging infrastructure"""
149 try:
150 from systemd.journal import JournalHandler
151 LOG.addHandler(JournalHandler())
152 except ImportError:
153 ## fallback to syslog
154 #import logging.handlers
155 #LOG.addHandler(logging.handlers.SysLogHandler())
156 # fallback to stdout
157 handler = logging.StreamHandler(sys.stdout)
158 LOG.addHandler(handler)
159
160 # ugly, but passable
161 if log_level in [None, 'debug', 'info', 'warning']:
162 if log_level == 'debug':
163 LOG.setLevel(logging.DEBUG)
164 elif log_level == 'info':
165 LOG.setLevel(logging.INFO)
166 elif log_level == 'warning':
167 LOG.setLevel(logging.WARNING)
168 else:
169 LOG.setLevel(logging.NOTSET)
170 else:
171 return False
172
173 return True
174
175 def main():
176 """Main routine: sort through args, decide what to do, then do it"""
177 parser = get_parser()
178 options = parser.parse_args()
179
180 config = configparser.ConfigParser(allow_no_value=False)
181
182 if not options.config:
183 parser.print_help()
184 print("\nPlease specify a configuration file via --config.")
185 sys.exit(1)
186
187 config.read(options.config)
188
189 if not setup_logging(options.log_level):
190 parser.print_help()
191 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
192 sys.exit(1)
193
194 (infoex, station) = setup_config(config)
195
196 LOG.debug('Config parsed, starting up')
197
198 # create mappings
199 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
200 iemap = setup_infoex_counterparts_mapping(station['provider'])
201
202 # override units if user selected metric
203 try:
204 if station['units'] == 'metric':
205 final_data = switch_units_to_metric(final_data, fmap)
206 except KeyError:
207 if station['provider'] != 'python':
208 LOG.error("Please specify the units in the configuration "
209 "file")
210 sys.exit(1)
211
212 (begin_date, end_date) = setup_time_values(station)
213
214 if station['provider'] == 'python':
215 LOG.debug("Getting custom data from external Python program")
216 else:
217 LOG.debug("Getting %s data from %s to %s (%s)",
218 str(station['desired_data']),
219 str(begin_date), str(end_date), end_date.tzinfo.zone)
220
221 time_all_elements = time.time()
222
223 # get the data
224 if station['provider'] == 'nrcs':
225 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
226 elif station['provider'] == 'mesowest':
227 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
228 station)
229 elif station['provider'] == 'python':
230 try:
231 import importlib.util
232
233 spec = importlib.util.spec_from_file_location('custom_wx',
234 station['path'])
235 mod = importlib.util.module_from_spec(spec)
236 spec.loader.exec_module(mod)
237 mod.LOG = LOG
238
239 try:
240 infoex['wx_data'] = mod.get_custom_data()
241
242 if infoex['wx_data'] is None:
243 infoex['wx_data'] = []
244 except Exception as exc:
245 LOG.error("Python program for custom Wx data failed in "
246 "execution: %s", str(exc))
247 sys.exit(1)
248
249 LOG.info("Successfully executed external Python program")
250 except ImportError:
251 LOG.error("Please upgrade to Python 3.3 or later")
252 sys.exit(1)
253 except FileNotFoundError:
254 LOG.error("Specified Python program for custom Wx data "
255 "was not found")
256 sys.exit(1)
257 except Exception as exc:
258 LOG.error("A problem was encountered when attempting to "
259 "load your custom Wx program: %s", str(exc))
260 sys.exit(1)
261
262 LOG.info("Time taken to get all data : %.3f sec", time.time() -
263 time_all_elements)
264
265 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
266
267 # timezone massaging
268 final_end_date = end_date.astimezone(station['tz'])
269
270 # Now we only need to add in what we want to change thanks to that
271 # abomination of a variable declaration earlier
272 final_data[fmap['Location UUID']] = infoex['location_uuid']
273 final_data[fmap['obDate']] = final_end_date.strftime('%m/%d/%Y')
274 final_data[fmap['obTime']] = final_end_date.strftime('%H:%M')
275
276 for element_cd in infoex['wx_data']:
277 if element_cd not in iemap:
278 LOG.warning("BAD KEY wx_data['%s']", element_cd)
279 continue
280
281 # Massage precision of certain values to fit InfoEx's
282 # expectations
283 #
284 # 0 decimal places: wind speed, wind direction, wind gust, snow depth
285 # 1 decimal place: air temp, relative humidity, baro
286 # Avoid transforming None values
287 if infoex['wx_data'][element_cd] is None:
288 continue
289 elif element_cd in ['wind_speed', 'WSPD', 'wind_direction',
290 'WDIR', 'wind_gust', 'SNWD', 'snow_depth']:
291 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
292 elif element_cd in ['TOBS', 'air_temp', 'RHUM',
293 'relative_humidity', 'PRES', 'pressure']:
294 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
295
296 # CONSIDER: Casting every value to Float() -- need to investigate if
297 # any possible elementCds we may want are any other data
298 # type than float.
299 #
300 # Another possibility is to query the API with
301 # getStationElements and temporarily store the
302 # storedUnitCd. But that's pretty network-intensive and
303 # may not even be worth it if there's only e.g. one or two
304 # exceptions to any otherwise uniformly Float value set.
305 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
306
307 LOG.debug("final_data: %s", str(final_data))
308
309 if infoex['wx_data']:
310 if not write_local_csv(infoex['csv_filename'], final_data):
311 LOG.warning('Could not write local CSV file: %s',
312 infoex['csv_filename'])
313 return 1
314
315 if not options.dry_run:
316 upload_csv(infoex['csv_filename'], infoex)
317
318 LOG.debug('DONE')
319 return 0
320
321 # data structure operations
322 def setup_infoex_fields_mapping(location_uuid):
323 """
324 Create a mapping of InfoEx fields to the local data's indexing scheme.
325
326 INFOEX FIELDS
327
328 This won't earn style points in Python, but here we establish a couple
329 of helpful mappings variables. The reason this is helpful is that the
330 end result is simply an ordered set, the CSV file. But we still may
331 want to manipulate the values arbitrarily before writing that file.
332
333 Also note that the current Auto Wx InfoEx documentation shows these
334 keys in a graphical table with the "index" beginning at 1, but here we
335 sanely index beginning at 0.
336 """
337 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
338 fmap = {} ; final_data = [None] * 29
339 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
340 fmap['obDate'] = 1 ; final_data[1] = None
341 fmap['obTime'] = 2 ; final_data[2] = None
342 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
343 fmap['tempMaxHour'] = 4 ; final_data[4] = None
344 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
345 fmap['tempMinHour'] = 6 ; final_data[6] = None
346 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
347 fmap['tempPres'] = 8 ; final_data[8] = None
348 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
349 fmap['precipitationGauge'] = 10 ; final_data[10] = None
350 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
351 fmap['windSpeedNum'] = 12 ; final_data[12] = None
352 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
353 fmap['windDirectionNum'] = 14 ; final_data[14] = None
354 fmap['hS'] = 15 ; final_data[15] = None
355 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
356 fmap['baro'] = 17 ; final_data[17] = None
357 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
358 fmap['rH'] = 19 ; final_data[19] = None
359 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
360 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
361 fmap['windGustDirNum'] = 22 ; final_data[22] = None
362 fmap['dewPoint'] = 23 ; final_data[23] = None
363 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
364 fmap['hn24Auto'] = 25 ; final_data[25] = None
365 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
366 fmap['hstAuto'] = 27 ; final_data[27] = None
367 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
368
369 return (fmap, final_data)
370
371 def setup_infoex_counterparts_mapping(provider):
372 """
373 Create a mapping of the NRCS/MesoWest fields that this program supports to
374 their InfoEx counterparts
375 """
376 iemap = {}
377
378 if provider == 'nrcs':
379 iemap['PREC'] = 'precipitationGauge'
380 iemap['TOBS'] = 'tempPres'
381 iemap['TMAX'] = 'tempMaxHour'
382 iemap['TMIN'] = 'tempMinHour'
383 iemap['SNWD'] = 'hS'
384 iemap['PRES'] = 'baro'
385 iemap['RHUM'] = 'rH'
386 iemap['WSPD'] = 'windSpeedNum'
387 iemap['WDIR'] = 'windDirectionNum'
388 # unsupported by NRCS:
389 # windGustSpeedNum
390 elif provider == 'mesowest':
391 iemap['precip_accum'] = 'precipitationGauge'
392 iemap['air_temp'] = 'tempPres'
393 iemap['air_temp_high_24_hour'] = 'tempMaxHour'
394 iemap['air_temp_low_24_hour'] = 'tempMinHour'
395 iemap['snow_depth'] = 'hS'
396 iemap['pressure'] = 'baro'
397 iemap['relative_humidity'] = 'rH'
398 iemap['wind_speed'] = 'windSpeedNum'
399 iemap['wind_direction'] = 'windDirectionNum'
400 iemap['wind_gust'] = 'windGustSpeedNum'
401 elif provider == 'python':
402 # we expect Python programs to use the InfoEx data type names
403 iemap['precipitationGauge'] = 'precipitationGauge'
404 iemap['tempPres'] = 'tempPres'
405 iemap['tempMaxHour'] = 'tempMaxHour'
406 iemap['tempMinHour'] = 'tempMinHour'
407 iemap['hS'] = 'hS'
408 iemap['baro'] = 'baro'
409 iemap['rH'] = 'rH'
410 iemap['windSpeedNum'] = 'windSpeedNum'
411 iemap['windDirectionNum'] = 'windDirectionNum'
412 iemap['windGustSpeedNum'] = 'windGustSpeedNum'
413
414 return iemap
415
416 # provider-specific operations
417 def get_nrcs_data(begin, end, station):
418 """get the data we're after from the NRCS WSDL"""
419 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
420 client = zeep.Client(wsdl=station['source'], transport=transport)
421 remote_data = {}
422
423 # massage begin/end date format
424 begin_date_str = begin.strftime('%Y-%m-%d %H:%M:00')
425 end_date_str = end.strftime('%Y-%m-%d %H:%M:00')
426
427 for element_cd in station['desired_data']:
428 time_element = time.time()
429
430 # get the last three hours of data for this elementCd/element_cd
431 tmp = client.service.getHourlyData(
432 stationTriplets=[station['station_id']],
433 elementCd=element_cd,
434 ordinal=1,
435 beginDate=begin_date_str,
436 endDate=end_date_str)
437
438 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
439 time.time() - time_element)
440
441 values = tmp[0]['values']
442
443 # sort and isolate the most recent
444 #
445 # NOTE: we do this because sometimes there are gaps in hourly data
446 # in NRCS; yes, we may end up with slightly inaccurate data,
447 # so perhaps this decision will be re-evaluated in the future
448 if values:
449 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
450 remote_data[element_cd] = ordered[0]['value']
451 else:
452 remote_data[element_cd] = None
453
454 return remote_data
455
456 def get_mesowest_data(begin, end, station):
457 """get the data we're after from the MesoWest/Synoptic API"""
458 remote_data = {}
459
460 # massage begin/end date format
461 begin_date_str = begin.strftime('%Y%m%d%H%M')
462 end_date_str = end.strftime('%Y%m%d%H%M')
463
464 # construct final, completed API URL
465 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
466 req = requests.get(api_req_url)
467
468 try:
469 json = req.json()
470 except ValueError:
471 LOG.error("Bad JSON in MesoWest response")
472 sys.exit(1)
473
474 try:
475 observations = json['STATION'][0]['OBSERVATIONS']
476 except KeyError:
477 LOG.error("Unexpected JSON in MesoWest response")
478 sys.exit(1)
479 except ValueError:
480 LOG.error("Bad JSON in MesoWest response")
481 sys.exit(1)
482
483 pos = len(observations['date_time']) - 1
484
485 for element_cd in station['desired_data'].split(','):
486 # sort and isolate the most recent, see note above in NRCS for how and
487 # why this is done
488 #
489 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
490 # data (whereas with NRCS, we have to make a separate request for
491 # each element we want). This is nice for network efficiency but
492 # it means we have to handle this part differently for each.
493 #
494 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
495 # provides hourly data, but MesoWest can often provide data every
496 # 10 minutes -- though this provides more opportunity for
497 # irregularities
498
499 # we may not have the data at all
500 key_name = element_cd + '_set_1'
501
502 if key_name in observations:
503 if observations[key_name][pos]:
504 remote_data[element_cd] = observations[key_name][pos]
505
506 # mesowest by default provides wind_speed in m/s, but
507 # we specify 'english' units in the request; either way,
508 # we want mph
509 if element_cd in ('wind_speed', 'wind_gust'):
510 remote_data[element_cd] = kn_to_mph(remote_data[element_cd])
511 else:
512 remote_data[element_cd] = None
513 else:
514 remote_data[element_cd] = None
515
516 return remote_data
517
518 def switch_units_to_metric(data_map, mapping):
519 """replace units with metric counterparts"""
520
521 # NOTE: to update this, use the fmap<->final_data mapping laid out
522 # in setup_infoex_fields_mapping ()
523 #
524 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
525 # itself handles the unit conversion; in the future, we will also
526 # support NRCS unit conversion, but this must be done by this
527 # program.
528 data_map[mapping['tempPresUnit']] = 'C'
529 data_map[mapping['hsUnit']] = 'm'
530 data_map[mapping['windSpeedUnit']] = 'm/s'
531 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
532
533 return data_map
534
535 # CSV operations
536 def write_local_csv(path_to_file, data):
537 """Write the specified CSV file to disk"""
538 with open(path_to_file, 'w') as file_object:
539 # The requirement is that empty values are represented in the CSV
540 # file as "", csv.QUOTE_NONNUMERIC achieves that
541 LOG.debug("writing CSV file '%s'", path_to_file)
542 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
543 writer.writerow(data)
544 file_object.close()
545 return True
546
547 def upload_csv(path_to_file, infoex_data):
548 """Upload the specified CSV file to InfoEx FTP and remove the file"""
549 with open(path_to_file, 'rb') as file_object:
550 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
551 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
552 infoex_data['api_key'])
553 ftp.storlines('STOR ' + path_to_file, file_object)
554 ftp.close()
555 file_object.close()
556 os.remove(path_to_file)
557
558 # other miscellaneous routines
559 def setup_time_values(station):
560 """establish time bounds of data request(s)"""
561
562 # default timezone to UTC (for MesoWest)
563 tz = pytz.utc
564
565 # but for NRCS, use the config-specified timezone
566 if station['provider'] == 'nrcs':
567 tz = station['tz']
568
569 # floor time to nearest hour
570 date_time = datetime.datetime.now(tz=tz)
571 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
572 seconds=date_time.second,
573 microseconds=date_time.microsecond)
574 begin_date = end_date - datetime.timedelta(hours=3)
575 return (begin_date, end_date)
576
577 def ms_to_mph(ms):
578 """convert meters per second to miles per hour"""
579 return ms * 2.236936
580
581 def kn_to_mph(kn):
582 """convert knots to miles per hour"""
583 return kn * 1.150779
584
585 if __name__ == "__main__":
586 sys.exit(main())