Merge branch 'release-3.2.4'
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32 import urllib3
33 import importlib.util
34
35 from ftplib import FTP
36 from argparse import ArgumentParser
37
38 import pytz
39
40 import requests
41
42 import zeep
43 import zeep.cache
44 import zeep.transports
45
46 __version__ = '3.2.4'
47
48 LOG = logging.getLogger(__name__)
49 LOG.setLevel(logging.NOTSET)
50
51 urllib3.disable_warnings()
52
53 def get_parser():
54 """Return OptionParser for this program"""
55 parser = ArgumentParser()
56
57 parser.add_argument("--version",
58 action="version",
59 version=__version__)
60
61 parser.add_argument("--config",
62 dest="config",
63 metavar="FILE",
64 help="location of config file")
65
66 parser.add_argument("--log-level",
67 dest="log_level",
68 default=None,
69 help="set the log level (debug, info, warning)")
70
71 parser.add_argument("--dry-run",
72 action="store_true",
73 dest="dry_run",
74 default=False,
75 help="fetch data but don't upload to InfoEx")
76
77 return parser
78
79 def setup_config(config):
80 """Setup config variable based on values specified in the ini file"""
81 try:
82 infoex = {
83 'host': config['infoex']['host'],
84 'uuid': config['infoex']['uuid'],
85 'api_key': config['infoex']['api_key'],
86 'csv_filename': config['infoex']['csv_filename'],
87 'location_uuid': config['infoex']['location_uuid'],
88 'wx_data': {}, # placeholder key, values to come later
89 }
90
91 station = dict()
92 station['provider'] = config['station']['type']
93
94 if station['provider'] not in ['nrcs', 'mesowest', 'python']:
95 print("Please specify either nrcs or mesowest as the station type.")
96 sys.exit(1)
97
98 if station['provider'] == 'nrcs':
99 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
100 station['station_id'] = config['station']['station_id']
101 station['desired_data'] = config['station']['desired_data'].split(',')
102 station['units'] = config['station']['units']
103
104 if station['provider'] == 'mesowest':
105 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
106 station['station_id'] = config['station']['station_id']
107 station['units'] = config['station']['units']
108 station['desired_data'] = config['station']['desired_data']
109
110 # construct full API URL (sans start/end time, added later)
111 station['source'] = station['source'] + '?token=' + \
112 config['station']['token'] + \
113 '&within=60&units=' + station['units'] + \
114 '&stid=' + station['station_id'] + \
115 '&vars=' + station['desired_data']
116
117 if station['provider'] == 'python':
118 station['path'] = config['station']['path']
119
120 tz = 'America/Los_Angeles'
121
122 if 'tz' in config['station']:
123 tz = config['station']['tz']
124
125 try:
126 station['tz'] = pytz.timezone(tz)
127 except pytz.exceptions.UnknownTimeZoneError:
128 LOG.critical("%s is not a valid timezone", tz)
129 sys.exit(1)
130
131 except KeyError as err:
132 LOG.critical("%s not defined in configuration file", err)
133 sys.exit(1)
134
135 # all sections/values present in config file, final sanity check
136 try:
137 for key in config.sections():
138 for subkey in config[key]:
139 if not config[key][subkey]:
140 raise ValueError
141 except ValueError:
142 LOG.critical("Config value '%s.%s' is empty", key, subkey)
143 sys.exit(1)
144
145 return (infoex, station)
146
147 def setup_logging(log_level):
148 """Setup our logging infrastructure"""
149 try:
150 from systemd.journal import JournalHandler
151 LOG.addHandler(JournalHandler())
152 except ImportError:
153 ## fallback to syslog
154 #import logging.handlers
155 #LOG.addHandler(logging.handlers.SysLogHandler())
156 # fallback to stdout
157 handler = logging.StreamHandler(sys.stdout)
158 formatter = logging.Formatter('%(asctime)s.%(msecs)03d '
159 '%(levelname)s %(module)s - '
160 '%(funcName)s: %(message)s',
161 '%Y-%m-%d %H:%M:%S')
162 handler.setFormatter(formatter)
163 LOG.addHandler(handler)
164
165 # ugly, but passable
166 if log_level in [None, 'debug', 'info', 'warning']:
167 if log_level == 'debug':
168 LOG.setLevel(logging.DEBUG)
169 elif log_level == 'info':
170 LOG.setLevel(logging.INFO)
171 elif log_level == 'warning':
172 LOG.setLevel(logging.WARNING)
173 else:
174 LOG.setLevel(logging.NOTSET)
175 else:
176 return False
177
178 return True
179
180 def main():
181 """Main routine: sort through args, decide what to do, then do it"""
182 parser = get_parser()
183 options = parser.parse_args()
184
185 config = configparser.ConfigParser(allow_no_value=False)
186
187 if not options.config:
188 parser.print_help()
189 print("\nPlease specify a configuration file via --config.")
190 sys.exit(1)
191
192 config.read(options.config)
193
194 if not setup_logging(options.log_level):
195 parser.print_help()
196 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
197 sys.exit(1)
198
199 (infoex, station) = setup_config(config)
200
201 LOG.debug('Config parsed, starting up')
202
203 # create mappings
204 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
205 iemap = setup_infoex_counterparts_mapping(station['provider'])
206
207 # override units if user selected metric
208 if station['provider'] != 'python' and station['units'] == 'metric':
209 final_data = switch_units_to_metric(final_data, fmap)
210
211 (begin_date, end_date) = setup_time_values(station)
212
213 if station['provider'] == 'python':
214 LOG.debug("Getting custom data from external Python program")
215 else:
216 LOG.debug("Getting %s data from %s to %s (%s)",
217 str(station['desired_data']),
218 str(begin_date), str(end_date), end_date.tzinfo.zone)
219
220 time_all_elements = time.time()
221
222 # get the data
223 if station['provider'] == 'nrcs':
224 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
225 elif station['provider'] == 'mesowest':
226 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
227 station)
228 elif station['provider'] == 'python':
229 try:
230 spec = importlib.util.spec_from_file_location('custom_wx',
231 station['path'])
232 mod = importlib.util.module_from_spec(spec)
233 spec.loader.exec_module(mod)
234 mod.LOG = LOG
235
236 try:
237 infoex['wx_data'] = mod.get_custom_data()
238
239 if infoex['wx_data'] is None:
240 infoex['wx_data'] = []
241 except Exception as exc:
242 LOG.error("Python program for custom Wx data failed in "
243 "execution: %s", str(exc))
244 sys.exit(1)
245
246 LOG.info("Successfully executed external Python program")
247 except ImportError:
248 LOG.error("Please upgrade to Python 3.3 or later")
249 sys.exit(1)
250 except FileNotFoundError:
251 LOG.error("Specified Python program for custom Wx data "
252 "was not found")
253 sys.exit(1)
254 except Exception as exc:
255 LOG.error("A problem was encountered when attempting to "
256 "load your custom Wx program: %s", str(exc))
257 sys.exit(1)
258
259 LOG.info("Time taken to get all data : %.3f sec", time.time() -
260 time_all_elements)
261
262 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
263
264 # timezone massaging
265 final_end_date = end_date.astimezone(station['tz'])
266
267 # Now we only need to add in what we want to change thanks to that
268 # abomination of a variable declaration earlier
269 final_data[fmap['Location UUID']] = infoex['location_uuid']
270 final_data[fmap['obDate']] = final_end_date.strftime('%m/%d/%Y')
271 final_data[fmap['obTime']] = final_end_date.strftime('%H:%M')
272 final_data[fmap['timeZone']] = station['tz'].zone
273
274 for element_cd in infoex['wx_data']:
275 if element_cd not in iemap:
276 LOG.warning("BAD KEY wx_data['%s']", element_cd)
277 continue
278
279 if infoex['wx_data'][element_cd] is None:
280 continue
281
282 # do the conversion before the rounding
283 if station['provider'] == 'nrcs' and station['units'] == 'metric':
284 infoex['wx_data'][element_cd] = convert_nrcs_units_to_metric(element_cd, infoex['wx_data'][element_cd])
285
286 # Massage precision of certain values to fit InfoEx's
287 # expectations
288 #
289 # 0 decimal places: relative humidity, wind speed, wind
290 # direction, wind gust, snow depth
291 # 1 decimal place: air temp, baro
292 # Avoid transforming None values
293 if element_cd in ['wind_speed', 'WSPD', 'wind_direction',
294 'RHUM', 'relative_humidity', 'WDIR',
295 'wind_gust', 'SNWD', 'snow_depth']:
296 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
297 elif element_cd in ['TOBS', 'air_temp', 'PRES', 'pressure']:
298 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
299 elif element_cd in ['PREC', 'precip_accum']:
300 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 2)
301
302 # CONSIDER: Casting every value to Float() -- need to investigate if
303 # any possible elementCds we may want are any other data
304 # type than float.
305 #
306 # Another possibility is to query the API with
307 # getStationElements and temporarily store the
308 # storedUnitCd. But that's pretty network-intensive and
309 # may not even be worth it if there's only e.g. one or two
310 # exceptions to any otherwise uniformly Float value set.
311 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
312
313 LOG.debug("final_data: %s", str(final_data))
314
315 if infoex['wx_data']:
316 if not write_local_csv(infoex['csv_filename'], final_data):
317 LOG.warning('Could not write local CSV file: %s',
318 infoex['csv_filename'])
319 return 1
320
321 if not options.dry_run:
322 upload_csv(infoex['csv_filename'], infoex)
323
324 LOG.debug('DONE')
325 return 0
326
327 # data structure operations
328 def setup_infoex_fields_mapping(location_uuid):
329 """
330 Create a mapping of InfoEx fields to the local data's indexing scheme.
331
332 INFOEX FIELDS
333
334 This won't earn style points in Python, but here we establish a couple
335 of helpful mappings variables. The reason this is helpful is that the
336 end result is simply an ordered set, the CSV file. But we still may
337 want to manipulate the values arbitrarily before writing that file.
338
339 Also note that the current Auto Wx InfoEx documentation shows these
340 keys in a graphical table with the "index" beginning at 1, but here we
341 sanely index beginning at 0.
342 """
343 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
344 fmap = {} ; final_data = [None] * 29
345 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
346 fmap['obDate'] = 1 ; final_data[1] = None
347 fmap['obTime'] = 2 ; final_data[2] = None
348 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
349 fmap['tempMaxHour'] = 4 ; final_data[4] = None
350 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
351 fmap['tempMinHour'] = 6 ; final_data[6] = None
352 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
353 fmap['tempPres'] = 8 ; final_data[8] = None
354 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
355 fmap['precipitationGauge'] = 10 ; final_data[10] = None
356 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
357 fmap['windSpeedNum'] = 12 ; final_data[12] = None
358 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
359 fmap['windDirectionNum'] = 14 ; final_data[14] = None
360 fmap['hS'] = 15 ; final_data[15] = None
361 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
362 fmap['baro'] = 17 ; final_data[17] = None
363 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
364 fmap['rH'] = 19 ; final_data[19] = None
365 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
366 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
367 fmap['windGustDirNum'] = 22 ; final_data[22] = None
368 fmap['dewPoint'] = 23 ; final_data[23] = None
369 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
370 fmap['hn24Auto'] = 25 ; final_data[25] = None
371 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
372 fmap['hstAuto'] = 27 ; final_data[27] = None
373 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
374
375 return (fmap, final_data)
376
377 def setup_infoex_counterparts_mapping(provider):
378 """
379 Create a mapping of the NRCS/MesoWest fields that this program supports to
380 their InfoEx counterparts
381 """
382 iemap = {}
383
384 if provider == 'nrcs':
385 iemap['PREC'] = 'precipitationGauge'
386 iemap['TOBS'] = 'tempPres'
387 iemap['TMAX'] = 'tempMaxHour'
388 iemap['TMIN'] = 'tempMinHour'
389 iemap['SNWD'] = 'hS'
390 iemap['PRES'] = 'baro'
391 iemap['RHUM'] = 'rH'
392 iemap['WSPD'] = 'windSpeedNum'
393 iemap['WDIR'] = 'windDirectionNum'
394 # unsupported by NRCS:
395 # windGustSpeedNum
396 elif provider == 'mesowest':
397 iemap['precip_accum'] = 'precipitationGauge'
398 iemap['air_temp'] = 'tempPres'
399 iemap['air_temp_high_24_hour'] = 'tempMaxHour'
400 iemap['air_temp_low_24_hour'] = 'tempMinHour'
401 iemap['snow_depth'] = 'hS'
402 iemap['pressure'] = 'baro'
403 iemap['relative_humidity'] = 'rH'
404 iemap['wind_speed'] = 'windSpeedNum'
405 iemap['wind_direction'] = 'windDirectionNum'
406 iemap['wind_gust'] = 'windGustSpeedNum'
407 elif provider == 'python':
408 # we expect Python programs to use the InfoEx data type names
409 iemap['precipitationGauge'] = 'precipitationGauge'
410 iemap['tempPres'] = 'tempPres'
411 iemap['tempMaxHour'] = 'tempMaxHour'
412 iemap['tempMinHour'] = 'tempMinHour'
413 iemap['hS'] = 'hS'
414 iemap['baro'] = 'baro'
415 iemap['rH'] = 'rH'
416 iemap['windSpeedNum'] = 'windSpeedNum'
417 iemap['windDirectionNum'] = 'windDirectionNum'
418 iemap['windGustSpeedNum'] = 'windGustSpeedNum'
419
420 return iemap
421
422 # provider-specific operations
423 def get_nrcs_data(begin, end, station):
424 """get the data we're after from the NRCS WSDL"""
425 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
426 transport.session.verify = False
427 client = zeep.Client(wsdl=station['source'], transport=transport)
428 remote_data = {}
429
430 # massage begin/end date format
431 begin_date_str = begin.strftime('%Y-%m-%d %H:%M:00')
432 end_date_str = end.strftime('%Y-%m-%d %H:%M:00')
433
434 for element_cd in station['desired_data']:
435 time_element = time.time()
436
437 # get the last three hours of data for this elementCd/element_cd
438 tmp = client.service.getHourlyData(
439 stationTriplets=[station['station_id']],
440 elementCd=element_cd,
441 ordinal=1,
442 beginDate=begin_date_str,
443 endDate=end_date_str)
444
445 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
446 time.time() - time_element)
447
448 values = tmp[0]['values']
449
450 # sort and isolate the most recent
451 #
452 # NOTE: we do this because sometimes there are gaps in hourly data
453 # in NRCS; yes, we may end up with slightly inaccurate data,
454 # so perhaps this decision will be re-evaluated in the future
455 if values:
456 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
457 remote_data[element_cd] = ordered[0]['value']
458 else:
459 remote_data[element_cd] = None
460
461 return remote_data
462
463 def get_mesowest_data(begin, end, station):
464 """get the data we're after from the MesoWest/Synoptic API"""
465 remote_data = {}
466
467 # massage begin/end date format
468 begin_date_str = begin.strftime('%Y%m%d%H%M')
469 end_date_str = end.strftime('%Y%m%d%H%M')
470
471 # construct final, completed API URL
472 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
473
474 try:
475 req = requests.get(api_req_url)
476 except requests.exceptions.ConnectionError:
477 LOG.error("Could not connect to '%s'", api_req_url)
478 sys.exit(1)
479
480 try:
481 json = req.json()
482 except ValueError:
483 LOG.error("Bad JSON in MesoWest response")
484 sys.exit(1)
485
486 try:
487 observations = json['STATION'][0]['OBSERVATIONS']
488 except KeyError as exc:
489 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
490 sys.exit(1)
491 except IndexError as exc:
492 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
493 try:
494 LOG.error("Detailed MesoWest response: '%s'",
495 json['SUMMARY']['RESPONSE_MESSAGE'])
496 except KeyError:
497 pass
498 sys.exit(1)
499 except ValueError as exc:
500 LOG.error("Bad JSON in MesoWest response: '%s'", exc)
501 sys.exit(1)
502
503 pos = len(observations['date_time']) - 1
504
505 for element_cd in station['desired_data'].split(','):
506 # sort and isolate the most recent, see note above in NRCS for how and
507 # why this is done
508 #
509 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
510 # data (whereas with NRCS, we have to make a separate request for
511 # each element we want). This is nice for network efficiency but
512 # it means we have to handle this part differently for each.
513 #
514 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
515 # provides hourly data, but MesoWest can often provide data every
516 # 10 minutes -- though this provides more opportunity for
517 # irregularities
518
519 # we may not have the data at all
520 key_name = element_cd + '_set_1'
521
522 if key_name in observations:
523 if observations[key_name][pos]:
524 remote_data[element_cd] = observations[key_name][pos]
525
526 # mesowest by default provides wind_speed in m/s, but
527 # we specify 'english' units in the request; either way,
528 # we want mph
529 if element_cd in ('wind_speed', 'wind_gust'):
530 remote_data[element_cd] = kn_to_mph(remote_data[element_cd])
531
532 # mesowest provides HS in mm, not cm; we want cm
533 if element_cd == 'snow_depth' and station['units'] == 'metric':
534 remote_data[element_cd] = mm_to_cm(remote_data[element_cd])
535 else:
536 remote_data[element_cd] = None
537 else:
538 remote_data[element_cd] = None
539
540 return remote_data
541
542 def switch_units_to_metric(data_map, mapping):
543 """replace units with metric counterparts"""
544
545 # NOTE: to update this, use the fmap<->final_data mapping laid out
546 # in setup_infoex_fields_mapping ()
547 data_map[mapping['tempMaxHourUnit']] = 'C'
548 data_map[mapping['tempMinHourUnit']] = 'C'
549 data_map[mapping['tempPresUnit']] = 'C'
550 data_map[mapping['precipitationGaugeUnit']] = 'mm'
551 data_map[mapping['hsUnit']] = 'cm'
552 data_map[mapping['windSpeedUnit']] = 'm/s'
553 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
554 data_map[mapping['dewPointUnit']] = 'C'
555 data_map[mapping['hn24AutoUnit']] = 'cm'
556 data_map[mapping['hstAutoUnit']] = 'cm'
557
558 return data_map
559
560 def convert_nrcs_units_to_metric(element_cd, value):
561 """convert NRCS values from English to metric"""
562 if element_cd == 'TOBS':
563 value = f_to_c(value)
564 elif element_cd == 'SNWD':
565 value = in_to_cm(value)
566 elif element_cd == 'PREC':
567 value = in_to_mm(value)
568 return value
569
570 # CSV operations
571 def write_local_csv(path_to_file, data):
572 """Write the specified CSV file to disk"""
573 with open(path_to_file, 'w') as file_object:
574 # The requirement is that empty values are represented in the CSV
575 # file as "", csv.QUOTE_NONNUMERIC achieves that
576 LOG.debug("writing CSV file '%s'", path_to_file)
577 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
578 writer.writerow(data)
579 file_object.close()
580 return True
581
582 def upload_csv(path_to_file, infoex_data):
583 """Upload the specified CSV file to InfoEx FTP and remove the file"""
584 with open(path_to_file, 'rb') as file_object:
585 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
586 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
587 infoex_data['api_key'])
588 ftp.storlines('STOR ' + path_to_file, file_object)
589 ftp.close()
590 file_object.close()
591 os.remove(path_to_file)
592
593 # other miscellaneous routines
594 def setup_time_values(station):
595 """establish time bounds of data request(s)"""
596
597 # default timezone to UTC (for MesoWest)
598 tz = pytz.utc
599
600 # but for NRCS, use the config-specified timezone
601 if station['provider'] == 'nrcs':
602 tz = station['tz']
603
604 # floor time to nearest hour
605 date_time = datetime.datetime.now(tz=tz)
606 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
607 seconds=date_time.second,
608 microseconds=date_time.microsecond)
609 begin_date = end_date - datetime.timedelta(hours=3)
610 return (begin_date, end_date)
611
612 def f_to_c(f):
613 """convert Fahrenheit to Celsius"""
614 return (float(f) - 32) * 5.0/9.0
615
616 def in_to_cm(inches):
617 """convert inches to centimetrs"""
618 return float(inches) * 2.54
619
620 def in_to_mm(inches):
621 """convert inches to millimeters"""
622 return (float(inches) * 2.54) * 10.0
623
624 def ms_to_mph(ms):
625 """convert meters per second to miles per hour"""
626 return ms * 2.236936
627
628 def kn_to_mph(kn):
629 """convert knots to miles per hour"""
630 return kn * 1.150779
631
632 def mm_to_cm(mm):
633 """convert millimeters to centimetrs"""
634 return mm / 10.0
635
636 if __name__ == "__main__":
637 sys.exit(main())