Update docs
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32 import urllib3
33 import importlib.util
34
35 from ftplib import FTP
36 from argparse import ArgumentParser
37
38 import pytz
39
40 import requests
41
42 import zeep
43 import zeep.cache
44 import zeep.transports
45
46 __version__ = '3.1.1'
47
48 LOG = logging.getLogger(__name__)
49 LOG.setLevel(logging.NOTSET)
50
51 urllib3.disable_warnings()
52
53 def get_parser():
54 """Return OptionParser for this program"""
55 parser = ArgumentParser()
56
57 parser.add_argument("--version",
58 action="version",
59 version=__version__)
60
61 parser.add_argument("--config",
62 dest="config",
63 metavar="FILE",
64 help="location of config file")
65
66 parser.add_argument("--log-level",
67 dest="log_level",
68 default=None,
69 help="set the log level (debug, info, warning)")
70
71 parser.add_argument("--dry-run",
72 action="store_true",
73 dest="dry_run",
74 default=False,
75 help="fetch data but don't upload to InfoEx")
76
77 return parser
78
79 def setup_config(config):
80 """Setup config variable based on values specified in the ini file"""
81 try:
82 infoex = {
83 'host': config['infoex']['host'],
84 'uuid': config['infoex']['uuid'],
85 'api_key': config['infoex']['api_key'],
86 'csv_filename': config['infoex']['csv_filename'],
87 'location_uuid': config['infoex']['location_uuid'],
88 'wx_data': {}, # placeholder key, values to come later
89 }
90
91 station = dict()
92 station['provider'] = config['station']['type']
93
94 if station['provider'] not in ['nrcs', 'mesowest', 'python']:
95 print("Please specify either nrcs or mesowest as the station type.")
96 sys.exit(1)
97
98 if station['provider'] == 'nrcs':
99 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
100 station['station_id'] = config['station']['station_id']
101 station['desired_data'] = config['station']['desired_data'].split(',')
102 station['units'] = config['station']['units']
103
104 if station['provider'] == 'mesowest':
105 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
106 station['station_id'] = config['station']['station_id']
107 station['units'] = config['station']['units']
108 station['desired_data'] = config['station']['desired_data']
109
110 # construct full API URL (sans start/end time, added later)
111 station['source'] = station['source'] + '?token=' + \
112 config['station']['token'] + \
113 '&within=60&units=' + station['units'] + \
114 '&stid=' + station['station_id'] + \
115 '&vars=' + station['desired_data']
116
117 if station['provider'] == 'python':
118 station['path'] = config['station']['path']
119
120 tz = 'America/Los_Angeles'
121
122 if 'tz' in config['station']:
123 tz = config['station']['tz']
124
125 try:
126 station['tz'] = pytz.timezone(tz)
127 except pytz.exceptions.UnknownTimeZoneError:
128 LOG.critical("%s is not a valid timezone", tz)
129 sys.exit(1)
130
131 except KeyError as err:
132 LOG.critical("%s not defined in configuration file", err)
133 sys.exit(1)
134
135 # all sections/values present in config file, final sanity check
136 try:
137 for key in config.sections():
138 for subkey in config[key]:
139 if not config[key][subkey]:
140 raise ValueError
141 except ValueError:
142 LOG.critical("Config value '%s.%s' is empty", key, subkey)
143 sys.exit(1)
144
145 return (infoex, station)
146
147 def setup_logging(log_level):
148 """Setup our logging infrastructure"""
149 try:
150 from systemd.journal import JournalHandler
151 LOG.addHandler(JournalHandler())
152 except ImportError:
153 ## fallback to syslog
154 #import logging.handlers
155 #LOG.addHandler(logging.handlers.SysLogHandler())
156 # fallback to stdout
157 handler = logging.StreamHandler(sys.stdout)
158 LOG.addHandler(handler)
159
160 # ugly, but passable
161 if log_level in [None, 'debug', 'info', 'warning']:
162 if log_level == 'debug':
163 LOG.setLevel(logging.DEBUG)
164 elif log_level == 'info':
165 LOG.setLevel(logging.INFO)
166 elif log_level == 'warning':
167 LOG.setLevel(logging.WARNING)
168 else:
169 LOG.setLevel(logging.NOTSET)
170 else:
171 return False
172
173 return True
174
175 def main():
176 """Main routine: sort through args, decide what to do, then do it"""
177 parser = get_parser()
178 options = parser.parse_args()
179
180 config = configparser.ConfigParser(allow_no_value=False)
181
182 if not options.config:
183 parser.print_help()
184 print("\nPlease specify a configuration file via --config.")
185 sys.exit(1)
186
187 config.read(options.config)
188
189 if not setup_logging(options.log_level):
190 parser.print_help()
191 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
192 sys.exit(1)
193
194 (infoex, station) = setup_config(config)
195
196 LOG.debug('Config parsed, starting up')
197
198 # create mappings
199 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
200 iemap = setup_infoex_counterparts_mapping(station['provider'])
201
202 # override units if user selected metric
203 if 'units' not in station:
204 if station['provider'] != 'python':
205 LOG.error("Please specify the units in the configuration "
206 "file")
207 sys.exit(1)
208
209 if station['units'] == 'metric':
210 final_data = switch_units_to_metric(final_data, fmap)
211
212 (begin_date, end_date) = setup_time_values(station)
213
214 if station['provider'] == 'python':
215 LOG.debug("Getting custom data from external Python program")
216 else:
217 LOG.debug("Getting %s data from %s to %s (%s)",
218 str(station['desired_data']),
219 str(begin_date), str(end_date), end_date.tzinfo.zone)
220
221 time_all_elements = time.time()
222
223 # get the data
224 if station['provider'] == 'nrcs':
225 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
226 elif station['provider'] == 'mesowest':
227 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
228 station)
229 elif station['provider'] == 'python':
230 try:
231 spec = importlib.util.spec_from_file_location('custom_wx',
232 station['path'])
233 mod = importlib.util.module_from_spec(spec)
234 spec.loader.exec_module(mod)
235 mod.LOG = LOG
236
237 try:
238 infoex['wx_data'] = mod.get_custom_data()
239
240 if infoex['wx_data'] is None:
241 infoex['wx_data'] = []
242 except Exception as exc:
243 LOG.error("Python program for custom Wx data failed in "
244 "execution: %s", str(exc))
245 sys.exit(1)
246
247 LOG.info("Successfully executed external Python program")
248 except ImportError:
249 LOG.error("Please upgrade to Python 3.3 or later")
250 sys.exit(1)
251 except FileNotFoundError:
252 LOG.error("Specified Python program for custom Wx data "
253 "was not found")
254 sys.exit(1)
255 except Exception as exc:
256 LOG.error("A problem was encountered when attempting to "
257 "load your custom Wx program: %s", str(exc))
258 sys.exit(1)
259
260 LOG.info("Time taken to get all data : %.3f sec", time.time() -
261 time_all_elements)
262
263 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
264
265 # timezone massaging
266 final_end_date = end_date.astimezone(station['tz'])
267
268 # Now we only need to add in what we want to change thanks to that
269 # abomination of a variable declaration earlier
270 final_data[fmap['Location UUID']] = infoex['location_uuid']
271 final_data[fmap['obDate']] = final_end_date.strftime('%m/%d/%Y')
272 final_data[fmap['obTime']] = final_end_date.strftime('%H:%M')
273 final_data[fmap['timeZone']] = station['tz'].zone
274
275 for element_cd in infoex['wx_data']:
276 if element_cd not in iemap:
277 LOG.warning("BAD KEY wx_data['%s']", element_cd)
278 continue
279
280 # do the conversion before the rounding
281 if station['provider'] == 'nrcs' and station['units'] == 'metric':
282 infoex['wx_data'][element_cd] = convert_nrcs_units_to_metric(element_cd, infoex['wx_data'][element_cd])
283
284 # Massage precision of certain values to fit InfoEx's
285 # expectations
286 #
287 # 0 decimal places: relative humidity, wind speed, wind
288 # direction, wind gust, snow depth
289 # 1 decimal place: air temp, baro
290 # Avoid transforming None values
291 if infoex['wx_data'][element_cd] is None:
292 continue
293 elif element_cd in ['wind_speed', 'WSPD', 'wind_direction',
294 'RHUM', 'relative_humidity', 'WDIR',
295 'wind_gust', 'SNWD', 'snow_depth']:
296 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
297 elif element_cd in ['TOBS', 'air_temp', 'PRES', 'pressure']:
298 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
299
300 # CONSIDER: Casting every value to Float() -- need to investigate if
301 # any possible elementCds we may want are any other data
302 # type than float.
303 #
304 # Another possibility is to query the API with
305 # getStationElements and temporarily store the
306 # storedUnitCd. But that's pretty network-intensive and
307 # may not even be worth it if there's only e.g. one or two
308 # exceptions to any otherwise uniformly Float value set.
309 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
310
311 LOG.debug("final_data: %s", str(final_data))
312
313 if infoex['wx_data']:
314 if not write_local_csv(infoex['csv_filename'], final_data):
315 LOG.warning('Could not write local CSV file: %s',
316 infoex['csv_filename'])
317 return 1
318
319 if not options.dry_run:
320 upload_csv(infoex['csv_filename'], infoex)
321
322 LOG.debug('DONE')
323 return 0
324
325 # data structure operations
326 def setup_infoex_fields_mapping(location_uuid):
327 """
328 Create a mapping of InfoEx fields to the local data's indexing scheme.
329
330 INFOEX FIELDS
331
332 This won't earn style points in Python, but here we establish a couple
333 of helpful mappings variables. The reason this is helpful is that the
334 end result is simply an ordered set, the CSV file. But we still may
335 want to manipulate the values arbitrarily before writing that file.
336
337 Also note that the current Auto Wx InfoEx documentation shows these
338 keys in a graphical table with the "index" beginning at 1, but here we
339 sanely index beginning at 0.
340 """
341 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
342 fmap = {} ; final_data = [None] * 29
343 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
344 fmap['obDate'] = 1 ; final_data[1] = None
345 fmap['obTime'] = 2 ; final_data[2] = None
346 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
347 fmap['tempMaxHour'] = 4 ; final_data[4] = None
348 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
349 fmap['tempMinHour'] = 6 ; final_data[6] = None
350 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
351 fmap['tempPres'] = 8 ; final_data[8] = None
352 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
353 fmap['precipitationGauge'] = 10 ; final_data[10] = None
354 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
355 fmap['windSpeedNum'] = 12 ; final_data[12] = None
356 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
357 fmap['windDirectionNum'] = 14 ; final_data[14] = None
358 fmap['hS'] = 15 ; final_data[15] = None
359 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
360 fmap['baro'] = 17 ; final_data[17] = None
361 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
362 fmap['rH'] = 19 ; final_data[19] = None
363 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
364 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
365 fmap['windGustDirNum'] = 22 ; final_data[22] = None
366 fmap['dewPoint'] = 23 ; final_data[23] = None
367 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
368 fmap['hn24Auto'] = 25 ; final_data[25] = None
369 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
370 fmap['hstAuto'] = 27 ; final_data[27] = None
371 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
372
373 return (fmap, final_data)
374
375 def setup_infoex_counterparts_mapping(provider):
376 """
377 Create a mapping of the NRCS/MesoWest fields that this program supports to
378 their InfoEx counterparts
379 """
380 iemap = {}
381
382 if provider == 'nrcs':
383 iemap['PREC'] = 'precipitationGauge'
384 iemap['TOBS'] = 'tempPres'
385 iemap['TMAX'] = 'tempMaxHour'
386 iemap['TMIN'] = 'tempMinHour'
387 iemap['SNWD'] = 'hS'
388 iemap['PRES'] = 'baro'
389 iemap['RHUM'] = 'rH'
390 iemap['WSPD'] = 'windSpeedNum'
391 iemap['WDIR'] = 'windDirectionNum'
392 # unsupported by NRCS:
393 # windGustSpeedNum
394 elif provider == 'mesowest':
395 iemap['precip_accum'] = 'precipitationGauge'
396 iemap['air_temp'] = 'tempPres'
397 iemap['air_temp_high_24_hour'] = 'tempMaxHour'
398 iemap['air_temp_low_24_hour'] = 'tempMinHour'
399 iemap['snow_depth'] = 'hS'
400 iemap['pressure'] = 'baro'
401 iemap['relative_humidity'] = 'rH'
402 iemap['wind_speed'] = 'windSpeedNum'
403 iemap['wind_direction'] = 'windDirectionNum'
404 iemap['wind_gust'] = 'windGustSpeedNum'
405 elif provider == 'python':
406 # we expect Python programs to use the InfoEx data type names
407 iemap['precipitationGauge'] = 'precipitationGauge'
408 iemap['tempPres'] = 'tempPres'
409 iemap['tempMaxHour'] = 'tempMaxHour'
410 iemap['tempMinHour'] = 'tempMinHour'
411 iemap['hS'] = 'hS'
412 iemap['baro'] = 'baro'
413 iemap['rH'] = 'rH'
414 iemap['windSpeedNum'] = 'windSpeedNum'
415 iemap['windDirectionNum'] = 'windDirectionNum'
416 iemap['windGustSpeedNum'] = 'windGustSpeedNum'
417
418 return iemap
419
420 # provider-specific operations
421 def get_nrcs_data(begin, end, station):
422 """get the data we're after from the NRCS WSDL"""
423 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
424 transport.session.verify = False
425 client = zeep.Client(wsdl=station['source'], transport=transport)
426 remote_data = {}
427
428 # massage begin/end date format
429 begin_date_str = begin.strftime('%Y-%m-%d %H:%M:00')
430 end_date_str = end.strftime('%Y-%m-%d %H:%M:00')
431
432 for element_cd in station['desired_data']:
433 time_element = time.time()
434
435 # get the last three hours of data for this elementCd/element_cd
436 tmp = client.service.getHourlyData(
437 stationTriplets=[station['station_id']],
438 elementCd=element_cd,
439 ordinal=1,
440 beginDate=begin_date_str,
441 endDate=end_date_str)
442
443 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
444 time.time() - time_element)
445
446 values = tmp[0]['values']
447
448 # sort and isolate the most recent
449 #
450 # NOTE: we do this because sometimes there are gaps in hourly data
451 # in NRCS; yes, we may end up with slightly inaccurate data,
452 # so perhaps this decision will be re-evaluated in the future
453 if values:
454 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
455 remote_data[element_cd] = ordered[0]['value']
456 else:
457 remote_data[element_cd] = None
458
459 return remote_data
460
461 def get_mesowest_data(begin, end, station):
462 """get the data we're after from the MesoWest/Synoptic API"""
463 remote_data = {}
464
465 # massage begin/end date format
466 begin_date_str = begin.strftime('%Y%m%d%H%M')
467 end_date_str = end.strftime('%Y%m%d%H%M')
468
469 # construct final, completed API URL
470 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
471 req = requests.get(api_req_url)
472
473 try:
474 json = req.json()
475 except ValueError:
476 LOG.error("Bad JSON in MesoWest response")
477 sys.exit(1)
478
479 try:
480 observations = json['STATION'][0]['OBSERVATIONS']
481 except KeyError as exc:
482 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
483 sys.exit(1)
484 except IndexError as exc:
485 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
486 try:
487 LOG.error("Detailed MesoWest response: '%s'",
488 json['SUMMARY']['RESPONSE_MESSAGE'])
489 except KeyError:
490 pass
491 sys.exit(1)
492 except ValueError as exc:
493 LOG.error("Bad JSON in MesoWest response: '%s'", exc)
494 sys.exit(1)
495
496 pos = len(observations['date_time']) - 1
497
498 for element_cd in station['desired_data'].split(','):
499 # sort and isolate the most recent, see note above in NRCS for how and
500 # why this is done
501 #
502 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
503 # data (whereas with NRCS, we have to make a separate request for
504 # each element we want). This is nice for network efficiency but
505 # it means we have to handle this part differently for each.
506 #
507 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
508 # provides hourly data, but MesoWest can often provide data every
509 # 10 minutes -- though this provides more opportunity for
510 # irregularities
511
512 # we may not have the data at all
513 key_name = element_cd + '_set_1'
514
515 if key_name in observations:
516 if observations[key_name][pos]:
517 remote_data[element_cd] = observations[key_name][pos]
518
519 # mesowest by default provides wind_speed in m/s, but
520 # we specify 'english' units in the request; either way,
521 # we want mph
522 if element_cd in ('wind_speed', 'wind_gust'):
523 remote_data[element_cd] = kn_to_mph(remote_data[element_cd])
524 else:
525 remote_data[element_cd] = None
526 else:
527 remote_data[element_cd] = None
528
529 return remote_data
530
531 def switch_units_to_metric(data_map, mapping):
532 """replace units with metric counterparts"""
533
534 # NOTE: to update this, use the fmap<->final_data mapping laid out
535 # in setup_infoex_fields_mapping ()
536 data_map[mapping['tempMaxHourUnit']] = 'C'
537 data_map[mapping['tempMinHourUnit']] = 'C'
538 data_map[mapping['tempPresUnit']] = 'C'
539 data_map[mapping['precipitationGaugeUnit']] = 'C'
540 data_map[mapping['hsUnit']] = 'cm'
541 data_map[mapping['windSpeedUnit']] = 'm/s'
542 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
543 data_map[mapping['dewPointUnit']] = 'C'
544 data_map[mapping['hn24AutoUnit']] = 'cm'
545 data_map[mapping['hstAutoUnit']] = 'cm'
546
547 return data_map
548
549 def convert_nrcs_units_to_metric(element_cd, value):
550 """convert NRCS values from English to metric"""
551 if element_cd == 'TOBS':
552 value = f_to_c(value)
553 elif element_cd == 'SNWD':
554 value = in_to_cm(value)
555 elif element_cd == 'PREC':
556 value = in_to_cm(value)
557 return value
558
559 # CSV operations
560 def write_local_csv(path_to_file, data):
561 """Write the specified CSV file to disk"""
562 with open(path_to_file, 'w') as file_object:
563 # The requirement is that empty values are represented in the CSV
564 # file as "", csv.QUOTE_NONNUMERIC achieves that
565 LOG.debug("writing CSV file '%s'", path_to_file)
566 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
567 writer.writerow(data)
568 file_object.close()
569 return True
570
571 def upload_csv(path_to_file, infoex_data):
572 """Upload the specified CSV file to InfoEx FTP and remove the file"""
573 with open(path_to_file, 'rb') as file_object:
574 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
575 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
576 infoex_data['api_key'])
577 ftp.storlines('STOR ' + path_to_file, file_object)
578 ftp.close()
579 file_object.close()
580 os.remove(path_to_file)
581
582 # other miscellaneous routines
583 def setup_time_values(station):
584 """establish time bounds of data request(s)"""
585
586 # default timezone to UTC (for MesoWest)
587 tz = pytz.utc
588
589 # but for NRCS, use the config-specified timezone
590 if station['provider'] == 'nrcs':
591 tz = station['tz']
592
593 # floor time to nearest hour
594 date_time = datetime.datetime.now(tz=tz)
595 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
596 seconds=date_time.second,
597 microseconds=date_time.microsecond)
598 begin_date = end_date - datetime.timedelta(hours=3)
599 return (begin_date, end_date)
600
601 def f_to_c(f):
602 """convert Fahrenheit to Celsius"""
603 return (float(f) - 32) * 5.0/9.0
604
605 def in_to_cm(inches):
606 """convert inches to centimetrs"""
607 return float(inches) * 2.54
608
609 def ms_to_mph(ms):
610 """convert meters per second to miles per hour"""
611 return ms * 2.236936
612
613 def kn_to_mph(kn):
614 """convert knots to miles per hour"""
615 return kn * 1.150779
616
617 if __name__ == "__main__":
618 sys.exit(main())