Add important info to stdout log output
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32 import urllib3
33 import importlib.util
34
35 from ftplib import FTP
36 from argparse import ArgumentParser
37
38 import pytz
39
40 import requests
41
42 import zeep
43 import zeep.cache
44 import zeep.transports
45
46 __version__ = '3.2.1'
47
48 LOG = logging.getLogger(__name__)
49 LOG.setLevel(logging.NOTSET)
50
51 urllib3.disable_warnings()
52
53 def get_parser():
54 """Return OptionParser for this program"""
55 parser = ArgumentParser()
56
57 parser.add_argument("--version",
58 action="version",
59 version=__version__)
60
61 parser.add_argument("--config",
62 dest="config",
63 metavar="FILE",
64 help="location of config file")
65
66 parser.add_argument("--log-level",
67 dest="log_level",
68 default=None,
69 help="set the log level (debug, info, warning)")
70
71 parser.add_argument("--dry-run",
72 action="store_true",
73 dest="dry_run",
74 default=False,
75 help="fetch data but don't upload to InfoEx")
76
77 return parser
78
79 def setup_config(config):
80 """Setup config variable based on values specified in the ini file"""
81 try:
82 infoex = {
83 'host': config['infoex']['host'],
84 'uuid': config['infoex']['uuid'],
85 'api_key': config['infoex']['api_key'],
86 'csv_filename': config['infoex']['csv_filename'],
87 'location_uuid': config['infoex']['location_uuid'],
88 'wx_data': {}, # placeholder key, values to come later
89 }
90
91 station = dict()
92 station['provider'] = config['station']['type']
93
94 if station['provider'] not in ['nrcs', 'mesowest', 'python']:
95 print("Please specify either nrcs or mesowest as the station type.")
96 sys.exit(1)
97
98 if station['provider'] == 'nrcs':
99 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
100 station['station_id'] = config['station']['station_id']
101 station['desired_data'] = config['station']['desired_data'].split(',')
102 station['units'] = config['station']['units']
103
104 if station['provider'] == 'mesowest':
105 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
106 station['station_id'] = config['station']['station_id']
107 station['units'] = config['station']['units']
108 station['desired_data'] = config['station']['desired_data']
109
110 # construct full API URL (sans start/end time, added later)
111 station['source'] = station['source'] + '?token=' + \
112 config['station']['token'] + \
113 '&within=60&units=' + station['units'] + \
114 '&stid=' + station['station_id'] + \
115 '&vars=' + station['desired_data']
116
117 if station['provider'] == 'python':
118 station['path'] = config['station']['path']
119
120 tz = 'America/Los_Angeles'
121
122 if 'tz' in config['station']:
123 tz = config['station']['tz']
124
125 try:
126 station['tz'] = pytz.timezone(tz)
127 except pytz.exceptions.UnknownTimeZoneError:
128 LOG.critical("%s is not a valid timezone", tz)
129 sys.exit(1)
130
131 except KeyError as err:
132 LOG.critical("%s not defined in configuration file", err)
133 sys.exit(1)
134
135 # all sections/values present in config file, final sanity check
136 try:
137 for key in config.sections():
138 for subkey in config[key]:
139 if not config[key][subkey]:
140 raise ValueError
141 except ValueError:
142 LOG.critical("Config value '%s.%s' is empty", key, subkey)
143 sys.exit(1)
144
145 return (infoex, station)
146
147 def setup_logging(log_level):
148 """Setup our logging infrastructure"""
149 try:
150 from systemd.journal import JournalHandler
151 LOG.addHandler(JournalHandler())
152 except ImportError:
153 ## fallback to syslog
154 #import logging.handlers
155 #LOG.addHandler(logging.handlers.SysLogHandler())
156 # fallback to stdout
157 handler = logging.StreamHandler(sys.stdout)
158 formatter = logging.Formatter('%(asctime)s.%(msecs)03d '
159 '%(levelname)s %(module)s - %(funcName)s: %(message)s',
160 '%Y-%m-%d %H:%M:%S')
161 handler.setFormatter(formatter)
162 LOG.addHandler(handler)
163
164 # ugly, but passable
165 if log_level in [None, 'debug', 'info', 'warning']:
166 if log_level == 'debug':
167 LOG.setLevel(logging.DEBUG)
168 elif log_level == 'info':
169 LOG.setLevel(logging.INFO)
170 elif log_level == 'warning':
171 LOG.setLevel(logging.WARNING)
172 else:
173 LOG.setLevel(logging.NOTSET)
174 else:
175 return False
176
177 return True
178
179 def main():
180 """Main routine: sort through args, decide what to do, then do it"""
181 parser = get_parser()
182 options = parser.parse_args()
183
184 config = configparser.ConfigParser(allow_no_value=False)
185
186 if not options.config:
187 parser.print_help()
188 print("\nPlease specify a configuration file via --config.")
189 sys.exit(1)
190
191 config.read(options.config)
192
193 if not setup_logging(options.log_level):
194 parser.print_help()
195 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
196 sys.exit(1)
197
198 (infoex, station) = setup_config(config)
199
200 LOG.debug('Config parsed, starting up')
201
202 # create mappings
203 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
204 iemap = setup_infoex_counterparts_mapping(station['provider'])
205
206 # override units if user selected metric
207 if station['provider'] != 'python' and station['units'] == 'metric':
208 final_data = switch_units_to_metric(final_data, fmap)
209
210 (begin_date, end_date) = setup_time_values(station)
211
212 if station['provider'] == 'python':
213 LOG.debug("Getting custom data from external Python program")
214 else:
215 LOG.debug("Getting %s data from %s to %s (%s)",
216 str(station['desired_data']),
217 str(begin_date), str(end_date), end_date.tzinfo.zone)
218
219 time_all_elements = time.time()
220
221 # get the data
222 if station['provider'] == 'nrcs':
223 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
224 elif station['provider'] == 'mesowest':
225 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
226 station)
227 elif station['provider'] == 'python':
228 try:
229 spec = importlib.util.spec_from_file_location('custom_wx',
230 station['path'])
231 mod = importlib.util.module_from_spec(spec)
232 spec.loader.exec_module(mod)
233 mod.LOG = LOG
234
235 try:
236 infoex['wx_data'] = mod.get_custom_data()
237
238 if infoex['wx_data'] is None:
239 infoex['wx_data'] = []
240 except Exception as exc:
241 LOG.error("Python program for custom Wx data failed in "
242 "execution: %s", str(exc))
243 sys.exit(1)
244
245 LOG.info("Successfully executed external Python program")
246 except ImportError:
247 LOG.error("Please upgrade to Python 3.3 or later")
248 sys.exit(1)
249 except FileNotFoundError:
250 LOG.error("Specified Python program for custom Wx data "
251 "was not found")
252 sys.exit(1)
253 except Exception as exc:
254 LOG.error("A problem was encountered when attempting to "
255 "load your custom Wx program: %s", str(exc))
256 sys.exit(1)
257
258 LOG.info("Time taken to get all data : %.3f sec", time.time() -
259 time_all_elements)
260
261 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
262
263 # timezone massaging
264 final_end_date = end_date.astimezone(station['tz'])
265
266 # Now we only need to add in what we want to change thanks to that
267 # abomination of a variable declaration earlier
268 final_data[fmap['Location UUID']] = infoex['location_uuid']
269 final_data[fmap['obDate']] = final_end_date.strftime('%m/%d/%Y')
270 final_data[fmap['obTime']] = final_end_date.strftime('%H:%M')
271 final_data[fmap['timeZone']] = station['tz'].zone
272
273 for element_cd in infoex['wx_data']:
274 if element_cd not in iemap:
275 LOG.warning("BAD KEY wx_data['%s']", element_cd)
276 continue
277
278 # do the conversion before the rounding
279 if station['provider'] == 'nrcs' and station['units'] == 'metric':
280 infoex['wx_data'][element_cd] = convert_nrcs_units_to_metric(element_cd, infoex['wx_data'][element_cd])
281
282 # Massage precision of certain values to fit InfoEx's
283 # expectations
284 #
285 # 0 decimal places: relative humidity, wind speed, wind
286 # direction, wind gust, snow depth
287 # 1 decimal place: air temp, baro
288 # Avoid transforming None values
289 if infoex['wx_data'][element_cd] is None:
290 continue
291 elif element_cd in ['wind_speed', 'WSPD', 'wind_direction',
292 'RHUM', 'relative_humidity', 'WDIR',
293 'wind_gust', 'SNWD', 'snow_depth']:
294 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
295 elif element_cd in ['TOBS', 'air_temp', 'PRES', 'pressure']:
296 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
297
298 # CONSIDER: Casting every value to Float() -- need to investigate if
299 # any possible elementCds we may want are any other data
300 # type than float.
301 #
302 # Another possibility is to query the API with
303 # getStationElements and temporarily store the
304 # storedUnitCd. But that's pretty network-intensive and
305 # may not even be worth it if there's only e.g. one or two
306 # exceptions to any otherwise uniformly Float value set.
307 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
308
309 LOG.debug("final_data: %s", str(final_data))
310
311 if infoex['wx_data']:
312 if not write_local_csv(infoex['csv_filename'], final_data):
313 LOG.warning('Could not write local CSV file: %s',
314 infoex['csv_filename'])
315 return 1
316
317 if not options.dry_run:
318 upload_csv(infoex['csv_filename'], infoex)
319
320 LOG.debug('DONE')
321 return 0
322
323 # data structure operations
324 def setup_infoex_fields_mapping(location_uuid):
325 """
326 Create a mapping of InfoEx fields to the local data's indexing scheme.
327
328 INFOEX FIELDS
329
330 This won't earn style points in Python, but here we establish a couple
331 of helpful mappings variables. The reason this is helpful is that the
332 end result is simply an ordered set, the CSV file. But we still may
333 want to manipulate the values arbitrarily before writing that file.
334
335 Also note that the current Auto Wx InfoEx documentation shows these
336 keys in a graphical table with the "index" beginning at 1, but here we
337 sanely index beginning at 0.
338 """
339 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
340 fmap = {} ; final_data = [None] * 29
341 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
342 fmap['obDate'] = 1 ; final_data[1] = None
343 fmap['obTime'] = 2 ; final_data[2] = None
344 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
345 fmap['tempMaxHour'] = 4 ; final_data[4] = None
346 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
347 fmap['tempMinHour'] = 6 ; final_data[6] = None
348 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
349 fmap['tempPres'] = 8 ; final_data[8] = None
350 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
351 fmap['precipitationGauge'] = 10 ; final_data[10] = None
352 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
353 fmap['windSpeedNum'] = 12 ; final_data[12] = None
354 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
355 fmap['windDirectionNum'] = 14 ; final_data[14] = None
356 fmap['hS'] = 15 ; final_data[15] = None
357 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
358 fmap['baro'] = 17 ; final_data[17] = None
359 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
360 fmap['rH'] = 19 ; final_data[19] = None
361 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
362 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
363 fmap['windGustDirNum'] = 22 ; final_data[22] = None
364 fmap['dewPoint'] = 23 ; final_data[23] = None
365 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
366 fmap['hn24Auto'] = 25 ; final_data[25] = None
367 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
368 fmap['hstAuto'] = 27 ; final_data[27] = None
369 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
370
371 return (fmap, final_data)
372
373 def setup_infoex_counterparts_mapping(provider):
374 """
375 Create a mapping of the NRCS/MesoWest fields that this program supports to
376 their InfoEx counterparts
377 """
378 iemap = {}
379
380 if provider == 'nrcs':
381 iemap['PREC'] = 'precipitationGauge'
382 iemap['TOBS'] = 'tempPres'
383 iemap['TMAX'] = 'tempMaxHour'
384 iemap['TMIN'] = 'tempMinHour'
385 iemap['SNWD'] = 'hS'
386 iemap['PRES'] = 'baro'
387 iemap['RHUM'] = 'rH'
388 iemap['WSPD'] = 'windSpeedNum'
389 iemap['WDIR'] = 'windDirectionNum'
390 # unsupported by NRCS:
391 # windGustSpeedNum
392 elif provider == 'mesowest':
393 iemap['precip_accum'] = 'precipitationGauge'
394 iemap['air_temp'] = 'tempPres'
395 iemap['air_temp_high_24_hour'] = 'tempMaxHour'
396 iemap['air_temp_low_24_hour'] = 'tempMinHour'
397 iemap['snow_depth'] = 'hS'
398 iemap['pressure'] = 'baro'
399 iemap['relative_humidity'] = 'rH'
400 iemap['wind_speed'] = 'windSpeedNum'
401 iemap['wind_direction'] = 'windDirectionNum'
402 iemap['wind_gust'] = 'windGustSpeedNum'
403 elif provider == 'python':
404 # we expect Python programs to use the InfoEx data type names
405 iemap['precipitationGauge'] = 'precipitationGauge'
406 iemap['tempPres'] = 'tempPres'
407 iemap['tempMaxHour'] = 'tempMaxHour'
408 iemap['tempMinHour'] = 'tempMinHour'
409 iemap['hS'] = 'hS'
410 iemap['baro'] = 'baro'
411 iemap['rH'] = 'rH'
412 iemap['windSpeedNum'] = 'windSpeedNum'
413 iemap['windDirectionNum'] = 'windDirectionNum'
414 iemap['windGustSpeedNum'] = 'windGustSpeedNum'
415
416 return iemap
417
418 # provider-specific operations
419 def get_nrcs_data(begin, end, station):
420 """get the data we're after from the NRCS WSDL"""
421 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
422 transport.session.verify = False
423 client = zeep.Client(wsdl=station['source'], transport=transport)
424 remote_data = {}
425
426 # massage begin/end date format
427 begin_date_str = begin.strftime('%Y-%m-%d %H:%M:00')
428 end_date_str = end.strftime('%Y-%m-%d %H:%M:00')
429
430 for element_cd in station['desired_data']:
431 time_element = time.time()
432
433 # get the last three hours of data for this elementCd/element_cd
434 tmp = client.service.getHourlyData(
435 stationTriplets=[station['station_id']],
436 elementCd=element_cd,
437 ordinal=1,
438 beginDate=begin_date_str,
439 endDate=end_date_str)
440
441 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
442 time.time() - time_element)
443
444 values = tmp[0]['values']
445
446 # sort and isolate the most recent
447 #
448 # NOTE: we do this because sometimes there are gaps in hourly data
449 # in NRCS; yes, we may end up with slightly inaccurate data,
450 # so perhaps this decision will be re-evaluated in the future
451 if values:
452 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
453 remote_data[element_cd] = ordered[0]['value']
454 else:
455 remote_data[element_cd] = None
456
457 return remote_data
458
459 def get_mesowest_data(begin, end, station):
460 """get the data we're after from the MesoWest/Synoptic API"""
461 remote_data = {}
462
463 # massage begin/end date format
464 begin_date_str = begin.strftime('%Y%m%d%H%M')
465 end_date_str = end.strftime('%Y%m%d%H%M')
466
467 # construct final, completed API URL
468 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
469 req = requests.get(api_req_url)
470
471 try:
472 json = req.json()
473 except ValueError:
474 LOG.error("Bad JSON in MesoWest response")
475 sys.exit(1)
476
477 try:
478 observations = json['STATION'][0]['OBSERVATIONS']
479 except KeyError as exc:
480 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
481 sys.exit(1)
482 except IndexError as exc:
483 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
484 try:
485 LOG.error("Detailed MesoWest response: '%s'",
486 json['SUMMARY']['RESPONSE_MESSAGE'])
487 except KeyError:
488 pass
489 sys.exit(1)
490 except ValueError as exc:
491 LOG.error("Bad JSON in MesoWest response: '%s'", exc)
492 sys.exit(1)
493
494 pos = len(observations['date_time']) - 1
495
496 for element_cd in station['desired_data'].split(','):
497 # sort and isolate the most recent, see note above in NRCS for how and
498 # why this is done
499 #
500 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
501 # data (whereas with NRCS, we have to make a separate request for
502 # each element we want). This is nice for network efficiency but
503 # it means we have to handle this part differently for each.
504 #
505 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
506 # provides hourly data, but MesoWest can often provide data every
507 # 10 minutes -- though this provides more opportunity for
508 # irregularities
509
510 # we may not have the data at all
511 key_name = element_cd + '_set_1'
512
513 if key_name in observations:
514 if observations[key_name][pos]:
515 remote_data[element_cd] = observations[key_name][pos]
516
517 # mesowest by default provides wind_speed in m/s, but
518 # we specify 'english' units in the request; either way,
519 # we want mph
520 if element_cd in ('wind_speed', 'wind_gust'):
521 remote_data[element_cd] = kn_to_mph(remote_data[element_cd])
522 else:
523 remote_data[element_cd] = None
524 else:
525 remote_data[element_cd] = None
526
527 return remote_data
528
529 def switch_units_to_metric(data_map, mapping):
530 """replace units with metric counterparts"""
531
532 # NOTE: to update this, use the fmap<->final_data mapping laid out
533 # in setup_infoex_fields_mapping ()
534 data_map[mapping['tempMaxHourUnit']] = 'C'
535 data_map[mapping['tempMinHourUnit']] = 'C'
536 data_map[mapping['tempPresUnit']] = 'C'
537 data_map[mapping['precipitationGaugeUnit']] = 'mm'
538 data_map[mapping['hsUnit']] = 'cm'
539 data_map[mapping['windSpeedUnit']] = 'm/s'
540 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
541 data_map[mapping['dewPointUnit']] = 'C'
542 data_map[mapping['hn24AutoUnit']] = 'cm'
543 data_map[mapping['hstAutoUnit']] = 'cm'
544
545 return data_map
546
547 def convert_nrcs_units_to_metric(element_cd, value):
548 """convert NRCS values from English to metric"""
549 if element_cd == 'TOBS':
550 value = f_to_c(value)
551 elif element_cd == 'SNWD':
552 value = in_to_cm(value)
553 elif element_cd == 'PREC':
554 value = in_to_mm(value)
555 return value
556
557 # CSV operations
558 def write_local_csv(path_to_file, data):
559 """Write the specified CSV file to disk"""
560 with open(path_to_file, 'w') as file_object:
561 # The requirement is that empty values are represented in the CSV
562 # file as "", csv.QUOTE_NONNUMERIC achieves that
563 LOG.debug("writing CSV file '%s'", path_to_file)
564 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
565 writer.writerow(data)
566 file_object.close()
567 return True
568
569 def upload_csv(path_to_file, infoex_data):
570 """Upload the specified CSV file to InfoEx FTP and remove the file"""
571 with open(path_to_file, 'rb') as file_object:
572 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
573 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
574 infoex_data['api_key'])
575 ftp.storlines('STOR ' + path_to_file, file_object)
576 ftp.close()
577 file_object.close()
578 os.remove(path_to_file)
579
580 # other miscellaneous routines
581 def setup_time_values(station):
582 """establish time bounds of data request(s)"""
583
584 # default timezone to UTC (for MesoWest)
585 tz = pytz.utc
586
587 # but for NRCS, use the config-specified timezone
588 if station['provider'] == 'nrcs':
589 tz = station['tz']
590
591 # floor time to nearest hour
592 date_time = datetime.datetime.now(tz=tz)
593 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
594 seconds=date_time.second,
595 microseconds=date_time.microsecond)
596 begin_date = end_date - datetime.timedelta(hours=3)
597 return (begin_date, end_date)
598
599 def f_to_c(f):
600 """convert Fahrenheit to Celsius"""
601 return (float(f) - 32) * 5.0/9.0
602
603 def in_to_cm(inches):
604 """convert inches to centimetrs"""
605 return float(inches) * 2.54
606
607 def in_to_mm(inches):
608 """convert inches to millimeters"""
609 return (float(inches) * 2.54) * 10.0
610
611 def ms_to_mph(ms):
612 """convert meters per second to miles per hour"""
613 return ms * 2.236936
614
615 def kn_to_mph(kn):
616 """convert knots to miles per hour"""
617 return kn * 1.150779
618
619 if __name__ == "__main__":
620 sys.exit(main())