Fix possibility of a NoneType raising a TypeError
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32 import urllib3
33 import importlib.util
34
35 from ftplib import FTP
36 from argparse import ArgumentParser
37
38 import pytz
39
40 import requests
41
42 import zeep
43 import zeep.cache
44 import zeep.transports
45
46 __version__ = '3.2.2'
47
48 LOG = logging.getLogger(__name__)
49 LOG.setLevel(logging.NOTSET)
50
51 urllib3.disable_warnings()
52
53 def get_parser():
54 """Return OptionParser for this program"""
55 parser = ArgumentParser()
56
57 parser.add_argument("--version",
58 action="version",
59 version=__version__)
60
61 parser.add_argument("--config",
62 dest="config",
63 metavar="FILE",
64 help="location of config file")
65
66 parser.add_argument("--log-level",
67 dest="log_level",
68 default=None,
69 help="set the log level (debug, info, warning)")
70
71 parser.add_argument("--dry-run",
72 action="store_true",
73 dest="dry_run",
74 default=False,
75 help="fetch data but don't upload to InfoEx")
76
77 return parser
78
79 def setup_config(config):
80 """Setup config variable based on values specified in the ini file"""
81 try:
82 infoex = {
83 'host': config['infoex']['host'],
84 'uuid': config['infoex']['uuid'],
85 'api_key': config['infoex']['api_key'],
86 'csv_filename': config['infoex']['csv_filename'],
87 'location_uuid': config['infoex']['location_uuid'],
88 'wx_data': {}, # placeholder key, values to come later
89 }
90
91 station = dict()
92 station['provider'] = config['station']['type']
93
94 if station['provider'] not in ['nrcs', 'mesowest', 'python']:
95 print("Please specify either nrcs or mesowest as the station type.")
96 sys.exit(1)
97
98 if station['provider'] == 'nrcs':
99 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
100 station['station_id'] = config['station']['station_id']
101 station['desired_data'] = config['station']['desired_data'].split(',')
102 station['units'] = config['station']['units']
103
104 if station['provider'] == 'mesowest':
105 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
106 station['station_id'] = config['station']['station_id']
107 station['units'] = config['station']['units']
108 station['desired_data'] = config['station']['desired_data']
109
110 # construct full API URL (sans start/end time, added later)
111 station['source'] = station['source'] + '?token=' + \
112 config['station']['token'] + \
113 '&within=60&units=' + station['units'] + \
114 '&stid=' + station['station_id'] + \
115 '&vars=' + station['desired_data']
116
117 if station['provider'] == 'python':
118 station['path'] = config['station']['path']
119
120 tz = 'America/Los_Angeles'
121
122 if 'tz' in config['station']:
123 tz = config['station']['tz']
124
125 try:
126 station['tz'] = pytz.timezone(tz)
127 except pytz.exceptions.UnknownTimeZoneError:
128 LOG.critical("%s is not a valid timezone", tz)
129 sys.exit(1)
130
131 except KeyError as err:
132 LOG.critical("%s not defined in configuration file", err)
133 sys.exit(1)
134
135 # all sections/values present in config file, final sanity check
136 try:
137 for key in config.sections():
138 for subkey in config[key]:
139 if not config[key][subkey]:
140 raise ValueError
141 except ValueError:
142 LOG.critical("Config value '%s.%s' is empty", key, subkey)
143 sys.exit(1)
144
145 return (infoex, station)
146
147 def setup_logging(log_level):
148 """Setup our logging infrastructure"""
149 try:
150 from systemd.journal import JournalHandler
151 LOG.addHandler(JournalHandler())
152 except ImportError:
153 ## fallback to syslog
154 #import logging.handlers
155 #LOG.addHandler(logging.handlers.SysLogHandler())
156 # fallback to stdout
157 handler = logging.StreamHandler(sys.stdout)
158 formatter = logging.Formatter('%(asctime)s.%(msecs)03d '
159 '%(levelname)s %(module)s - %(funcName)s: %(message)s',
160 '%Y-%m-%d %H:%M:%S')
161 handler.setFormatter(formatter)
162 LOG.addHandler(handler)
163
164 # ugly, but passable
165 if log_level in [None, 'debug', 'info', 'warning']:
166 if log_level == 'debug':
167 LOG.setLevel(logging.DEBUG)
168 elif log_level == 'info':
169 LOG.setLevel(logging.INFO)
170 elif log_level == 'warning':
171 LOG.setLevel(logging.WARNING)
172 else:
173 LOG.setLevel(logging.NOTSET)
174 else:
175 return False
176
177 return True
178
179 def main():
180 """Main routine: sort through args, decide what to do, then do it"""
181 parser = get_parser()
182 options = parser.parse_args()
183
184 config = configparser.ConfigParser(allow_no_value=False)
185
186 if not options.config:
187 parser.print_help()
188 print("\nPlease specify a configuration file via --config.")
189 sys.exit(1)
190
191 config.read(options.config)
192
193 if not setup_logging(options.log_level):
194 parser.print_help()
195 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
196 sys.exit(1)
197
198 (infoex, station) = setup_config(config)
199
200 LOG.debug('Config parsed, starting up')
201
202 # create mappings
203 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
204 iemap = setup_infoex_counterparts_mapping(station['provider'])
205
206 # override units if user selected metric
207 if station['provider'] != 'python' and station['units'] == 'metric':
208 final_data = switch_units_to_metric(final_data, fmap)
209
210 (begin_date, end_date) = setup_time_values(station)
211
212 if station['provider'] == 'python':
213 LOG.debug("Getting custom data from external Python program")
214 else:
215 LOG.debug("Getting %s data from %s to %s (%s)",
216 str(station['desired_data']),
217 str(begin_date), str(end_date), end_date.tzinfo.zone)
218
219 time_all_elements = time.time()
220
221 # get the data
222 if station['provider'] == 'nrcs':
223 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
224 elif station['provider'] == 'mesowest':
225 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
226 station)
227 elif station['provider'] == 'python':
228 try:
229 spec = importlib.util.spec_from_file_location('custom_wx',
230 station['path'])
231 mod = importlib.util.module_from_spec(spec)
232 spec.loader.exec_module(mod)
233 mod.LOG = LOG
234
235 try:
236 infoex['wx_data'] = mod.get_custom_data()
237
238 if infoex['wx_data'] is None:
239 infoex['wx_data'] = []
240 except Exception as exc:
241 LOG.error("Python program for custom Wx data failed in "
242 "execution: %s", str(exc))
243 sys.exit(1)
244
245 LOG.info("Successfully executed external Python program")
246 except ImportError:
247 LOG.error("Please upgrade to Python 3.3 or later")
248 sys.exit(1)
249 except FileNotFoundError:
250 LOG.error("Specified Python program for custom Wx data "
251 "was not found")
252 sys.exit(1)
253 except Exception as exc:
254 LOG.error("A problem was encountered when attempting to "
255 "load your custom Wx program: %s", str(exc))
256 sys.exit(1)
257
258 LOG.info("Time taken to get all data : %.3f sec", time.time() -
259 time_all_elements)
260
261 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
262
263 # timezone massaging
264 final_end_date = end_date.astimezone(station['tz'])
265
266 # Now we only need to add in what we want to change thanks to that
267 # abomination of a variable declaration earlier
268 final_data[fmap['Location UUID']] = infoex['location_uuid']
269 final_data[fmap['obDate']] = final_end_date.strftime('%m/%d/%Y')
270 final_data[fmap['obTime']] = final_end_date.strftime('%H:%M')
271 final_data[fmap['timeZone']] = station['tz'].zone
272
273 for element_cd in infoex['wx_data']:
274 if element_cd not in iemap:
275 LOG.warning("BAD KEY wx_data['%s']", element_cd)
276 continue
277
278 if infoex['wx_data'][element_cd] is None:
279 continue
280
281 # do the conversion before the rounding
282 if station['provider'] == 'nrcs' and station['units'] == 'metric':
283 infoex['wx_data'][element_cd] = convert_nrcs_units_to_metric(element_cd, infoex['wx_data'][element_cd])
284
285 # Massage precision of certain values to fit InfoEx's
286 # expectations
287 #
288 # 0 decimal places: relative humidity, wind speed, wind
289 # direction, wind gust, snow depth
290 # 1 decimal place: air temp, baro
291 # Avoid transforming None values
292 if element_cd in ['wind_speed', 'WSPD', 'wind_direction',
293 'RHUM', 'relative_humidity', 'WDIR',
294 'wind_gust', 'SNWD', 'snow_depth']:
295 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
296 elif element_cd in ['TOBS', 'air_temp', 'PRES', 'pressure']:
297 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
298 elif element_cd in ['PREC', 'precip_accum']:
299 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 2)
300
301 # CONSIDER: Casting every value to Float() -- need to investigate if
302 # any possible elementCds we may want are any other data
303 # type than float.
304 #
305 # Another possibility is to query the API with
306 # getStationElements and temporarily store the
307 # storedUnitCd. But that's pretty network-intensive and
308 # may not even be worth it if there's only e.g. one or two
309 # exceptions to any otherwise uniformly Float value set.
310 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
311
312 LOG.debug("final_data: %s", str(final_data))
313
314 if infoex['wx_data']:
315 if not write_local_csv(infoex['csv_filename'], final_data):
316 LOG.warning('Could not write local CSV file: %s',
317 infoex['csv_filename'])
318 return 1
319
320 if not options.dry_run:
321 upload_csv(infoex['csv_filename'], infoex)
322
323 LOG.debug('DONE')
324 return 0
325
326 # data structure operations
327 def setup_infoex_fields_mapping(location_uuid):
328 """
329 Create a mapping of InfoEx fields to the local data's indexing scheme.
330
331 INFOEX FIELDS
332
333 This won't earn style points in Python, but here we establish a couple
334 of helpful mappings variables. The reason this is helpful is that the
335 end result is simply an ordered set, the CSV file. But we still may
336 want to manipulate the values arbitrarily before writing that file.
337
338 Also note that the current Auto Wx InfoEx documentation shows these
339 keys in a graphical table with the "index" beginning at 1, but here we
340 sanely index beginning at 0.
341 """
342 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
343 fmap = {} ; final_data = [None] * 29
344 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
345 fmap['obDate'] = 1 ; final_data[1] = None
346 fmap['obTime'] = 2 ; final_data[2] = None
347 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
348 fmap['tempMaxHour'] = 4 ; final_data[4] = None
349 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
350 fmap['tempMinHour'] = 6 ; final_data[6] = None
351 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
352 fmap['tempPres'] = 8 ; final_data[8] = None
353 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
354 fmap['precipitationGauge'] = 10 ; final_data[10] = None
355 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
356 fmap['windSpeedNum'] = 12 ; final_data[12] = None
357 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
358 fmap['windDirectionNum'] = 14 ; final_data[14] = None
359 fmap['hS'] = 15 ; final_data[15] = None
360 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
361 fmap['baro'] = 17 ; final_data[17] = None
362 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
363 fmap['rH'] = 19 ; final_data[19] = None
364 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
365 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
366 fmap['windGustDirNum'] = 22 ; final_data[22] = None
367 fmap['dewPoint'] = 23 ; final_data[23] = None
368 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
369 fmap['hn24Auto'] = 25 ; final_data[25] = None
370 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
371 fmap['hstAuto'] = 27 ; final_data[27] = None
372 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
373
374 return (fmap, final_data)
375
376 def setup_infoex_counterparts_mapping(provider):
377 """
378 Create a mapping of the NRCS/MesoWest fields that this program supports to
379 their InfoEx counterparts
380 """
381 iemap = {}
382
383 if provider == 'nrcs':
384 iemap['PREC'] = 'precipitationGauge'
385 iemap['TOBS'] = 'tempPres'
386 iemap['TMAX'] = 'tempMaxHour'
387 iemap['TMIN'] = 'tempMinHour'
388 iemap['SNWD'] = 'hS'
389 iemap['PRES'] = 'baro'
390 iemap['RHUM'] = 'rH'
391 iemap['WSPD'] = 'windSpeedNum'
392 iemap['WDIR'] = 'windDirectionNum'
393 # unsupported by NRCS:
394 # windGustSpeedNum
395 elif provider == 'mesowest':
396 iemap['precip_accum'] = 'precipitationGauge'
397 iemap['air_temp'] = 'tempPres'
398 iemap['air_temp_high_24_hour'] = 'tempMaxHour'
399 iemap['air_temp_low_24_hour'] = 'tempMinHour'
400 iemap['snow_depth'] = 'hS'
401 iemap['pressure'] = 'baro'
402 iemap['relative_humidity'] = 'rH'
403 iemap['wind_speed'] = 'windSpeedNum'
404 iemap['wind_direction'] = 'windDirectionNum'
405 iemap['wind_gust'] = 'windGustSpeedNum'
406 elif provider == 'python':
407 # we expect Python programs to use the InfoEx data type names
408 iemap['precipitationGauge'] = 'precipitationGauge'
409 iemap['tempPres'] = 'tempPres'
410 iemap['tempMaxHour'] = 'tempMaxHour'
411 iemap['tempMinHour'] = 'tempMinHour'
412 iemap['hS'] = 'hS'
413 iemap['baro'] = 'baro'
414 iemap['rH'] = 'rH'
415 iemap['windSpeedNum'] = 'windSpeedNum'
416 iemap['windDirectionNum'] = 'windDirectionNum'
417 iemap['windGustSpeedNum'] = 'windGustSpeedNum'
418
419 return iemap
420
421 # provider-specific operations
422 def get_nrcs_data(begin, end, station):
423 """get the data we're after from the NRCS WSDL"""
424 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
425 transport.session.verify = False
426 client = zeep.Client(wsdl=station['source'], transport=transport)
427 remote_data = {}
428
429 # massage begin/end date format
430 begin_date_str = begin.strftime('%Y-%m-%d %H:%M:00')
431 end_date_str = end.strftime('%Y-%m-%d %H:%M:00')
432
433 for element_cd in station['desired_data']:
434 time_element = time.time()
435
436 # get the last three hours of data for this elementCd/element_cd
437 tmp = client.service.getHourlyData(
438 stationTriplets=[station['station_id']],
439 elementCd=element_cd,
440 ordinal=1,
441 beginDate=begin_date_str,
442 endDate=end_date_str)
443
444 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
445 time.time() - time_element)
446
447 values = tmp[0]['values']
448
449 # sort and isolate the most recent
450 #
451 # NOTE: we do this because sometimes there are gaps in hourly data
452 # in NRCS; yes, we may end up with slightly inaccurate data,
453 # so perhaps this decision will be re-evaluated in the future
454 if values:
455 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
456 remote_data[element_cd] = ordered[0]['value']
457 else:
458 remote_data[element_cd] = None
459
460 return remote_data
461
462 def get_mesowest_data(begin, end, station):
463 """get the data we're after from the MesoWest/Synoptic API"""
464 remote_data = {}
465
466 # massage begin/end date format
467 begin_date_str = begin.strftime('%Y%m%d%H%M')
468 end_date_str = end.strftime('%Y%m%d%H%M')
469
470 # construct final, completed API URL
471 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
472
473 try:
474 req = requests.get(api_req_url)
475 except requests.exceptions.ConnectionError:
476 LOG.error("Could not connect to '%s'", api_req_url)
477 sys.exit(1)
478
479 try:
480 json = req.json()
481 except ValueError:
482 LOG.error("Bad JSON in MesoWest response")
483 sys.exit(1)
484
485 try:
486 observations = json['STATION'][0]['OBSERVATIONS']
487 except KeyError as exc:
488 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
489 sys.exit(1)
490 except IndexError as exc:
491 LOG.error("Unexpected JSON in MesoWest response: '%s'", exc)
492 try:
493 LOG.error("Detailed MesoWest response: '%s'",
494 json['SUMMARY']['RESPONSE_MESSAGE'])
495 except KeyError:
496 pass
497 sys.exit(1)
498 except ValueError as exc:
499 LOG.error("Bad JSON in MesoWest response: '%s'", exc)
500 sys.exit(1)
501
502 pos = len(observations['date_time']) - 1
503
504 for element_cd in station['desired_data'].split(','):
505 # sort and isolate the most recent, see note above in NRCS for how and
506 # why this is done
507 #
508 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
509 # data (whereas with NRCS, we have to make a separate request for
510 # each element we want). This is nice for network efficiency but
511 # it means we have to handle this part differently for each.
512 #
513 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
514 # provides hourly data, but MesoWest can often provide data every
515 # 10 minutes -- though this provides more opportunity for
516 # irregularities
517
518 # we may not have the data at all
519 key_name = element_cd + '_set_1'
520
521 if key_name in observations:
522 if observations[key_name][pos]:
523 remote_data[element_cd] = observations[key_name][pos]
524
525 # mesowest by default provides wind_speed in m/s, but
526 # we specify 'english' units in the request; either way,
527 # we want mph
528 if element_cd in ('wind_speed', 'wind_gust'):
529 remote_data[element_cd] = kn_to_mph(remote_data[element_cd])
530 else:
531 remote_data[element_cd] = None
532 else:
533 remote_data[element_cd] = None
534
535 return remote_data
536
537 def switch_units_to_metric(data_map, mapping):
538 """replace units with metric counterparts"""
539
540 # NOTE: to update this, use the fmap<->final_data mapping laid out
541 # in setup_infoex_fields_mapping ()
542 data_map[mapping['tempMaxHourUnit']] = 'C'
543 data_map[mapping['tempMinHourUnit']] = 'C'
544 data_map[mapping['tempPresUnit']] = 'C'
545 data_map[mapping['precipitationGaugeUnit']] = 'mm'
546 data_map[mapping['hsUnit']] = 'cm'
547 data_map[mapping['windSpeedUnit']] = 'm/s'
548 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
549 data_map[mapping['dewPointUnit']] = 'C'
550 data_map[mapping['hn24AutoUnit']] = 'cm'
551 data_map[mapping['hstAutoUnit']] = 'cm'
552
553 return data_map
554
555 def convert_nrcs_units_to_metric(element_cd, value):
556 """convert NRCS values from English to metric"""
557 if element_cd == 'TOBS':
558 value = f_to_c(value)
559 elif element_cd == 'SNWD':
560 value = in_to_cm(value)
561 elif element_cd == 'PREC':
562 value = in_to_mm(value)
563 return value
564
565 # CSV operations
566 def write_local_csv(path_to_file, data):
567 """Write the specified CSV file to disk"""
568 with open(path_to_file, 'w') as file_object:
569 # The requirement is that empty values are represented in the CSV
570 # file as "", csv.QUOTE_NONNUMERIC achieves that
571 LOG.debug("writing CSV file '%s'", path_to_file)
572 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
573 writer.writerow(data)
574 file_object.close()
575 return True
576
577 def upload_csv(path_to_file, infoex_data):
578 """Upload the specified CSV file to InfoEx FTP and remove the file"""
579 with open(path_to_file, 'rb') as file_object:
580 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
581 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
582 infoex_data['api_key'])
583 ftp.storlines('STOR ' + path_to_file, file_object)
584 ftp.close()
585 file_object.close()
586 os.remove(path_to_file)
587
588 # other miscellaneous routines
589 def setup_time_values(station):
590 """establish time bounds of data request(s)"""
591
592 # default timezone to UTC (for MesoWest)
593 tz = pytz.utc
594
595 # but for NRCS, use the config-specified timezone
596 if station['provider'] == 'nrcs':
597 tz = station['tz']
598
599 # floor time to nearest hour
600 date_time = datetime.datetime.now(tz=tz)
601 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
602 seconds=date_time.second,
603 microseconds=date_time.microsecond)
604 begin_date = end_date - datetime.timedelta(hours=3)
605 return (begin_date, end_date)
606
607 def f_to_c(f):
608 """convert Fahrenheit to Celsius"""
609 return (float(f) - 32) * 5.0/9.0
610
611 def in_to_cm(inches):
612 """convert inches to centimetrs"""
613 return float(inches) * 2.54
614
615 def in_to_mm(inches):
616 """convert inches to millimeters"""
617 return (float(inches) * 2.54) * 10.0
618
619 def ms_to_mph(ms):
620 """convert meters per second to miles per hour"""
621 return ms * 2.236936
622
623 def kn_to_mph(kn):
624 """convert knots to miles per hour"""
625 return kn * 1.150779
626
627 if __name__ == "__main__":
628 sys.exit(main())