Merge branch 'custom_wx' into develop
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32
33 from ftplib import FTP
34 from argparse import ArgumentParser
35
36 import requests
37
38 import zeep
39 import zeep.cache
40 import zeep.transports
41
42 __version__ = '2.2.0'
43
44 LOG = logging.getLogger(__name__)
45 LOG.setLevel(logging.NOTSET)
46
47 def get_parser():
48 """Return OptionParser for this program"""
49 parser = ArgumentParser()
50
51 parser.add_argument("--version",
52 action="version",
53 version=__version__)
54
55 parser.add_argument("--config",
56 dest="config",
57 metavar="FILE",
58 help="location of config file")
59
60 parser.add_argument("--log-level",
61 dest="log_level",
62 default=None,
63 help="set the log level (debug, info, warning)")
64
65 parser.add_argument("--dry-run",
66 action="store_true",
67 dest="dry_run",
68 default=False,
69 help="fetch data but don't upload to InfoEx")
70
71 return parser
72
73 def setup_config(config):
74 """Setup config variable based on values specified in the ini file"""
75 try:
76 infoex = {
77 'host': config['infoex']['host'],
78 'uuid': config['infoex']['uuid'],
79 'api_key': config['infoex']['api_key'],
80 'csv_filename': config['infoex']['csv_filename'],
81 'location_uuid': config['infoex']['location_uuid'],
82 'wx_data': {}, # placeholder key, values to come later
83 }
84
85 station = dict()
86 station['provider'] = config['station']['type']
87
88 if station['provider'] not in ['nrcs', 'mesowest', 'python']:
89 print("Please specify either nrcs or mesowest as the station type.")
90 sys.exit(1)
91
92 if station['provider'] == 'nrcs':
93 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
94 station['station_id'] = config['station']['station_id']
95 station['desired_data'] = config['station']['desired_data'].split(',')
96
97 # XXX: For NRCS, we're manually overriding units for now! Once
98 # unit conversion is supported for NRCS, REMOVE THIS!
99 if 'units' not in station:
100 station['units'] = 'imperial'
101
102 if station['provider'] == 'mesowest':
103 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
104 station['station_id'] = config['station']['station_id']
105 station['units'] = config['station']['units']
106 station['desired_data'] = config['station']['desired_data']
107
108 # construct full API URL (sans start/end time, added later)
109 station['source'] = station['source'] + '?token=' + \
110 config['station']['token'] + \
111 '&within=60&units=' + station['units'] + \
112 '&stid=' + station['station_id'] + \
113 '&vars=' + station['desired_data']
114
115 if station['provider'] == 'python':
116 station['path'] = config['station']['path']
117
118 except KeyError as err:
119 LOG.critical("%s not defined in configuration file", err)
120 sys.exit(1)
121
122 # all sections/values present in config file, final sanity check
123 try:
124 for key in config.sections():
125 for subkey in config[key]:
126 if not config[key][subkey]:
127 raise ValueError
128 except ValueError:
129 LOG.critical("Config value '%s.%s' is empty", key, subkey)
130 sys.exit(1)
131
132 return (infoex, station)
133
134 def setup_logging(log_level):
135 """Setup our logging infrastructure"""
136 try:
137 from systemd.journal import JournalHandler
138 LOG.addHandler(JournalHandler())
139 except ImportError:
140 ## fallback to syslog
141 #import logging.handlers
142 #LOG.addHandler(logging.handlers.SysLogHandler())
143 # fallback to stdout
144 handler = logging.StreamHandler(sys.stdout)
145 LOG.addHandler(handler)
146
147 # ugly, but passable
148 if log_level in [None, 'debug', 'info', 'warning']:
149 if log_level == 'debug':
150 LOG.setLevel(logging.DEBUG)
151 elif log_level == 'info':
152 LOG.setLevel(logging.INFO)
153 elif log_level == 'warning':
154 LOG.setLevel(logging.WARNING)
155 else:
156 LOG.setLevel(logging.NOTSET)
157 else:
158 return False
159
160 return True
161
162 def main():
163 """Main routine: sort through args, decide what to do, then do it"""
164 parser = get_parser()
165 options = parser.parse_args()
166
167 config = configparser.ConfigParser(allow_no_value=False)
168
169 if not options.config:
170 parser.print_help()
171 print("\nPlease specify a configuration file via --config.")
172 sys.exit(1)
173
174 config.read(options.config)
175
176 if not setup_logging(options.log_level):
177 parser.print_help()
178 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
179 sys.exit(1)
180
181 (infoex, station) = setup_config(config)
182
183 LOG.debug('Config parsed, starting up')
184
185 # create mappings
186 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
187 iemap = setup_infoex_counterparts_mapping(station['provider'])
188
189 # override units if user selected metric
190 try:
191 if station['units'] == 'metric':
192 final_data = switch_units_to_metric(final_data, fmap)
193 except KeyError:
194 if station['provider'] != 'python':
195 LOG.error("Please specify the units in the configuration "
196 "file")
197 sys.exit(1)
198
199 (begin_date, end_date) = setup_time_values()
200
201 if station['provider'] == 'python':
202 LOG.debug("Getting custom data from external Python program")
203 else:
204 LOG.debug("Getting %s data from %s to %s",
205 str(station['desired_data']),
206 str(begin_date), str(end_date))
207
208 time_all_elements = time.time()
209
210 # get the data
211 if station['provider'] == 'nrcs':
212 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
213 elif station['provider'] == 'mesowest':
214 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
215 station)
216 elif station['provider'] == 'python':
217 try:
218 import importlib.util
219
220 spec = importlib.util.spec_from_file_location('custom_wx',
221 station['path'])
222 mod = importlib.util.module_from_spec(spec)
223 spec.loader.exec_module(mod)
224 mod.LOG = LOG
225
226 try:
227 infoex['wx_data'] = mod.get_custom_data()
228
229 if infoex['wx_data'] is None:
230 infoex['wx_data'] = []
231 except Exception as exc:
232 LOG.error("Python program for custom Wx data failed in "
233 "execution: " + str(exc))
234 sys.exit(1)
235
236 LOG.info("Successfully executed external Python program")
237 except ImportError:
238 LOG.error("Please upgrade to Python 3.3 or later")
239 sys.exit(1)
240 except FileNotFoundError:
241 LOG.error("Specified Python program for custom Wx data "
242 "was not found")
243 sys.exit(1)
244 except Exception as exc:
245 LOG.error("A problem was encountered when attempting to "
246 "load your custom Wx program: " + str(exc))
247 sys.exit(1)
248
249 LOG.info("Time taken to get all data : %.3f sec", time.time() -
250 time_all_elements)
251
252 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
253
254 # Now we only need to add in what we want to change thanks to that
255 # abomination of a variable declaration earlier
256 final_data[fmap['Location UUID']] = infoex['location_uuid']
257 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
258 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
259
260 for element_cd in infoex['wx_data']:
261 if element_cd not in iemap:
262 LOG.warning("BAD KEY wx_data['%s']", element_cd)
263 continue
264
265 # Massage precision of certain values to fit InfoEx's
266 # expectations
267 #
268 # 0 decimal places: wind speed, wind direction, wind gust, snow depth
269 # 1 decimal place: air temp, baro
270 # Avoid transforming None values
271 if infoex['wx_data'][element_cd] is None:
272 continue
273 elif element_cd in ['wind_speed', 'WSPD', 'wind_direction',
274 'WDIR', 'wind_gust', 'SNWD', 'snow_depth']:
275 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
276 elif element_cd in ['TOBS', 'air_temp', 'PRES', 'pressure']:
277 infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
278
279 # CONSIDER: Casting every value to Float() -- need to investigate if
280 # any possible elementCds we may want are any other data
281 # type than float.
282 #
283 # Another possibility is to query the API with
284 # getStationElements and temporarily store the
285 # storedUnitCd. But that's pretty network-intensive and
286 # may not even be worth it if there's only e.g. one or two
287 # exceptions to any otherwise uniformly Float value set.
288 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
289
290 LOG.debug("final_data: %s", str(final_data))
291
292 if len(infoex['wx_data']) > 0:
293 if not write_local_csv(infoex['csv_filename'], final_data):
294 LOG.warning('Could not write local CSV file: %s',
295 infoex['csv_filename'])
296 return 1
297
298 if not options.dry_run:
299 upload_csv(infoex['csv_filename'], infoex)
300
301 LOG.debug('DONE')
302 return 0
303
304 # data structure operations
305 def setup_infoex_fields_mapping(location_uuid):
306 """
307 Create a mapping of InfoEx fields to the local data's indexing scheme.
308
309 INFOEX FIELDS
310
311 This won't earn style points in Python, but here we establish a couple
312 of helpful mappings variables. The reason this is helpful is that the
313 end result is simply an ordered set, the CSV file. But we still may
314 want to manipulate the values arbitrarily before writing that file.
315
316 Also note that the current Auto Wx InfoEx documentation shows these
317 keys in a graphical table with the "index" beginning at 1, but here we
318 sanely index beginning at 0.
319 """
320 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
321 fmap = {} ; final_data = [None] * 29
322 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
323 fmap['obDate'] = 1 ; final_data[1] = None
324 fmap['obTime'] = 2 ; final_data[2] = None
325 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
326 fmap['tempMaxHour'] = 4 ; final_data[4] = None
327 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
328 fmap['tempMinHour'] = 6 ; final_data[6] = None
329 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
330 fmap['tempPres'] = 8 ; final_data[8] = None
331 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
332 fmap['precipitationGauge'] = 10 ; final_data[10] = None
333 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
334 fmap['windSpeedNum'] = 12 ; final_data[12] = None
335 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
336 fmap['windDirectionNum'] = 14 ; final_data[14] = None
337 fmap['hS'] = 15 ; final_data[15] = None
338 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
339 fmap['baro'] = 17 ; final_data[17] = None
340 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
341 fmap['rH'] = 19 ; final_data[19] = None
342 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
343 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
344 fmap['windGustDirNum'] = 22 ; final_data[22] = None
345 fmap['dewPoint'] = 23 ; final_data[23] = None
346 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
347 fmap['hn24Auto'] = 25 ; final_data[25] = None
348 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
349 fmap['hstAuto'] = 27 ; final_data[27] = None
350 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
351
352 return (fmap, final_data)
353
354 def setup_infoex_counterparts_mapping(provider):
355 """
356 Create a mapping of the NRCS/MesoWest fields that this program supports to
357 their InfoEx counterparts
358 """
359 iemap = {}
360
361 if provider == 'nrcs':
362 iemap['PREC'] = 'precipitationGauge'
363 iemap['TOBS'] = 'tempPres'
364 iemap['TMAX'] = 'tempMaxHour'
365 iemap['TMIN'] = 'tempMinHour'
366 iemap['SNWD'] = 'hS'
367 iemap['PRES'] = 'baro'
368 iemap['RHUM'] = 'rH'
369 iemap['WSPD'] = 'windSpeedNum'
370 iemap['WDIR'] = 'windDirectionNum'
371 # unsupported by NRCS:
372 # windGustSpeedNum
373 elif provider == 'mesowest':
374 iemap['precip_accum'] = 'precipitationGauge'
375 iemap['air_temp'] = 'tempPres'
376 iemap['air_temp_high_24_hour'] = 'tempMaxHour'
377 iemap['air_temp_low_24_hour'] = 'tempMinHour'
378 iemap['snow_depth'] = 'hS'
379 iemap['pressure'] = 'baro'
380 iemap['relative_humidity'] = 'rH'
381 iemap['wind_speed'] = 'windSpeedNum'
382 iemap['wind_direction'] = 'windDirectionNum'
383 iemap['wind_gust'] = 'windGustSpeedNum'
384 elif provider == 'python':
385 # we expect Python programs to use the InfoEx data type names
386 iemap['precipitationGauge'] = 'precipitationGauge'
387 iemap['tempPres'] = 'tempPres'
388 iemap['tempMaxHour'] = 'tempMaxHour'
389 iemap['tempMinHour'] = 'tempMinHour'
390 iemap['hS'] = 'hS'
391 iemap['baro'] = 'baro'
392 iemap['rH'] = 'rH'
393 iemap['windSpeedNum'] = 'windSpeedNum'
394 iemap['windDirectionNum'] = 'windDirectionNum'
395 iemap['windGustSpeedNum'] = 'windGustSpeedNum'
396
397 return iemap
398
399 # provider-specific operations
400 def get_nrcs_data(begin, end, station):
401 """get the data we're after from the NRCS WSDL"""
402 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
403 client = zeep.Client(wsdl=station['source'], transport=transport)
404 remote_data = {}
405
406 for element_cd in station['desired_data']:
407 time_element = time.time()
408
409 # get the last three hours of data for this elementCd/element_cd
410 tmp = client.service.getHourlyData(
411 stationTriplets=[station['station_id']],
412 elementCd=element_cd,
413 ordinal=1,
414 beginDate=begin,
415 endDate=end)
416
417 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
418 time.time() - time_element)
419
420 values = tmp[0]['values']
421
422 # sort and isolate the most recent
423 #
424 # NOTE: we do this because sometimes there are gaps in hourly data
425 # in NRCS; yes, we may end up with slightly inaccurate data,
426 # so perhaps this decision will be re-evaluated in the future
427 if values:
428 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
429 remote_data[element_cd] = ordered[0]['value']
430 else:
431 remote_data[element_cd] = None
432
433 return remote_data
434
435 def get_mesowest_data(begin, end, station):
436 """get the data we're after from the MesoWest/Synoptic API"""
437 remote_data = {}
438
439 # massage begin/end date format
440 begin_date_str = begin.strftime('%Y%m%d%H%M')
441 end_date_str = end.strftime('%Y%m%d%H%M')
442
443 # construct final, completed API URL
444 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
445 req = requests.get(api_req_url)
446
447 try:
448 json = req.json()
449 except ValueError:
450 LOG.error("Bad JSON in MesoWest response")
451 sys.exit(1)
452
453 try:
454 observations = json['STATION'][0]['OBSERVATIONS']
455 except ValueError:
456 LOG.error("Bad JSON in MesoWest response")
457 sys.exit(1)
458
459 pos = len(observations['date_time']) - 1
460
461 for element_cd in station['desired_data'].split(','):
462 # sort and isolate the most recent, see note above in NRCS for how and
463 # why this is done
464 #
465 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
466 # data (whereas with NRCS, we have to make a separate request for
467 # each element we want). This is nice for network efficiency but
468 # it means we have to handle this part differently for each.
469 #
470 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
471 # provides hourly data, but MesoWest can often provide data every
472 # 10 minutes -- though this provides more opportunity for
473 # irregularities
474
475 # we may not have the data at all
476 key_name = element_cd + '_set_1'
477 if key_name in observations:
478 if observations[key_name][pos]:
479 remote_data[element_cd] = observations[key_name][pos]
480 else:
481 remote_data[element_cd] = None
482 else:
483 remote_data[element_cd] = None
484
485 return remote_data
486
487 def switch_units_to_metric(data_map, mapping):
488 """replace units with metric counterparts"""
489
490 # NOTE: to update this, use the fmap<->final_data mapping laid out
491 # in setup_infoex_fields_mapping ()
492 #
493 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
494 # itself handles the unit conversion; in the future, we will also
495 # support NRCS unit conversion, but this must be done by this
496 # program.
497 data_map[mapping['tempPresUnit']] = 'C'
498 data_map[mapping['hsUnit']] = 'm'
499 data_map[mapping['windSpeedUnit']] = 'm/s'
500 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
501
502 return data_map
503
504 # CSV operations
505 def write_local_csv(path_to_file, data):
506 """Write the specified CSV file to disk"""
507 with open(path_to_file, 'w') as file_object:
508 # The requirement is that empty values are represented in the CSV
509 # file as "", csv.QUOTE_NONNUMERIC achieves that
510 LOG.debug("writing CSV file '%s'", path_to_file)
511 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
512 writer.writerow(data)
513 file_object.close()
514 return True
515
516 def upload_csv(path_to_file, infoex_data):
517 """Upload the specified CSV file to InfoEx FTP and remove the file"""
518 with open(path_to_file, 'rb') as file_object:
519 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
520 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
521 infoex_data['api_key'])
522 ftp.storlines('STOR ' + path_to_file, file_object)
523 ftp.close()
524 file_object.close()
525 os.remove(path_to_file)
526
527 # other miscellaneous routines
528 def setup_time_values():
529 """establish time bounds of data request(s)"""
530 # floor time to nearest hour
531 date_time = datetime.datetime.now()
532 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
533 seconds=date_time.second,
534 microseconds=date_time.microsecond)
535 begin_date = end_date - datetime.timedelta(hours=3)
536 return (begin_date, end_date)
537
538 if __name__ == "__main__":
539 sys.exit(main())