Merge branch 'release-2.0.2' into develop
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32
33 from ftplib import FTP
34 from argparse import ArgumentParser
35
36 import requests
37
38 import zeep
39 import zeep.cache
40 import zeep.transports
41
42 __version__ = '2.0.2'
43
44 LOG = logging.getLogger(__name__)
45 LOG.setLevel(logging.NOTSET)
46
47 def get_parser():
48 """Return OptionParser for this program"""
49 parser = ArgumentParser()
50
51 parser.add_argument("--version",
52 action="version",
53 version=__version__)
54
55 parser.add_argument("--config",
56 dest="config",
57 metavar="FILE",
58 help="location of config file")
59
60 parser.add_argument("--log-level",
61 dest="log_level",
62 default=None,
63 help="set the log level (debug, info, warning)")
64
65 parser.add_argument("--dry-run",
66 action="store_true",
67 dest="dry_run",
68 default=False,
69 help="fetch data but don't upload to InfoEx")
70
71 return parser
72
73 def setup_config(config):
74 """Setup config variable based on values specified in the ini file"""
75 try:
76 infoex = {
77 'host': config['infoex']['host'],
78 'uuid': config['infoex']['uuid'],
79 'api_key': config['infoex']['api_key'],
80 'csv_filename': config['infoex']['csv_filename'],
81 'location_uuid': config['infoex']['location_uuid'],
82 'wx_data': {}, # placeholder key, values to come later
83 }
84
85 station = dict()
86 station['provider'] = config['station']['type']
87
88 if station['provider'] not in ['nrcs', 'mesowest']:
89 print("Please specify either nrcs or mesowest as the station type.")
90 sys.exit(1)
91
92 if station['provider'] == 'nrcs':
93 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
94 station['station_id'] = config['station']['station_id']
95 station['desired_data'] = config['station']['desired_data'].split(',')
96
97 # XXX: For NRCS, we're manually overriding units for now! Once
98 # unit conversion is supported for NRCS, REMOVE THIS!
99 if 'units' not in station:
100 station['units'] = 'imperial'
101
102 if station['provider'] == 'mesowest':
103 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
104 station['station_id'] = config['station']['station_id']
105 station['units'] = config['station']['units']
106 station['desired_data'] = config['station']['desired_data']
107
108 # construct full API URL (sans start/end time, added later)
109 station['source'] = station['source'] + '?token=' + \
110 config['station']['token'] + \
111 '&within=60&units=' + station['units'] + \
112 '&stid=' + station['station_id'] + \
113 '&vars=' + station['desired_data']
114
115 except KeyError as err:
116 LOG.critical("%s not defined in configuration file", err)
117 exit(1)
118
119 # all sections/values present in config file, final sanity check
120 try:
121 for key in config.sections():
122 for subkey in config[key]:
123 if not config[key][subkey]:
124 raise ValueError
125 except ValueError:
126 LOG.critical("Config value '%s.%s' is empty", key, subkey)
127 exit(1)
128
129 return (infoex, station)
130
131 def setup_logging(log_level):
132 """Setup our logging infrastructure"""
133 try:
134 from systemd.journal import JournalHandler
135 LOG.addHandler(JournalHandler())
136 except ImportError:
137 ## fallback to syslog
138 #import logging.handlers
139 #LOG.addHandler(logging.handlers.SysLogHandler())
140 # fallback to stdout
141 handler = logging.StreamHandler(sys.stdout)
142 LOG.addHandler(handler)
143
144 # ugly, but passable
145 if log_level in [None, 'debug', 'info', 'warning']:
146 if log_level == 'debug':
147 LOG.setLevel(logging.DEBUG)
148 elif log_level == 'info':
149 LOG.setLevel(logging.INFO)
150 elif log_level == 'warning':
151 LOG.setLevel(logging.WARNING)
152 else:
153 LOG.setLevel(logging.NOTSET)
154 else:
155 return False
156
157 return True
158
159 def main():
160 """Main routine: sort through args, decide what to do, then do it"""
161 parser = get_parser()
162 options = parser.parse_args()
163
164 config = configparser.ConfigParser(allow_no_value=False)
165
166 if not options.config:
167 parser.print_help()
168 print("\nPlease specify a configuration file via --config.")
169 sys.exit(1)
170
171 config.read(options.config)
172
173 if not setup_logging(options.log_level):
174 parser.print_help()
175 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
176 sys.exit(1)
177
178 (infoex, station) = setup_config(config)
179
180 LOG.debug('Config parsed, starting up')
181
182 # create mappings
183 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
184 iemap = setup_infoex_counterparts_mapping(station['provider'])
185
186 # override units if user selected metric
187 if station['units'] == 'metric':
188 final_data = switch_units_to_metric(final_data, fmap)
189
190 (begin_date, end_date) = setup_time_values()
191
192 # get the data
193 LOG.debug("Getting %s data from %s to %s", str(station['desired_data']),
194 str(begin_date), str(end_date))
195
196 time_all_elements = time.time()
197
198 # get the data
199 if station['provider'] == 'nrcs':
200 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
201 elif station['provider'] == 'mesowest':
202 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
203 station)
204
205 LOG.info("Time taken to get all data : %.3f sec", time.time() -
206 time_all_elements)
207
208 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
209
210 # Now we only need to add in what we want to change thanks to that
211 # abomination of a variable declaration earlier
212 final_data[fmap['Location UUID']] = infoex['location_uuid']
213 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
214 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
215
216 for element_cd in infoex['wx_data']:
217 if element_cd not in iemap:
218 LOG.warning("BAD KEY wx_data['%s']", element_cd)
219 continue
220
221 # CONSIDER: Casting every value to Float() -- need to investigate if
222 # any possible elementCds we may want are any other data
223 # type than float.
224 #
225 # Another possibility is to query the API with
226 # getStationElements and temporarily store the
227 # storedUnitCd. But that's pretty network-intensive and
228 # may not even be worth it if there's only e.g. one or two
229 # exceptions to any otherwise uniformly Float value set.
230 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
231
232 LOG.debug("final_data: %s", str(final_data))
233
234 if not write_local_csv(infoex['csv_filename'], final_data):
235 LOG.warning('Could not write local CSV file: %s',
236 infoex['csv_filename'])
237 return 1
238
239 if not options.dry_run:
240 upload_csv(infoex['csv_filename'], infoex)
241
242 LOG.debug('DONE')
243 return 0
244
245 # data structure operations
246 def setup_infoex_fields_mapping(location_uuid):
247 """
248 Create a mapping of InfoEx fields to the local data's indexing scheme.
249
250 INFOEX FIELDS
251
252 This won't earn style points in Python, but here we establish a couple
253 of helpful mappings variables. The reason this is helpful is that the
254 end result is simply an ordered set, the CSV file. But we still may
255 want to manipulate the values arbitrarily before writing that file.
256
257 Also note that the current Auto Wx InfoEx documentation shows these
258 keys in a graphical table with the "index" beginning at 1, but here we
259 sanely index beginning at 0.
260 """
261 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
262 fmap = {} ; final_data = [None] * 29
263 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
264 fmap['obDate'] = 1 ; final_data[1] = None
265 fmap['obTime'] = 2 ; final_data[2] = None
266 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
267 fmap['tempMaxHour'] = 4 ; final_data[4] = None
268 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
269 fmap['tempMinHour'] = 6 ; final_data[6] = None
270 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
271 fmap['tempPres'] = 8 ; final_data[8] = None
272 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
273 fmap['precipitationGauge'] = 10 ; final_data[10] = None
274 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
275 fmap['windSpeedNum'] = 12 ; final_data[12] = None
276 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
277 fmap['windDirectionNum'] = 14 ; final_data[14] = None
278 fmap['hS'] = 15 ; final_data[15] = None
279 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
280 fmap['baro'] = 17 ; final_data[17] = None
281 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
282 fmap['rH'] = 19 ; final_data[19] = None
283 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
284 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
285 fmap['windGustDirNum'] = 22 ; final_data[22] = None
286 fmap['dewPoint'] = 23 ; final_data[23] = None
287 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
288 fmap['hn24Auto'] = 25 ; final_data[25] = None
289 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
290 fmap['hstAuto'] = 27 ; final_data[27] = None
291 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
292
293 return (fmap, final_data)
294
295 def setup_infoex_counterparts_mapping(provider):
296 """
297 Create a mapping of the NRCS/MesoWest fields that this program supports to
298 their InfoEx counterparts
299 """
300 iemap = {}
301
302 if provider == 'nrcs':
303 iemap['PREC'] = 'precipitationGauge'
304 iemap['TOBS'] = 'tempPres'
305 iemap['SNWD'] = 'hS'
306 iemap['PRES'] = 'baro'
307 iemap['RHUM'] = 'rH'
308 iemap['WSPD'] = 'windSpeedNum'
309 iemap['WDIR'] = 'windDirectionNum'
310 # unsupported by NRCS:
311 # windGustSpeedNum
312 elif provider == 'mesowest':
313 iemap['precip_accum'] = 'precipitationGauge'
314 iemap['air_temp'] = 'tempPres'
315 iemap['snow_depth'] = 'hS'
316 iemap['pressure'] = 'baro'
317 iemap['relative_humidity'] = 'rH'
318 iemap['wind_speed'] = 'windSpeedNum'
319 iemap['wind_direction'] = 'windDirectionNum'
320 iemap['wind_gust'] = 'windGustSpeedNum'
321
322 return iemap
323
324 # provider-specific operations
325 def get_nrcs_data(begin, end, station):
326 """get the data we're after from the NRCS WSDL"""
327 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
328 client = zeep.Client(wsdl=station['source'], transport=transport)
329 remote_data = {}
330
331 for element_cd in station['desired_data']:
332 time_element = time.time()
333
334 # get the last three hours of data for this elementCd/element_cd
335 tmp = client.service.getHourlyData(
336 stationTriplets=[station['station_id']],
337 elementCd=element_cd,
338 ordinal=1,
339 beginDate=begin,
340 endDate=end)
341
342 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
343 time.time() - time_element)
344
345 values = tmp[0]['values']
346
347 # sort and isolate the most recent
348 #
349 # NOTE: we do this because sometimes there are gaps in hourly data
350 # in NRCS; yes, we may end up with slightly inaccurate data,
351 # so perhaps this decision will be re-evaluated in the future
352 if values:
353 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
354 remote_data[element_cd] = ordered[0]['value']
355 else:
356 remote_data[element_cd] = None
357
358 return remote_data
359
360 def get_mesowest_data(begin, end, station):
361 """get the data we're after from the MesoWest/Synoptic API"""
362 remote_data = {}
363
364 # massage begin/end date format
365 begin_date_str = begin.strftime('%Y%m%d%H%M')
366 end_date_str = end.strftime('%Y%m%d%H%M')
367
368 # construct final, completed API URL
369 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
370 req = requests.get(api_req_url)
371
372 try:
373 json = req.json()
374 except ValueError:
375 LOG.error("Bad JSON in MesoWest response")
376 sys.exit(1)
377
378 try:
379 observations = json['STATION'][0]['OBSERVATIONS']
380 except ValueError:
381 LOG.error("Bad JSON in MesoWest response")
382 sys.exit(1)
383
384 pos = len(observations['date_time']) - 1
385
386 for element_cd in station['desired_data'].split(','):
387 # sort and isolate the most recent, see note above in NRCS for how and
388 # why this is done
389 #
390 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
391 # data (whereas with NRCS, we have to make a separate request for
392 # each element we want). This is nice for network efficiency but
393 # it means we have to handle this part differently for each.
394 #
395 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
396 # provides hourly data, but MesoWest can often provide data every
397 # 10 minutes -- though this provides more opportunity for
398 # irregularities
399
400 # we may not have the data at all
401 key_name = element_cd + '_set_1'
402 if key_name in observations:
403 if observations[key_name][pos]:
404 remote_data[element_cd] = observations[key_name][pos]
405 else:
406 remote_data[element_cd] = None
407 else:
408 remote_data[element_cd] = None
409
410 return remote_data
411
412 def switch_units_to_metric(data_map, mapping):
413 """replace units with metric counterparts"""
414
415 # NOTE: to update this, use the fmap<->final_data mapping laid out
416 # in setup_infoex_fields_mapping ()
417 #
418 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
419 # itself handles the unit conversion; in the future, we will also
420 # support NRCS unit conversion, but this must be done by this
421 # program.
422 data_map[mapping['tempPresUnit']] = 'C'
423 data_map[mapping['hsUnit']] = 'm'
424 data_map[mapping['windSpeedUnit']] = 'm/s'
425 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
426
427 return data_map
428
429 # CSV operations
430 def write_local_csv(path_to_file, data):
431 """Write the specified CSV file to disk"""
432 with open(path_to_file, 'w') as file_object:
433 # The requirement is that empty values are represented in the CSV
434 # file as "", csv.QUOTE_NONNUMERIC achieves that
435 LOG.debug("writing CSV file '%s'", path_to_file)
436 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
437 writer.writerow(data)
438 file_object.close()
439 return True
440
441 def upload_csv(path_to_file, infoex_data):
442 """Upload the specified CSV file to InfoEx FTP and remove the file"""
443 with open(path_to_file, 'rb') as file_object:
444 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
445 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
446 infoex_data['api_key'])
447 ftp.storlines('STOR ' + path_to_file, file_object)
448 ftp.close()
449 file_object.close()
450 os.remove(path_to_file)
451
452 # other miscellaneous routines
453 def setup_time_values():
454 """establish time bounds of data request(s)"""
455 # floor time to nearest hour
456 date_time = datetime.datetime.now()
457 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
458 seconds=date_time.second,
459 microseconds=date_time.microsecond)
460 begin_date = end_date - datetime.timedelta(hours=3)
461 return (begin_date, end_date)
462
463 if __name__ == "__main__":
464 sys.exit(main())