48a9063ff4137ce25087dca97ab1a25aa328472c
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32
33 from ftplib import FTP
34 from optparse import OptionParser
35
36 import requests
37
38 import zeep
39 import zeep.cache
40 import zeep.transports
41
42 __version__ = '2.0.0'
43
44 LOG = logging.getLogger(__name__)
45 LOG.setLevel(logging.NOTSET)
46
47 def get_parser():
48 """Return OptionParser for this program"""
49 parser = OptionParser(version=__version__)
50
51 parser.add_option("--config",
52 dest="config",
53 metavar="FILE",
54 help="location of config file")
55
56 parser.add_option("--log-level",
57 dest="log_level",
58 default=None,
59 help="set the log level (debug, info, warning)")
60
61 parser.add_option("--dry-run",
62 action="store_true",
63 dest="dry_run",
64 default=False,
65 help="fetch data but don't upload to InfoEx")
66
67 return parser
68
69 def setup_config(config):
70 """Setup config variable based on values specified in the ini file"""
71 try:
72 infoex = {
73 'host': config['infoex']['host'],
74 'uuid': config['infoex']['uuid'],
75 'api_key': config['infoex']['api_key'],
76 'csv_filename': config['infoex']['csv_filename'],
77 'location_uuid': config['infoex']['location_uuid'],
78 'wx_data': {}, # placeholder key, values to come later
79 }
80
81 station = dict()
82 station['provider'] = config['station']['type']
83
84 if station['provider'] not in ['nrcs', 'mesowest']:
85 print("Please specify either nrcs or mesowest as the station type.")
86 sys.exit(1)
87
88 if station['provider'] == 'nrcs':
89 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
90 station['station_id'] = config['station']['station_id']
91
92 try:
93 station['desired_data'] = config['station']['desired_data'].split(',')
94 except:
95 # desired_data malformed or missing, setting default
96 station['desired_data'] = [
97 'TOBS', # AIR TEMPERATURE OBSERVED (degF)
98 'SNWD', # SNOW DEPTH (in)
99 'PREC' # PRECIPITATION ACCUMULATION (in)
100 ]
101
102 # XXX: For NRCS, we're manually overriding units for now! Once
103 # unit conversion is supported for NRCS, REMOVE THIS!
104 if 'units' not in station:
105 station['units'] = 'imperial'
106
107 if station['provider'] == 'mesowest':
108 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
109 station['station_id'] = config['station']['station_id']
110 station['units'] = config['station']['units']
111
112 try:
113 station['desired_data'] = config['station']['desired_data']
114 except:
115 # desired_data malformed or missing, setting default
116 station['desired_data'] = 'air_temp,snow_depth'
117
118 # construct full API URL (sans start/end time, added later)
119 station['source'] = station['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + station['units'] + '&stid=' + station['station_id'] + '&vars=' + station['desired_data']
120
121 except KeyError as e:
122 LOG.critical("%s not defined in %s" % (e, options.config))
123 exit(1)
124 except Exception as exc:
125 LOG.critical("Exception occurred in config parsing: '%s'" % (exc))
126 exit(1)
127
128 # all sections/values present in config file, final sanity check
129 try:
130 for key in config.sections():
131 for subkey in config[key]:
132 if not len(config[key][subkey]):
133 raise ValueError;
134 except ValueError as exc:
135 LOG.critical("Config value '%s.%s' is empty" % (key, subkey))
136 exit(1)
137
138 return (infoex, station)
139
140 def setup_logging(log_level):
141 """Setup our logging infrastructure"""
142 try:
143 from systemd.journal import JournalHandler
144 LOG.addHandler(JournalHandler())
145 except:
146 ## fallback to syslog
147 #import logging.handlers
148 #LOG.addHandler(logging.handlers.SysLogHandler())
149 # fallback to stdout
150 handler = logging.StreamHandler(sys.stdout)
151 LOG.addHandler(handler)
152
153 # ugly, but passable
154 if log_level in [None, 'debug', 'info', 'warning']:
155 if log_level == 'debug':
156 LOG.setLevel(logging.DEBUG)
157 elif log_level == 'info':
158 LOG.setLevel(logging.INFO)
159 elif log_level == 'warning':
160 LOG.setLevel(logging.WARNING)
161 else:
162 LOG.setLevel(logging.NOTSET)
163 else:
164 return False
165
166 return True
167
168 def main():
169 """Main routine: sort through args, decide what to do, then do it"""
170 parser = get_parser()
171 (options, args) = parser.parse_args()
172
173 config = configparser.ConfigParser(allow_no_value=False)
174
175 if not options.config:
176 parser.print_help()
177 print("\nPlease specify a configuration file via --config.")
178 sys.exit(1)
179
180 config.read(options.config)
181
182 if not setup_logging(options.log_level):
183 parser.print_help()
184 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
185 sys.exit(1)
186
187 (infoex, station) = setup_config(config)
188
189 LOG.debug('Config parsed, starting up')
190
191 # create mappings
192 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
193 iemap = setup_infoex_counterparts_mapping(station['provider'])
194
195 # override units if user selected metric
196 if station['units'] == 'metric':
197 final_data = switch_units_to_metric(final_data, fmap)
198
199 (begin_date, end_date) = setup_time_values()
200
201 # get the data
202 LOG.debug("Getting %s data from %s to %s" % (str(station['desired_data']),
203 str(begin_date), str(end_date)))
204
205 time_all_elements = time.time()
206
207 # get the data
208 if station['provider'] == 'nrcs':
209 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
210 elif station['provider'] == 'mesowest':
211 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
212 station)
213
214 LOG.info("Time taken to get all data : %.3f sec" % (time.time() -
215 time_all_elements))
216
217 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
218
219 # Now we only need to add in what we want to change thanks to that
220 # abomination of a variable declaration earlier
221 final_data[fmap['Location UUID']] = infoex['location_uuid']
222 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
223 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
224
225 for elementCd in infoex['wx_data']:
226 if elementCd not in iemap:
227 LOG.warning("BAD KEY wx_data['%s']" % (elementCd))
228 continue
229
230 # CONSIDER: Casting every value to Float() -- need to investigate if
231 # any possible elementCds we may want are any other data
232 # type than float.
233 #
234 # Another possibility is to query the API with
235 # getStationElements and temporarily store the
236 # storedUnitCd. But that's pretty network-intensive and
237 # may not even be worth it if there's only e.g. one or two
238 # exceptions to any otherwise uniformly Float value set.
239 final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
240
241 LOG.debug("final_data: %s" % (str(final_data)))
242
243 if not write_local_csv(infoex['csv_filename'], final_data):
244 LOG.warning('Could not write local CSV file: %s',
245 infoex['csv_filename'])
246 return 1;
247
248 if not options.dry_run:
249 upload_csv(infoex['csv_filename'], infoex)
250
251 LOG.debug('DONE')
252 return 0
253
254 # data structure operations
255 def setup_infoex_fields_mapping(location_uuid):
256 """
257 Create a mapping of InfoEx fields to the local data's indexing scheme.
258
259 INFOEX FIELDS
260
261 This won't earn style points in Python, but here we establish a couple
262 of helpful mappings variables. The reason this is helpful is that the
263 end result is simply an ordered set, the CSV file. But we still may
264 want to manipulate the values arbitrarily before writing that file.
265
266 Also note that the current Auto Wx InfoEx documentation shows these
267 keys in a graphical table with the "index" beginning at 1, but here we
268 sanely index beginning at 0.
269 """
270 fmap = {} ; final_data = [None] * 29
271 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
272 fmap['obDate'] = 1 ; final_data[1] = None
273 fmap['obTime'] = 2 ; final_data[2] = None
274 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
275 fmap['tempMaxHour'] = 4 ; final_data[4] = None
276 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
277 fmap['tempMinHour'] = 6 ; final_data[6] = None
278 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
279 fmap['tempPres'] = 8 ; final_data[8] = None
280 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
281 fmap['precipitationGauge'] = 10 ; final_data[10] = None
282 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
283 fmap['windSpeedNum'] = 12 ; final_data[12] = None
284 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
285 fmap['windDirectionNum'] = 14 ; final_data[14] = None
286 fmap['hS'] = 15 ; final_data[15] = None
287 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
288 fmap['baro'] = 17 ; final_data[17] = None
289 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
290 fmap['rH'] = 19 ; final_data[19] = None
291 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
292 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
293 fmap['windGustDirNum'] = 22 ; final_data[22] = None
294 fmap['dewPoint'] = 23 ; final_data[23] = None
295 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
296 fmap['hn24Auto'] = 25 ; final_data[25] = None
297 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
298 fmap['hstAuto'] = 27 ; final_data[27] = None
299 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
300
301 return (fmap, final_data)
302
303 def setup_infoex_counterparts_mapping(provider):
304 """
305 Create a mapping of the NRCS/MesoWest fields that this program supports to
306 their InfoEx counterparts
307 """
308 iemap = {}
309
310 if provider == 'nrcs':
311 iemap['PREC'] = 'precipitationGauge'
312 iemap['TOBS'] = 'tempPres'
313 iemap['SNWD'] = 'hS'
314 iemap['PRES'] = 'baro'
315 iemap['RHUM'] = 'rH'
316 iemap['WSPD'] = 'windSpeedNum'
317 iemap['WDIR'] = 'windDirectionNum'
318 # unsupported by NRCS:
319 # windGustSpeedNum
320 elif provider == 'mesowest':
321 iemap['precip_accum'] = 'precipitationGauge'
322 iemap['air_temp'] = 'tempPres'
323 iemap['snow_depth'] = 'hS'
324 iemap['pressure'] = 'baro'
325 iemap['relative_humidity'] = 'rH'
326 iemap['wind_speed'] = 'windSpeedNum'
327 iemap['wind_direction'] = 'windDirectionNum'
328 iemap['wind_gust'] = 'windGustSpeedNum'
329
330 return iemap
331
332 # provider-specific operations
333 def get_nrcs_data(begin, end, station):
334 """get the data we're after from the NRCS WSDL"""
335 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
336 client = zeep.Client(wsdl=station['source'], transport=transport)
337 remote_data = {}
338
339 for elementCd in station['desired_data']:
340 time_element = time.time()
341
342 # get the last three hours of data for this elementCd
343 tmp = client.service.getHourlyData(
344 stationTriplets=[station['station_id']],
345 elementCd=elementCd,
346 ordinal=1,
347 beginDate=begin,
348 endDate=end)
349
350 LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
351 time.time() - time_element))
352
353 values = tmp[0]['values']
354
355 # sort and isolate the most recent
356 #
357 # NOTE: we do this because sometimes there are gaps in hourly data
358 # in NRCS; yes, we may end up with slightly inaccurate data,
359 # so perhaps this decision will be re-evaluated in the future
360 if values:
361 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
362 remote_data[elementCd] = ordered[0]['value']
363 else:
364 remote_data[elementCd] = None
365
366 return remote_data
367
368 def get_mesowest_data(begin, end, station):
369 """get the data we're after from the MesoWest/Synoptic API"""
370 remote_data = {}
371
372 # massage begin/end date format
373 begin_date_str = begin.strftime('%Y%m%d%H%M')
374 end_date_str = end.strftime('%Y%m%d%H%M')
375
376 # construct final, completed API URL
377 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
378 req = requests.get(api_req_url)
379
380 try:
381 json = req.json()
382 except ValueError:
383 LOG.error("Bad JSON in MesoWest response")
384 sys.exit(1)
385
386 try:
387 observations = json['STATION'][0]['OBSERVATIONS']
388 except ValueError:
389 LOG.error("Bad JSON in MesoWest response")
390 sys.exit(1)
391
392 pos = len(observations['date_time']) - 1
393
394 for elementCd in station['desired_data'].split(','):
395 # sort and isolate the most recent, see note above in NRCS for how and
396 # why this is done
397 #
398 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
399 # data (whereas with NRCS, we have to make a separate request for
400 # each element we want). This is nice for network efficiency but
401 # it means we have to handle this part differently for each.
402 #
403 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
404 # provides hourly data, but MesoWest can often provide data every
405 # 10 minutes -- though this provides more opportunity for
406 # irregularities
407
408 # we may not have the data at all
409 key_name = elementCd + '_set_1'
410 if key_name in observations:
411 if observations[key_name][pos]:
412 remote_data[elementCd] = observations[key_name][pos]
413 else:
414 remote_data[elementCd] = None
415 else:
416 remote_data[elementCd] = None
417
418 return remote_data
419
420 def switch_units_to_metric(data_map, mapping):
421 """replace units with metric counterparts"""
422
423 # NOTE: to update this, use the fmap<->final_data mapping laid out
424 # in setup_infoex_fields_mapping ()
425 #
426 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
427 # itself handles the unit conversion; in the future, we will also
428 # support NRCS unit conversion, but this must be done by this
429 # program.
430 data_map[mapping['tempPresUnit']] = 'C'
431 data_map[mapping['hsUnit']] = 'm'
432 data_map[mapping['windSpeedUnit']] = 'm/s'
433 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
434
435 return data_map
436
437 # CSV operations
438 def write_local_csv(path_to_file, data):
439 """Write the specified CSV file to disk"""
440 with open(path_to_file, 'w') as f:
441 # The requirement is that empty values are represented in the CSV
442 # file as "", csv.QUOTE_NONNUMERIC achieves that
443 LOG.debug("writing CSV file '%s'" % (path_to_file))
444 writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
445 writer.writerow(data)
446 f.close()
447 return True
448
449 def upload_csv(path_to_file, infoex_data):
450 """Upload the specified CSV file to InfoEx FTP and remove the file"""
451 with open(path_to_file, 'rb') as file_object:
452 LOG.debug("uploading FTP file '%s'" % (infoex_data['host']))
453 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
454 infoex_data['api_key'])
455 ftp.storlines('STOR ' + path_to_file, file_object)
456 ftp.close()
457 file_object.close()
458 os.remove(path_to_file)
459
460 # other miscellaneous routines
461 def setup_time_values():
462 """establish time bounds of data request(s)"""
463 # floor time to nearest hour
464 dt = datetime.datetime.now()
465 end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
466 seconds=dt.second,
467 microseconds=dt.microsecond)
468 begin_date = end_date - datetime.timedelta(hours=3)
469 return (begin_date, end_date)
470
471 if __name__ == "__main__":
472 sys.exit(main())