Remove default override for desired_data
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32
33 from ftplib import FTP
34 from optparse import OptionParser
35
36 import requests
37
38 import zeep
39 import zeep.cache
40 import zeep.transports
41
42 __version__ = '2.0.1'
43
44 LOG = logging.getLogger(__name__)
45 LOG.setLevel(logging.NOTSET)
46
47 def get_parser():
48 """Return OptionParser for this program"""
49 parser = OptionParser(version=__version__)
50
51 parser.add_option("--config",
52 dest="config",
53 metavar="FILE",
54 help="location of config file")
55
56 parser.add_option("--log-level",
57 dest="log_level",
58 default=None,
59 help="set the log level (debug, info, warning)")
60
61 parser.add_option("--dry-run",
62 action="store_true",
63 dest="dry_run",
64 default=False,
65 help="fetch data but don't upload to InfoEx")
66
67 return parser
68
69 def setup_config(config):
70 """Setup config variable based on values specified in the ini file"""
71 try:
72 infoex = {
73 'host': config['infoex']['host'],
74 'uuid': config['infoex']['uuid'],
75 'api_key': config['infoex']['api_key'],
76 'csv_filename': config['infoex']['csv_filename'],
77 'location_uuid': config['infoex']['location_uuid'],
78 'wx_data': {}, # placeholder key, values to come later
79 }
80
81 station = dict()
82 station['provider'] = config['station']['type']
83
84 if station['provider'] not in ['nrcs', 'mesowest']:
85 print("Please specify either nrcs or mesowest as the station type.")
86 sys.exit(1)
87
88 if station['provider'] == 'nrcs':
89 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
90 station['station_id'] = config['station']['station_id']
91 station['desired_data'] = config['station']['desired_data'].split(',')
92
93 # XXX: For NRCS, we're manually overriding units for now! Once
94 # unit conversion is supported for NRCS, REMOVE THIS!
95 if 'units' not in station:
96 station['units'] = 'imperial'
97
98 if station['provider'] == 'mesowest':
99 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
100 station['station_id'] = config['station']['station_id']
101 station['units'] = config['station']['units']
102 station['desired_data'] = config['station']['desired_data']
103
104 # construct full API URL (sans start/end time, added later)
105 station['source'] = station['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + station['units'] + '&stid=' + station['station_id'] + '&vars=' + station['desired_data']
106
107 except KeyError as e:
108 LOG.critical("%s not defined in %s" % (e, options.config))
109 exit(1)
110 except Exception as exc:
111 LOG.critical("Exception occurred in config parsing: '%s'" % (exc))
112 exit(1)
113
114 # all sections/values present in config file, final sanity check
115 try:
116 for key in config.sections():
117 for subkey in config[key]:
118 if not len(config[key][subkey]):
119 raise ValueError;
120 except ValueError as exc:
121 LOG.critical("Config value '%s.%s' is empty" % (key, subkey))
122 exit(1)
123
124 return (infoex, station)
125
126 def setup_logging(log_level):
127 """Setup our logging infrastructure"""
128 try:
129 from systemd.journal import JournalHandler
130 LOG.addHandler(JournalHandler())
131 except:
132 ## fallback to syslog
133 #import logging.handlers
134 #LOG.addHandler(logging.handlers.SysLogHandler())
135 # fallback to stdout
136 handler = logging.StreamHandler(sys.stdout)
137 LOG.addHandler(handler)
138
139 # ugly, but passable
140 if log_level in [None, 'debug', 'info', 'warning']:
141 if log_level == 'debug':
142 LOG.setLevel(logging.DEBUG)
143 elif log_level == 'info':
144 LOG.setLevel(logging.INFO)
145 elif log_level == 'warning':
146 LOG.setLevel(logging.WARNING)
147 else:
148 LOG.setLevel(logging.NOTSET)
149 else:
150 return False
151
152 return True
153
154 def main():
155 """Main routine: sort through args, decide what to do, then do it"""
156 parser = get_parser()
157 (options, args) = parser.parse_args()
158
159 config = configparser.ConfigParser(allow_no_value=False)
160
161 if not options.config:
162 parser.print_help()
163 print("\nPlease specify a configuration file via --config.")
164 sys.exit(1)
165
166 config.read(options.config)
167
168 if not setup_logging(options.log_level):
169 parser.print_help()
170 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
171 sys.exit(1)
172
173 (infoex, station) = setup_config(config)
174
175 LOG.debug('Config parsed, starting up')
176
177 # create mappings
178 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
179 iemap = setup_infoex_counterparts_mapping(station['provider'])
180
181 # override units if user selected metric
182 if station['units'] == 'metric':
183 final_data = switch_units_to_metric(final_data, fmap)
184
185 (begin_date, end_date) = setup_time_values()
186
187 # get the data
188 LOG.debug("Getting %s data from %s to %s" % (str(station['desired_data']),
189 str(begin_date), str(end_date)))
190
191 time_all_elements = time.time()
192
193 # get the data
194 if station['provider'] == 'nrcs':
195 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
196 elif station['provider'] == 'mesowest':
197 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
198 station)
199
200 LOG.info("Time taken to get all data : %.3f sec" % (time.time() -
201 time_all_elements))
202
203 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
204
205 # Now we only need to add in what we want to change thanks to that
206 # abomination of a variable declaration earlier
207 final_data[fmap['Location UUID']] = infoex['location_uuid']
208 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
209 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
210
211 for elementCd in infoex['wx_data']:
212 if elementCd not in iemap:
213 LOG.warning("BAD KEY wx_data['%s']" % (elementCd))
214 continue
215
216 # CONSIDER: Casting every value to Float() -- need to investigate if
217 # any possible elementCds we may want are any other data
218 # type than float.
219 #
220 # Another possibility is to query the API with
221 # getStationElements and temporarily store the
222 # storedUnitCd. But that's pretty network-intensive and
223 # may not even be worth it if there's only e.g. one or two
224 # exceptions to any otherwise uniformly Float value set.
225 final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
226
227 LOG.debug("final_data: %s" % (str(final_data)))
228
229 if not write_local_csv(infoex['csv_filename'], final_data):
230 LOG.warning('Could not write local CSV file: %s',
231 infoex['csv_filename'])
232 return 1;
233
234 if not options.dry_run:
235 upload_csv(infoex['csv_filename'], infoex)
236
237 LOG.debug('DONE')
238 return 0
239
240 # data structure operations
241 def setup_infoex_fields_mapping(location_uuid):
242 """
243 Create a mapping of InfoEx fields to the local data's indexing scheme.
244
245 INFOEX FIELDS
246
247 This won't earn style points in Python, but here we establish a couple
248 of helpful mappings variables. The reason this is helpful is that the
249 end result is simply an ordered set, the CSV file. But we still may
250 want to manipulate the values arbitrarily before writing that file.
251
252 Also note that the current Auto Wx InfoEx documentation shows these
253 keys in a graphical table with the "index" beginning at 1, but here we
254 sanely index beginning at 0.
255 """
256 fmap = {} ; final_data = [None] * 29
257 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
258 fmap['obDate'] = 1 ; final_data[1] = None
259 fmap['obTime'] = 2 ; final_data[2] = None
260 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
261 fmap['tempMaxHour'] = 4 ; final_data[4] = None
262 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
263 fmap['tempMinHour'] = 6 ; final_data[6] = None
264 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
265 fmap['tempPres'] = 8 ; final_data[8] = None
266 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
267 fmap['precipitationGauge'] = 10 ; final_data[10] = None
268 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
269 fmap['windSpeedNum'] = 12 ; final_data[12] = None
270 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
271 fmap['windDirectionNum'] = 14 ; final_data[14] = None
272 fmap['hS'] = 15 ; final_data[15] = None
273 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
274 fmap['baro'] = 17 ; final_data[17] = None
275 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
276 fmap['rH'] = 19 ; final_data[19] = None
277 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
278 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
279 fmap['windGustDirNum'] = 22 ; final_data[22] = None
280 fmap['dewPoint'] = 23 ; final_data[23] = None
281 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
282 fmap['hn24Auto'] = 25 ; final_data[25] = None
283 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
284 fmap['hstAuto'] = 27 ; final_data[27] = None
285 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
286
287 return (fmap, final_data)
288
289 def setup_infoex_counterparts_mapping(provider):
290 """
291 Create a mapping of the NRCS/MesoWest fields that this program supports to
292 their InfoEx counterparts
293 """
294 iemap = {}
295
296 if provider == 'nrcs':
297 iemap['PREC'] = 'precipitationGauge'
298 iemap['TOBS'] = 'tempPres'
299 iemap['SNWD'] = 'hS'
300 iemap['PRES'] = 'baro'
301 iemap['RHUM'] = 'rH'
302 iemap['WSPD'] = 'windSpeedNum'
303 iemap['WDIR'] = 'windDirectionNum'
304 # unsupported by NRCS:
305 # windGustSpeedNum
306 elif provider == 'mesowest':
307 iemap['precip_accum'] = 'precipitationGauge'
308 iemap['air_temp'] = 'tempPres'
309 iemap['snow_depth'] = 'hS'
310 iemap['pressure'] = 'baro'
311 iemap['relative_humidity'] = 'rH'
312 iemap['wind_speed'] = 'windSpeedNum'
313 iemap['wind_direction'] = 'windDirectionNum'
314 iemap['wind_gust'] = 'windGustSpeedNum'
315
316 return iemap
317
318 # provider-specific operations
319 def get_nrcs_data(begin, end, station):
320 """get the data we're after from the NRCS WSDL"""
321 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
322 client = zeep.Client(wsdl=station['source'], transport=transport)
323 remote_data = {}
324
325 for elementCd in station['desired_data']:
326 time_element = time.time()
327
328 # get the last three hours of data for this elementCd
329 tmp = client.service.getHourlyData(
330 stationTriplets=[station['station_id']],
331 elementCd=elementCd,
332 ordinal=1,
333 beginDate=begin,
334 endDate=end)
335
336 LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
337 time.time() - time_element))
338
339 values = tmp[0]['values']
340
341 # sort and isolate the most recent
342 #
343 # NOTE: we do this because sometimes there are gaps in hourly data
344 # in NRCS; yes, we may end up with slightly inaccurate data,
345 # so perhaps this decision will be re-evaluated in the future
346 if values:
347 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
348 remote_data[elementCd] = ordered[0]['value']
349 else:
350 remote_data[elementCd] = None
351
352 return remote_data
353
354 def get_mesowest_data(begin, end, station):
355 """get the data we're after from the MesoWest/Synoptic API"""
356 remote_data = {}
357
358 # massage begin/end date format
359 begin_date_str = begin.strftime('%Y%m%d%H%M')
360 end_date_str = end.strftime('%Y%m%d%H%M')
361
362 # construct final, completed API URL
363 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
364 req = requests.get(api_req_url)
365
366 try:
367 json = req.json()
368 except ValueError:
369 LOG.error("Bad JSON in MesoWest response")
370 sys.exit(1)
371
372 try:
373 observations = json['STATION'][0]['OBSERVATIONS']
374 except ValueError:
375 LOG.error("Bad JSON in MesoWest response")
376 sys.exit(1)
377
378 pos = len(observations['date_time']) - 1
379
380 for elementCd in station['desired_data'].split(','):
381 # sort and isolate the most recent, see note above in NRCS for how and
382 # why this is done
383 #
384 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
385 # data (whereas with NRCS, we have to make a separate request for
386 # each element we want). This is nice for network efficiency but
387 # it means we have to handle this part differently for each.
388 #
389 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
390 # provides hourly data, but MesoWest can often provide data every
391 # 10 minutes -- though this provides more opportunity for
392 # irregularities
393
394 # we may not have the data at all
395 key_name = elementCd + '_set_1'
396 if key_name in observations:
397 if observations[key_name][pos]:
398 remote_data[elementCd] = observations[key_name][pos]
399 else:
400 remote_data[elementCd] = None
401 else:
402 remote_data[elementCd] = None
403
404 return remote_data
405
406 def switch_units_to_metric(data_map, mapping):
407 """replace units with metric counterparts"""
408
409 # NOTE: to update this, use the fmap<->final_data mapping laid out
410 # in setup_infoex_fields_mapping ()
411 #
412 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
413 # itself handles the unit conversion; in the future, we will also
414 # support NRCS unit conversion, but this must be done by this
415 # program.
416 data_map[mapping['tempPresUnit']] = 'C'
417 data_map[mapping['hsUnit']] = 'm'
418 data_map[mapping['windSpeedUnit']] = 'm/s'
419 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
420
421 return data_map
422
423 # CSV operations
424 def write_local_csv(path_to_file, data):
425 """Write the specified CSV file to disk"""
426 with open(path_to_file, 'w') as f:
427 # The requirement is that empty values are represented in the CSV
428 # file as "", csv.QUOTE_NONNUMERIC achieves that
429 LOG.debug("writing CSV file '%s'" % (path_to_file))
430 writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
431 writer.writerow(data)
432 f.close()
433 return True
434
435 def upload_csv(path_to_file, infoex_data):
436 """Upload the specified CSV file to InfoEx FTP and remove the file"""
437 with open(path_to_file, 'rb') as file_object:
438 LOG.debug("uploading FTP file '%s'" % (infoex_data['host']))
439 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
440 infoex_data['api_key'])
441 ftp.storlines('STOR ' + path_to_file, file_object)
442 ftp.close()
443 file_object.close()
444 os.remove(path_to_file)
445
446 # other miscellaneous routines
447 def setup_time_values():
448 """establish time bounds of data request(s)"""
449 # floor time to nearest hour
450 dt = datetime.datetime.now()
451 end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
452 seconds=dt.second,
453 microseconds=dt.microsecond)
454 begin_date = end_date - datetime.timedelta(hours=3)
455 return (begin_date, end_date)
456
457 if __name__ == "__main__":
458 sys.exit(main())