0755e4a1d9252340b01c0442fd092b587ac98837
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 This program fetches data from either an NRCS SNOTEL site or MesoWest
10 weather station and pushes it to InfoEx using the new automated weather
11 system implementation.
12
13 It is designed to be run hourly, and it asks for the last three hours
14 of data of each desired type, and selects the most recent one. This
15 lends some resiliency to the process and helps ensure that we have a
16 value to send, but it can lead to somewhat inconsistent/untruthful
17 data if e.g. the HS is from the last hour but the tempPres is from two
18 hours ago because the instrumentation had a hiccup. It's worth
19 considering if this is a bug or a feature.
20
21 For more information, see file: README
22 For licensing, see file: LICENSE
23 """
24
25 import configparser
26 import csv
27 import datetime
28 import logging
29 import os
30 import sys
31 import time
32
33 from ftplib import FTP
34 from optparse import OptionParser
35
36 import requests
37
38 import zeep
39 import zeep.cache
40 import zeep.transports
41
42 __version__ = '2.0.1'
43
44 LOG = logging.getLogger(__name__)
45 LOG.setLevel(logging.NOTSET)
46
47 def get_parser():
48 """Return OptionParser for this program"""
49 parser = OptionParser(version=__version__)
50
51 parser.add_option("--config",
52 dest="config",
53 metavar="FILE",
54 help="location of config file")
55
56 parser.add_option("--log-level",
57 dest="log_level",
58 default=None,
59 help="set the log level (debug, info, warning)")
60
61 parser.add_option("--dry-run",
62 action="store_true",
63 dest="dry_run",
64 default=False,
65 help="fetch data but don't upload to InfoEx")
66
67 return parser
68
69 def setup_config(config):
70 """Setup config variable based on values specified in the ini file"""
71 try:
72 infoex = {
73 'host': config['infoex']['host'],
74 'uuid': config['infoex']['uuid'],
75 'api_key': config['infoex']['api_key'],
76 'csv_filename': config['infoex']['csv_filename'],
77 'location_uuid': config['infoex']['location_uuid'],
78 'wx_data': {}, # placeholder key, values to come later
79 }
80
81 station = dict()
82 station['provider'] = config['station']['type']
83
84 if station['provider'] not in ['nrcs', 'mesowest']:
85 print("Please specify either nrcs or mesowest as the station type.")
86 sys.exit(1)
87
88 if station['provider'] == 'nrcs':
89 station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
90 station['station_id'] = config['station']['station_id']
91 station['desired_data'] = config['station']['desired_data'].split(',')
92
93 # XXX: For NRCS, we're manually overriding units for now! Once
94 # unit conversion is supported for NRCS, REMOVE THIS!
95 if 'units' not in station:
96 station['units'] = 'imperial'
97
98 if station['provider'] == 'mesowest':
99 station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
100 station['station_id'] = config['station']['station_id']
101 station['units'] = config['station']['units']
102 station['desired_data'] = config['station']['desired_data']
103
104 # construct full API URL (sans start/end time, added later)
105 station['source'] = station['source'] + '?token=' + \
106 config['station']['token'] + \
107 '&within=60&units=' + station['units'] + \
108 '&stid=' + station['station_id'] + \
109 '&vars=' + station['desired_data']
110
111 except KeyError as err:
112 LOG.critical("%s not defined in configuration file", err)
113 exit(1)
114
115 # all sections/values present in config file, final sanity check
116 try:
117 for key in config.sections():
118 for subkey in config[key]:
119 if not config[key][subkey]:
120 raise ValueError
121 except ValueError:
122 LOG.critical("Config value '%s.%s' is empty", key, subkey)
123 exit(1)
124
125 return (infoex, station)
126
127 def setup_logging(log_level):
128 """Setup our logging infrastructure"""
129 try:
130 from systemd.journal import JournalHandler
131 LOG.addHandler(JournalHandler())
132 except ImportError:
133 ## fallback to syslog
134 #import logging.handlers
135 #LOG.addHandler(logging.handlers.SysLogHandler())
136 # fallback to stdout
137 handler = logging.StreamHandler(sys.stdout)
138 LOG.addHandler(handler)
139
140 # ugly, but passable
141 if log_level in [None, 'debug', 'info', 'warning']:
142 if log_level == 'debug':
143 LOG.setLevel(logging.DEBUG)
144 elif log_level == 'info':
145 LOG.setLevel(logging.INFO)
146 elif log_level == 'warning':
147 LOG.setLevel(logging.WARNING)
148 else:
149 LOG.setLevel(logging.NOTSET)
150 else:
151 return False
152
153 return True
154
155 def main():
156 """Main routine: sort through args, decide what to do, then do it"""
157 parser = get_parser()
158 (options, args) = parser.parse_args()
159
160 config = configparser.ConfigParser(allow_no_value=False)
161
162 if not options.config:
163 parser.print_help()
164 print("\nPlease specify a configuration file via --config.")
165 sys.exit(1)
166
167 config.read(options.config)
168
169 if not setup_logging(options.log_level):
170 parser.print_help()
171 print("\nPlease select an appropriate log level or remove the switch (--log-level).")
172 sys.exit(1)
173
174 (infoex, station) = setup_config(config)
175
176 LOG.debug('Config parsed, starting up')
177
178 # create mappings
179 (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
180 iemap = setup_infoex_counterparts_mapping(station['provider'])
181
182 # override units if user selected metric
183 if station['units'] == 'metric':
184 final_data = switch_units_to_metric(final_data, fmap)
185
186 (begin_date, end_date) = setup_time_values()
187
188 # get the data
189 LOG.debug("Getting %s data from %s to %s", str(station['desired_data']),
190 str(begin_date), str(end_date))
191
192 time_all_elements = time.time()
193
194 # get the data
195 if station['provider'] == 'nrcs':
196 infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
197 elif station['provider'] == 'mesowest':
198 infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
199 station)
200
201 LOG.info("Time taken to get all data : %.3f sec", time.time() -
202 time_all_elements)
203
204 LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
205
206 # Now we only need to add in what we want to change thanks to that
207 # abomination of a variable declaration earlier
208 final_data[fmap['Location UUID']] = infoex['location_uuid']
209 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
210 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
211
212 for element_cd in infoex['wx_data']:
213 if element_cd not in iemap:
214 LOG.warning("BAD KEY wx_data['%s']", element_cd)
215 continue
216
217 # CONSIDER: Casting every value to Float() -- need to investigate if
218 # any possible elementCds we may want are any other data
219 # type than float.
220 #
221 # Another possibility is to query the API with
222 # getStationElements and temporarily store the
223 # storedUnitCd. But that's pretty network-intensive and
224 # may not even be worth it if there's only e.g. one or two
225 # exceptions to any otherwise uniformly Float value set.
226 final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
227
228 LOG.debug("final_data: %s", str(final_data))
229
230 if not write_local_csv(infoex['csv_filename'], final_data):
231 LOG.warning('Could not write local CSV file: %s',
232 infoex['csv_filename'])
233 return 1
234
235 if not options.dry_run:
236 upload_csv(infoex['csv_filename'], infoex)
237
238 LOG.debug('DONE')
239 return 0
240
241 # data structure operations
242 def setup_infoex_fields_mapping(location_uuid):
243 """
244 Create a mapping of InfoEx fields to the local data's indexing scheme.
245
246 INFOEX FIELDS
247
248 This won't earn style points in Python, but here we establish a couple
249 of helpful mappings variables. The reason this is helpful is that the
250 end result is simply an ordered set, the CSV file. But we still may
251 want to manipulate the values arbitrarily before writing that file.
252
253 Also note that the current Auto Wx InfoEx documentation shows these
254 keys in a graphical table with the "index" beginning at 1, but here we
255 sanely index beginning at 0.
256 """
257 # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
258 fmap = {} ; final_data = [None] * 29
259 fmap['Location UUID'] = 0 ; final_data[0] = location_uuid
260 fmap['obDate'] = 1 ; final_data[1] = None
261 fmap['obTime'] = 2 ; final_data[2] = None
262 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
263 fmap['tempMaxHour'] = 4 ; final_data[4] = None
264 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
265 fmap['tempMinHour'] = 6 ; final_data[6] = None
266 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
267 fmap['tempPres'] = 8 ; final_data[8] = None
268 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
269 fmap['precipitationGauge'] = 10 ; final_data[10] = None
270 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
271 fmap['windSpeedNum'] = 12 ; final_data[12] = None
272 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
273 fmap['windDirectionNum'] = 14 ; final_data[14] = None
274 fmap['hS'] = 15 ; final_data[15] = None
275 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
276 fmap['baro'] = 17 ; final_data[17] = None
277 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
278 fmap['rH'] = 19 ; final_data[19] = None
279 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
280 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
281 fmap['windGustDirNum'] = 22 ; final_data[22] = None
282 fmap['dewPoint'] = 23 ; final_data[23] = None
283 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
284 fmap['hn24Auto'] = 25 ; final_data[25] = None
285 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
286 fmap['hstAuto'] = 27 ; final_data[27] = None
287 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
288
289 return (fmap, final_data)
290
291 def setup_infoex_counterparts_mapping(provider):
292 """
293 Create a mapping of the NRCS/MesoWest fields that this program supports to
294 their InfoEx counterparts
295 """
296 iemap = {}
297
298 if provider == 'nrcs':
299 iemap['PREC'] = 'precipitationGauge'
300 iemap['TOBS'] = 'tempPres'
301 iemap['SNWD'] = 'hS'
302 iemap['PRES'] = 'baro'
303 iemap['RHUM'] = 'rH'
304 iemap['WSPD'] = 'windSpeedNum'
305 iemap['WDIR'] = 'windDirectionNum'
306 # unsupported by NRCS:
307 # windGustSpeedNum
308 elif provider == 'mesowest':
309 iemap['precip_accum'] = 'precipitationGauge'
310 iemap['air_temp'] = 'tempPres'
311 iemap['snow_depth'] = 'hS'
312 iemap['pressure'] = 'baro'
313 iemap['relative_humidity'] = 'rH'
314 iemap['wind_speed'] = 'windSpeedNum'
315 iemap['wind_direction'] = 'windDirectionNum'
316 iemap['wind_gust'] = 'windGustSpeedNum'
317
318 return iemap
319
320 # provider-specific operations
321 def get_nrcs_data(begin, end, station):
322 """get the data we're after from the NRCS WSDL"""
323 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
324 client = zeep.Client(wsdl=station['source'], transport=transport)
325 remote_data = {}
326
327 for element_cd in station['desired_data']:
328 time_element = time.time()
329
330 # get the last three hours of data for this elementCd/element_cd
331 tmp = client.service.getHourlyData(
332 stationTriplets=[station['station_id']],
333 elementCd=element_cd,
334 ordinal=1,
335 beginDate=begin,
336 endDate=end)
337
338 LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
339 time.time() - time_element)
340
341 values = tmp[0]['values']
342
343 # sort and isolate the most recent
344 #
345 # NOTE: we do this because sometimes there are gaps in hourly data
346 # in NRCS; yes, we may end up with slightly inaccurate data,
347 # so perhaps this decision will be re-evaluated in the future
348 if values:
349 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
350 remote_data[element_cd] = ordered[0]['value']
351 else:
352 remote_data[element_cd] = None
353
354 return remote_data
355
356 def get_mesowest_data(begin, end, station):
357 """get the data we're after from the MesoWest/Synoptic API"""
358 remote_data = {}
359
360 # massage begin/end date format
361 begin_date_str = begin.strftime('%Y%m%d%H%M')
362 end_date_str = end.strftime('%Y%m%d%H%M')
363
364 # construct final, completed API URL
365 api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
366 req = requests.get(api_req_url)
367
368 try:
369 json = req.json()
370 except ValueError:
371 LOG.error("Bad JSON in MesoWest response")
372 sys.exit(1)
373
374 try:
375 observations = json['STATION'][0]['OBSERVATIONS']
376 except ValueError:
377 LOG.error("Bad JSON in MesoWest response")
378 sys.exit(1)
379
380 pos = len(observations['date_time']) - 1
381
382 for element_cd in station['desired_data'].split(','):
383 # sort and isolate the most recent, see note above in NRCS for how and
384 # why this is done
385 #
386 # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
387 # data (whereas with NRCS, we have to make a separate request for
388 # each element we want). This is nice for network efficiency but
389 # it means we have to handle this part differently for each.
390 #
391 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
392 # provides hourly data, but MesoWest can often provide data every
393 # 10 minutes -- though this provides more opportunity for
394 # irregularities
395
396 # we may not have the data at all
397 key_name = element_cd + '_set_1'
398 if key_name in observations:
399 if observations[key_name][pos]:
400 remote_data[element_cd] = observations[key_name][pos]
401 else:
402 remote_data[element_cd] = None
403 else:
404 remote_data[element_cd] = None
405
406 return remote_data
407
408 def switch_units_to_metric(data_map, mapping):
409 """replace units with metric counterparts"""
410
411 # NOTE: to update this, use the fmap<->final_data mapping laid out
412 # in setup_infoex_fields_mapping ()
413 #
414 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
415 # itself handles the unit conversion; in the future, we will also
416 # support NRCS unit conversion, but this must be done by this
417 # program.
418 data_map[mapping['tempPresUnit']] = 'C'
419 data_map[mapping['hsUnit']] = 'm'
420 data_map[mapping['windSpeedUnit']] = 'm/s'
421 data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
422
423 return data_map
424
425 # CSV operations
426 def write_local_csv(path_to_file, data):
427 """Write the specified CSV file to disk"""
428 with open(path_to_file, 'w') as file_object:
429 # The requirement is that empty values are represented in the CSV
430 # file as "", csv.QUOTE_NONNUMERIC achieves that
431 LOG.debug("writing CSV file '%s'", path_to_file)
432 writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
433 writer.writerow(data)
434 file_object.close()
435 return True
436
437 def upload_csv(path_to_file, infoex_data):
438 """Upload the specified CSV file to InfoEx FTP and remove the file"""
439 with open(path_to_file, 'rb') as file_object:
440 LOG.debug("uploading FTP file '%s'", infoex_data['host'])
441 ftp = FTP(infoex_data['host'], infoex_data['uuid'],
442 infoex_data['api_key'])
443 ftp.storlines('STOR ' + path_to_file, file_object)
444 ftp.close()
445 file_object.close()
446 os.remove(path_to_file)
447
448 # other miscellaneous routines
449 def setup_time_values():
450 """establish time bounds of data request(s)"""
451 # floor time to nearest hour
452 date_time = datetime.datetime.now()
453 end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
454 seconds=date_time.second,
455 microseconds=date_time.microsecond)
456 begin_date = end_date - datetime.timedelta(hours=3)
457 return (begin_date, end_date)
458
459 if __name__ == "__main__":
460 sys.exit(main())