Some work on units
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS/MesoWest Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 Version 2.0.0
10
11 This program fetches data from either an NRCS SNOTEL site or MesoWest
12 weather station and pushes it to InfoEx using the new automated weather
13 system implementation.
14
15 It is designed to be run hourly, and it asks for the last three hours
16 of data of each desired type, and selects the most recent one. This
17 lends some resiliency to the process and helps ensure that we have a
18 value to send, but it can lead to somewhat inconsistent/untruthful
19 data if e.g. the HS is from the last hour but the tempPres is from two
20 hours ago because the instrumentation had a hiccup. It's worth
21 considering if this is a bug or a feature.
22
23 For more information, see file: README
24 For licensing, see file: LICENSE
25 """
26
27 import configparser
28 import csv
29 import datetime
30 import logging
31 import os
32 import sys
33 import time
34
35 from collections import OrderedDict
36 from ftplib import FTP
37 from optparse import OptionParser
38
39 import requests
40
41 import zeep
42 import zeep.cache
43 import zeep.transports
44
45 log = logging.getLogger(__name__)
46 log.setLevel(logging.DEBUG)
47
48 try:
49 from systemd.journal import JournalHandler
50 log.addHandler(JournalHandler())
51 except:
52 ## fallback to syslog
53 #import logging.handlers
54 #log.addHandler(logging.handlers.SysLogHandler())
55 # fallback to stdout
56 handler = logging.StreamHandler(sys.stdout)
57 log.addHandler(handler)
58
59 parser = OptionParser()
60
61 parser.add_option("--config",
62 dest="config",
63 metavar="FILE",
64 help="location of config file")
65
66 parser.add_option("--dry-run",
67 action="store_true",
68 dest="dry_run",
69 default=False,
70 help="fetch data but don't upload to InfoEx")
71
72 (options, args) = parser.parse_args()
73
74 config = configparser.ConfigParser(allow_no_value=False)
75
76 if not options.config:
77 print("Please specify a configuration file via --config.")
78 sys.exit(1)
79
80 config.read(options.config)
81
82 log.debug('STARTING UP')
83
84 try:
85 infoex = {
86 'host': config['infoex']['host'],
87 'uuid': config['infoex']['uuid'],
88 'api_key': config['infoex']['api_key'],
89 'csv_filename': config['infoex']['csv_filename'],
90 'location_uuid': config['infoex']['location_uuid'],
91 'wx_data': {}, # placeholder key, values to come later
92 }
93
94 data = dict()
95 data['provider'] = config['station']['type']
96
97 if data['provider'] not in ['nrcs', 'mesowest']:
98 print("Please specify either nrcs or mesowest as the station type.")
99 sys.exit(1)
100
101 if data['provider'] == 'nrcs':
102 data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
103 data['station_id'] = config['station']['station_id']
104
105 try:
106 desired_data = config['station']['desired_data'].split(',')
107 except:
108 # desired_data malformed or missing, setting default
109 desired_data = [
110 'TOBS', # AIR TEMPERATURE OBSERVED (degF)
111 'SNWD', # SNOW DEPTH (in)
112 'PREC' # PRECIPITATION ACCUMULATION (in)
113 ]
114
115 # XXX: For NRCS, we're manually overriding units for now! Once
116 # unit conversion is supported for NRCS, REMOVE THIS!
117 if 'units' not in data:
118 data['units'] = 'imperial'
119
120 if data['provider'] == 'mesowest':
121 data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
122 data['station_id'] = config['station']['station_id']
123 data['units'] = config['station']['units']
124
125 try:
126 desired_data = config['station']['desired_data']
127 except:
128 # desired_data malformed or missing, setting default
129 desired_data = 'air_temp,snow_depth'
130
131 # construct full API URL (sans start/end time, added later)
132 data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + desired_data
133
134 except KeyError as e:
135 log.critical("%s not defined in %s" % (e, options.config))
136 exit(1)
137 except Exception as exc:
138 log.critical("Exception occurred in config parsing: '%s'" % (exc))
139 exit(1)
140
141 # all sections/values present in config file, final sanity check
142 try:
143 for key in config.sections():
144 for subkey in config[key]:
145 if not len(config[key][subkey]):
146 raise ValueError;
147 except ValueError as exc:
148 log.critical("Config value '%s.%s' is empty" % (key, subkey))
149 exit(1)
150
151 # INFOEX FIELDS
152 #
153 # This won't earn style points in Python, but here we establish a couple
154 # of helpful mappings variables. The reason this is helpful is that the
155 # end result is simply an ordered set, the CSV file. But we still may
156 # want to manipulate the values arbitrarily before writing that file.
157 #
158 # Also note that the current Auto Wx InfoEx documentation shows these
159 # keys in a graphical table with the "index" beginning at 1, but here we
160 # are sanely indexing beginning at 0.
161 fmap = {} ; final_data = [None] * 29
162 fmap['Location UUID'] = 0 ; final_data[0] = infoex['location_uuid']
163 fmap['obDate'] = 1 ; final_data[1] = None
164 fmap['obTime'] = 2 ; final_data[2] = None
165 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
166 fmap['tempMaxHour'] = 4 ; final_data[4] = None
167 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
168 fmap['tempMinHour'] = 6 ; final_data[6] = None
169 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
170 fmap['tempPres'] = 8 ; final_data[8] = None
171 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
172 fmap['precipitationGauge'] = 10 ; final_data[10] = None
173 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
174 fmap['windSpeedNum'] = 12 ; final_data[12] = None
175 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
176 fmap['windDirectionNum'] = 14 ; final_data[14] = None
177 fmap['hS'] = 15 ; final_data[15] = None
178 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
179 fmap['baro'] = 17 ; final_data[17] = None
180 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
181 fmap['rH'] = 19 ; final_data[19] = None
182 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
183 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
184 fmap['windGustDirNum'] = 22 ; final_data[22] = None
185 fmap['dewPoint'] = 23 ; final_data[23] = None
186 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
187 fmap['hn24Auto'] = 25 ; final_data[25] = None
188 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
189 fmap['hstAuto'] = 27 ; final_data[27] = None
190 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
191
192 # one final mapping, the NRCS/MesoWest fields that this program supports to
193 # their InfoEx counterpart
194 iemap = {}
195
196 if data['provider'] == 'nrcs':
197 iemap['PREC'] = 'precipitationGauge'
198 iemap['TOBS'] = 'tempPres'
199 iemap['SNWD'] = 'hS'
200 iemap['PRES'] = 'baro'
201 iemap['RHUM'] = 'rH'
202 iemap['WSPD'] = 'windSpeedNum'
203 iemap['WDIR'] = 'windDirectionNum'
204 # unsupported by NRCS:
205 # windGustSpeedNum
206 elif data['provider'] == 'mesowest':
207 iemap['precip_accum'] = 'precipitationGauge'
208 iemap['air_temp'] = 'tempPres'
209 iemap['snow_depth'] = 'hS'
210 iemap['pressure'] = 'baro'
211 iemap['relative_humidity'] = 'rH'
212 iemap['wind_speed'] = 'windSpeedNum'
213 iemap['wind_direction'] = 'windDirectionNum'
214 iemap['wind_gust'] = 'windGustSpeedNum'
215
216 # override units if user selected metric
217 #
218 # NOTE: to update this, use the fmap<->final_data mapping laid out above
219 #
220 # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
221 # itself handles the unit conversion; in the future, we will also
222 # support NRCS unit conversion, but this must be done by this
223 # program.
224 if data['units'] == 'metric':
225 final_data[fmap['tempPresUnit']] = 'C'
226 final_data[fmap['hsUnit']] = 'm'
227 final_data[fmap['windSpeedUnit']] = 'm/s'
228 final_data[fmap['windGustSpeedNumUnit']] = 'm/s'
229
230 # floor time to nearest hour
231 dt = datetime.datetime.now()
232 end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
233 seconds=dt.second,
234 microseconds=dt.microsecond)
235 begin_date = end_date - datetime.timedelta(hours=3)
236
237 # get the data
238 log.debug("Getting %s data from %s to %s" % (str(desired_data),
239 str(begin_date), str(end_date)))
240
241 time_all_elements = time.time()
242
243 # NRCS-specific code
244 if data['provider'] == 'nrcs':
245 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
246 client = zeep.Client(wsdl=data['source'], transport=transport)
247
248 for elementCd in desired_data:
249 time_element = time.time()
250
251 # get the last three hours of data for this elementCd
252 tmp = client.service.getHourlyData(
253 stationTriplets=[data['station_id']],
254 elementCd=elementCd,
255 ordinal=1,
256 beginDate=begin_date,
257 endDate=end_date)
258
259 log.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
260 time.time() - time_element))
261
262 values = tmp[0]['values']
263
264 # sort and isolate the most recent
265 #
266 # NOTE: we do this because sometimes there are gaps in hourly data
267 # in NRCS; yes, we may end up with slightly inaccurate data,
268 # so perhaps this decision will be re-evaluated in the future
269 if values:
270 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
271 infoex['wx_data'][elementCd] = ordered[0]['value']
272 else:
273 infoex['wx_data'][elementCd] = None
274
275 # MesoWest-specific code
276 elif data['provider'] == 'mesowest':
277 # massage begin/end date format
278 begin_date_str = begin_date.strftime('%Y%m%d%H%M')
279 end_date_str = end_date.strftime('%Y%m%d%H%M')
280
281 # construct final, completed API URL
282 api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
283 req = requests.get(api_req_url)
284
285 try:
286 json = req.json()
287 except ValueError:
288 log.error("Bad JSON in MesoWest response")
289 sys.exit(1)
290
291 try:
292 observations = json['STATION'][0]['OBSERVATIONS']
293 except ValueError:
294 log.error("Bad JSON in MesoWest response")
295 sys.exit(1)
296
297 pos = len(observations['date_time']) - 1
298
299 for elementCd in desired_data.split(','):
300 # sort and isolate the most recent, see note above in NRCS for how and
301 # why this is done
302 #
303 # NOTE: Unlike in the NRCS case, the MesoWest API respones contains all
304 # data (whereas with NRCS, we have to make a separate request for
305 # each element we want. This is nice for network efficiency but
306 # it means we have to handle this part differently for each.
307 #
308 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
309 # provides hourly data, but MesoWest can often provide data every
310 # 10 minutes -- though this provides more opportunity for
311 # irregularities
312
313 # we may not have the data at all
314 key_name = elementCd + '_set_1'
315 if key_name in observations:
316 if observations[key_name][pos]:
317 infoex['wx_data'][elementCd] = observations[key_name][pos]
318 else:
319 infoex['wx_data'][elementCd] = None
320 else:
321 infoex['wx_data'][elementCd] = None
322
323 log.info("Time to get all data : %.3f sec" % (time.time() -
324 time_all_elements))
325
326 log.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
327
328 # Now we only need to add in what we want to change thanks to that
329 # abomination of a variable declaration earlier
330 final_data[fmap['Location UUID']] = infoex['location_uuid']
331 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
332 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
333
334 for elementCd in infoex['wx_data']:
335 if elementCd not in iemap:
336 log.warning("BAD KEY wx_data['%s']" % (elementCd))
337 continue
338
339 # CONSIDER: Casting every value to Float() -- need to investigate if
340 # any possible elementCds we may want are any other data
341 # type than float.
342 #
343 # Another possibility is to query the API with
344 # getStationElements and temporarily store the
345 # storedUnitCd. But that's pretty network-intensive and
346 # may not even be worth it if there's only e.g. one or two
347 # exceptions to any otherwise uniformly Float value set.
348 final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
349
350 log.debug("final_data: %s" % (str(final_data)))
351
352 with open(infoex['csv_filename'], 'w') as f:
353 # The requirement is that empty values are represented in the CSV
354 # file as "", csv.QUOTE_NONNUMERIC achieves that
355 log.debug("writing CSV file '%s'" % (infoex['csv_filename']))
356 writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
357 writer.writerow(final_data)
358 f.close()
359
360 if not options.dry_run:
361 # not a dry run
362 with open(infoex['csv_filename'], 'rb') as f:
363 log.debug("uploading FTP file '%s'" % (infoex['host']))
364 ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key'])
365 ftp.storlines('STOR ' + infoex['csv_filename'], f)
366 ftp.close()
367 f.close()
368 os.remove(infoex['csv_filename'])
369
370 log.debug('DONE')