More README updates
[infoex-autowx.git] / infoex-autowx.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 """
5 InfoEx <-> NRCS Auto Wx implementation
6 Alexander Vasarab
7 Wylark Mountaineering LLC
8
9 Version 1.0.0
10
11 This program fetches data from an NRCS SNOTEL site and pushes it to
12 InfoEx using the new automated weather system implementation.
13
14 It is designed to be run hourly, and it asks for the last three hours
15 of data of each desired type, and selects the most recent one. This
16 lends some resiliency to the process and helps ensure that we have a
17 value to send, but it can lead to somewhat inconsistent/untruthful
18 data if e.g. the HS is from the last hour but the tempPres is from two
19 hours ago because the instrumentation had a hiccup. It's worth
20 considering if this is a bug or a feature.
21
22 For more information, see file: README
23 For licensing, see file: LICENSE
24 """
25
26 import configparser
27 import csv
28 import datetime
29 import logging
30 import os
31 import sys
32 import time
33
34 from collections import OrderedDict
35 from ftplib import FTP
36 from optparse import OptionParser
37
38 import requests
39
40 import zeep
41 import zeep.cache
42 import zeep.transports
43
44 log = logging.getLogger(__name__)
45 log.setLevel(logging.DEBUG)
46
47 try:
48 from systemd.journal import JournalHandler
49 log.addHandler(JournalHandler())
50 except:
51 ## fallback to syslog
52 #import logging.handlers
53 #log.addHandler(logging.handlers.SysLogHandler())
54 # fallback to stdout
55 handler = logging.StreamHandler(sys.stdout)
56 log.addHandler(handler)
57
58 parser = OptionParser()
59 parser.add_option("--config",
60 dest="config",
61 metavar="FILE",
62 help="location of config file")
63 parser.add_option("--dry-run",
64 action="store_true",
65 dest="dry_run",
66 default=False,
67 help="fetch data but don't upload to InfoEx")
68
69 (options, args) = parser.parse_args()
70
71 config = configparser.ConfigParser(allow_no_value=False)
72
73 if not options.config:
74 print("Please specify a configuration file via --config.")
75 sys.exit(1)
76
77 config.read(options.config)
78
79 log.debug('STARTING UP')
80
81 try:
82 infoex = {
83 'host': config['infoex']['host'],
84 'uuid': config['infoex']['uuid'],
85 'api_key': config['infoex']['api_key'],
86 'csv_filename': config['infoex']['csv_filename'],
87 'location_uuid': config['infoex']['location_uuid'],
88 'wx_data': {}, # placeholder key, values to come later
89 }
90
91 data = dict()
92 data['provider'] = config['station']['type']
93
94 if data['provider'] not in ['nrcs', 'mesowest']:
95 print("Please specify either nrcs or mesowest as the station type.")
96 sys.exit(1)
97
98 if data['provider'] == 'nrcs':
99 data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
100 data['station_id'] = config['station']['station_id']
101
102 try:
103 desired_data = config['station']['desired_data'].split(',')
104 except:
105 # desired_data malformed or missing, setting default
106 desired_data = [
107 'TOBS', # AIR TEMPERATURE OBSERVED (degF)
108 'SNWD', # SNOW DEPTH (in)
109 'PREC' # PRECIPITATION ACCUMULATION (in)
110 ]
111
112 if data['provider'] == 'mesowest':
113 data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
114 data['station_id'] = config['station']['station_id']
115 data['units'] = config['station']['units']
116
117 try:
118 desired_data = config['station']['desired_data']
119 except:
120 # desired_data malformed or missing, setting default
121 desired_data = 'air_temp,snow_depth'
122
123 # construct full API URL (sans start/end time, added later)
124 data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + desired_data
125
126 except KeyError as e:
127 log.critical("%s not defined in %s" % (e, options.config))
128 exit(1)
129 except Exception as exc:
130 log.critical("Exception occurred in config parsing: '%s'" % (exc))
131 exit(1)
132
133 # all sections/values present in config file, final sanity check
134 try:
135 for key in config.sections():
136 for subkey in config[key]:
137 if not len(config[key][subkey]):
138 raise ValueError;
139 except ValueError as exc:
140 log.critical("Config value '%s.%s' is empty" % (key, subkey))
141 exit(1)
142
143 # INFOEX FIELDS
144 #
145 # This won't earn style points in Python, but here we establish a couple
146 # of helpful mappings variables. The reason this is helpful is that the
147 # end result is simply an ordered set, the CSV file. But we still may
148 # want to manipulate the values arbitrarily before writing that file.
149 #
150 # Also note that the current Auto Wx InfoEx documentation shows these
151 # keys in a graphical table with the "index" beginning at 1, but here we
152 # are sanely indexing beginning at 0.
153 fmap = {} ; final_data = [None] * 29
154 fmap['Location UUID'] = 0 ; final_data[0] = infoex['location_uuid']
155 fmap['obDate'] = 1 ; final_data[1] = None
156 fmap['obTime'] = 2 ; final_data[2] = None
157 fmap['timeZone'] = 3 ; final_data[3] = 'Pacific'
158 fmap['tempMaxHour'] = 4 ; final_data[4] = None
159 fmap['tempMaxHourUnit'] = 5 ; final_data[5] = 'F'
160 fmap['tempMinHour'] = 6 ; final_data[6] = None
161 fmap['tempMinHourUnit'] = 7 ; final_data[7] = 'F'
162 fmap['tempPres'] = 8 ; final_data[8] = None
163 fmap['tempPresUnit'] = 9 ; final_data[9] = 'F'
164 fmap['precipitationGauge'] = 10 ; final_data[10] = None
165 fmap['precipitationGaugeUnit'] = 11 ; final_data[11] = 'in'
166 fmap['windSpeedNum'] = 12 ; final_data[12] = None
167 fmap['windSpeedUnit'] = 13 ; final_data[13] = 'mph'
168 fmap['windDirectionNum'] = 14 ; final_data[14] = None
169 fmap['hS'] = 15 ; final_data[15] = None
170 fmap['hsUnit'] = 16 ; final_data[16] = 'in'
171 fmap['baro'] = 17 ; final_data[17] = None
172 fmap['baroUnit'] = 18 ; final_data[18] = 'inHg'
173 fmap['rH'] = 19 ; final_data[19] = None
174 fmap['windGustSpeedNum'] = 20 ; final_data[20] = None
175 fmap['windGustSpeedNumUnit'] = 21 ; final_data[21] = 'mph'
176 fmap['windGustDirNum'] = 22 ; final_data[22] = None
177 fmap['dewPoint'] = 23 ; final_data[23] = None
178 fmap['dewPointUnit'] = 24 ; final_data[24] = 'F'
179 fmap['hn24Auto'] = 25 ; final_data[25] = None
180 fmap['hn24AutoUnit'] = 26 ; final_data[26] = 'in'
181 fmap['hstAuto'] = 27 ; final_data[27] = None
182 fmap['hstAutoUnit'] = 28 ; final_data[28] = 'in'
183
184 # one final mapping, the NRCS/MesoWest fields that this program supports to
185 # their InfoEx counterpart
186 iemap = {}
187
188 if data['provider'] == 'nrcs':
189 iemap['PREC'] = 'precipitationGauge'
190 iemap['TOBS'] = 'tempPres'
191 iemap['SNWD'] = 'hS'
192 elif data['provider'] == 'mesowest':
193 iemap['precip_accum'] = 'precipitationGauge'
194 iemap['air_temp'] = 'tempPres'
195 iemap['snow_depth'] = 'hS'
196
197 # floor time to nearest hour
198 dt = datetime.datetime.now()
199 end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
200 seconds=dt.second,
201 microseconds=dt.microsecond)
202 begin_date = end_date - datetime.timedelta(hours=3)
203
204 # get the data
205 log.debug("Getting %s data from %s to %s" % (str(desired_data),
206 str(begin_date), str(end_date)))
207
208 time_all_elements = time.time()
209
210 # NRCS-specific code
211 if data['provider'] == 'nrcs':
212 transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
213 client = zeep.Client(wsdl=data['source'], transport=transport)
214
215 for elementCd in desired_data:
216 time_element = time.time()
217
218 # get the last three hours of data for this elementCd
219 tmp = client.service.getHourlyData(
220 stationTriplets=[data['station_id']],
221 elementCd=elementCd,
222 ordinal=1,
223 beginDate=begin_date,
224 endDate=end_date)
225
226 log.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
227 time.time() - time_element))
228
229 values = tmp[0]['values']
230
231 # sort and isolate the most recent
232 #
233 # NOTE: we do this because sometimes there are gaps in hourly data
234 # in NRCS; yes, we may end up with slightly inaccurate data,
235 # so perhaps this decision will be re-evaluated in the future
236 if values:
237 ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
238 infoex['wx_data'][elementCd] = ordered[0]['value']
239 else:
240 infoex['wx_data'][elementCd] = None
241
242 # MesoWest-specific code
243 elif data['provider'] == 'mesowest':
244 # massage begin/end date format
245 begin_date_str = begin_date.strftime('%Y%m%d%H%M')
246 end_date_str = end_date.strftime('%Y%m%d%H%M')
247
248 # construct final, completed API URL
249 api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
250 req = requests.get(api_req_url)
251
252 try:
253 json = req.json()
254 except ValueError:
255 log.error("Bad JSON in MesoWest response")
256 sys.exit(1)
257
258 try:
259 observations = json['STATION'][0]['OBSERVATIONS']
260 except ValueError:
261 log.error("Bad JSON in MesoWest response")
262 sys.exit(1)
263
264 pos = len(observations['date_time']) - 1
265
266 for elementCd in desired_data.split(','):
267 # sort and isolate the most recent, see note above in NRCS for how and
268 # why this is done
269 #
270 # NOTE: Unlike in the NRCS case, the MesoWest API respones contains all
271 # data (whereas with NRCS, we have to make a separate request for
272 # each element we want. This is nice for network efficiency but
273 # it means we have to handle this part differently for each.
274 #
275 # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
276 # provides hourly data, but MesoWest can often provide data every
277 # 10 minutes -- though this provides more opportunity for
278 # irregularities
279
280 # we may not have the data at all
281 key_name = elementCd + '_set_1'
282 if key_name in observations:
283 if observations[key_name][pos]:
284 infoex['wx_data'][elementCd] = observations[key_name][pos]
285 else:
286 infoex['wx_data'][elementCd] = None
287 else:
288 infoex['wx_data'][elementCd] = None
289
290 log.info("Time to get all data : %.3f sec" % (time.time() -
291 time_all_elements))
292
293 log.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
294
295 # Now we only need to add in what we want to change thanks to that
296 # abomination of a variable declaration earlier
297 final_data[fmap['Location UUID']] = infoex['location_uuid']
298 final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
299 final_data[fmap['obTime']] = end_date.strftime('%H:%M')
300
301 for elementCd in infoex['wx_data']:
302 if elementCd not in iemap:
303 log.warning("BAD KEY wx_data['%s']" % (elementCd))
304 continue
305
306 # CONSIDER: Casting every value to Float() -- need to investigate if
307 # any possible elementCds we may want are any other data
308 # type than float.
309 #
310 # Another possibility is to query the API with
311 # getStationElements and temporarily store the
312 # storedUnitCd. But that's pretty network-intensive and
313 # may not even be worth it if there's only e.g. one or two
314 # exceptions to any otherwise uniformly Float value set.
315 final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
316
317 log.debug("final_data: %s" % (str(final_data)))
318
319 with open(infoex['csv_filename'], 'w') as f:
320 # The requirement is that empty values are represented in the CSV
321 # file as "", csv.QUOTE_NONNUMERIC achieves that
322 log.debug("writing CSV file '%s'" % (infoex['csv_filename']))
323 writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
324 writer.writerow(final_data)
325 f.close()
326
327 if not options.dry_run:
328 # not a dry run
329 with open(infoex['csv_filename'], 'rb') as f:
330 log.debug("uploading FTP file '%s'" % (infoex['host']))
331 ftp = FTP(infoex['host'], infoex['uuid'], infoex['api_key'])
332 ftp.storlines('STOR ' + infoex['csv_filename'], f)
333 ftp.close()
334 f.close()
335 os.remove(infoex['csv_filename'])
336
337 log.debug('DONE')