Rename data -> station for clarity

[infoex-autowx.git] / infoex-autowx.py
diff --git a/infoex-autowx.py b/infoex-autowx.py

index d66683cfa41775964bc8f2b432daa172d8b0fd24..bb93eef0d78f3cce14feba38ea7e2f6ed077e736 100755 (executable)
--- a/infoex-autowx.py
+++ b/infoex-autowx.py
@@ -78,22 +78,22 @@ def setup_config(config):
              'wx_data': {}, # placeholder key, values to come later
          }
  
-        data = dict()
-        data['provider'] = config['station']['type']
+        station = dict()
+        station['provider'] = config['station']['type']
  
-        if data['provider'] not in ['nrcs', 'mesowest']:
+        if station['provider'] not in ['nrcs', 'mesowest']:
              print("Please specify either nrcs or mesowest as the station type.")
              sys.exit(1)
  
-        if data['provider'] == 'nrcs':
-            data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
-            data['station_id'] = config['station']['station_id']
+        if station['provider'] == 'nrcs':
+            station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
+            station['station_id'] = config['station']['station_id']
  
              try:
-                data['desired_data'] = config['station']['desired_data'].split(',')
+                station['desired_data'] = config['station']['desired_data'].split(',')
              except:
                  # desired_data malformed or missing, setting default
-                data['desired_data'] = [
+                station['desired_data'] = [
                                         'TOBS', # AIR TEMPERATURE OBSERVED (degF)
                                         'SNWD', # SNOW DEPTH (in)
                                         'PREC'  # PRECIPITATION ACCUMULATION (in)
@@ -101,22 +101,22 @@ def setup_config(config):
  
              # XXX: For NRCS, we're manually overriding units for now! Once
              #      unit conversion is supported for NRCS, REMOVE THIS!
-            if 'units' not in data:
-                data['units'] = 'imperial'
+            if 'units' not in station:
+                station['units'] = 'imperial'
  
-        if data['provider'] == 'mesowest':
-            data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
-            data['station_id'] = config['station']['station_id']
-            data['units'] = config['station']['units']
+        if station['provider'] == 'mesowest':
+            station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
+            station['station_id'] = config['station']['station_id']
+            station['units'] = config['station']['units']
  
              try:
-                data['desired_data'] = config['station']['desired_data']
+                station['desired_data'] = config['station']['desired_data']
              except:
                  # desired_data malformed or missing, setting default
-                data['desired_data'] = 'air_temp,snow_depth'
+                station['desired_data'] = 'air_temp,snow_depth'
  
              # construct full API URL (sans start/end time, added later)
-            data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + data['desired_data']
+            station['source'] = station['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + station['units'] + '&stid=' + station['station_id'] + '&vars=' + station['desired_data']
  
      except KeyError as e:
          LOG.critical("%s not defined in %s" % (e, options.config))
@@ -135,7 +135,7 @@ def setup_config(config):
          LOG.critical("Config value '%s.%s' is empty" % (key, subkey))
          exit(1)
  
-    return (infoex, data)
+    return (infoex, station)
  
  def setup_logging(log_level):
      """Setup our logging infrastructure"""
@@ -184,13 +184,13 @@ def main():
          print("\nPlease select an appropriate log level or remove the switch (--log-level).")
          sys.exit(1)
  
-    (infoex, data) = setup_config(config)
+    (infoex, station) = setup_config(config)
  
      LOG.debug('Config parsed, starting up')
  
      # create mappings
      (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
-    iemap = setup_infoex_counterparts_mapping(data['provider'])
+    iemap = setup_infoex_counterparts_mapping(station['provider'])
  
      # override units if user selected metric
      #
@@ -200,7 +200,7 @@ def main():
      #       itself handles the unit conversion; in the future, we will also
      #       support NRCS unit conversion, but this must be done by this
      #       program.
-    if data['units'] == 'metric':
+    if station['units'] == 'metric':
          final_data[fmap['tempPresUnit']] = 'C'
          final_data[fmap['hsUnit']] = 'm'
          final_data[fmap['windSpeedUnit']] = 'm/s'
@@ -214,92 +214,19 @@ def main():
      begin_date = end_date - datetime.timedelta(hours=3)
  
      # get the data
-    LOG.debug("Getting %s data from %s to %s" % (str(data['desired_data']),
+    LOG.debug("Getting %s data from %s to %s" % (str(station['desired_data']),
          str(begin_date), str(end_date)))
  
      time_all_elements = time.time()
  
-    # NRCS-specific code
-    if data['provider'] == 'nrcs':
-        transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
-        client = zeep.Client(wsdl=data['source'], transport=transport)
-
-        for elementCd in data['desired_data']:
-            time_element = time.time()
-
-            # get the last three hours of data for this elementCd
-            tmp = client.service.getHourlyData(
-                    stationTriplets=[data['station_id']],
-                    elementCd=elementCd,
-                    ordinal=1,
-                    beginDate=begin_date,
-                    endDate=end_date)
-
-            LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
-                time.time() - time_element))
-
-            values = tmp[0]['values']
-
-            # sort and isolate the most recent
-            #
-            # NOTE: we do this because sometimes there are gaps in hourly data
-            #       in NRCS; yes, we may end up with slightly inaccurate data,
-            #       so perhaps this decision will be re-evaluated in the future
-            if values:
-                ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
-                infoex['wx_data'][elementCd] = ordered[0]['value']
-            else:
-                infoex['wx_data'][elementCd] = None
-
-    # MesoWest-specific code
-    elif data['provider'] == 'mesowest':
-        # massage begin/end date format
-        begin_date_str = begin_date.strftime('%Y%m%d%H%M')
-        end_date_str = end_date.strftime('%Y%m%d%H%M')
-
-        # construct final, completed API URL
-        api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
-        req = requests.get(api_req_url)
-
-        try:
-            json = req.json()
-        except ValueError:
-            LOG.error("Bad JSON in MesoWest response")
-            sys.exit(1)
-
-        try:
-            observations = json['STATION'][0]['OBSERVATIONS']
-        except ValueError:
-            LOG.error("Bad JSON in MesoWest response")
-            sys.exit(1)
-
-        pos = len(observations['date_time']) - 1
-
-        for elementCd in data['desired_data'].split(','):
-            # sort and isolate the most recent, see note above in NRCS for how and
-            # why this is done
-            #
-            # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
-            #       data (whereas with NRCS, we have to make a separate request for
-            #       each element we want). This is nice for network efficiency but
-            #       it means we have to handle this part differently for each.
-            #
-            # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
-            #       provides hourly data, but MesoWest can often provide data every
-            #       10 minutes -- though this provides more opportunity for
-            #       irregularities
-
-            # we may not have the data at all
-            key_name = elementCd + '_set_1'
-            if key_name in observations:
-                if observations[key_name][pos]:
-                    infoex['wx_data'][elementCd] = observations[key_name][pos]
-                else:
-                    infoex['wx_data'][elementCd] = None
-            else:
-                infoex['wx_data'][elementCd] = None
+    # get the data
+    if station['provider'] == 'nrcs':
+        infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
+    elif station['provider'] == 'mesowest':
+        infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
+                station)
  
-    LOG.info("Time to get all data : %.3f sec" % (time.time() -
+    LOG.info("Time taken to get all data : %.3f sec" % (time.time() -
          time_all_elements))
  
      LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
@@ -339,7 +266,7 @@ def main():
      LOG.debug('DONE')
      return 0
  
-# Data structure operations
+# data structure operations
  def setup_infoex_fields_mapping(location_uuid):
      """
      Create a mapping of InfoEx fields to the local data's indexing scheme.
@@ -417,6 +344,94 @@ def setup_infoex_counterparts_mapping(provider):
  
      return iemap
  
+# provider-specific operations
+def get_nrcs_data(begin, end, station):
+    """get the data we're after from the NRCS WSDL"""
+    transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
+    client = zeep.Client(wsdl=station['source'], transport=transport)
+    remote_data = {}
+
+    for elementCd in station['desired_data']:
+        time_element = time.time()
+
+        # get the last three hours of data for this elementCd
+        tmp = client.service.getHourlyData(
+                stationTriplets=[station['station_id']],
+                elementCd=elementCd,
+                ordinal=1,
+                beginDate=begin,
+                endDate=end)
+
+        LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
+            time.time() - time_element))
+
+        values = tmp[0]['values']
+
+        # sort and isolate the most recent
+        #
+        # NOTE: we do this because sometimes there are gaps in hourly data
+        #       in NRCS; yes, we may end up with slightly inaccurate data,
+        #       so perhaps this decision will be re-evaluated in the future
+        if values:
+            ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
+            remote_data[elementCd] = ordered[0]['value']
+        else:
+            remote_data[elementCd] = None
+
+    return remote_data
+
+def get_mesowest_data(begin, end, station):
+    """get the data we're after from the MesoWest/Synoptic API"""
+    remote_data = {}
+
+    # massage begin/end date format
+    begin_date_str = begin.strftime('%Y%m%d%H%M')
+    end_date_str = end.strftime('%Y%m%d%H%M')
+
+    # construct final, completed API URL
+    api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
+    req = requests.get(api_req_url)
+
+    try:
+        json = req.json()
+    except ValueError:
+        LOG.error("Bad JSON in MesoWest response")
+        sys.exit(1)
+
+    try:
+        observations = json['STATION'][0]['OBSERVATIONS']
+    except ValueError:
+        LOG.error("Bad JSON in MesoWest response")
+        sys.exit(1)
+
+    pos = len(observations['date_time']) - 1
+
+    for elementCd in station['desired_data'].split(','):
+        # sort and isolate the most recent, see note above in NRCS for how and
+        # why this is done
+        #
+        # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
+        #       data (whereas with NRCS, we have to make a separate request for
+        #       each element we want). This is nice for network efficiency but
+        #       it means we have to handle this part differently for each.
+        #
+        # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
+        #       provides hourly data, but MesoWest can often provide data every
+        #       10 minutes -- though this provides more opportunity for
+        #       irregularities
+
+        # we may not have the data at all
+        key_name = elementCd + '_set_1'
+        if key_name in observations:
+            if observations[key_name][pos]:
+                remote_data[elementCd] = observations[key_name][pos]
+            else:
+                remote_data[elementCd] = None
+        else:
+            remote_data[elementCd] = None
+
+    return remote_data
+
  # CSV operations
  def write_local_csv(path_to_file, data):
      """Write the specified CSV file to disk"""