Merge branch 'release-3.0.1' into develop
[infoex-autowx.git] / infoex-autowx.py
index d66683cfa41775964bc8f2b432daa172d8b0fd24..d58baac635453d4fd06bd00f23a6e2ffc2d1620b 100755 (executable)
@@ -31,7 +31,7 @@ import sys
 import time
 
 from ftplib import FTP
-from optparse import OptionParser
+from argparse import ArgumentParser
 
 import requests
 
@@ -39,30 +39,34 @@ import zeep
 import zeep.cache
 import zeep.transports
 
-__version__ = '2.0.0'
+__version__ = '3.0.1'
 
 LOG = logging.getLogger(__name__)
 LOG.setLevel(logging.NOTSET)
 
 def get_parser():
     """Return OptionParser for this program"""
-    parser = OptionParser(version=__version__)
+    parser = ArgumentParser()
 
-    parser.add_option("--config",
-        dest="config",
-        metavar="FILE",
-        help="location of config file")
+    parser.add_argument("--version",
+                        action="version",
+                        version=__version__)
 
-    parser.add_option("--log-level",
-        dest="log_level",
-        default=None,
-        help="set the log level (debug, info, warning)")
+    parser.add_argument("--config",
+                        dest="config",
+                        metavar="FILE",
+                        help="location of config file")
 
-    parser.add_option("--dry-run",
-        action="store_true",
-        dest="dry_run",
-        default=False,
-        help="fetch data but don't upload to InfoEx")
+    parser.add_argument("--log-level",
+                        dest="log_level",
+                        default=None,
+                        help="set the log level (debug, info, warning)")
+
+    parser.add_argument("--dry-run",
+                        action="store_true",
+                        dest="dry_run",
+                        default=False,
+                        help="fetch data but don't upload to InfoEx")
 
     return parser
 
@@ -78,71 +82,61 @@ def setup_config(config):
             'wx_data': {}, # placeholder key, values to come later
         }
 
-        data = dict()
-        data['provider'] = config['station']['type']
+        station = dict()
+        station['provider'] = config['station']['type']
 
-        if data['provider'] not in ['nrcs', 'mesowest']:
+        if station['provider'] not in ['nrcs', 'mesowest', 'python']:
             print("Please specify either nrcs or mesowest as the station type.")
             sys.exit(1)
 
-        if data['provider'] == 'nrcs':
-            data['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
-            data['station_id'] = config['station']['station_id']
-
-            try:
-                data['desired_data'] = config['station']['desired_data'].split(',')
-            except:
-                # desired_data malformed or missing, setting default
-                data['desired_data'] = [
-                                       'TOBS', # AIR TEMPERATURE OBSERVED (degF)
-                                       'SNWD', # SNOW DEPTH (in)
-                                       'PREC'  # PRECIPITATION ACCUMULATION (in)
-                                       ]
+        if station['provider'] == 'nrcs':
+            station['source'] = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
+            station['station_id'] = config['station']['station_id']
+            station['desired_data'] = config['station']['desired_data'].split(',')
 
             # XXX: For NRCS, we're manually overriding units for now! Once
             #      unit conversion is supported for NRCS, REMOVE THIS!
-            if 'units' not in data:
-                data['units'] = 'imperial'
+            if 'units' not in station:
+                station['units'] = 'imperial'
 
-        if data['provider'] == 'mesowest':
-            data['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
-            data['station_id'] = config['station']['station_id']
-            data['units'] = config['station']['units']
-
-            try:
-                data['desired_data'] = config['station']['desired_data']
-            except:
-                # desired_data malformed or missing, setting default
-                data['desired_data'] = 'air_temp,snow_depth'
+        if station['provider'] == 'mesowest':
+            station['source'] = 'https://api.synopticdata.com/v2/stations/timeseries'
+            station['station_id'] = config['station']['station_id']
+            station['units'] = config['station']['units']
+            station['desired_data'] = config['station']['desired_data']
 
             # construct full API URL (sans start/end time, added later)
-            data['source'] = data['source'] + '?token=' + config['station']['token'] + '&within=60&units=' + data['units'] + '&stid=' + data['station_id'] + '&vars=' + data['desired_data']
+            station['source'] = station['source'] + '?token=' + \
+                                config['station']['token'] + \
+                                '&within=60&units=' + station['units'] + \
+                                '&stid=' + station['station_id'] + \
+                                '&vars=' + station['desired_data']
 
-    except KeyError as e:
-        LOG.critical("%s not defined in %s" % (e, options.config))
-        exit(1)
-    except Exception as exc:
-        LOG.critical("Exception occurred in config parsing: '%s'" % (exc))
-        exit(1)
+        if station['provider'] == 'python':
+            station['path'] = config['station']['path']
+
+    except KeyError as err:
+        LOG.critical("%s not defined in configuration file", err)
+        sys.exit(1)
 
     # all sections/values present in config file, final sanity check
     try:
         for key in config.sections():
             for subkey in config[key]:
-                if not len(config[key][subkey]):
-                    raise ValueError;
-    except ValueError as exc:
-        LOG.critical("Config value '%s.%s' is empty" % (key, subkey))
-        exit(1)
+                if not config[key][subkey]:
+                    raise ValueError
+    except ValueError:
+        LOG.critical("Config value '%s.%s' is empty", key, subkey)
+        sys.exit(1)
 
-    return (infoex, data)
+    return (infoex, station)
 
 def setup_logging(log_level):
     """Setup our logging infrastructure"""
     try:
         from systemd.journal import JournalHandler
         LOG.addHandler(JournalHandler())
-    except:
+    except ImportError:
         ## fallback to syslog
         #import logging.handlers
         #LOG.addHandler(logging.handlers.SysLogHandler())
@@ -168,7 +162,7 @@ def setup_logging(log_level):
 def main():
     """Main routine: sort through args, decide what to do, then do it"""
     parser = get_parser()
-    (options, args) = parser.parse_args()
+    options = parser.parse_args()
 
     config = configparser.ConfigParser(allow_no_value=False)
 
@@ -184,123 +178,76 @@ def main():
         print("\nPlease select an appropriate log level or remove the switch (--log-level).")
         sys.exit(1)
 
-    (infoex, data) = setup_config(config)
+    (infoex, station) = setup_config(config)
 
     LOG.debug('Config parsed, starting up')
 
     # create mappings
     (fmap, final_data) = setup_infoex_fields_mapping(infoex['location_uuid'])
-    iemap = setup_infoex_counterparts_mapping(data['provider'])
+    iemap = setup_infoex_counterparts_mapping(station['provider'])
 
     # override units if user selected metric
-    #
-    # NOTE: to update this, use the fmap<->final_data mapping laid out above
-    #
-    # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
-    #       itself handles the unit conversion; in the future, we will also
-    #       support NRCS unit conversion, but this must be done by this
-    #       program.
-    if data['units'] == 'metric':
-        final_data[fmap['tempPresUnit']] = 'C'
-        final_data[fmap['hsUnit']] = 'm'
-        final_data[fmap['windSpeedUnit']] = 'm/s'
-        final_data[fmap['windGustSpeedNumUnit']] = 'm/s'
+    try:
+        if station['units'] == 'metric':
+            final_data = switch_units_to_metric(final_data, fmap)
+    except KeyError:
+        if station['provider'] != 'python':
+            LOG.error("Please specify the units in the configuration "
+                      "file")
+            sys.exit(1)
 
-    # floor time to nearest hour
-    dt = datetime.datetime.now()
-    end_date = dt - datetime.timedelta(minutes=dt.minute % 60,
-                                       seconds=dt.second,
-                                       microseconds=dt.microsecond)
-    begin_date = end_date - datetime.timedelta(hours=3)
+    (begin_date, end_date) = setup_time_values()
 
-    # get the data
-    LOG.debug("Getting %s data from %s to %s" % (str(data['desired_data']),
-        str(begin_date), str(end_date)))
+    if station['provider'] == 'python':
+        LOG.debug("Getting custom data from external Python program")
+    else:
+        LOG.debug("Getting %s data from %s to %s",
+                  str(station['desired_data']),
+                  str(begin_date), str(end_date))
 
     time_all_elements = time.time()
 
-    # NRCS-specific code
-    if data['provider'] == 'nrcs':
-        transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
-        client = zeep.Client(wsdl=data['source'], transport=transport)
-
-        for elementCd in data['desired_data']:
-            time_element = time.time()
-
-            # get the last three hours of data for this elementCd
-            tmp = client.service.getHourlyData(
-                    stationTriplets=[data['station_id']],
-                    elementCd=elementCd,
-                    ordinal=1,
-                    beginDate=begin_date,
-                    endDate=end_date)
-
-            LOG.info("Time to get elementCd '%s': %.3f sec" % (elementCd,
-                time.time() - time_element))
-
-            values = tmp[0]['values']
-
-            # sort and isolate the most recent
-            #
-            # NOTE: we do this because sometimes there are gaps in hourly data
-            #       in NRCS; yes, we may end up with slightly inaccurate data,
-            #       so perhaps this decision will be re-evaluated in the future
-            if values:
-                ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
-                infoex['wx_data'][elementCd] = ordered[0]['value']
-            else:
-                infoex['wx_data'][elementCd] = None
-
-    # MesoWest-specific code
-    elif data['provider'] == 'mesowest':
-        # massage begin/end date format
-        begin_date_str = begin_date.strftime('%Y%m%d%H%M')
-        end_date_str = end_date.strftime('%Y%m%d%H%M')
+    # get the data
+    if station['provider'] == 'nrcs':
+        infoex['wx_data'] = get_nrcs_data(begin_date, end_date, station)
+    elif station['provider'] == 'mesowest':
+        infoex['wx_data'] = get_mesowest_data(begin_date, end_date,
+                                              station)
+    elif station['provider'] == 'python':
+        try:
+            import importlib.util
 
-        # construct final, completed API URL
-        api_req_url = data['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
-        req = requests.get(api_req_url)
+            spec = importlib.util.spec_from_file_location('custom_wx',
+                                                          station['path'])
+            mod = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(mod)
+            mod.LOG = LOG
 
-        try:
-            json = req.json()
-        except ValueError:
-            LOG.error("Bad JSON in MesoWest response")
+            try:
+                infoex['wx_data'] = mod.get_custom_data()
+
+                if infoex['wx_data'] is None:
+                    infoex['wx_data'] = []
+            except Exception as exc:
+                LOG.error("Python program for custom Wx data failed in "
+                          "execution: %s", str(exc))
+                sys.exit(1)
+
+            LOG.info("Successfully executed external Python program")
+        except ImportError:
+            LOG.error("Please upgrade to Python 3.3 or later")
             sys.exit(1)
-
-        try:
-            observations = json['STATION'][0]['OBSERVATIONS']
-        except ValueError:
-            LOG.error("Bad JSON in MesoWest response")
+        except FileNotFoundError:
+            LOG.error("Specified Python program for custom Wx data "
+                      "was not found")
+            sys.exit(1)
+        except Exception as exc:
+            LOG.error("A problem was encountered when attempting to "
+                      "load your custom Wx program: %s", str(exc))
             sys.exit(1)
 
-        pos = len(observations['date_time']) - 1
-
-        for elementCd in data['desired_data'].split(','):
-            # sort and isolate the most recent, see note above in NRCS for how and
-            # why this is done
-            #
-            # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
-            #       data (whereas with NRCS, we have to make a separate request for
-            #       each element we want). This is nice for network efficiency but
-            #       it means we have to handle this part differently for each.
-            #
-            # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
-            #       provides hourly data, but MesoWest can often provide data every
-            #       10 minutes -- though this provides more opportunity for
-            #       irregularities
-
-            # we may not have the data at all
-            key_name = elementCd + '_set_1'
-            if key_name in observations:
-                if observations[key_name][pos]:
-                    infoex['wx_data'][elementCd] = observations[key_name][pos]
-                else:
-                    infoex['wx_data'][elementCd] = None
-            else:
-                infoex['wx_data'][elementCd] = None
-
-    LOG.info("Time to get all data : %.3f sec" % (time.time() -
-        time_all_elements))
+    LOG.info("Time taken to get all data : %.3f sec", time.time() -
+             time_all_elements)
 
     LOG.debug("infoex[wx_data]: %s", str(infoex['wx_data']))
 
@@ -310,11 +257,26 @@ def main():
     final_data[fmap['obDate']] = end_date.strftime('%m/%d/%Y')
     final_data[fmap['obTime']] = end_date.strftime('%H:%M')
 
-    for elementCd in infoex['wx_data']:
-        if elementCd not in iemap:
-            LOG.warning("BAD KEY wx_data['%s']" % (elementCd))
+    for element_cd in infoex['wx_data']:
+        if element_cd not in iemap:
+            LOG.warning("BAD KEY wx_data['%s']", element_cd)
             continue
 
+        # Massage precision of certain values to fit InfoEx's
+        # expectations
+        #
+        # 0 decimal places: wind speed, wind direction, wind gust, snow depth
+        # 1 decimal place:  air temp, relative humidity, baro
+        # Avoid transforming None values
+        if infoex['wx_data'][element_cd] is None:
+            continue
+        elif element_cd in ['wind_speed', 'WSPD', 'wind_direction',
+                            'WDIR', 'wind_gust', 'SNWD', 'snow_depth']:
+            infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd])
+        elif element_cd in ['TOBS', 'air_temp', 'RHUM',
+                            'relative_humidity', 'PRES', 'pressure']:
+            infoex['wx_data'][element_cd] = round(infoex['wx_data'][element_cd], 1)
+
         # CONSIDER: Casting every value to Float() -- need to investigate if
         #           any possible elementCds we may want are any other data
         #           type than float.
@@ -324,37 +286,39 @@ def main():
         #           storedUnitCd. But that's pretty network-intensive and
         #           may not even be worth it if there's only e.g. one or two
         #           exceptions to any otherwise uniformly Float value set.
-        final_data[fmap[iemap[elementCd]]] = infoex['wx_data'][elementCd]
+        final_data[fmap[iemap[element_cd]]] = infoex['wx_data'][element_cd]
 
-    LOG.debug("final_data: %s" % (str(final_data)))
+    LOG.debug("final_data: %s", str(final_data))
 
-    if not write_local_csv(infoex['csv_filename'], final_data):
-        LOG.warning('Could not write local CSV file: %s',
-                    infoex['csv_filename'])
-        return 1;
+    if infoex['wx_data']:
+        if not write_local_csv(infoex['csv_filename'], final_data):
+            LOG.warning('Could not write local CSV file: %s',
+                        infoex['csv_filename'])
+            return 1
 
-    if not options.dry_run:
-        upload_csv(infoex['csv_filename'], infoex)
+        if not options.dry_run:
+            upload_csv(infoex['csv_filename'], infoex)
 
     LOG.debug('DONE')
     return 0
 
-# Data structure operations
+# data structure operations
 def setup_infoex_fields_mapping(location_uuid):
     """
     Create a mapping of InfoEx fields to the local data's indexing scheme.
 
     INFOEX FIELDS
-    
+
     This won't earn style points in Python, but here we establish a couple
     of helpful mappings variables. The reason this is helpful is that the
     end result is simply an ordered set, the CSV file. But we still may
     want to manipulate the values arbitrarily before writing that file.
-    
+
     Also note that the current Auto Wx InfoEx documentation shows these
     keys in a graphical table with the "index" beginning at 1, but here we
     sanely index beginning at 0.
     """
+    # pylint: disable=too-many-statements,multiple-statements,bad-whitespace
     fmap = {}                           ; final_data     = [None] * 29
     fmap['Location UUID'] = 0           ; final_data[0]  = location_uuid
     fmap['obDate'] = 1                  ; final_data[1]  = None
@@ -398,6 +362,8 @@ def setup_infoex_counterparts_mapping(provider):
     if provider == 'nrcs':
         iemap['PREC'] = 'precipitationGauge'
         iemap['TOBS'] = 'tempPres'
+        iemap['TMAX'] = 'tempMaxHour'
+        iemap['TMIN'] = 'tempMinHour'
         iemap['SNWD'] = 'hS'
         iemap['PRES'] = 'baro'
         iemap['RHUM'] = 'rH'
@@ -408,31 +374,155 @@ def setup_infoex_counterparts_mapping(provider):
     elif provider == 'mesowest':
         iemap['precip_accum'] = 'precipitationGauge'
         iemap['air_temp'] = 'tempPres'
+        iemap['air_temp_high_24_hour'] = 'tempMaxHour'
+        iemap['air_temp_low_24_hour'] = 'tempMinHour'
         iemap['snow_depth'] = 'hS'
         iemap['pressure'] = 'baro'
         iemap['relative_humidity'] = 'rH'
         iemap['wind_speed'] = 'windSpeedNum'
         iemap['wind_direction'] = 'windDirectionNum'
         iemap['wind_gust'] = 'windGustSpeedNum'
+    elif provider == 'python':
+        # we expect Python programs to use the InfoEx data type names
+        iemap['precipitationGauge'] = 'precipitationGauge'
+        iemap['tempPres'] = 'tempPres'
+        iemap['tempMaxHour'] = 'tempMaxHour'
+        iemap['tempMinHour'] = 'tempMinHour'
+        iemap['hS'] = 'hS'
+        iemap['baro'] = 'baro'
+        iemap['rH'] = 'rH'
+        iemap['windSpeedNum'] = 'windSpeedNum'
+        iemap['windDirectionNum'] = 'windDirectionNum'
+        iemap['windGustSpeedNum'] = 'windGustSpeedNum'
 
     return iemap
 
+# provider-specific operations
+def get_nrcs_data(begin, end, station):
+    """get the data we're after from the NRCS WSDL"""
+    transport = zeep.transports.Transport(cache=zeep.cache.SqliteCache())
+    client = zeep.Client(wsdl=station['source'], transport=transport)
+    remote_data = {}
+
+    for element_cd in station['desired_data']:
+        time_element = time.time()
+
+        # get the last three hours of data for this elementCd/element_cd
+        tmp = client.service.getHourlyData(
+            stationTriplets=[station['station_id']],
+            elementCd=element_cd,
+            ordinal=1,
+            beginDate=begin,
+            endDate=end)
+
+        LOG.info("Time to get NRCS elementCd '%s': %.3f sec", element_cd,
+                 time.time() - time_element)
+
+        values = tmp[0]['values']
+
+        # sort and isolate the most recent
+        #
+        # NOTE: we do this because sometimes there are gaps in hourly data
+        #       in NRCS; yes, we may end up with slightly inaccurate data,
+        #       so perhaps this decision will be re-evaluated in the future
+        if values:
+            ordered = sorted(values, key=lambda t: t['dateTime'], reverse=True)
+            remote_data[element_cd] = ordered[0]['value']
+        else:
+            remote_data[element_cd] = None
+
+    return remote_data
+
+def get_mesowest_data(begin, end, station):
+    """get the data we're after from the MesoWest/Synoptic API"""
+    remote_data = {}
+
+    # massage begin/end date format
+    begin_date_str = begin.strftime('%Y%m%d%H%M')
+    end_date_str = end.strftime('%Y%m%d%H%M')
+
+    # construct final, completed API URL
+    api_req_url = station['source'] + '&start=' + begin_date_str + '&end=' + end_date_str
+    req = requests.get(api_req_url)
+
+    try:
+        json = req.json()
+    except ValueError:
+        LOG.error("Bad JSON in MesoWest response")
+        sys.exit(1)
+
+    try:
+        observations = json['STATION'][0]['OBSERVATIONS']
+    except ValueError:
+        LOG.error("Bad JSON in MesoWest response")
+        sys.exit(1)
+
+    pos = len(observations['date_time']) - 1
+
+    for element_cd in station['desired_data'].split(','):
+        # sort and isolate the most recent, see note above in NRCS for how and
+        # why this is done
+        #
+        # NOTE: Unlike in the NRCS case, the MesoWest API response contains all
+        #       data (whereas with NRCS, we have to make a separate request for
+        #       each element we want). This is nice for network efficiency but
+        #       it means we have to handle this part differently for each.
+        #
+        # NOTE: Also unlike NRCS, MesoWest provides more granular data; NRCS
+        #       provides hourly data, but MesoWest can often provide data every
+        #       10 minutes -- though this provides more opportunity for
+        #       irregularities
+
+        # we may not have the data at all
+        key_name = element_cd + '_set_1'
+
+        if key_name in observations:
+            if observations[key_name][pos]:
+                remote_data[element_cd] = observations[key_name][pos]
+
+                # mesowest provides wind_speed in m/s, we want mph
+                if element_cd in ('wind_speed', 'wind_gust'):
+                    remote_data[element_cd] = ms_to_mph(remote_data[element_cd])
+            else:
+                remote_data[element_cd] = None
+        else:
+            remote_data[element_cd] = None
+
+    return remote_data
+
+def switch_units_to_metric(data_map, mapping):
+    """replace units with metric counterparts"""
+
+    # NOTE: to update this, use the fmap<->final_data mapping laid out
+    #       in setup_infoex_fields_mapping ()
+    #
+    # NOTE: this only 'works' with MesoWest for now, as the MesoWest API
+    #       itself handles the unit conversion; in the future, we will also
+    #       support NRCS unit conversion, but this must be done by this
+    #       program.
+    data_map[mapping['tempPresUnit']] = 'C'
+    data_map[mapping['hsUnit']] = 'm'
+    data_map[mapping['windSpeedUnit']] = 'm/s'
+    data_map[mapping['windGustSpeedNumUnit']] = 'm/s'
+
+    return data_map
+
 # CSV operations
 def write_local_csv(path_to_file, data):
     """Write the specified CSV file to disk"""
-    with open(path_to_file, 'w') as f:
+    with open(path_to_file, 'w') as file_object:
         # The requirement is that empty values are represented in the CSV
         # file as "", csv.QUOTE_NONNUMERIC achieves that
-        LOG.debug("writing CSV file '%s'" % (path_to_file))
-        writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
+        LOG.debug("writing CSV file '%s'", path_to_file)
+        writer = csv.writer(file_object, quoting=csv.QUOTE_NONNUMERIC)
         writer.writerow(data)
-        f.close()
+        file_object.close()
     return True
 
 def upload_csv(path_to_file, infoex_data):
     """Upload the specified CSV file to InfoEx FTP and remove the file"""
     with open(path_to_file, 'rb') as file_object:
-        LOG.debug("uploading FTP file '%s'" % (infoex_data['host']))
+        LOG.debug("uploading FTP file '%s'", infoex_data['host'])
         ftp = FTP(infoex_data['host'], infoex_data['uuid'],
                   infoex_data['api_key'])
         ftp.storlines('STOR ' + path_to_file, file_object)
@@ -440,5 +530,20 @@ def upload_csv(path_to_file, infoex_data):
         file_object.close()
     os.remove(path_to_file)
 
+# other miscellaneous routines
+def setup_time_values():
+    """establish time bounds of data request(s)"""
+    # floor time to nearest hour
+    date_time = datetime.datetime.now()
+    end_date = date_time - datetime.timedelta(minutes=date_time.minute % 60,
+                                              seconds=date_time.second,
+                                              microseconds=date_time.microsecond)
+    begin_date = end_date - datetime.timedelta(hours=3)
+    return (begin_date, end_date)
+
+def ms_to_mph(ms):
+    """convert meters per second to miles per hour"""
+    return ms * 2.236936
+
 if __name__ == "__main__":
     sys.exit(main())