diff --git a/docs/sphinx/source/whatsnew/v0.6.0.rst b/docs/sphinx/source/whatsnew/v0.6.0.rst index 9513424e1f..9613c4c88a 100644 --- a/docs/sphinx/source/whatsnew/v0.6.0.rst +++ b/docs/sphinx/source/whatsnew/v0.6.0.rst @@ -22,6 +22,8 @@ Bug fixes (:issue:`464`) * ModelChain.prepare_inputs failed to pass solar_position and airmass to Location.get_clearsky. Fixed. (:issue:`481`) +* Add User-Agent specification to TMY3 remote requests to avoid rejection. + (:issue:`493`) Documentation diff --git a/pvlib/tmy.py b/pvlib/tmy.py index 111f2bab91..371bd933f1 100644 --- a/pvlib/tmy.py +++ b/pvlib/tmy.py @@ -7,9 +7,9 @@ import dateutil import io try: - from urllib2 import urlopen + from urllib2 import urlopen, Request except ImportError: - from urllib.request import urlopen + from urllib.request import urlopen, Request import pandas as pd @@ -164,14 +164,23 @@ def readtmy3(filename=None, coerce_year=None, recolumn=True): head = ['USAF', 'Name', 'State', 'TZ', 'latitude', 'longitude', 'altitude'] - try: - csvdata = open(filename, 'r') - except IOError: - response = urlopen(filename) + if filename.startswith('http'): + request = Request(filename, headers={'User-Agent': + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 ' + 'Safari/537.36'}) + response = urlopen(request) csvdata = io.StringIO(response.read().decode(errors='ignore')) + else: + # assume it's accessible via the file system + csvdata = open(filename, 'r') + + # read in file metadata, advance buffer to second line + firstline = csvdata.readline() + if 'Request Rejected' in firstline: + raise IOError('Remote server rejected TMY file request') - # read in file metadata - meta = dict(zip(head, csvdata.readline().rstrip('\n').split(","))) + meta = dict(zip(head, firstline.rstrip('\n').split(","))) # convert metadata strings to numeric types meta['altitude'] = float(meta['altitude']) @@ -180,8 +189,12 @@ def readtmy3(filename=None, coerce_year=None, recolumn=True): meta['TZ'] = float(meta['TZ']) meta['USAF'] = int(meta['USAF']) + # use pandas to read the csv file/stringio buffer + # header is actually the second line in file, but tell pandas to look for + # header information on the 1st line (0 indexing) because we've already + # advanced past the true first line with the readline call above. data = pd.read_csv( - filename, header=1, + csvdata, header=0, parse_dates={'datetime': ['Date (MM/DD/YYYY)', 'Time (HH:MM)']}, date_parser=lambda *x: _parsedate(*x, year=coerce_year), index_col='datetime')