88import numpy as np
99import datetime as dt
1010import urllib
11- import urllib2
1211import time
13- import warnings
12+ from contextlib import closing
13+ from urllib2 import urlopen
1414
1515from zipfile import ZipFile
1616from pandas .util .py3compat import StringIO , BytesIO , bytes_to_str
@@ -109,10 +109,11 @@ def get_quote_yahoo(symbols):
109109
110110 data = dict (zip (codes .keys (), [[] for i in range (len (codes ))]))
111111
112- urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
113- sym_list , request )
112+ url_str = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (sym_list ,
113+ request )
114114
115- lines = urllib2 .urlopen (urlStr ).readlines ()
115+ with closing (urlopen (url_str )) as url :
116+ lines = url .readlines ()
116117
117118 for line in lines :
118119 fields = line .decode ('utf-8' ).strip ().split (',' )
@@ -151,29 +152,29 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
151152
152153 yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
153154
154- url = yahoo_URL + 's=%s' % sym + \
155- '&a=%s' % (start .month - 1 ) + \
156- '&b=%s' % start .day + \
157- '&c=%s' % start .year + \
158- '&d=%s' % (end .month - 1 ) + \
159- '&e=%s' % end .day + \
160- '&f=%s' % end .year + \
161- '&g=d' + \
162- '&ignore=.csv'
163-
164- for _ in range (retry_count ):
165- resp = urllib2 . urlopen (url )
166- if resp .code == 200 :
167- lines = resp .read ()
168- rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
169- parse_dates = True )[::- 1 ]
170-
171- # Yahoo! Finance sometimes does this awesome thing where they
172- # return 2 rows for the most recent business day
173- if len (rs ) > 2 and rs .index [- 1 ] == rs .index [- 2 ]: # pragma: no cover
174- rs = rs [:- 1 ]
175-
176- return rs
155+ url = ( yahoo_URL + 's=%s' % sym +
156+ '&a=%s' % (start .month - 1 ) +
157+ '&b=%s' % start .day +
158+ '&c=%s' % start .year +
159+ '&d=%s' % (end .month - 1 ) +
160+ '&e=%s' % end .day +
161+ '&f=%s' % end .year +
162+ '&g=d' +
163+ '&ignore=.csv' )
164+
165+ for _ in xrange (retry_count ):
166+ with closing ( urlopen (url )) as resp :
167+ if resp .code == 200 :
168+ lines = resp .read ()
169+ rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
170+ parse_dates = True )[::- 1 ]
171+
172+ # Yahoo! Finance sometimes does this awesome thing where they
173+ # return 2 rows for the most recent business day
174+ if len (rs ) > 2 and rs .index [- 1 ] == rs .index [- 2 ]: # pragma: no cover
175+ rs = rs [:- 1 ]
176+
177+ return rs
177178
178179 time .sleep (pause )
179180
@@ -198,17 +199,19 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
198199 google_URL = 'http://www.google.com/finance/historical?'
199200
200201 # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
201- url = google_URL + urllib .urlencode ({"q" : sym , \
202- "startdate" : start .strftime ('%b %d, %Y' ), \
203- "enddate" : end .strftime ('%b %d, %Y' ), "output" : "csv" })
204- for _ in range (retry_count ):
205- resp = urllib2 .urlopen (url )
206- if resp .code == 200 :
207- lines = resp .read ()
208- rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
209- parse_dates = True )[::- 1 ]
210-
211- return rs
202+ url = google_URL + urllib .urlencode ({"q" : sym ,
203+ "startdate" : start .strftime ('%b %d, '
204+ '%Y' ),
205+ "enddate" : end .strftime ('%b %d, %Y' ),
206+ "output" : "csv" })
207+ for _ in xrange (retry_count ):
208+ with closing (urlopen (url )) as resp :
209+ if resp .code == 200 :
210+ lines = resp .read ()
211+ rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
212+ parse_dates = True )[::- 1 ]
213+
214+ return rs
212215
213216 time .sleep (pause )
214217
@@ -280,19 +283,19 @@ def get_components_yahoo(idx_sym):
280283 '&e=.csv&h={2}'
281284
282285 idx_mod = idx_sym .replace ('^' , '@%5E' )
283- urlStr = url .format (idx_mod , stats , 1 )
286+ url_str = url .format (idx_mod , stats , 1 )
284287
285288 idx_df = DataFrame ()
286289 mask = [True ]
287290 comp_idx = 1
288291
289- #LOOP across component index structure,
290- #break when no new components are found
291- while ( True in mask ) :
292- urlStr = url .format (idx_mod , stats , comp_idx )
293- lines = ( urllib . urlopen (urlStr ). read (). decode ( 'utf-8' ). strip ().
294- strip ( '"' ). split ( '" \r \n "' ) )
295-
292+ # LOOP across component index structure,
293+ # break when no new components are found
294+ while True in mask :
295+ url_str = url .format (idx_mod , stats , comp_idx )
296+ with closing ( urlopen (url_str )) as resp :
297+ raw = resp . read ( )
298+ lines = raw . decode ( 'utf-8' ). strip (). strip ( '"' ). split ( '" \r \n "' )
296299 lines = [line .strip ().split ('","' ) for line in lines ]
297300
298301 temp_df = DataFrame (lines , columns = ['ticker' , 'name' , 'exchange' ])
@@ -468,11 +471,11 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
468471
469472 fred_URL = "http://research.stlouisfed.org/fred2/series/"
470473
471- url = fred_URL + '%s' % name + \
472- '/downloaddata/%s' % name + '.csv'
473- data = read_csv (urllib . urlopen ( url ) , index_col = 0 , parse_dates = True ,
474- header = None , skiprows = 1 , names = ["DATE" , name ],
475- na_values = '.' )
474+ url = fred_URL + '%s' % name + '/downloaddata/%s' % name + '.csv'
475+ with closing ( urlopen ( url )) as resp :
476+ data = read_csv (resp , index_col = 0 , parse_dates = True ,
477+ header = None , skiprows = 1 , names = ["DATE" , name ],
478+ na_values = '.' )
476479 try :
477480 return data .truncate (start , end )
478481 except KeyError :
@@ -489,9 +492,9 @@ def get_data_famafrench(name, start=None, end=None):
489492 # path of zip files
490493 zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
491494
492- url = urllib . urlopen (zipFileURL + name + ".zip" )
493- zipfile = ZipFile (StringIO (url .read ()))
494- data = zipfile . open (name + ".txt" ).readlines ()
495+ with closing ( urlopen (zipFileURL + name + ".zip" )) as url :
496+ with closing ( ZipFile (StringIO (url .read ()))) as zf :
497+ data = zf . read (name + ".txt" ).splitlines ()
495498
496499 file_edges = np .where (np .array ([len (d ) for d in data ]) == 2 )[0 ]
497500
@@ -638,7 +641,7 @@ def get_options_data(self, month=None, year=None, expiry=None):
638641 url = str ('http://finance.yahoo.com/q/op?s=' + self .symbol +
639642 '+Options' )
640643
641- parsed = parse (urllib2 . urlopen ( url ) )
644+ parsed = parse (url )
642645 doc = parsed .getroot ()
643646 tables = doc .findall ('.//table' )
644647 calls = tables [9 ]
@@ -709,7 +712,7 @@ def get_call_data(self, month=None, year=None, expiry=None):
709712 url = str ('http://finance.yahoo.com/q/op?s=' + self .symbol +
710713 '+Options' )
711714
712- parsed = parse (urllib2 . urlopen ( url ) )
715+ parsed = parse (url )
713716 doc = parsed .getroot ()
714717 tables = doc .findall ('.//table' )
715718 calls = tables [9 ]
@@ -777,7 +780,7 @@ def get_put_data(self, month=None, year=None, expiry=None):
777780 url = str ('http://finance.yahoo.com/q/op?s=' + self .symbol +
778781 '+Options' )
779782
780- parsed = parse (urllib2 . urlopen ( url ) )
783+ parsed = parse (url )
781784 doc = parsed .getroot ()
782785 tables = doc .findall ('.//table' )
783786 puts = tables [13 ]
0 commit comments