1212
1313from pandas .core .dtypes .common import is_list_like
1414from pandas .errors import EmptyDataError
15- from pandas .io .common import _is_url , urlopen , _validate_header_arg
15+ from pandas .io .common import _is_url , _urlopen , _validate_header_arg
1616from pandas .io .parsers import TextParser
1717from pandas .compat import (lrange , lmap , u , string_types , iteritems ,
1818 raise_with_traceback , binary_type )
@@ -113,7 +113,7 @@ def _get_skiprows(skiprows):
113113 type (skiprows ).__name__ )
114114
115115
116- def _read (obj ):
116+ def _read (obj , session = None ):
117117 """Try to read from a url, file or string.
118118
119119 Parameters
@@ -125,8 +125,7 @@ def _read(obj):
125125 raw_text : str
126126 """
127127 if _is_url (obj ):
128- with urlopen (obj ) as url :
129- text = url .read ()
128+ text , _ = _urlopen (obj , session = session )
130129 elif hasattr (obj , 'read' ):
131130 text = obj .read ()
132131 elif isinstance (obj , char_types ):
@@ -201,12 +200,13 @@ class _HtmlFrameParser(object):
201200 functionality.
202201 """
203202
204- def __init__ (self , io , match , attrs , encoding , displayed_only ):
203+ def __init__ (self , io , match , attrs , encoding , displayed_only , session = None ):
205204 self .io = io
206205 self .match = match
207206 self .attrs = attrs
208207 self .encoding = encoding
209208 self .displayed_only = displayed_only
209+ self .session = session
210210
211211 def parse_tables (self ):
212212 """
@@ -590,7 +590,7 @@ def _parse_tfoot_tr(self, table):
590590 return table .select ('tfoot tr' )
591591
592592 def _setup_build_doc (self ):
593- raw_text = _read (self .io )
593+ raw_text = _read (self .io , self . session )
594594 if not raw_text :
595595 raise ValueError ('No text parsed from document: {doc}'
596596 .format (doc = self .io ))
@@ -713,7 +713,7 @@ def _build_doc(self):
713713
714714 try :
715715 if _is_url (self .io ):
716- with urlopen (self .io ) as f :
716+ with _urlopen (self .io ) as f :
717717 r = parse (f , parser = parser )
718718 else :
719719 # try to parse the input in the simplest way
@@ -887,9 +887,10 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):
887887
888888 # hack around python 3 deleting the exception variable
889889 retained = None
890+ session = kwargs .get ('session' , None )
890891 for flav in flavor :
891892 parser = _parser_dispatch (flav )
892- p = parser (io , compiled_match , attrs , encoding , displayed_only )
893+ p = parser (io , compiled_match , attrs , encoding , displayed_only , session )
893894
894895 try :
895896 tables = p .parse_tables ()
@@ -925,7 +926,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
925926 skiprows = None , attrs = None , parse_dates = False ,
926927 tupleize_cols = None , thousands = ',' , encoding = None ,
927928 decimal = '.' , converters = None , na_values = None ,
928- keep_default_na = True , displayed_only = True ):
929+ keep_default_na = True , displayed_only = True , session = None ):
929930 r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
930931
931932 Parameters
@@ -1088,4 +1089,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
10881089 thousands = thousands , attrs = attrs , encoding = encoding ,
10891090 decimal = decimal , converters = converters , na_values = na_values ,
10901091 keep_default_na = keep_default_na ,
1091- displayed_only = displayed_only )
1092+ displayed_only = displayed_only , session = session )
0 commit comments