44Tests parsers ability to read and parse non-local files
55and hence require a network connection to be read.
66"""
7-
87import os
8+
99import pytest
10+ import moto
1011
1112import pandas .util .testing as tm
1213from pandas import DataFrame
1314from pandas .io .parsers import read_csv , read_table
15+ from pandas .compat import BytesIO
16+
17+
18+ @pytest .fixture (scope = 'module' )
19+ def tips_file ():
20+ return os .path .join (tm .get_data_path (), 'tips.csv' )
1421
1522
1623@pytest .fixture (scope = 'module' )
@@ -19,6 +26,40 @@ def salaries_table():
1926 return read_table (path )
2027
2128
29+ @pytest .fixture (scope = 'module' )
30+ def s3_resource (tips_file ):
31+ pytest .importorskip ('s3fs' )
32+ moto .mock_s3 ().start ()
33+
34+ test_s3_files = [
35+ ('tips.csv' , tips_file ),
36+ ('tips.csv.gz' , tips_file + '.gz' ),
37+ ('tips.csv.bz2' , tips_file + '.bz2' ),
38+ ]
39+
40+ def add_tips_files (bucket_name ):
41+ for s3_key , file_name in test_s3_files :
42+ with open (file_name , 'rb' ) as f :
43+ conn .Bucket (bucket_name ).put_object (
44+ Key = s3_key ,
45+ Body = f )
46+
47+ boto3 = pytest .importorskip ('boto3' )
48+ # see gh-16135
49+ bucket = 'pandas-test'
50+
51+ conn = boto3 .resource ("s3" , region_name = "us-east-1" )
52+ conn .create_bucket (Bucket = bucket )
53+ add_tips_files (bucket )
54+
55+ conn .create_bucket (Bucket = 'cant_get_it' , ACL = 'private' )
56+ add_tips_files ('cant_get_it' )
57+
58+ yield conn
59+
60+ moto .mock_s3 ().stop ()
61+
62+
2263@pytest .mark .network
2364@pytest .mark .parametrize (
2465 "compression,extension" ,
@@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
5192
5293
5394class TestS3 (object ):
54-
55- def setup_method (self , method ):
56- try :
57- import s3fs # noqa
58- except ImportError :
59- pytest .skip ("s3fs not installed" )
60-
6195 @tm .network
6296 def test_parse_public_s3_bucket (self ):
97+ pytest .importorskip ('s3fs' )
98+ # more of an integration test due to the not-public contents portion
99+ # can probably mock this though.
63100 for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
64101 df = read_csv ('s3://pandas-test/tips.csv' +
65102 ext , compression = comp )
@@ -74,26 +111,24 @@ def test_parse_public_s3_bucket(self):
74111 assert not df .empty
75112 tm .assert_frame_equal (read_csv (tm .get_data_path ('tips.csv' )), df )
76113
77- @ tm . network
78- def test_parse_public_s3n_bucket ( self ):
114+ def test_parse_public_s3n_bucket ( self , s3_resource ):
115+
79116 # Read from AWS s3 as "s3n" URL
80117 df = read_csv ('s3n://pandas-test/tips.csv' , nrows = 10 )
81118 assert isinstance (df , DataFrame )
82119 assert not df .empty
83120 tm .assert_frame_equal (read_csv (
84121 tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
85122
86- @tm .network
87- def test_parse_public_s3a_bucket (self ):
123+ def test_parse_public_s3a_bucket (self , s3_resource ):
88124 # Read from AWS s3 as "s3a" URL
89125 df = read_csv ('s3a://pandas-test/tips.csv' , nrows = 10 )
90126 assert isinstance (df , DataFrame )
91127 assert not df .empty
92128 tm .assert_frame_equal (read_csv (
93129 tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
94130
95- @tm .network
96- def test_parse_public_s3_bucket_nrows (self ):
131+ def test_parse_public_s3_bucket_nrows (self , s3_resource ):
97132 for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
98133 df = read_csv ('s3://pandas-test/tips.csv' +
99134 ext , nrows = 10 , compression = comp )
@@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self):
102137 tm .assert_frame_equal (read_csv (
103138 tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
104139
105- @tm .network
106- def test_parse_public_s3_bucket_chunked (self ):
140+ def test_parse_public_s3_bucket_chunked (self , s3_resource ):
107141 # Read with a chunksize
108142 chunksize = 5
109143 local_tips = read_csv (tm .get_data_path ('tips.csv' ))
@@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self):
121155 chunksize * i_chunk : chunksize * (i_chunk + 1 )]
122156 tm .assert_frame_equal (true_df , df )
123157
124- @tm .network
125- def test_parse_public_s3_bucket_chunked_python (self ):
158+ def test_parse_public_s3_bucket_chunked_python (self , s3_resource ):
126159 # Read with a chunksize using the Python parser
127160 chunksize = 5
128161 local_tips = read_csv (tm .get_data_path ('tips.csv' ))
@@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self):
140173 chunksize * i_chunk : chunksize * (i_chunk + 1 )]
141174 tm .assert_frame_equal (true_df , df )
142175
143- @tm .network
144- def test_parse_public_s3_bucket_python (self ):
176+ def test_parse_public_s3_bucket_python (self , s3_resource ):
145177 for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
146178 df = read_csv ('s3://pandas-test/tips.csv' + ext , engine = 'python' ,
147179 compression = comp )
@@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self):
150182 tm .assert_frame_equal (read_csv (
151183 tm .get_data_path ('tips.csv' )), df )
152184
153- @tm .network
154- def test_infer_s3_compression (self ):
185+ def test_infer_s3_compression (self , s3_resource ):
155186 for ext in ['' , '.gz' , '.bz2' ]:
156187 df = read_csv ('s3://pandas-test/tips.csv' + ext ,
157188 engine = 'python' , compression = 'infer' )
@@ -160,8 +191,7 @@ def test_infer_s3_compression(self):
160191 tm .assert_frame_equal (read_csv (
161192 tm .get_data_path ('tips.csv' )), df )
162193
163- @tm .network
164- def test_parse_public_s3_bucket_nrows_python (self ):
194+ def test_parse_public_s3_bucket_nrows_python (self , s3_resource ):
165195 for ext , comp in [('' , None ), ('.gz' , 'gzip' ), ('.bz2' , 'bz2' )]:
166196 df = read_csv ('s3://pandas-test/tips.csv' + ext , engine = 'python' ,
167197 nrows = 10 , compression = comp )
@@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self):
170200 tm .assert_frame_equal (read_csv (
171201 tm .get_data_path ('tips.csv' )).iloc [:10 ], df )
172202
173- @tm .network
174- def test_s3_fails (self ):
203+ def test_s3_fails (self , s3_resource ):
175204 with pytest .raises (IOError ):
176205 read_csv ('s3://nyqpug/asdf.csv' )
177206
@@ -180,21 +209,18 @@ def test_s3_fails(self):
180209 with pytest .raises (IOError ):
181210 read_csv ('s3://cant_get_it/' )
182211
183- @tm .network
184- def boto3_client_s3 (self ):
212+ def test_read_csv_handles_boto_s3_object (self ,
213+ s3_resource ,
214+ tips_file ):
185215 # see gh-16135
186216
187- # boto3 is a dependency of s3fs
188- import boto3
189- client = boto3 .client ("s3" )
190-
191- key = "/tips.csv"
192- bucket = "pandas-test"
193- s3_object = client .get_object (Bucket = bucket , Key = key )
217+ s3_object = s3_resource .meta .client .get_object (
218+ Bucket = 'pandas-test' ,
219+ Key = 'tips.csv' )
194220
195- result = read_csv (s3_object ["Body" ])
221+ result = read_csv (BytesIO ( s3_object ["Body" ]. read ()), encoding = 'utf8' )
196222 assert isinstance (result , DataFrame )
197223 assert not result .empty
198224
199- expected = read_csv (tm . get_data_path ( 'tips.csv' ) )
225+ expected = read_csv (tips_file )
200226 tm .assert_frame_equal (result , expected )
0 commit comments