-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-15892: [C++] Dataset APIs require s3:ListBucket Permissions #12701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0c6002f
0b8902b
3473c51
472cac7
2ff6233
9f63cd5
f6a01ae
21dba25
a40aef0
3c00819
587bb3e
fc39613
1652606
58a0e51
ab9a792
bd28ec1
83aecbd
e181b9d
9ae65f3
b1ece5f
247bec2
0d6e0fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,7 +35,8 @@ | |
| import pyarrow.feather | ||
| import pyarrow.fs as fs | ||
| from pyarrow.tests.util import (change_cwd, _filesystem_uri, | ||
| FSProtocolClass, ProxyHandler) | ||
| FSProtocolClass, ProxyHandler, | ||
| _configure_s3_limited_user) | ||
|
|
||
| try: | ||
| import pandas as pd | ||
|
|
@@ -4334,6 +4335,71 @@ def test_write_dataset_s3(s3_example_simple): | |
| assert result.equals(table) | ||
|
|
||
|
|
||
| _minio_put_only_policy = """{ | ||
| "Version": "2012-10-17", | ||
| "Statement": [ | ||
| { | ||
| "Effect": "Allow", | ||
| "Action": [ | ||
| "s3:PutObject", | ||
| "s3:ListBucket", | ||
| "s3:GetObjectVersion" | ||
| ], | ||
| "Resource": [ | ||
| "arn:aws:s3:::*" | ||
| ] | ||
| } | ||
| ] | ||
| }""" | ||
|
|
||
|
|
||
| @pytest.mark.parquet | ||
| @pytest.mark.s3 | ||
| def test_write_dataset_s3_put_only(s3_server): | ||
| # [ARROW-15892] Testing the create_dir flag which will restrict | ||
| # creating a new directory for writing a dataset. This is | ||
| # required while writing a dataset in s3 where we have very | ||
| # limited permissions and thus we can directly write the dataset | ||
| # without creating a directory. | ||
| from pyarrow.fs import S3FileSystem | ||
|
|
||
| # write dataset with s3 filesystem | ||
| host, port, _, _ = s3_server['connection'] | ||
| fs = S3FileSystem( | ||
| access_key='limited', | ||
| secret_key='limited123', | ||
| endpoint_override='{}:{}'.format(host, port), | ||
| scheme='http' | ||
| ) | ||
| _configure_s3_limited_user(s3_server, _minio_put_only_policy) | ||
|
|
||
| table = pa.table([ | ||
| pa.array(range(20)), pa.array(np.random.randn(20)), | ||
| pa.array(np.repeat(['a', 'b'], 10))], | ||
| names=["f1", "f2", "part"] | ||
| ) | ||
| part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive") | ||
|
|
||
| # writing with filesystem object with create_dir flag set to false | ||
sanjibansg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ds.write_dataset( | ||
| table, "existing-bucket", filesystem=fs, | ||
| format="feather", create_dir=False, partitioning=part, | ||
| existing_data_behavior='overwrite_or_ignore' | ||
| ) | ||
|
||
| # check roundtrip | ||
| result = ds.dataset( | ||
| "existing-bucket", filesystem=fs, format="ipc", partitioning="hive" | ||
| ).to_table() | ||
| assert result.equals(table) | ||
|
|
||
| with pytest.raises(OSError, match="Access Denied"): | ||
| ds.write_dataset( | ||
| table, "existing-bucket", filesystem=fs, | ||
| format="feather", create_dir=True, | ||
| existing_data_behavior='overwrite_or_ignore' | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.parquet | ||
| def test_dataset_null_to_dictionary_cast(tempdir, dataset_reader): | ||
| # ARROW-12420 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.