-
Notifications
You must be signed in to change notification settings - Fork 4k
Description
Describe the bug, including details regarding any error messages, version, and platform.
arrow::FinalizeS3() doesn't call both of RegionResolver::ResetDefaultInstance() and Aws::ShutdownAPI() by #33858.
This may cause a crash on exit by the "SubTreeFileSystem$create() with URI" R test:
arrow/r/tests/testthat/test-filesystem.R
Lines 154 to 164 in 0344a2c
| test_that("SubTreeFileSystem$create() with URI", { | |
| skip_on_cran() | |
| skip_if_not_available("s3") | |
| skip_if_offline() | |
| fs <- SubTreeFileSystem$create("s3://voltrondata-labs-datasets") | |
| expect_r6_class(fs, "SubTreeFileSystem") | |
| expect_identical( | |
| capture.output(print(fs)), | |
| "SubTreeFileSystem: s3://voltrondata-labs-datasets/" | |
| ) | |
| }) |
For example, it's not happen on the current main but it's happen on #36230:
pure virtual method called
terminate called without an active exception
Aborted (core dumped)
I could reproduce this by running only the test (I commented out all other tests). And here is the backtrace for the case:
(gdb) bt
#0 0x00007fd53f20600b in raise () from /usr/lib/x86_64-linux-gnu/libc.so.6
#1 0x00007fd53f1e5859 in abort () from /usr/lib/x86_64-linux-gnu/libc.so.6
#2 0x00007fd53cd45911 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#3 0x00007fd53cd5138c in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#4 0x00007fd53cd513f7 in std::terminate() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#5 0x00007fd53cd52155 in __cxa_pure_virtual () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#6 0x00007fd536724dc4 in Aws::Http::CurlHandleContainer::~CurlHandleContainer (
this=0x559db1f2ac78, __in_chrg=<optimized out>)
at /build/cpp/awssdk_ep-prefix/src/awssdk_ep/aws-cpp-sdk-core/source/http/curl/CurlHandleContainer.cpp:27
#7 0x00007fd5366e6464 in Aws::Http::CurlHttpClient::~CurlHttpClient (this=0x559db1f2ac10,
__in_chrg=<optimized out>)
at /build/cpp/awssdk_ep-prefix/src/awssdk_ep/aws-cpp-sdk-core/include/aws/core/http/curl/CurlHttpClient.h:26
#8 0x00007fd5366dd5a7 in __gnu_cxx::new_allocator<Aws::Http::CurlHttpClient>::destroy<Aws::Http::CurlHttpClient> (this=0x559db1f2ac10, __p=0x559db1f2ac10)
at /usr/include/c++/9/ext/new_allocator.h:152
#9 0x00007fd5366dd4f3 in std::allocator_traits<std::allocator<Aws::Http::CurlHttpClient> >::destroy<Aws::Http::CurlHttpClient> (__a=..., __p=0x559db1f2ac10)
at /usr/include/c++/9/bits/alloc_traits.h:496
#10 0x00007fd5366dd31b in std::_Sp_counted_ptr_inplace<Aws::Http::CurlHttpClient, std::allocator<Aws::Http::CurlHttpClient>, (__gnu_cxx::_Lock_policy)2>::_M_dispose (this=0x559db1f2ac00)
at /usr/include/c++/9/bits/shared_ptr_base.h:557
#11 0x00007fd5346cb074 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (
this=0x559db1f2ac00) at /usr/include/c++/9/bits/shared_ptr_base.h:155
#12 0x00007fd5346c7881 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (
this=0x559dada631b0, __in_chrg=<optimized out>)
at /usr/include/c++/9/bits/shared_ptr_base.h:730
#13 0x00007fd5363cd5d2 in std::__shared_ptr<Aws::Http::HttpClient, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (this=0x559dada631a8, __in_chrg=<optimized out>)
at /usr/include/c++/9/bits/shared_ptr_base.h:1169
#14 0x00007fd5363cd5f2 in std::shared_ptr<Aws::Http::HttpClient>::~shared_ptr (
this=0x559dada631a8, __in_chrg=<optimized out>) at /usr/include/c++/9/bits/shared_ptr.h:103
#15 0x00007fd5363cd736 in Aws::Client::AWSClient::~AWSClient (this=0x559dada63180,
__in_chrg=<optimized out>)
at /build/cpp/awssdk_ep-prefix/src/awssdk_ep/aws-cpp-sdk-core/include/aws/core/client/AWSClient.h:97
#16 0x00007fd5363cd960 in Aws::Client::AWSXMLClient::~AWSXMLClient (this=0x559dada63180,
__in_chrg=<optimized out>)
at /build/cpp/awssdk_ep-prefix/src/awssdk_ep/aws-cpp-sdk-core/include/aws/core/client/AWSXmlClient.h:44
#17 0x00007fd5363e15c6 in Aws::S3::S3Client::~S3Client (this=0x559dada63180,
__in_chrg=<optimized out>)
at /build/cpp/awssdk_ep-prefix/src/awssdk_ep/aws-cpp-sdk-s3/source/S3Client.cpp:246
#18 0x00007fd535f9c7c2 in arrow::fs::(anonymous namespace)::S3Client::~S3Client (
this=0x559dada63180, __in_chrg=<optimized out>)
at /arrow/cpp/src/arrow/filesystem/s3fs.cc:547
#19 0x00007fd535f9de17 in __gnu_cxx::new_allocator<arrow::fs::(anonymous namespace)::S3Client>::destroy<arrow::fs::(anonymous namespace)::S3Client> (this=0x559dada63180, __p=0x559dada63180)
at /usr/include/c++/9/ext/new_allocator.h:152
#20 0x00007fd535f9da55 in std::allocator_traits<std::allocator<arrow::fs::(anonymous namespace)::S3Client> >::destroy<arrow::fs::(anonymous namespace)::S3Client> (__a=..., __p=0x559dada63180)
at /usr/include/c++/9/bits/alloc_traits.h:496
#21 0x00007fd535f9d37b in std::_Sp_counted_ptr_inplace<arrow::fs::(anonymous namespace)::S3Client, std::allocator<arrow::fs::(anonymous namespace)::S3Client>, (__gnu_cxx::_Lock_policy)2>::_M_dispose (this=0x559dada63170) at /usr/include/c++/9/bits/shared_ptr_base.h:557
#22 0x00007fd5346cb074 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (
this=0x559dada63170) at /usr/include/c++/9/bits/shared_ptr_base.h:155
#23 0x00007fd5346c7881 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (
this=0x559dabe990e8, __in_chrg=<optimized out>)
at /usr/include/c++/9/bits/shared_ptr_base.h:730
#24 0x00007fd535f81ed0 in std::__shared_ptr<arrow::fs::(anonymous namespace)::S3Client, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (this=0x559dabe990e0, __in_chrg=<optimized out>)
at /usr/include/c++/9/bits/shared_ptr_base.h:1169
#25 0x00007fd535f81eec in std::shared_ptr<arrow::fs::(anonymous namespace)::S3Client>::~shared_ptr (this=0x559dabe990e0, __in_chrg=<optimized out>) at /usr/include/c++/9/bits/shared_ptr.h:103
#26 0x00007fd535f96756 in arrow::fs::(anonymous namespace)::RegionResolver::~RegionResolver (
this=0x559dabe98c90, __in_chrg=<optimized out>)
at /arrow/cpp/src/arrow/filesystem/s3fs.cc:795
#27 0x00007fd535f9d650 in std::_Sp_counted_ptr<arrow::fs::(anonymous namespace)::RegionResolver*, (__gnu_cxx::_Lock_policy)2>::_M_dispose (this=0x559db17f9990)
at /usr/include/c++/9/bits/shared_ptr_base.h:377
#28 0x00007fd5346cb074 in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (
this=0x559db17f9990) at /usr/include/c++/9/bits/shared_ptr_base.h:155
#29 0x00007fd5346c7881 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (
this=0x7fd538084a38 <arrow::fs::(anonymous namespace)::RegionResolver::instance_+8>,
__in_chrg=<optimized out>) at /usr/include/c++/9/bits/shared_ptr_base.h:730
#30 0x00007fd535f8252e in std::__shared_ptr<arrow::fs::(anonymous namespace)::RegionResolver, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (
this=0x7fd538084a30 <arrow::fs::(anonymous namespace)::RegionResolver::instance_>,
__in_chrg=<optimized out>) at /usr/include/c++/9/bits/shared_ptr_base.h:1169
#31 0x00007fd535f8254e in std::shared_ptr<arrow::fs::(anonymous namespace)::RegionResolver>::~shared_ptr (this=0x7fd538084a30 <arrow::fs::(anonymous namespace)::RegionResolver::instance_>,
__in_chrg=<optimized out>) at /usr/include/c++/9/bits/shared_ptr.h:103
#32 0x00007fd53f2098a7 in ?? () from /usr/lib/x86_64-linux-gnu/libc.so.6
#33 0x00007fd53f209a60 in exit () from /usr/lib/x86_64-linux-gnu/libc.so.6
#34 0x00007fd53f63e781 in ?? () from /usr/lib/R/lib/libR.so
#35 0x00007fd53f641140 in R_CleanUp () from /usr/lib/R/lib/libR.so
#36 0x00007fd53f53d0ee in run_Rmainloop () from /usr/lib/R/lib/libR.so
#37 0x0000559da998e09f in main ()
#38 0x00007fd53f1e7083 in __libc_start_main () from /usr/lib/x86_64-linux-gnu/libc.so.6
#39 0x0000559da998e0de in _start ()
#6 0x00007fd536724dc4 in Aws::Http::CurlHandleContainer::~CurlHandleContainer (
this=0x559db1f2ac78, __in_chrg=<optimized out>)
at /build/cpp/awssdk_ep-prefix/src/awssdk_ep/aws-cpp-sdk-core/source/http/curl/CurlHandleContainer.cpp:27
And AWS_LOGSTREAM_INFO is here: https://github.com/aws/aws-sdk-cpp/blob/1fb97256a2ae7211a741fda0033ef1e18d29e2f0/aws-cpp-sdk-core/include/aws/core/utils/logging/LogMacros.h#L159-L168
It seems that Aws::Utils::Logging::GetLogSystem() returns a destroyed object in the context. Note that this is called in exit() (#33 0x00007fd53f209a60 in exit () from /usr/lib/x86_64-linux-gnu/libc.so.6). So object destroyed order will be undefined.
Can we call RegionResolver::ResetDefaultInstance() and Aws::ShutdownAPI() from arrow::FinalizeS3() again? For example:
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index c3a6eb0ea..886405b52 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -2608,9 +2608,12 @@ struct AwsInstance : public ::arrow::internal::Executor::Resource {
ARROW_LOG(WARNING)
<< " arrow::fs::FinalizeS3 was not called even though S3 was initialized. "
"This could lead to a segmentation fault at exit";
- RegionResolver::ResetDefaultInstance();
- Aws::ShutdownAPI(aws_options_);
}
+ // Don't let S3 be shutdown until all Arrow threads are done using it
+ ARROW_UNUSED(arrow::internal::GetCpuThreadPool()->Shutdown());
+ ARROW_UNUSED(io::internal::GetIOThreadPool()->Shutdown());
+ RegionResolver::ResetDefaultInstance();
+ Aws::ShutdownAPI(aws_options_);
}
}
@@ -2670,16 +2673,8 @@ struct AwsInstance : public ::arrow::internal::Executor::Resource {
std::atomic<bool> is_finalized_;
};
-std::shared_ptr<AwsInstance> CreateAwsInstance() {
- auto instance = std::make_shared<AwsInstance>();
- // Don't let S3 be shutdown until all Arrow threads are done using it
- arrow::internal::GetCpuThreadPool()->KeepAlive(instance);
- io::internal::GetIOThreadPool()->KeepAlive(instance);
- return instance;
-}
-
AwsInstance& GetAwsInstance() {
- static auto instance = CreateAwsInstance();
+ static auto instance = std::make_shared<AwsInstance>();
return *instance;
}
I can avoid the crash on my environment by this patch.
@westonpace What do you think about this problem?
Component(s)
C++