-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Description
Describe the bug BUG描述
fork without exec , 父进程中成功执行过rpc调用, 然后fork一个子进程, 在子进程中执行rcp调用会一直卡死在bthread::futex_wait_private。相关堆栈如下:
#0 0x00007fb792d3c52d in syscall () from /lib64/libc.so.6
#1 0x00007fb795761fe4 in bthread::futex_wait_private (timeout=0x0, expected=0, addr1=0x7ffc42c1e3a0) at /home/brpc/src/bthread/sys_futex.h:40
#2 bthread::wait_pthread (pw=..., abstime=, abstime@entry=0x0) at /home/brpc/src/bthread/butex.cpp:150
#3 0x00007fb79576288e in bthread::butex_wait_from_pthread (prepend=, abstime=0x0, expected_value=1, b=0x3453f60, g=) at /home/brpc/src/bthread/butex.cpp:634
#4 bthread::butex_wait (arg=arg@entry=0x3453f60, expected_value=expected_value@entry=1, abstime=abstime@entry=0x0, prepend=prepend@entry=false) at /home/brpc/src/bthread/butex.cpp:667
#5 0x00007fb79576c93b in bthread_id_join (id=...) at /home/brpc/src/bthread/id.cpp:530
#6 0x00007fb7955ab225 in brpc::Join (id=..., id@entry=...) at /home/brpc/src/brpc/controller.cpp:581
#7 0x00007fb7955a62db in brpc::Channel::CallMethod (this=0x7fb795d93360 <hsmdClient+128>, method=0x33fecb8, controller_base=0x7ffc42c1e610, request=0x7ffc42c1e5a0, response=0x7ffc42c1e5c0, done=0x0) at /home/brpc/src/brpc/channel.cpp:589
#8 0x00007fb795528dd8 in master::pb::MasterService_Stub::QueryLeader (this=0x7ffc42c1e580, controller=0x7ffc42c1e610, request=0x7ffc42c1e5a0, response=0x7ffc42c1e5c0, done=0x0) at /home/hsm/libhsmd/../common/pb/master.pb.cc:25807
#9 0x00007fb7954c1187 in common::RpcClientMaster::QueryLeader (this=0x7ffc42c1e9d0, failLeaderEndpoint="", leader_name="master2", leader_endpoint="10.16.16.203:8002") at /home/hsm/libhsmd/../common/rpcclient/rpcclient_master.cpp:169
To Reproduce 复现
Status CallRpc()
{
brpc::ChannelOptions options;
options.use_rdma = use_rdma;
options.protocol = "baidu_std";
options.connection_type = "single"; // // Possible values: "single", "pooled", "short".
options.timeout_ms = 10000; /*milliseconds*/
options.max_retry = 3; // Default: 3
if (channel_->Init(server_addr.c_str(), port, &options) != 0)
{
LOG(ERROR) << "Fail to initialize channel " << server_addr + ":" << port;
return false;
}
master::pb::QueryLeaderRequest request;
master::pb::QueryLeaderResponse response;
request.set_failleaderendpoint(failLeaderEndpoint);
master::pb::MasterService_Stub stub(channel_);
brpc::Controller cntl;
stub.QueryLeader(&cntl, &request, &response, NULL);
leader_name = response.leader_name();
leader_endpoint = response.leader_endpoint();
Status status = CheckResult("RpcClientMaster::QueryLeader", &cntl, response.status().code(), response.status().message());
return status;
}
int TestBrpcForkWithoutExec()
{
printf("main pid=%d\r\n",getpid());
//如果在父进程中调用过CallRpc(), 则在子进程中就会 卡死. 如果父进程中没有调用过CallRpc(), 则子进程中均正常执行
CallRpc();
for(int i=0;i<2;i++)
{
pid_t p = fork();
if (p == 0)
{ //子进程中执行
printf("%d child pid=%d\r\n",i, getpid());
//会一直在等待在 bthread::futex_wait_private
if (!CallRpc())
{
printf("%d child client.Init failed\r\n",i);
return 0;
}
else
{
printf("%d child client.Init success\r\n",i);
return -1;
}
}
}
return 0;
}
Expected behavior
fork()子进程后, 能提供接口重新初始化brpc环境, 以便子进程中仍能正常工作。
既然有GlobalInitializeOrDie()这个函数,为什么没有GlobalClear() 清理brpc环境呢?
尝试过修改子进程中channel_->Init()之前修改 register_extensions_once = PTHREAD_ONCE_INIT; 但是报错,估计不行。
官方有没有更好的办法?
Versions
OS: CentOS Linux 8
Compiler: gcc 8.5
brpc: 1.13
protobuf: 3.5
Additional context/screenshots