diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..542a9e133f --- /dev/null +++ b/.gitignore @@ -0,0 +1,451 @@ +# Ignore objects and archives. +*.[oa] +Makefile +Makefile.in +*.l[oa] +*.gcno +coverage_run.sh +*.gcov +*.gcda +*_suite.log +.deps +.libs +*.m4 +autom4te.cache/ +buildutils/config.guess +buildutils/config.sub +buildutils/depcomp +buildutils/install-sh +buildutils/ltmain.sh +buildutils/missing +buildutils/modulefiles +buildutils/modulefiles.vers +buildutils/pbs_mkdirs +buildutils/self-extract-head-sh +buildutils/ylwrap +config.log +config.status +configure +contrib/blcr/checkpoint_script +contrib/blcr/restart_script +contrib/init.d/debian.pbs_mom +contrib/init.d/debian.pbs_sched +contrib/init.d/debian.pbs_server +contrib/init.d/debian.trqauthd +contrib/init.d/pbs_mom +contrib/init.d/pbs_sched +contrib/init.d/pbs_server +contrib/init.d/trqauthd +contrib/init.d/suse.pbs_mom +contrib/init.d/suse.pbs_sched +contrib/init.d/suse.pbs_server +contrib/init.d/suse.trqauthd +*.1 +*.3 +*.7 +*.8 +doc/soelim +libtool +pbs-config +src/cmds/nqs2pbs +src/cmds/pbs_track +src/cmds/pbsdsh +src/cmds/pbsnodes +src/cmds/qalter +src/cmds/qchkpt +src/cmds/qdel +src/cmds/qdisable +src/cmds/qenable +src/cmds/qgpumode +src/cmds/qgpureset +src/cmds/qhold +src/cmds/qmgr +src/cmds/qmove +src/cmds/qmsg +src/cmds/qorder +src/cmds/qrerun +src/cmds/qrls +src/cmds/qrun +src/cmds/qselect +src/cmds/qsig +src/cmds/qstart +src/cmds/qstat +src/cmds/qstop +src/cmds/qsub +src/cmds/qterm +src/daemon_client/trqauthd +src/drmaa/Doxyfile +src/include/pbs_config.h* +src/include/site_job_attr_def.h +src/include/site_job_attr_enum.h +src/include/site_qmgr_node_print.h +src/include/site_qmgr_que_print.h +src/include/site_qmgr_svr_print.h +src/include/site_que_attr_def.h +src/include/site_que_attr_enum.h +src/include/site_svr_attr_def.h +src/include/site_svr_attr_enum.h +src/include/stamp-h1 +src/momctl/momctl +src/resmom/pbs_demux +src/resmom/pbs_mom +src/scheduler.cc/pbs_sched +*.swp +src/server/pbs_server +src/tools/chk_tree +src/tools/hostn +src/tools/pbs_tclsh +src/tools/printjob +src/tools/printserverdb +src/tools/printtracking +src/tools/tracejob +tags +torque.spec +src/cmds/test/MXML/test_MXML +src/cmds/test/common_cmds/test_common_cmds +src/cmds/test/pbs_track/test_pbs_track +src/cmds/test/pbsdsh/test_pbsdsh +src/cmds/test/pbsnodes/test_pbsnodes +src/cmds/test/pbspd/test_pbspd +src/cmds/test/pbspoe/test_pbspoe +src/cmds/test/qalter/test_qalter +src/cmds/test/qchkpt/test_qchkpt +src/cmds/test/qdel/test_qdel +src/cmds/test/qdisable/test_qdisable +src/cmds/test/qenable/test_qenable +src/cmds/test/qgpumode/test_qgpumode +src/cmds/test/qgpureset/test_qgpureset +src/cmds/test/qhold/test_qhold +src/cmds/test/qmgr/test_qmgr +src/cmds/test/qmove/test_qmove +src/cmds/test/qmsg/test_qmsg +src/cmds/test/qorder/test_qorder +src/cmds/test/qrerun/test_qrerun +src/cmds/test/qrls/test_qrls +src/cmds/test/qrun/test_qrun +src/cmds/test/qselect/test_qselect +src/cmds/test/qsig/test_qsig +src/cmds/test/qstart/test_qstart +src/cmds/test/qstat/test_qstat +src/cmds/test/qstop/test_qstop +src/cmds/test/qsub_functions/build_test_files.sh +src/cmds/test/qsub_functions/test_x11_get_proto +src/cmds/test/qterm/test_qterm +src/daemon_client/test/trq_auth_daemon/test_trq_auth_daemon +src/lib/Libattr/test/attr_atomic/test_attr_atomic +src/lib/Libattr/test/attr_fn_acl/test_attr_fn_acl +src/lib/Libattr/test/attr_fn_arst/test_attr_fn_arst +src/lib/Libattr/test/attr_fn_b/test_attr_fn_b +src/lib/Libattr/test/attr_fn_c/test_attr_fn_c +src/lib/Libattr/test/attr_fn_hold/test_attr_fn_hold +src/lib/Libattr/test/attr_fn_intr/test_attr_fn_intr +src/lib/Libattr/test/attr_fn_l/test_attr_fn_l +src/lib/Libattr/test/attr_fn_ll/test_attr_fn_ll +src/lib/Libattr/test/attr_fn_resc/test_attr_fn_resc +src/lib/Libattr/test/attr_fn_size/test_attr_fn_size +src/lib/Libattr/test/attr_fn_str/test_attr_fn_str +src/lib/Libattr/test/attr_fn_time/test_attr_fn_time +src/lib/Libattr/test/attr_fn_tokens/test_attr_fn_tokens +src/lib/Libattr/test/attr_fn_tv/test_attr_fn_tv +src/lib/Libattr/test/attr_fn_unkn/test_attr_fn_unkn +src/lib/Libattr/test/attr_func/test_attr_func +src/lib/Libattr/test/attr_node_func/test_attr_node_func +src/lib/Libcmds/test/add_verify_resources/test_add_verify_resources +src/lib/Libcmds/test/ck_job_name/test_ck_job_name +src/lib/Libcmds/test/cnt2server/test_cnt2server +src/lib/Libcmds/test/cvtdate/test_cvtdate +src/lib/Libcmds/test/get_server/test_get_server +src/lib/Libcmds/test/locate_job/test_locate_job +src/lib/Libcmds/test/parse_at/test_parse_at +src/lib/Libcmds/test/parse_depend/test_parse_depend +src/lib/Libcmds/test/parse_destid/test_parse_destid +src/lib/Libcmds/test/parse_equal/test_parse_equal +src/lib/Libcmds/test/parse_jobid/test_parse_jobid +src/lib/Libcmds/test/parse_stage/test_parse_stage +src/lib/Libcmds/test/prepare_path/test_prepare_path +src/lib/Libcmds/test/prt_job_err/test_prt_job_err +src/lib/Libcmds/test/set_attr/test_set_attr +src/lib/Libcmds/test/set_resource/test_set_resource +src/lib/Libcsv/test/csv/test_csv +src/lib/Libdis/test/discui_/test_discui_ +src/lib/Libdis/test/discul_/test_discul_ +src/lib/Libdis/test/disi10d_/test_disi10d_ +src/lib/Libdis/test/disi10l_/test_disi10l_ +src/lib/Libdis/test/disiui_/test_disiui_ +src/lib/Libdis/test/disp10d_/test_disp10d_ +src/lib/Libdis/test/disp10l_/test_disp10l_ +src/lib/Libdis/test/disrcs/test_disrcs +src/lib/Libdis/test/disrd/test_disrd +src/lib/Libdis/test/disrf/test_disrf +src/lib/Libdis/test/disrfcs/test_disrfcs +src/lib/Libdis/test/disrfst/test_disrfst +src/lib/Libdis/test/disrl/test_disrl +src/lib/Libdis/test/disrl_/test_disrl_ +src/lib/Libdis/test/disrsc/test_disrsc +src/lib/Libdis/test/disrsi/test_disrsi +src/lib/Libdis/test/disrsi_/test_disrsi_ +src/lib/Libdis/test/disrsl/test_disrsl +src/lib/Libdis/test/disrsl_/test_disrsl_ +src/lib/Libdis/test/disrss/test_disrss +src/lib/Libdis/test/disrst/test_disrst +src/lib/Libdis/test/disruc/test_disruc +src/lib/Libdis/test/disrui/test_disrui +src/lib/Libdis/test/disrul/test_disrul +src/lib/Libdis/test/disrus/test_disrus +src/lib/Libdis/test/diswcs/test_diswcs +src/lib/Libdis/test/diswf/test_diswf +src/lib/Libdis/test/diswl_/test_diswl_ +src/lib/Libdis/test/diswsi/test_diswsi +src/lib/Libdis/test/diswsl/test_diswsl +src/lib/Libdis/test/diswui/test_diswui +src/lib/Libdis/test/diswui_/test_diswui_ +src/lib/Libdis/test/diswul/test_diswul +src/lib/Libifl/test/PBSD_gpuctrl2/test_PBSD_gpuctrl2 +src/lib/Libifl/test/PBSD_manage2/test_PBSD_manage2 +src/lib/Libifl/test/PBSD_manager_caps/test_PBSD_manager_caps +src/lib/Libifl/test/PBSD_msg2/test_PBSD_msg2 +src/lib/Libifl/test/PBSD_rdrpy/test_PBSD_rdrpy +src/lib/Libifl/test/PBSD_sig2/test_PBSD_sig2 +src/lib/Libifl/test/PBSD_status/test_PBSD_status +src/lib/Libifl/test/PBSD_status2/test_PBSD_status2 +src/lib/Libifl/test/PBSD_submit_caps/test_PBSD_submit_caps +src/lib/Libifl/test/PBS_attr/test_PBS_attr +src/lib/Libifl/test/dec_Authen/test_dec_Authen +src/lib/Libifl/test/dec_CpyFil/test_dec_CpyFil +src/lib/Libifl/test/dec_Gpu/test_dec_Gpu +src/lib/Libifl/test/dec_JobCred/test_dec_JobCred +src/lib/Libifl/test/dec_JobFile/test_dec_JobFile +src/lib/Libifl/test/dec_JobId/test_dec_JobId +src/lib/Libifl/test/dec_JobObit/test_dec_JobObit +src/lib/Libifl/test/dec_Manage/test_dec_Manage +src/lib/Libifl/test/dec_MoveJob/test_dec_MoveJob +src/lib/Libifl/test/dec_MsgJob/test_dec_MsgJob +src/lib/Libifl/test/dec_QueueJob/test_dec_QueueJob +src/lib/Libifl/test/dec_Reg/test_dec_Reg +src/lib/Libifl/test/dec_ReqExt/test_dec_ReqExt +src/lib/Libifl/test/dec_ReqHdr/test_dec_ReqHdr +src/lib/Libifl/test/dec_Resc/test_dec_Resc +src/lib/Libifl/test/dec_ReturnFile/test_dec_ReturnFile +src/lib/Libifl/test/dec_RunJob/test_dec_RunJob +src/lib/Libifl/test/dec_Shut/test_dec_Shut +src/lib/Libifl/test/dec_Sig/test_dec_Sig +src/lib/Libifl/test/dec_Status/test_dec_Status +src/lib/Libifl/test/dec_Track/test_dec_Track +src/lib/Libifl/test/dec_attrl/test_dec_attrl +src/lib/Libifl/test/dec_attropl/test_dec_attropl +src/lib/Libifl/test/dec_rpyc/test_dec_rpyc +src/lib/Libifl/test/dec_rpys/test_dec_rpys +src/lib/Libifl/test/dec_svrattrl/test_dec_svrattrl +src/lib/Libifl/test/enc_CpyFil/test_enc_CpyFil +src/lib/Libifl/test/enc_Gpu/test_enc_Gpu +src/lib/Libifl/test/enc_JobCred/test_enc_JobCred +src/lib/Libifl/test/enc_JobFile/test_enc_JobFile +src/lib/Libifl/test/enc_JobId/test_enc_JobId +src/lib/Libifl/test/enc_JobObit/test_enc_JobObit +src/lib/Libifl/test/enc_Manage/test_enc_Manage +src/lib/Libifl/test/enc_MoveJob/test_enc_MoveJob +src/lib/Libifl/test/enc_MsgJob/test_enc_MsgJob +src/lib/Libifl/test/enc_QueueJob/test_enc_QueueJob +src/lib/Libifl/test/enc_QueueJob_hash/test_enc_QueueJob_hash +src/lib/Libifl/test/enc_Reg/test_enc_Reg +src/lib/Libifl/test/enc_ReqExt/test_enc_ReqExt +src/lib/Libifl/test/enc_ReqHdr/test_enc_ReqHdr +src/lib/Libifl/test/enc_ReturnFile/test_enc_ReturnFile +src/lib/Libifl/test/enc_RunJob/test_enc_RunJob +src/lib/Libifl/test/enc_Shut/test_enc_Shut +src/lib/Libifl/test/enc_Sig/test_enc_Sig +src/lib/Libifl/test/enc_Status/test_enc_Status +src/lib/Libifl/test/enc_Track/test_enc_Track +src/lib/Libifl/test/enc_attrl/test_enc_attrl +src/lib/Libifl/test/enc_attropl/test_enc_attropl +src/lib/Libifl/test/enc_attropl_hash/test_enc_attropl_hash +src/lib/Libifl/test/enc_reply/test_enc_reply +src/lib/Libifl/test/enc_svrattrl/test_enc_svrattrl +src/lib/Libifl/test/get_svrport/test_get_svrport +src/lib/Libifl/test/list_link/test_list_link +src/lib/Libifl/test/nonblock/test_nonblock +src/lib/Libifl/test/pbsD_alterjo/test_pbsD_alterjo +src/lib/Libifl/test/pbsD_asyrun/test_pbsD_asyrun +src/lib/Libifl/test/pbsD_chkptjob/test_pbsD_chkptjob +src/lib/Libifl/test/pbsD_connect/test_pbsD_connect +src/lib/Libifl/test/pbsD_deljob/test_pbsD_deljob +src/lib/Libifl/test/pbsD_gpuctrl/test_pbsD_gpuctrl +src/lib/Libifl/test/pbsD_holdjob/test_pbsD_holdjob +src/lib/Libifl/test/pbsD_locjob/test_pbsD_locjob +src/lib/Libifl/test/pbsD_manager/test_pbsD_manager +src/lib/Libifl/test/pbsD_movejob/test_pbsD_movejob +src/lib/Libifl/test/pbsD_msgjob/test_pbsD_msgjob +src/lib/Libifl/test/pbsD_orderjo/test_pbsD_orderjo +src/lib/Libifl/test/pbsD_rerunjo/test_pbsD_rerunjo +src/lib/Libifl/test/pbsD_resc/test_pbsD_resc +src/lib/Libifl/test/pbsD_rlsjob/test_pbsD_rlsjob +src/lib/Libifl/test/pbsD_runjob/test_pbsD_runjob +src/lib/Libifl/test/pbsD_selectj/test_pbsD_selectj +src/lib/Libifl/test/pbsD_sigjob/test_pbsD_sigjob +src/lib/Libifl/test/pbsD_stagein/test_pbsD_stagein +src/lib/Libifl/test/pbsD_statjob/test_pbsD_statjob +src/lib/Libifl/test/pbsD_statnode/test_pbsD_statnode +src/lib/Libifl/test/pbsD_statque/test_pbsD_statque +src/lib/Libifl/test/pbsD_statsrv/test_pbsD_statsrv +src/lib/Libifl/test/pbsD_submit/test_pbsD_submit +src/lib/Libifl/test/pbsD_submit_hash/test_pbsD_submit_hash +src/lib/Libifl/test/pbsD_termin/test_pbsD_termin +src/lib/Libifl/test/pbs_geterrmg/test_pbs_geterrmg +src/lib/Libifl/test/pbs_statfree/test_pbs_statfree +src/lib/Libifl/test/tcp_dis/test_tcp_dis +src/lib/Libifl/test/tm/test_tm +src/lib/Libifl/test/torquecfg/test_torquecfg +src/lib/Libifl/test/trq_auth/test_trq_auth +src/lib/Liblog/test/chk_file_sec/test_chk_file_sec +src/lib/Liblog/test/log_event/test_log_event +src/lib/Liblog/test/pbs_log/test_pbs_log +src/lib/Liblog/test/pbs_messages/test_pbs_messages +src/lib/Liblog/test/setup_env/test_setup_env +src/lib/Libnet/test/conn_table/test_conn_table +src/lib/Libnet/test/get_hostaddr/test_get_hostaddr +src/lib/Libnet/test/get_hostname/test_get_hostname +src/lib/Libnet/test/md5/test_md5 +src/lib/Libnet/test/net_client/test_net_client +src/lib/Libnet/test/net_common/test_net_common +src/lib/Libnet/test/net_server/test_net_server +src/lib/Libnet/test/net_set_clse/test_net_set_clse +src/lib/Libnet/test/port_forwarding/test_port_forwarding +src/lib/Libnet/test/rm/test_rm +src/lib/Libnet/test/server_core/test_server_core +src/lib/Libsite/test/site_allow_u/test_site_allow_u +src/lib/Libsite/test/site_alt_rte/test_site_alt_rte +src/lib/Libsite/test/site_check_u/test_site_check_u +src/lib/Libsite/test/site_map_usr/test_site_map_usr +src/lib/Libsite/test/site_mom_chu/test_site_mom_chu +src/lib/Libsite/test/site_mom_ckp/test_site_mom_ckp +src/lib/Libsite/test/site_mom_jst/test_site_mom_jst +src/lib/Libutils/test/u_MXML/test_u_MXML +src/lib/Libutils/test/u_dynamic_string/test_u_dynamic_string +src/lib/Libutils/test/u_groups/test_u_groups +src/lib/Libutils/test/u_hash_map_structs/ct_u_hash_map_structs +src/lib/Libutils/test/u_hash_table/test_u_hash_table +src/lib/Libutils/test/u_lock_ctl/test_u_lock_ctl +src/lib/Libutils/test/u_memmgr/test_u_memmgr +src/lib/Libutils/test/u_mom_hierarchy/test_u_mom_hierarchy +src/lib/Libutils/test/u_mu/test_u_mu +src/lib/Libutils/test/u_resizable_array/test_u_resizable_array +src/lib/Libutils/test/u_threadpool/test_u_threadpool +src/lib/Libutils/test/u_tree/test_u_tree +src/lib/Libutils/test/u_users/test_u_users +src/lib/Libutils/test/u_xml/test_u_xml +src/momctl/test/momctl/test_momctl +src/resmom/linux/test/mom_mach/test_mom_mach +src/resmom/linux/test/mom_start/test_mom_start +src/resmom/linux/test/pe_input/test_pe_input +src/resmom/test/alps_reservations/test_alps_reservations +src/resmom/test/catch_child/1.sysname.ac.JB +src/resmom/test/catch_child/2.sysname.ac.JB +src/resmom/test/catch_child/3.sysname.ac.JB +src/resmom/test/catch_child/build_test_files.sh +src/resmom/test/catch_child/test_catch_child +src/resmom/test/catch_child/test_exit_mom_job +src/resmom/test/catch_child/test_get_node +src/resmom/test/catch_child/test_init_abort_jobs +src/resmom/test/catch_child/test_mom_deljob +src/resmom/test/catch_child/test_obit_reply +src/resmom/test/catch_child/test_post_epilogue +src/resmom/test/catch_child/test_preobit_reply +src/resmom/test/catch_child/test_scan_for_exiting +src/resmom/test/checkpoint/test_checkpoint +src/resmom/test/generate_alps_status/test_generate_alps_status +src/resmom/test/mom_comm/test_mom_comm +src/resmom/test/mom_inter/test_mom_inter +src/resmom/test/mom_job_func/test_mom_job_func +src/resmom/test/mom_main/test_mom_main +src/resmom/test/mom_process_request/test_mom_process_request +src/resmom/test/mom_req_quejob/test_mom_req_quejob +src/resmom/test/mom_server/test_mom_server +src/resmom/test/pbs_demux/test_pbs_demux +src/resmom/test/prolog/test_prolog +src/resmom/test/release_reservation/test_release_reservation +src/resmom/test/requests/test_requests +src/resmom/test/start_exec/test_start_exec +src/resmom/test/tmsock_recov/test_tmsock_recov +src/server/test/accounting/test_accounting +src/server/test/array_func/test_array_func +src/server/test/array_upgrade/test_array_upgrade +src/server/test/attr_recov/test_attr_recov +src/server/test/batch_request/test_batch_request +src/server/test/dis_read/test_dis_read +src/server/test/display_alps_status/test_display_alps_status +src/server/test/exiting_jobs/test_exiting_jobs +src/server/test/geteusernam/test_geteusernam +src/server/test/issue_request/test_issue_request +src/server/test/job_container/test_job_container +src/server/test/job_func/test_job_func +src/server/test/job_func/test_record_jobinfo +src/server/test/job_qs_upgrade/test_job_qs_upgrade +src/server/test/job_recov/test_job_recov +src/server/test/job_recycler/test_job_recycler +src/server/test/job_route/test_job_route +src/server/test/login_nodes/test_login_nodes +src/server/test/node_func/test_node_func +src/server/test/node_manager/test_node_manager +src/server/test/pbsd_init/test_pbsd_init +src/server/test/pbsd_main/test_pbsd_main +src/server/test/process_alps_status/test_process_alps_status +src/server/test/process_mom_update/test_process_mom_update +src/server/test/process_request/test_process_request +src/server/test/queue_func/test_queue_func +src/server/test/queue_recov/test_queue_recov +src/server/test/receive_mom_communication/test_receive_mom_communication +src/server/test/reply_send/test_reply_send +src/server/test/req_delete/test_req_delete +src/server/test/req_deletearray/test_req_deletearray +src/server/test/req_getcred/test_req_getcred +src/server/test/req_gpuctrl/test_req_gpuctrl +src/server/test/req_holdarray/test_req_holdarray +src/server/test/req_holdjob/test_req_holdjob +src/server/test/req_jobobit/test_req_jobobit +src/server/test/req_locate/test_req_locate +src/server/test/req_manager/test_req_manager +src/server/test/req_message/test_req_message +src/server/test/req_modify/test_req_modify +src/server/test/req_movejob/test_req_movejob +src/server/test/req_quejob/test_req_quejob +src/server/test/req_register/test_req_register +src/server/test/req_rerun/test_req_rerun +src/server/test/req_rescq/test_req_rescq +src/server/test/req_runjob/test_req_runjob +src/server/test/req_select/test_req_select +src/server/test/req_shutdown/test_req_shutdown +src/server/test/req_signal/test_req_signal +src/server/test/req_stat/test_req_stat +src/server/test/req_tokens/test_req_tokens +src/server/test/req_track/test_req_track +src/server/test/resc_def_all/test_resc_def_all +src/server/test/run_sched/test_run_sched +src/server/test/stat_job/test_stat_job +src/server/test/svr_chk_owner/test_svr_chk_owner +src/server/test/svr_connect/test_svr_connect +src/server/test/svr_format_job/test_svr_format_job +src/server/test/svr_func/test_svr_func +src/server/test/svr_jobfunc/test_svr_jobfunc +src/server/test/svr_mail/test_svr_mail +src/server/test/svr_movejob/test_svr_movejob +src/server/test/svr_recov/test_svr_recov +src/server/test/svr_resccost/test_svr_resccost +src/server/test/svr_task/test_svr_task +src/server/test/track_alps_reservations/test_track_alps_reservations +src/server/test/user_info/test_user_info +src/tools/test/chk_tree/test_chk_tree +src/tools/test/hostn/test_hostn +src/tools/test/pbsTclInit/test_pbsTclInit +src/tools/test/printjob/test_printjob +src/tools/test/printserverdb/test_printserverdb +src/tools/test/printtracking/test_printtracking +src/tools/test/tracejob/test_tracejob +.project +.cproject +cscope.out +*.orig +*.rej +*.patch +tmpdiff +tmporig +tmpfile diff --git a/CHANGELOG b/CHANGELOG index 3ddc1f83e7..80541db169 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -73,6 +73,8 @@ c - crash b - bug fix e - enhancement f - new feature n - note set to the login nodes. TRQ-1482. b - Fix a bug that using -V got rid of -v. TRQ-1457. b - Make qsub -I -x work again. TRQ-1483. + c - Fix a potential crash when getting the status of a login node in cray mode. + TRQ-1491. 4.1.3 b - fix a security loophole that potentially allowed an interactive job to run diff --git a/Makefile.am b/Makefile.am index ca8fdb7f2e..c74eb44093 100644 --- a/Makefile.am +++ b/Makefile.am @@ -8,6 +8,7 @@ EXTRA_DIST = acinclude.m4 \ CHANGELOG \ configure.ac \ cov_file_results.pl \ + current_hash \ Doxyfile \ INSTALL \ INSTALL.GNU \ diff --git a/configure.ac b/configure.ac index 22a3878dc6..32f3df5e9f 100644 --- a/configure.ac +++ b/configure.ac @@ -69,13 +69,16 @@ dnl be created in the include directory. AM_CONFIG_HEADER([src/include/pbs_config.h]) dnl -dnl Find our svn revision number for program outputs +dnl Find our git revision hash for program outputs dnl -AC_PATH_PROG(svnversioncommand, svnversion) -if test "X$svnversioncommand" != "X" && test `$svnversioncommand -n '.'` != "exported"; then - AC_DEFINE_UNQUOTED(SVN_VERSION, ["`svnversion -n`"], [repository svn version]) +githash="`git rev-parse HEAD`" +success=$? +if test "X$success" != "X0" ; then + AC_DEFINE_UNQUOTED(GIT_HASH, ["$githash"], [repository svn version]) else - AC_DEFINE_UNQUOTED(SVN_VERSION, ["unknown"], [repository svn version]) + dnl current hash is a file checked in to the git repo in the base directory + dnl containing the most recent hash + AC_DEFINE_UNQUOTED(GIT_HASH, ["`cat current_hash`"], [repository svn version]) fi diff --git a/current_hash b/current_hash new file mode 100644 index 0000000000..710dba2cb8 --- /dev/null +++ b/current_hash @@ -0,0 +1 @@ +a2498b31a71e656959f38ce8a88024fbc869eaad diff --git a/src/cmds/pbsnodes.c b/src/cmds/pbsnodes.c index 81073acf62..170c26577c 100644 --- a/src/cmds/pbsnodes.c +++ b/src/cmds/pbsnodes.c @@ -712,7 +712,7 @@ int main( if ((optarg != NULL) && !strcmp(optarg, "version")) { fprintf(stderr, "Version: %s\nRevision: %s\n", - PACKAGE_VERSION, SVN_VERSION); + PACKAGE_VERSION, GIT_HASH); exit(0); } diff --git a/src/cmds/qstat.c b/src/cmds/qstat.c index 7be9cc09d1..4e1c6b6245 100644 --- a/src/cmds/qstat.c +++ b/src/cmds/qstat.c @@ -2207,7 +2207,7 @@ int main( if ((optarg != NULL) && !strcmp(optarg, "version")) { fprintf(stderr, "Version: %s\nRevision: %s\n", - PACKAGE_VERSION, SVN_VERSION); + PACKAGE_VERSION, GIT_HASH); exit(0); } diff --git a/src/include/pbs_job.h b/src/include/pbs_job.h index f306c20f2f..b922f8873d 100644 --- a/src/include/pbs_job.h +++ b/src/include/pbs_job.h @@ -1087,7 +1087,7 @@ extern char *prefix_std_file(job *, dynamic_string *, int); extern char *add_std_filename(job *, char *, int, dynamic_string *); extern int set_jobexid(job *, pbs_attribute *, char *); extern int site_check_user_map(job *, char *, char *, int); -int svr_dequejob(char *, int); +int svr_dequejob(job *, int); extern int svr_enquejob(job *, int, int); extern void svr_evaljobstate(job *, int *, int *, int); extern void svr_mailowner(job *, int, int, char *); diff --git a/src/include/pbs_nodes.h b/src/include/pbs_nodes.h index 251fb4ba05..4416e08b0c 100644 --- a/src/include/pbs_nodes.h +++ b/src/include/pbs_nodes.h @@ -332,6 +332,8 @@ struct pbsnode struct AvlNode *node_boards; /* private tree of numa nodes */ char *numa_str; /* comma-delimited string of processor values */ char *gpu_str; /* comma-delimited string of the number of gpus for each nodeboard */ + + unsigned char nd_mom_reported_down;/* notes that the mom reported its own shutdown */ unsigned char nd_is_alps_reporter; unsigned char nd_is_alps_login; diff --git a/src/lib/Libifl/tm.c b/src/lib/Libifl/tm.c index 6a7649adc8..3f2f6739df 100644 --- a/src/lib/Libifl/tm.c +++ b/src/lib/Libifl/tm.c @@ -170,8 +170,10 @@ static event_info *event_hash[EVENT_HASH]; /* ** Find an event number or return a NULL. */ -static event_info * -find_event(tm_event_t x) +event_info *find_event( + + tm_event_t x) + { event_info *ep; @@ -187,10 +189,11 @@ find_event(tm_event_t x) /* ** Delete an event. */ -static void -del_event(event_info *ep) - { +void del_event( + + event_info *ep) + { /* unlink event from hash list */ if (ep->e_prev) ep->e_prev->e_next = ep->e_next; @@ -245,8 +248,8 @@ del_event(event_info *ep) /* ** Create a new event number. */ -static tm_event_t -new_event(void) +tm_event_t new_event(void) + { static tm_event_t next_event = TM_NULL_EVENT + 1; event_info *ep; @@ -275,8 +278,12 @@ new_event(void) /* ** Link new event number into the above hash table. */ -static void -add_event(tm_event_t event, tm_node_id node, int type, void *info) +void add_event( + + tm_event_t event, + tm_node_id node, + int type, + void *info) { event_info *ep, **head; @@ -322,8 +329,10 @@ static task_info *task_hash[TASK_HASH]; /* ** Find a task table entry for a given task number or return a NULL. */ -static task_info * -find_task(tm_task_id x) +task_info *find_task( + + tm_task_id x) + { task_info *tp; @@ -340,8 +349,12 @@ find_task(tm_task_id x) ** Create a new task entry and link it into the above hash ** table. */ -static tm_task_id -new_task(char *jobid, tm_node_id node, tm_task_id task) +tm_task_id new_task( + + char *jobid, + tm_node_id node, + tm_task_id task) + { task_info *tp, **head; @@ -559,8 +572,8 @@ static int localmom(void) static int startcom( - int com, - tm_event_t event, + int com, + tm_event_t event, struct tcp_chan **pchan) { @@ -943,6 +956,7 @@ int tm_obit( tm_task_id tid, /* in */ int *obitval, /* out */ tm_event_t *event) /* out */ + { int rc = TM_SUCCESS; task_info *tp; @@ -1002,14 +1016,14 @@ struct taskhold ** is a valid node number, it returns the event that the list of ** tasks on is available. */ -int -tm_taskinfo( - tm_node_id node, /* in */ +int tm_taskinfo( + + tm_node_id node, /* in */ tm_task_id *tid_list, /* out */ - int list_size, /* in */ - int *ntasks, /* out */ - tm_event_t *event /* out */ -) + int list_size, /* in */ + int *ntasks, /* out */ + tm_event_t *event) /* out */ + { struct taskhold *thold; struct tcp_chan *chan = NULL; @@ -1053,11 +1067,11 @@ tm_taskinfo( ** Returns the job-relative node number that holds or held . In ** case of an error, it returns TM_ERROR_NODE. */ -int -tm_atnode( - tm_task_id tid, /* in */ - tm_node_id *node /* out */ -) +int tm_atnode( + + tm_task_id tid, /* in */ + tm_node_id *node) /* out */ + { task_info *tp; @@ -1085,12 +1099,13 @@ struct reschold ** is available. It returns ERROR_EVENT otherwise. */ int tm_rescinfo( - tm_node_id node, /* in */ - char *resource, /* out */ - int len, /* in */ + + tm_node_id node, /* in */ + char *resource, /* out */ + int len, /* in */ tm_event_t *event) /* out */ - { + { struct reschold *rhold; struct tcp_chan *chan = NULL; @@ -1125,7 +1140,10 @@ int tm_rescinfo( add_event(*event, node, TM_RESOURCES, (void *)rhold); return TM_SUCCESS; - } + } /* END tm_rescinfo() */ + + + /* ** Posts the first of a copy of * within MOM on @@ -1133,13 +1151,13 @@ int tm_rescinfo( ** non-NULL, it returns the event that the effort to post * ** is complete. It returns ERROR_EVENT otherwise. */ -int -tm_publish( - char *name, /* in */ - void *info, /* in */ - int len, /* in */ - tm_event_t *event /* out */ -) +int tm_publish( + + char *name, /* in */ + void *info, /* in */ + int len, /* in */ + tm_event_t *event) /* out */ + { int rc = TM_SUCCESS; struct tcp_chan *chan = NULL; @@ -1173,7 +1191,7 @@ tm_publish( DIS_tcp_cleanup(chan); return rc; - } + } /* tm_publish() */ struct infohold { @@ -1187,18 +1205,18 @@ struct infohold ** is a valid task, it returns the event that the ** string specifying the info posted by is available. */ -int -tm_subscribe( - tm_task_id tid, /* in */ - char *name, /* in */ - void *info, /* out */ - int len, /* in */ - int *info_len, /* out */ - tm_event_t *event /* out */ -) +int tm_subscribe( + + tm_task_id tid, /* in */ + char *name, /* in */ + void *info, /* out */ + int len, /* in */ + int *info_len,/* out */ + tm_event_t *event) /* out */ + { - int rc = TM_SUCCESS; - task_info *tp; + int rc = TM_SUCCESS; + task_info *tp; struct tcp_chan *chan = NULL; struct infohold *ihold; diff --git a/src/resmom/linux/mom_mach.c b/src/resmom/linux/mom_mach.c index 8c0164c3ba..e0cac35640 100644 --- a/src/resmom/linux/mom_mach.c +++ b/src/resmom/linux/mom_mach.c @@ -2690,6 +2690,9 @@ int mom_close_poll(void) if (proc_array != NULL) { free(proc_array); + proc_array = NULL; + nproc = 0; + max_proc = TBL_INC; } return(PBSE_NONE); diff --git a/src/resmom/mom_main.c b/src/resmom/mom_main.c index c451c7de93..6645c1bfc7 100644 --- a/src/resmom/mom_main.c +++ b/src/resmom/mom_main.c @@ -1542,6 +1542,9 @@ void checkret( return; } /* END checkret() */ + + + char *skipwhite( char *str) @@ -2988,7 +2991,8 @@ static u_long setvarattr( /* step forward to end of TTL */ - while (!isspace(*ptr)) + while ((!isspace(*ptr)) && + (*ptr != '\0')) ptr++; if (*ptr == '\0') @@ -3504,7 +3508,6 @@ int read_config( char *server_list_ptr; char *tp; - if (LOGLEVEL >= 3) { sprintf(log_buffer, "updating configuration using file '%s'", @@ -3621,6 +3624,8 @@ int read_config( { linenum++; + memset(name, 0, sizeof(name)); + if (line[0] == '#') /* comment */ { memset(line, 0, sizeof(line)); @@ -3644,8 +3649,7 @@ int read_config( if (LOGLEVEL >= 6) { - sprintf(log_buffer, "processing config line '%.64s'", - str); + sprintf(log_buffer, "processing config line '%.64s'", str); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, log_buffer); } @@ -3690,7 +3694,7 @@ int read_config( log_err(-1, __func__, log_buffer); } - + memset(line, 0, sizeof(line)); continue; @@ -3699,7 +3703,7 @@ int read_config( add_static(str, file, linenum); nconfig++; - + memset(line, 0, sizeof(line)); } /* END while (fgets()) */ @@ -4486,6 +4490,7 @@ void cleanup_aux() } } } + closedir(auxdir); } } /* END cleanup_aux() */ @@ -5573,6 +5578,8 @@ int tcp_read_proto_version( tmpT = pbs_tcp_timeout; + pbs_tcp_timeout = 0; + *proto = disrsi(chan, &rc); if (tmpT > 0) @@ -6450,6 +6457,9 @@ void MOMCheckRestart(void) { time_t newmtime; + /* make sure we're not making a mess in the aux dir */ + cleanup_aux(); + if ((MOMConfigRestart <= 0) || (MOMExeTime <= 0)) { return; @@ -6477,9 +6487,6 @@ void MOMCheckRestart(void) DBPRT(("%s\n", log_buffer)); } - - /* make sure we're not making a mess in the aux dir */ - cleanup_aux(); } /* END MOMCheckRestart() */ @@ -6750,7 +6757,7 @@ void parse_command_line( else if (!strcmp(optarg, "version")) { printf("Version: %s\nRevision: %s\n", - PACKAGE_VERSION, SVN_VERSION); + PACKAGE_VERSION, GIT_HASH); exit(0); } diff --git a/src/server/job_func.c b/src/server/job_func.c index 60e55b5a01..413bbb6fb4 100644 --- a/src/server/job_func.c +++ b/src/server/job_func.c @@ -1680,6 +1680,9 @@ int svr_job_purge( int job_is_array_template; unsigned int job_has_checkpoint_file; int job_has_arraystruct; + int do_delete_array = FALSE; + job_array *pa = NULL; + char array_id[PBS_MAXSVRJOBID+1]; strcpy(job_id, pjob->ji_qs.ji_jobid); strcpy(job_fileprefix, pjob->ji_qs.ji_fileprefix); @@ -1724,7 +1727,7 @@ int svr_job_purge( { /* pa->ai_mutex will come out locked after the call to get_jobs_array */ - job_array *pa = get_jobs_array(&pjob); + pa = get_jobs_array(&pjob); if (pjob != NULL) { @@ -1743,7 +1746,9 @@ int svr_job_purge( if (pa->ai_qs.num_purged == pa->ai_qs.num_jobs) { /* array_delete will unlock pa->ai_mutex */ - array_delete(pa); + strcpy(array_id, pjob->ji_arraystructid); + do_delete_array = TRUE; + unlock_ai_mutex(pa, __func__, "1a", LOGLEVEL); } else { @@ -1765,12 +1770,10 @@ int svr_job_purge( { int need_deque = !pjob->ji_cold_restart; - unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); - /* jobs that are being deleted after a cold restart * haven't been queued */ if (need_deque == TRUE) - rc = svr_dequejob(job_id, FALSE); + rc = svr_dequejob(pjob, FALSE); if (rc != PBSE_JOBNOTFOUND) { @@ -1890,6 +1893,13 @@ int svr_job_purge( log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, job_id, log_buf); } + if (do_delete_array == TRUE) + { + pa = get_array(array_id); + if (pa != NULL) + array_delete(pa); + } + return(PBSE_NONE); } /* END svr_job_purge() */ diff --git a/src/server/job_recov.c b/src/server/job_recov.c index 15fdac7ae3..8fdb1fac83 100644 --- a/src/server/job_recov.c +++ b/src/server/job_recov.c @@ -176,7 +176,7 @@ int add_to_ms_list(char *node_name, job *pjob); int job_save( job *pjob, /* pointer to job structure */ - int updatetype, /* 0=quick, 1=full */ + int updatetype, /* 0=quick, 1=full, 2=new */ int mom_port) /* if 0 ignore otherwise append to end of job name. this is for multi-mom mode */ { diff --git a/src/server/job_route.c b/src/server/job_route.c index 0da21a00e3..1864f39323 100644 --- a/src/server/job_route.c +++ b/src/server/job_route.c @@ -119,7 +119,7 @@ #define ROUTE_RETRY_TIME 10 /* External functions called */ -int svr_movejob(job *, char *, int *, struct batch_request *, int); +int svr_movejob(job *, char *, int *, struct batch_request *); long count_proc(char *spec); /* Local Functions */ @@ -275,7 +275,7 @@ int default_router( if (is_bad_dest(jobp, destination)) continue; - switch (svr_movejob(jobp, destination, &local_errno, NULL, FALSE)) + switch (svr_movejob(jobp, destination, &local_errno, NULL)) { case ROUTE_PERM_FAILURE: /* permanent failure */ diff --git a/src/server/node_func.c b/src/server/node_func.c index df409c4571..b095b0cd06 100644 --- a/src/server/node_func.c +++ b/src/server/node_func.c @@ -556,18 +556,18 @@ int login_encode_jobs( int status_nodeattrib( - svrattrl *pal, /*an svrattrl from the request */ - attribute_def *padef, /*the defined node attributes */ - struct pbsnode *pnode, /*no longer an pbs_attribute ptr */ - int limit, /*number of array elts in padef */ - int priv, /*requester's privilege */ + svrattrl *pal, /*an svrattrl from the request */ + attribute_def *padef, /*the defined node attributes */ + struct pbsnode *pnode, /*no longer an pbs_attribute ptr */ + int limit, /*number of array elts in padef */ + int priv, /*requester's privilege */ tlist_head *phead, /*heads list of svrattrl structs that hang */ - /*off the brp_attr member of the status sub*/ - /*structure in the request's "reply area" */ + /*off the brp_attr member of the status sub*/ + /*structure in the request's "reply area" */ - int *bad) /*if node-pbs_attribute error, record it's*/ -/*list position here */ + int *bad) /*if node-pbs_attribute error, record it's*/ + /*list position here */ { int i; @@ -584,34 +584,34 @@ int status_nodeattrib( { /*set up attributes using data from node*/ - if (!strcmp((padef + i)->at_name, ATTR_NODE_state)) + if (i == ND_ATR_state) atemp[i].at_val.at_short = pnode->nd_state; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_properties)) + else if (i == ND_ATR_properties) atemp[i].at_val.at_arst = pnode->nd_prop; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_status)) + else if (i == ND_ATR_status) atemp[i].at_val.at_arst = pnode->nd_status; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_ntype)) + else if (i == ND_ATR_ntype) atemp[i].at_val.at_short = pnode->nd_ntype; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_jobs)) + else if (i == ND_ATR_jobs) atemp[i].at_val.at_jinfo = pnode; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_np)) + else if (i == ND_ATR_np) atemp[i].at_val.at_long = pnode->nd_nsn; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_note)) + else if (i == ND_ATR_note) atemp[i].at_val.at_str = pnode->nd_note; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_mom_port)) + else if (i == ND_ATR_mom_port) atemp[i].at_val.at_long = pnode->nd_mom_port; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_mom_rm_port)) + else if (i == ND_ATR_mom_rm_port) atemp[i].at_val.at_long = pnode->nd_mom_rm_port; /* skip NUMA attributes */ - else if (!strcmp((padef + i)->at_name, ATTR_NODE_num_node_boards)) + else if (i == ND_ATR_num_node_boards) continue; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_numa_str)) + else if (i == ND_ATR_numa_str) continue; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_gpus_str)) + else if (i == ND_ATR_gpus_str) continue; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_gpustatus)) + else if (i == ND_ATR_gpustatus) atemp[i].at_val.at_arst = pnode->nd_gpustatus; - else if (!strcmp((padef + i)->at_name, ATTR_NODE_gpus)) + else if (i == ND_ATR_gpus) { atemp[i].at_val.at_long = pnode->nd_ngpus; } @@ -654,6 +654,9 @@ int status_nodeattrib( rc = login_encode_jobs(pnode, phead); else { + if (index == ND_ATR_status) + atemp[index].at_val.at_arst = pnode->nd_status; + rc = ((padef + index)->at_encode( &atemp[index], phead, @@ -692,6 +695,9 @@ int status_nodeattrib( else if (((padef + index)->at_flags & priv) && !((padef + index)->at_flags & ATR_DFLAG_NOSTAT)) { + if (index == ND_ATR_status) + atemp[index].at_val.at_arst = pnode->nd_status; + rc = (padef + index)->at_encode( &atemp[index], phead, diff --git a/src/server/node_manager.c b/src/server/node_manager.c index 1f65c49fa2..e4078ca3c2 100644 --- a/src/server/node_manager.c +++ b/src/server/node_manager.c @@ -1594,7 +1594,22 @@ int process_status_info( if ((current = get_node_from_str(str, name, current)) == NULL) break; else + { + /* There is a race condition if using a mom hierarchy and manually + * shutting down a non-level 1 mom: if its message that the mom is + * shutting down gets there before its last status update, the node + * can incorrectly be set as free again. For that reason, only set + * a mom back up if its reporting for itself. */ + if ((strcmp(name, str + strlen("node=")) != 0) && + (current->nd_mom_reported_down == TRUE)) + { + dont_change_state = TRUE; + } + + current->nd_mom_reported_down = FALSE; + continue; + } } /* add the info to the "temp" pbs_attribute */ @@ -2529,6 +2544,11 @@ int svr_is_request( update_node_state(node, i); + if ((node->nd_state & INUSE_DOWN) != 0) + { + node->nd_mom_reported_down = TRUE; + } + break; case IS_STATUS: @@ -2546,7 +2566,6 @@ int svr_is_request( unlock_node(node, __func__, "before is_stat_get", LOGLEVEL); ret = is_stat_get(node_name, chan); -/* socket_read_flush(chan->sock); */ node = find_nodebyname(node_name); diff --git a/src/server/pbsd_main.c b/src/server/pbsd_main.c index 3cc9bdb969..03032e4f10 100644 --- a/src/server/pbsd_main.c +++ b/src/server/pbsd_main.c @@ -684,7 +684,7 @@ void parse_command_line( if (!strcmp(optarg, "version")) { fprintf(stderr, "Version: %s\nRevision: %s \n", - PACKAGE_VERSION, SVN_VERSION); + PACKAGE_VERSION, GIT_HASH); exit(0); } @@ -702,6 +702,7 @@ void parse_command_line( printf("installdir: %s\n", PBS_INSTALL_DIR); printf("serverhome: %s\n", PBS_SERVER_HOME); printf("version: %s\n", PACKAGE_VERSION); + printf("Revision: %s\n", GIT_HASH); exit(0); } diff --git a/src/server/queue_func.c b/src/server/queue_func.c index cae7ba8bd7..2832706605 100644 --- a/src/server/queue_func.c +++ b/src/server/queue_func.c @@ -137,7 +137,7 @@ int lock_queue( int logging) { - int rc = PBSE_NONE; + int rc = PBSE_NONE; char *err_msg = NULL; if (logging >= 10) @@ -148,15 +148,15 @@ int lock_queue( } if (pthread_mutex_lock(the_queue->qu_mutex) != 0) - { - if (logging >= 10) + { + if (logging >= 10) { snprintf(err_msg, MSG_LEN_LONG, "ALERT: cannot lock queue %s mutex in method %s", the_queue->qu_qs.qu_name, id); log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, err_msg); } rc = PBSE_MUTEX; - } + } if (err_msg != NULL) free(err_msg); @@ -642,109 +642,6 @@ pbs_queue *next_queue( -/* - * gets the locks on both queues without releasing the all_queues mutex lock. - * Doing this another way can cause deadlock. - * - * @return PBSE_NONE on success - */ - -int get_parent_dest_queues( - - char *queue_parent_name, - char *queue_dest_name, - pbs_queue **parent, - pbs_queue **dest, - job **pjob_ptr) - - { - pbs_queue *pque_parent; - pbs_queue *pque_dest; - char jobid[PBS_MAXSVRJOBID + 1]; - char log_buf[LOCAL_LOG_BUF_SIZE + 1]; - job *pjob = *pjob_ptr; - int index_parent; - int index_dest; - int rc = PBSE_NONE; - - if (LOGLEVEL >= 7) - { - sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid); - LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); - } - - strcpy(jobid, pjob->ji_qs.ji_jobid); - - *parent = NULL; - *dest = NULL; - - if ((queue_parent_name != NULL) && (queue_dest_name != NULL)) - { - if (!strcmp(queue_parent_name, queue_dest_name)) - { - /* parent and destination are the same. - Job is already in destnation queue. return */ - snprintf(log_buf, sizeof(log_buf), "parent and destination queues are the same: parent %s - dest %s. jobid: %s", - queue_parent_name, - queue_dest_name, - jobid); - log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); - return(-1); - } - } - else - return(-1); - - unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); - - unlock_queue(*parent, __func__, NULL, LOGLEVEL); - - - lock_allques_mutex(&svr_queues, __func__, NULL, LOGLEVEL); - - index_parent = get_value_hash(svr_queues.ht, queue_parent_name); - index_dest = get_value_hash(svr_queues.ht, queue_dest_name); - - if ((index_parent < 0) || - (index_dest < 0)) - { - rc = -1; - } - else - { - /* good path */ - pque_parent = svr_queues.ra->slots[index_parent].item; - pque_dest = svr_queues.ra->slots[index_dest].item; - - if ((pque_parent == NULL) || - (pque_dest == NULL)) - { - rc = -1; - } - else - { - /* SUCCESS! */ - lock_queue(pque_parent, __func__, NULL, LOGLEVEL); - lock_queue(pque_dest, __func__, NULL, LOGLEVEL); - *parent = pque_parent; - *dest = pque_dest; - - rc = PBSE_NONE; - } - } - - unlock_allques_mutex(&svr_queues, __func__, NULL, LOGLEVEL); - - if ((*pjob_ptr = svr_find_job(jobid, TRUE)) == NULL) - rc = -1; - - return(rc); - } /* END get_parent_dest_queues() */ - - - - - pbs_queue *lock_queue_with_job_held( pbs_queue *pque, diff --git a/src/server/queue_func.h b/src/server/queue_func.h index 6b2bd55bb6..8e9c0a1dfa 100644 --- a/src/server/queue_func.h +++ b/src/server/queue_func.h @@ -29,6 +29,4 @@ int remove_queue(all_queues *aq, pbs_queue *pque); pbs_queue *next_queue(all_queues *aq, int *iter); -int get_parent_dest_queues(char *queue_parent_name, char *queue_dest_name, pbs_queue **parent, pbs_queue **dest, job **pjob_ptr); - #endif /* _QUEUE_FUNC_H */ diff --git a/src/server/req_movejob.c b/src/server/req_movejob.c index e77e7238a8..49091ff465 100644 --- a/src/server/req_movejob.c +++ b/src/server/req_movejob.c @@ -110,7 +110,7 @@ extern char *msg_manager; extern char *msg_movejob; extern char *pbs_o_host; -int svr_movejob(job *, char *, int *, struct batch_request *, int); +int svr_movejob(job *, char *, int *, struct batch_request *); int svr_chkque(job *, pbs_queue *, char *, int, char *); job *chk_job_request(char *, struct batch_request *); @@ -177,7 +177,7 @@ int req_movejob( return(PBSE_NONE); } - switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, &local_errno, req, FALSE)) + switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, &local_errno, req)) { case 0: @@ -242,8 +242,6 @@ int req_orderjob( char tmpqn[PBS_MAXQUEUENAME+1]; struct batch_request *req = (struct batch_request *)vp; char log_buf[LOCAL_LOG_BUF_SIZE]; - char job_id1[PBS_MAXSVRJOBID+1]; - char job_id2[PBS_MAXSVRJOBID+1]; pbs_queue *pque1; pbs_queue *pque2; @@ -342,13 +340,9 @@ int req_orderjob( strcpy(tmpqn, pjob1->ji_qs.ji_queue); strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue); strcpy(pjob2->ji_qs.ji_queue, tmpqn); - strcpy(job_id1, pjob1->ji_qs.ji_jobid); - strcpy(job_id2, pjob2->ji_qs.ji_jobid); - unlock_ji_mutex(pjob1, __func__, "5", LOGLEVEL); - unlock_ji_mutex(pjob2, __func__, "6", LOGLEVEL); - svr_dequejob(job_id1, FALSE); - svr_dequejob(job_id2, FALSE); + svr_dequejob(pjob1, FALSE); + svr_dequejob(pjob2, FALSE); if (svr_enquejob(pjob1, FALSE, -1) == PBSE_JOB_RECYCLED) pjob1 = NULL; diff --git a/src/server/req_quejob.c b/src/server/req_quejob.c index 6acebac43c..90821422da 100644 --- a/src/server/req_quejob.c +++ b/src/server/req_quejob.c @@ -1429,6 +1429,8 @@ int req_quejob( svr_job_purge(pj); return rc; } + + job_save(pj, SAVEJOB_NEW, 0); /* link job into server's new jobs list request */ insert_job(&newjobs,pj); @@ -1849,7 +1851,7 @@ int req_rdytocommit( unlink(namebuf); } - if (job_save(pj, SAVEJOB_NEW, 0) == -1) + if (job_save(pj, SAVEJOB_FULL, 0) == -1) { rc = PBSE_CAN_NOT_WRITE_FILE; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, diff --git a/src/server/req_track.c b/src/server/req_track.c index cbe25bfe4e..73f6b65783 100644 --- a/src/server/req_track.c +++ b/src/server/req_track.c @@ -85,6 +85,7 @@ #include #include +#include #include #include "libpbs.h" #include @@ -325,13 +326,36 @@ void issue_track( job *pjob) { + struct batch_request *preq; + char *pc; + char *sname; + char log_buf[LOCAL_LOG_BUF_SIZE]; - struct batch_request *preq; - char *pc; + if ((pc = strchr(pjob->ji_qs.ji_jobid, '.')) == NULL) + { + snprintf(log_buf, sizeof(log_buf), + "Remote job routing is not compatible with display_job_server_suffix set to false. Cannot track %s", + pjob->ji_qs.ji_jobid); + log_err(-1, __func__, log_buf); + + return; + } + + sname = pc + 1; + + /* do not issue track requests to ourselves */ + if (!strcmp(sname, server_name)) + { + snprintf(log_buf, sizeof(log_buf), + "%s erroneously called for local job %s", + __func__, pjob->ji_qs.ji_jobid); + log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); + return; + } preq = alloc_br(PBS_BATCH_TrackJob); - if (preq == (struct batch_request *)0) + if (preq == NULL) return; preq->rq_ind.rq_track.rq_hopcount = pjob->ji_wattr[JOB_ATR_hopcount].at_val.at_long; diff --git a/src/server/svr_jobfunc.c b/src/server/svr_jobfunc.c index 6eb49c65c9..5ae236d334 100644 --- a/src/server/svr_jobfunc.c +++ b/src/server/svr_jobfunc.c @@ -624,30 +624,20 @@ int svr_enquejob( int svr_dequejob( - char *job_id, /* I, M */ - int parent_queue_mutex_held) /* I */ + job *pjob, /* I, M */ + int parent_queue_mutex_held) /* I */ { int bad_ct = 0; int rc = PBSE_NONE; - job *pjob = NULL; pbs_attribute *pattr; pbs_queue *pque; resource *presc; char log_buf[LOCAL_LOG_BUF_SIZE]; /* remove job from server's all job list and reduce server counts */ - if (LOGLEVEL >= 10) - { - snprintf(log_buf, sizeof(log_buf), "%s", job_id); - log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); - } - - if ((pjob = svr_find_job(job_id, FALSE)) == NULL) - return(PBSE_JOBNOTFOUND); - if (LOGLEVEL >= 6) - LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, pjob->ji_qs.ji_jobid); + log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, pjob->ji_qs.ji_jobid); if (parent_queue_mutex_held == FALSE) { @@ -713,11 +703,14 @@ int svr_dequejob( pque ? pque->qu_qs.qu_name : "unknown queue", PJobState[pjob->ji_qs.ji_state]); - log_event(PBSEVENT_DEBUG2,PBS_EVENTCLASS_JOB,job_id,log_buf); + log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); if (bad_ct) /* state counts are all messed up */ { char queue_name[PBS_MAXQUEUENAME]; + char job_id[PBS_MAXSVRJOBID+1]; + + strcpy(job_id, pjob->ji_qs.ji_jobid); /* this function will lock queues and jobs */ unlock_ji_mutex(pjob, __func__, NULL, 0); diff --git a/src/server/svr_jobfunc.h b/src/server/svr_jobfunc.h index 1ea43309d5..690eaabd57 100644 --- a/src/server/svr_jobfunc.h +++ b/src/server/svr_jobfunc.h @@ -10,8 +10,6 @@ int svr_enquejob(job *pjob, int has_sv_qs_mutex, int); -int svr_dequejob(char *job_id, int); - int svr_setjobstate(job *pjob, int newstate, int newsubstate, int); void svr_evaljobstate(job *pjob, int *newstate, int *newsub, int forceeval); diff --git a/src/server/svr_movejob.c b/src/server/svr_movejob.c index 93a885d6e9..664908dd12 100644 --- a/src/server/svr_movejob.c +++ b/src/server/svr_movejob.c @@ -140,7 +140,7 @@ extern struct pbsnode *PGetNodeFromAddr(pbs_net_t); /* Private Functions local to this file */ -int local_move(job *, int *, struct batch_request *, int); +int local_move(job *, int *, struct batch_request *); int should_retry_route(int err); /* Global Data */ @@ -186,8 +186,7 @@ int svr_movejob( job *jobp, char *destination, int *my_err, - struct batch_request *req, - int parent_queue_mutex_held) + struct batch_request *req) { pbs_net_t destaddr; @@ -208,7 +207,7 @@ int svr_movejob( destination, PBS_MAXROUTEDEST); - log_err(-1, "svr_movejob", log_buf); + log_err(-1, __func__, log_buf); *my_err = PBSE_QUENBIG; @@ -238,7 +237,7 @@ int svr_movejob( if (local != 0) { - return(local_move(jobp, my_err, req, parent_queue_mutex_held)); + return(local_move(jobp, my_err, req)); } return(net_move(jobp, req)); @@ -262,13 +261,10 @@ int local_move( job *pjob, int *my_err, - struct batch_request *req, - int parent_queue_mutex_held) + struct batch_request *req) { - pbs_queue *routing_que = NULL; pbs_queue *dest_que = NULL; - pbs_queue *tmp_que = NULL; char *destination = pjob->ji_qs.ji_destin; int mtype; char log_buf[LOCAL_LOG_BUF_SIZE]; @@ -279,56 +275,12 @@ int local_move( * by making sure that the destionation queue and the current queue are different. * If they are the same then consider it done correctly */ if (!strcmp(pjob->ji_qs.ji_queue, pjob->ji_qs.ji_destin)) - { return(PBSE_NONE); - } if (LOGLEVEL >= 7) { sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid); - LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); - } - - /* search for destination queue */ - /* CAUTION!!! This code is very complex - be very careful editing */ - if (parent_queue_mutex_held == TRUE) - routing_que = pjob->ji_qhdr; - else - { - routing_que = get_jobs_queue(&pjob); - - if (pjob == NULL) - { - log_err(PBSE_JOBNOTFOUND, __func__, "Job lost while acquiring queue 14"); - return(PBSE_JOBNOTFOUND); - } - } - - if (routing_que == NULL) - { - sprintf(log_buf, "queue %s does not exist\n", pjob->ji_qs.ji_queue); - - log_err(-1, __func__, log_buf); - - *my_err = PBSE_UNKQUE; - - return(-1); - } - - - if (get_parent_dest_queues(pjob->ji_qs.ji_queue, destination, &routing_que, &dest_que, &pjob) != PBSE_NONE) - { - if (dest_que != NULL) - unlock_queue(dest_que, __func__, NULL, LOGLEVEL); - - if ((parent_queue_mutex_held == FALSE) && - (routing_que != NULL)) - unlock_queue(routing_que, __func__, NULL, LOGLEVEL); - - if (pjob == NULL) - return(-10); - else - return(-1); + log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } /* @@ -336,7 +288,6 @@ int local_move( * checks on queue availability, etc. are skipped; * otherwise all checks are enforced. */ - if (req == 0) { mtype = MOVE_TYPE_Route; /* route */ @@ -350,51 +301,54 @@ int local_move( mtype = MOVE_TYPE_Move; /* non-privileged move */ } + strcpy(job_id, pjob->ji_qs.ji_jobid); + unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); + + dest_que = find_queuebyname(destination); + + if ((pjob = svr_find_job(job_id, TRUE)) == NULL) + { + /* job disappeared while locking queue */ + if (dest_que != NULL) + unlock_queue(dest_que, __func__, NULL, LOGLEVEL); + + return(PBSE_JOB_RECYCLED); + } + + if (dest_que == NULL) + { + /* this should never happen */ + sprintf(log_buf, "queue %s does not exist\n", pjob->ji_qs.ji_queue); + log_err(-1, __func__, log_buf); + + *my_err = PBSE_UNKQUE; + return(-1); + } + /* check the destination */ - if ((*my_err = svr_chkque( - pjob, - dest_que, - get_variable(pjob, pbs_o_host), mtype, NULL))) + if ((*my_err = svr_chkque(pjob, dest_que, get_variable(pjob, pbs_o_host), mtype, NULL))) { unlock_queue(dest_que, __func__, NULL, LOGLEVEL); /* should this queue be retried? */ - if (parent_queue_mutex_held == FALSE) - unlock_queue(routing_que, __func__, "retry", LOGLEVEL); - return(should_retry_route(*my_err)); } + unlock_queue(dest_que, __func__, NULL, LOGLEVEL); + /* dequeue job from present queue, update destination and */ /* queue_rank for new queue and enqueue into destination */ - - strcpy(job_id, pjob->ji_qs.ji_jobid); - unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); - - /* if we come out of svr_dequejob successfully pjob->ji_mutex will be locked */ - rc = svr_dequejob(job_id, TRUE); + rc = svr_dequejob(pjob, FALSE); if (rc) return(rc); snprintf(pjob->ji_qs.ji_queue, sizeof(pjob->ji_qs.ji_queue), "%s", destination); pjob->ji_wattr[JOB_ATR_qrank].at_val.at_long = ++queue_rank; - - unlock_queue(dest_que, __func__, NULL, LOGLEVEL); - unlock_queue(routing_que, __func__, "success", LOGLEVEL); if ((*my_err = svr_enquejob(pjob, FALSE, -1)) == PBSE_JOB_RECYCLED) return(-1); - if (parent_queue_mutex_held == TRUE) - { - /* re-lock the routing queue */ - if ((tmp_que = lock_queue_with_job_held(routing_que, &pjob)) == NULL) - lock_queue(routing_que, __func__, NULL, LOGLEVEL); - else - routing_que = tmp_que; - } - if (*my_err != PBSE_NONE) { return(-1); /* should never ever get here */ @@ -749,8 +703,7 @@ int send_job_work( encode_type = ATR_ENCODE_SVR; /* clear default resource settings */ - unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); - ret = svr_dequejob(job_id, FALSE); + ret = svr_dequejob(pjob, FALSE); if (ret) return(ret); } diff --git a/src/server/svr_movejob.h b/src/server/svr_movejob.h index 490f7dff5c..d5669a0a37 100644 --- a/src/server/svr_movejob.h +++ b/src/server/svr_movejob.h @@ -5,9 +5,7 @@ #include "pbs_job.h" /* job */ #include "batch_request.h" /* batch_request */ -int svr_movejob(job *jobp, char *destination, int *, struct batch_request *req, int); - -/* static int local_move(job *jobp, struct batch_request *req, int); */ +int svr_movejob(job *jobp, char *destination, int *, struct batch_request *req); void finish_routing_processing(job *pjob, int status); diff --git a/src/server/test/req_track/test_req_track.c b/src/server/test/req_track/test_req_track.c index 278dcdcfa8..b9fb6dc2df 100644 --- a/src/server/test/req_track/test_req_track.c +++ b/src/server/test/req_track/test_req_track.c @@ -4,6 +4,9 @@ #include #include #include "pbs_error.h" + + + START_TEST(test_one) {