@@ -117,8 +117,14 @@ static void pipe_closed(int fd, short flags, void *arg);
117
117
static void rollup (int status , orte_process_name_t * sender ,
118
118
opal_buffer_t * buffer ,
119
119
orte_rml_tag_t tag , void * cbdata );
120
+ static void node_regex_report (int status , orte_process_name_t * sender ,
121
+ opal_buffer_t * buffer ,
122
+ orte_rml_tag_t tag , void * cbdata );
123
+ static void report_orted (void );
124
+
120
125
static opal_buffer_t * bucket , * mybucket = NULL ;
121
126
static int ncollected = 0 ;
127
+ static bool node_regex_waiting = false;
122
128
123
129
static char * orte_parent_uri = NULL ;
124
130
@@ -734,6 +740,11 @@ int orte_daemon(int argc, char *argv[])
734
740
* a little time in the launch phase by "warming up" the
735
741
* connection to our parent while we wait for our children */
736
742
buffer = OBJ_NEW (opal_buffer_t ); // zero-byte message
743
+ if (NULL == orte_node_regex ) {
744
+ orte_rml .recv_buffer_nb (ORTE_PROC_MY_PARENT , ORTE_RML_TAG_NODE_REGEX_REPORT ,
745
+ ORTE_RML_PERSISTENT , node_regex_report , & node_regex_waiting );
746
+ node_regex_waiting = true;
747
+ }
737
748
if (0 > (ret = orte_rml .send_buffer_nb (orte_mgmt_conduit ,
738
749
ORTE_PROC_MY_PARENT , buffer ,
739
750
ORTE_RML_TAG_WARMUP_CONNECTION ,
@@ -969,8 +980,10 @@ int orte_daemon(int argc, char *argv[])
969
980
i += 2 ;
970
981
}
971
982
}
972
- /* now launch any child daemons of ours */
973
- orte_plm .remote_spawn (orte_tree_launch_cmd );
983
+ if (NULL != orte_node_regex ) {
984
+ /* now launch any child daemons of ours */
985
+ orte_plm .remote_spawn (orte_tree_launch_cmd );
986
+ }
974
987
}
975
988
976
989
if (orte_debug_daemons_flag ) {
@@ -1052,8 +1065,6 @@ static void rollup(int status, orte_process_name_t* sender,
1052
1065
opal_buffer_t * buffer ,
1053
1066
orte_rml_tag_t tag , void * cbdata )
1054
1067
{
1055
- int nreqd ;
1056
- char * rtmod ;
1057
1068
int ret ;
1058
1069
orte_process_name_t child ;
1059
1070
int32_t i , flag , cnt ;
@@ -1095,10 +1106,17 @@ static void rollup(int status, orte_process_name_t* sender,
1095
1106
}
1096
1107
1097
1108
report :
1109
+ report_orted ();
1110
+ }
1111
+
1112
+ static void report_orted () {
1113
+ char * rtmod ;
1114
+ int nreqd , ret ;
1115
+
1098
1116
/* get the number of children */
1099
1117
rtmod = orte_rml .get_routed (orte_mgmt_conduit );
1100
1118
nreqd = orte_routed .num_routes (rtmod ) + 1 ;
1101
- if (nreqd == ncollected && NULL != mybucket ) {
1119
+ if (nreqd == ncollected && NULL != mybucket && ! node_regex_waiting ) {
1102
1120
/* add the collection of our children's buckets to ours */
1103
1121
opal_dss .copy_payload (mybucket , bucket );
1104
1122
OBJ_RELEASE (bucket );
@@ -1112,3 +1130,36 @@ static void rollup(int status, orte_process_name_t* sender,
1112
1130
}
1113
1131
}
1114
1132
}
1133
+
1134
+ static void node_regex_report (int status , orte_process_name_t * sender ,
1135
+ opal_buffer_t * buffer ,
1136
+ orte_rml_tag_t tag , void * cbdata ) {
1137
+ int rc , n = 1 ;
1138
+ char * regex ;
1139
+ assert (NULL == orte_node_regex );
1140
+ bool * active = (bool * )cbdata ;
1141
+
1142
+ /* extract the node regex if needed, and update the routing tree */
1143
+ n = 1 ;
1144
+ if (ORTE_SUCCESS != (rc = opal_dss .unpack (buffer , & regex , & n , OPAL_STRING ))) {
1145
+ ORTE_ERROR_LOG (rc );
1146
+ return ;
1147
+ }
1148
+ orte_node_regex = regex ;
1149
+
1150
+ if (ORTE_SUCCESS != (rc = orte_util_nidmap_parse (orte_node_regex ))) {
1151
+ ORTE_ERROR_LOG (rc );
1152
+ return ;
1153
+ }
1154
+
1155
+ /* update the routing tree so any tree spawn operation
1156
+ * properly gets the number of children underneath us */
1157
+ orte_routed .update_routing_plan (NULL );
1158
+
1159
+ * active = false;
1160
+
1161
+ /* now launch any child daemons of ours */
1162
+ orte_plm .remote_spawn (orte_tree_launch_cmd );
1163
+
1164
+ report_orted ();
1165
+ }
0 commit comments