diff --git a/scheduler_plugins/slurm/utils/upload_artifacts.sh b/scheduler_plugins/slurm/utils/upload_artifacts.sh index fa89db0c88..44928bff1d 100755 --- a/scheduler_plugins/slurm/utils/upload_artifacts.sh +++ b/scheduler_plugins/slurm/utils/upload_artifacts.sh @@ -19,6 +19,11 @@ function usage { exit 1 } +BUILD_OS=$(uname) +if [ "${BUILD_OS}" != "Linux" ]; then + SED_BAK_EXT="''" +fi + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" cd "${SCRIPT_DIR}/.." @@ -74,18 +79,18 @@ PLUGIN_ARTIFACTS_CHECKSUM=$(shasum --algorithm 256 "${PLUGIN_ARTIFACTS_ARCHIVE}" PLUGIN_DEFINITION_S3_URL="s3://${S3_BUCKET}${S3_BUCKET_PREFIX}/plugin_definition.yaml" GENERATED_PLUGIN_DEFINITION_PATH="${TMP}/plugin_template_plugin_definition.yaml" cp plugin_definition.yaml ${GENERATED_PLUGIN_DEFINITION_PATH} -sed -i "s||${ADDITIONAL_CLUSTER_INFRASTRUCTURE_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH} -sed -i "s||${PLUGIN_ARTIFACTS_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH} -sed -i "s||${S3_BUCKET}${S3_BUCKET_PREFIX}|g" ${GENERATED_PLUGIN_DEFINITION_PATH} +sed -i $SED_BAK_EXT "s||${ADDITIONAL_CLUSTER_INFRASTRUCTURE_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH} +sed -i $SED_BAK_EXT "s||${PLUGIN_ARTIFACTS_CHECKSUM}|g" ${GENERATED_PLUGIN_DEFINITION_PATH} +sed -i $SED_BAK_EXT "s||${S3_BUCKET}${S3_BUCKET_PREFIX}|g" ${GENERATED_PLUGIN_DEFINITION_PATH} echo "Generated plugin definition:" && cat ${GENERATED_PLUGIN_DEFINITION_PATH} echo "Uploading plugin_definition to ${PLUGIN_DEFINITION_S3_URL}" aws s3 cp --region "${AWS_REGION}" "${GENERATED_PLUGIN_DEFINITION_PATH}" "${PLUGIN_DEFINITION_S3_URL}" GENERATED_CONFIG_PATH="${TMP}/slurm_plugin_cluster_config.yaml" cp examples/cluster_configuration.yaml ${GENERATED_CONFIG_PATH} -sed -i "s||${PLUGIN_DEFINITION_S3_URL}|g" ${GENERATED_CONFIG_PATH} +sed -i $SED_BAK_EXT "s||${PLUGIN_DEFINITION_S3_URL}|g" ${GENERATED_CONFIG_PATH} PLUGIN_DEFINITION_CHECKSUM=$(shasum --algorithm 256 "${GENERATED_PLUGIN_DEFINITION_PATH}" | cut -d' ' -f1) -sed -i "s||${PLUGIN_DEFINITION_CHECKSUM}|g" ${GENERATED_CONFIG_PATH} +sed -i $SED_BAK_EXT "s||${PLUGIN_DEFINITION_CHECKSUM}|g" ${GENERATED_CONFIG_PATH} echo "Generated test cluster configuration in ${GENERATED_CONFIG_PATH}:" cat ${GENERATED_CONFIG_PATH} diff --git a/tests/integration-tests/tests/efa/test_efa/test_efa/nccl_benchmarks/init_nccl_benchmarks.sh b/tests/integration-tests/tests/efa/test_efa/test_efa/nccl_benchmarks/init_nccl_benchmarks.sh index f77d343e77..43c705149e 100644 --- a/tests/integration-tests/tests/efa/test_efa/test_efa/nccl_benchmarks/init_nccl_benchmarks.sh +++ b/tests/integration-tests/tests/efa/test_efa/test_efa/nccl_benchmarks/init_nccl_benchmarks.sh @@ -7,7 +7,6 @@ module load ${1} NCCL_BENCHMARKS_VERSION='2.10.0' NCCL_VERSION='2.7.8-1' ML_REPO_PKG='nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb' -CUDA_VERSION='11.4' OFI_NCCL_VERSION='1.1.1' MPI_HOME=$(which mpirun | awk -F '/bin' '{print $1}') NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80" # Arch for NVIDIA A100 @@ -32,6 +31,6 @@ wget https://github.com/aws/aws-ofi-nccl/archive/v${OFI_NCCL_VERSION}.tar.gz tar xvfz v${OFI_NCCL_VERSION}.tar.gz cd aws-ofi-nccl-${OFI_NCCL_VERSION} ./autogen.sh -./configure --with-libfabric=/opt/amazon/efa --with-cuda=/usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/ --with-nccl=/shared/openmpi/nccl-${NCCL_VERSION}/build/ --with-mpi=${MPI_HOME} --prefix /shared/openmpi/ofi-plugin +./configure --with-libfabric=/opt/amazon/efa --with-cuda=/usr/local/cuda/targets/x86_64-linux/ --with-nccl=/shared/openmpi/nccl-${NCCL_VERSION}/build/ --with-mpi=${MPI_HOME} --prefix /shared/openmpi/ofi-plugin make -make install \ No newline at end of file +make install