diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile index bfd7fc1c1740f..1e87737abfb71 100644 --- a/.devops/musa.Dockerfile +++ b/.devops/musa.Dockerfile @@ -1,6 +1,6 @@ ARG UBUNTU_VERSION=22.04 # This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 +ARG MUSA_VERSION=rc3.1.1 # Target the MUSA build image ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6841ba5897809..02f6a13634b3f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -443,7 +443,7 @@ jobs: ubuntu-22-cmake-musa: runs-on: ubuntu-22.04 - container: mthreads/musa:rc3.1.0-devel-ubuntu22.04 + container: mthreads/musa:rc3.1.1-devel-ubuntu22.04 steps: - name: Clone diff --git a/docs/docker.md b/docs/docker.md index dac9a9ec164ff..cab5ae9572349 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -104,7 +104,7 @@ You may want to pass in some different `ARGS`, depending on the MUSA environment The defaults are: -- `MUSA_VERSION` set to `rc3.1.0` +- `MUSA_VERSION` set to `rc3.1.1` The resulting images, are essentially the same as the non-MUSA images: diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index c95728b08bfe8..93272d3e29aed 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1480,12 +1480,7 @@ static void ggml_cuda_op_mul_mat( const size_t nbytes_data = ggml_nbytes(src0); const size_t nbytes_padding = ggml_row_size(src0->type, MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING); dev[id].src0_dd = dev[id].src0_dd_alloc.alloc(ctx.pool(id), nbytes_data + nbytes_padding); - // TODO: remove this for MUSA once the Guilty Lockup issue is resolved -#ifndef GGML_USE_MUSA CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd, 0, nbytes_data + nbytes_padding, stream)); -#else // GGML_USE_MUSA - CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd + nbytes_data, 0, nbytes_padding, stream)); -#endif // !GGML_USE_MUSA } // If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared: