diff --git a/.github/actions/linux-testenv/action.yml b/.github/actions/linux-testenv/action.yml
index ba5eb8a25..c19d12c99 100644
--- a/.github/actions/linux-testenv/action.yml
+++ b/.github/actions/linux-testenv/action.yml
@@ -3,11 +3,11 @@ name: Setup Test Environment
 inputs:
   pytorch:
     type: string
-    default: 'main'
+    default: 'https://github.com/daisyden/pytorch.git@distributed_2.10'
     description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
   torch_xpu_ops:
     type: string
-    default: 'main'
+    default: 'daisyden/distributed_2.10'
     description: Torch-xpu-ops version, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
   python:
     type: string
@@ -69,9 +69,9 @@ runs:
         fi
         TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
         if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
-          PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
+          PYTORCH_REPO="https://github.com/daisyden/pytorch.git"
         else
-          PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
+          PYTORCH_REPO="https://github.com/daisyden/pytorch.git"
         fi
         git clone ${PYTORCH_REPO} pytorch
         cd pytorch
@@ -97,14 +97,9 @@ runs:
             TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
         fi
-        if [ "${{ github.event_name }}" == "pull_request" ] && [[ "${{ inputs.pytorch }}" != *"_wheel" ]];then
-          cp -r ${{ github.workspace }}/torch-xpu-ops third_party/torch-xpu-ops
-          cd third_party/torch-xpu-ops
-        else
-          git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
-          cd third_party/torch-xpu-ops
-          git checkout ${TORCH_XPU_OPS_COMMIT}
-        fi
+        git clone ${TORCH_XPU_OPS_REPO} third_party/torch-xpu-ops
+        cd third_party/torch-xpu-ops
+        git checkout ${TORCH_XPU_OPS_COMMIT}
         git status && git diff && git show -s
     - name: Install E2E Requirements
       shell: bash -xe {0}
diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index 3bc1729ba..9657c3667 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -77,7 +77,7 @@ runs:
           tee ${{ github.workspace }}/ut_log/op_ut/op_ut_with_skip_test.log
         ls -al
         cp *.xml ${{ github.workspace }}/ut_log
-        find op_ut_with_skip_nn op_ut_with_skip_quantization/core -type f -exec sh -c '
+        find op_ut_with_skip_nn op_ut_with_skip_quantization/core op_ut_with_all_functorch -type f -exec sh -c '
             dir_path=$(dirname "$1");
             case "$dir_path" in
                 *"op_ut_with_skip_quantization/core"*)
@@ -90,6 +90,7 @@ runs:
         ls -al op_ut_with_skip_nn op_ut_with_skip_quantization/core
         cp op_ut_with_skip_nn/*.xml ${{ github.workspace }}/ut_log
         cp op_ut_with_skip_quantization/core/*.xml ${{ github.workspace }}/ut_log
+        cp op_ut_with_all_functorch/*.xml ${{ github.workspace }}/ut_log
         # Cases run with a on-demand white list, since some suites are too
         # slow to go through all operators on CPU. So add cases on-demand
         # when XPU implementatoin is done.
@@ -180,10 +181,7 @@ runs:
           echo -e "[ERROR] XCCL is not enabled"
           exit 1
         fi
-        export CCL_ROOT=$(dirname $(which python))/../
-        export PATH="${CCL_ROOT}/bin/libfabric:${PATH}"
-        export LD_LIBRARY_PATH="${CCL_ROOT}/lib:${LD_LIBRARY_PATH}"
-        python run_distributed.py \
+        python run_distributed_local.py \
           2> ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
           tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log
         find ../ -type f -name "*.xml" -exec cp {} ${{ github.workspace }}/ut_log/ \;
diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh
index b4f526297..27f324ed1 100755
--- a/.github/scripts/build.sh
+++ b/.github/scripts/build.sh
@@ -19,7 +19,7 @@ done
 
 # Set pytorch
 rm -rf ${WORKSPACE}/pytorch
-git clone ${PYTORCH_REPO} ${WORKSPACE}/pytorch
+git clone https://github.com/daisyden/pytorch.git ${WORKSPACE}/pytorch
 cd ${WORKSPACE}/pytorch
 git checkout ${PYTORCH_COMMIT}
 git remote -v && git branch && git show -s
@@ -44,33 +44,32 @@ git remote -v && git branch && git show -s
 # Pre Build
 cd ${WORKSPACE}/pytorch
 python -m pip install requests
-python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
 git submodule sync && git submodule update --init --recursive
 python -m pip install -r requirements.txt
-python -m pip install mkl-static==2025.2.0 mkl-include==2025.2.0
+python -m pip install mkl-static mkl-include
 export USE_STATIC_MKL=1
 if [ "${XPU_ONEAPI_PATH}" == "" ];then
     export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \
-        intel-cmplr-lib-rt==2025.2.1 | \
-        intel-cmplr-lib-ur==2025.2.1 | \
-        intel-cmplr-lic-rt==2025.2.1 | \
-        intel-sycl-rt==2025.2.1 | \
-        oneccl-devel==2021.16.1 | \
-        oneccl==2021.16.1 | \
-        impi-rt==2021.16.1 | \
-        onemkl-sycl-blas==2025.2.0 | \
-        onemkl-sycl-dft==2025.2.0 | \
-        onemkl-sycl-lapack==2025.2.0 | \
-        onemkl-sycl-rng==2025.2.0 | \
-        onemkl-sycl-sparse==2025.2.0 | \
-        dpcpp-cpp-rt==2025.2.1 | \
-        intel-opencl-rt==2025.2.1 | \
-        mkl==2025.2.0 | \
-        intel-openmp==2025.2.1 | \
-        tbb==2022.2.0 | \
-        tcmlib==1.4.0 | \
-        umf==0.11.0 | \
-        intel-pti==0.13.1
+        intel-cmplr-lib-rt | \
+        intel-cmplr-lib-ur | \
+        intel-cmplr-lic-rt | \
+        intel-sycl-rt | \
+        oneccl-devel | \
+        oneccl | \
+        impi-rt | \
+        onemkl-sycl-blas | \
+        onemkl-sycl-dft | \
+        onemkl-sycl-lapack | \
+        onemkl-sycl-rng | \
+        onemkl-sycl-sparse | \
+        dpcpp-cpp-rt | \
+        intel-opencl-rt | \
+        mkl | \
+        intel-openmp | \
+        tbb | \
+        tcmlib | \
+        umf | \
+        intel-pti
     "
 fi
 
diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml
index d0f5e983d..d867851a2 100644
--- a/.github/workflows/_linux_build.yml
+++ b/.github/workflows/_linux_build.yml
@@ -10,11 +10,11 @@ on:
         description: Runner label
       pytorch:
         type: string
-        default: 'main'
+        default: 'https://github.com/daisyden/pytorch.git@distributed_2.10'
         description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
       torch_xpu_ops:
         type: string
-        default: 'main'
+        default: 'daisyden/distributed_2.10'
         description: Torch-xpu-ops main by default, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       triton:
         required: false
@@ -59,7 +59,7 @@ jobs:
     if: ${{ ! endsWith(inputs.pytorch, '_wheel') }}
     runs-on: ${{ needs.runner.outputs.runner_id }}
     container:
-      image: 'pytorch/manylinux2_28-builder:xpu-2.9'
+      image: 'intelgpu/ubuntu-22.04-lts2:2523.31'
       volumes:
         - ${{ github.workspace }}:${{ github.workspace }}
       env:
@@ -72,21 +72,30 @@ jobs:
     steps:
       - name: Install gh-cli
         run: |
+          rm -rf ./*.whl ./*.log
           cat /etc/os-release
           hostname && id
           # install gh
-          dnf install -y 'dnf-command(config-manager)'
-          dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
-          dnf install -y gh --repo gh-cli
-          gh --version
+          sudo apt update
+          sudo apt install -y gpg-agent wget curl cmake git unzip zip libgl1 zlib1g-dev numactl \
+            libglib2.0-dev rsync jq gcc-11 g++-11 python3-dev python3-venv gh
       - name: Setup python-${{ inputs.python }}
         run: |
           rm -rf /tmp/xpu-tool/myvenv
-          local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
-          /opt/python/${local_python}/bin/python -m venv /tmp/xpu-tool/myvenv
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          source $HOME/.local/bin/env
+          uv venv /tmp/xpu-tool/myvenv --python 3.10 --clear
+          source /tmp/xpu-tool/myvenv/bin/activate
           which python && python -V
           which pip && pip list
-          pip install -U pip wheel setuptools
+          uv pip install -U pip wheel setuptools
+      - name: Install oneapi
+        run: |
+          rm -rf /opt/intel/oneapi
+          wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/aa5447b5-3644-43c8-8ec4-72d53f6ecc19/intel-deep-learning-essentials-2025.3.0.338_offline.sh
+          sudo bash intel-deep-learning-essentials-2025.3.0.338_offline.sh -a -s --eula accept
+          source /opt/intel/oneapi/setvars.sh
+          icpx --version
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
         with:
@@ -113,8 +122,6 @@ jobs:
             TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
             TORCH_XPU_OPS_COMMIT="${{ inputs.torch_xpu_ops }}"
           fi
-          # gcc 11
-          source /opt/rh/gcc-toolset-11/enable
           source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
           ${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
             --WORKSPACE="${{ github.workspace }}" \
@@ -129,8 +136,6 @@ jobs:
           fi
       - name: Build Torchvision and Torchaudio
         run: |
-          # gcc 11
-          source /opt/rh/gcc-toolset-11/enable
           cd ./pytorch
           TORCHVISION_COMMIT_ID="$(cat .github/ci_commit_pins/vision.txt)"
           TORCHAUDIO_COMMIT_ID="$(cat .github/ci_commit_pins/audio.txt)"
@@ -177,9 +182,6 @@ jobs:
               curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
                       grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
             )"
-            # gcc 13
-            dnf install -y gcc-toolset-13-gcc-c++ zlib-devel
-            source /opt/rh/gcc-toolset-13/enable
             pip install cmake ninja pybind11
             python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} \
               2>&1 |tee ${{ github.workspace }}/build_triton_${TRITON_COMMIT_ID}.log
diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml
index 6abbd2470..c88cf34ff 100644
--- a/.github/workflows/_linux_ut.yml
+++ b/.github/workflows/_linux_ut.yml
@@ -9,11 +9,11 @@ on:
         description: Runner label
       pytorch:
         type: string
-        default: 'main'
+        default: 'https://github.com/daisyden/pytorch.git@distributed_2.10'
         description: Pytorch main by default, or 'commit/branch', or 'repo@commit/repo@branch'
       torch_xpu_ops:
         type: string
-        default: 'main'
+        default: 'daisyden/distributed_2.10'
         description: Torch-xpu-ops version, 'commit/branch', or 'repo@commit/repo@branch', or 'pinned' for pytorch pin
       python:
         type: string
@@ -97,12 +97,12 @@ jobs:
 
   test-in-baremetal:
     needs: runner
-    timeout-minutes: 600
+    timeout-minutes: 1200
     if: ${{ contains(inputs.ut, 'distributed') }}
     runs-on: ${{ needs.runner.outputs.runner_id }}
     env:
       AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
-      PYTEST_ADDOPTS: -v --timeout 3600 --timeout_method=thread -n 1
+      PYTEST_ADDOPTS: -v
     steps:
       - name: Checkout torch-xpu-ops
         uses: actions/checkout@v4
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index bad665086..9e44d4738 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -100,8 +100,8 @@ jobs:
         build: [build]
     uses: ./.github/workflows/_linux_build.yml
     with:
-      runner: pvc_rolling
-      pytorch: ${{ needs.conditions-filter.outputs.pytorch }}
+      pytorch: distributed_2.10
+      runner: PVC-7358
 
   linux-ut:
     needs: [conditions-filter, linux-build]
@@ -128,9 +128,8 @@ jobs:
         ut_name: [xpu_distributed]
     uses: ./.github/workflows/_linux_ut.yml
     with:
-      runner: pvc_rolling
-      pytorch: ${{ needs.conditions-filter.outputs.pytorch }}
-      torch_xpu_ops: ${{ needs.conditions-filter.outputs.pytorch == 'nightly_wheel' && 'pinned' || 'main' }}
+      runner: PVC-7358
+      pytorch: distributed_2.10
       ut: ${{ matrix.ut_name }}
 
   linux-e2e: