diff --git a/.github/workflows/apple_m.yml b/.github/workflows/apple_m.yml deleted file mode 100644 index e34eada86b..0000000000 --- a/.github/workflows/apple_m.yml +++ /dev/null @@ -1,149 +0,0 @@ -name: apple m - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - build: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: macos-14 - - strategy: - fail-fast: false - matrix: - build: [cmake, make] - fortran: [gfortran] - openmp: [0, 1] - ilp64: [0, 1] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Print system information - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - cat /proc/cpuinfo - elif [ "$RUNNER_OS" == "macOS" ]; then - sysctl -a | grep machdep.cpu - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Install Dependencies - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - sudo apt-get install -y gfortran cmake ccache libtinfo5 - elif [ "$RUNNER_OS" == "macOS" ]; then - # It looks like "gfortran" isn't working correctly unless "gcc" is re-installed. - brew reinstall gcc - brew install coreutils cmake ccache - brew install llvm - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - # We include the commit sha in the cache key, as new cache entries are - # only created if there is no existing entry for the key yet. - # GNU make and cmake call the compilers differently. It looks like - # that causes the cache to mismatch. Keep the ccache for both build - # tools separate to avoid polluting each other. - key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} - # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }} - ccache-${{ runner.os }}-${{ matrix.build }} - - - name: Configure ccache - run: | - if [ "${{ matrix.build }}" = "make" ]; then - # Add ccache to path - if [ "$RUNNER_OS" = "Linux" ]; then - echo "/usr/lib/ccache" >> $GITHUB_PATH - elif [ "$RUNNER_OS" = "macOS" ]; then - echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH - echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH - echo "" >>$GITHUB_PATH - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - fi - # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: Build OpenBLAS - run: | - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" - export CC="/opt/homebrew/opt/llvm/bin/clang" - case "${{ matrix.build }}" in - "make") - make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}" - ;; - "cmake") - export LDFLAGS="$LDFLAGS -Wl,-ld_classic" - mkdir build && cd build - cmake -DDYNAMIC_ARCH=1 \ - -DUSE_OPENMP=${{matrix.openmp}} \ - -DINTERFACE64=${{matrix.ilp64}} \ - -DNOFORTRAN=0 \ - -DBUILD_WITHOUT_LAPACK=0 \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ - .. - cmake --build . - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac - - - name: Show ccache status - continue-on-error: true - run: ccache -s - - - name: Run tests - timeout-minutes: 60 - run: | - case "${{ matrix.build }}" in - "make") - MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' - echo "::group::Tests in 'test' directory" - make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'ctest' directory" - make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'utest' directory" - make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - ;; - "cmake") - cd build && ctest - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml deleted file mode 100644 index 6928312b56..0000000000 --- a/.github/workflows/arm64_graviton.yml +++ /dev/null @@ -1,139 +0,0 @@ -name: arm64 graviton cirun - -on: - push: - branches: - - develop - - release-** - pull_request: - branches: - - develop - - release-** - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - build: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}" - - strategy: - fail-fast: false - matrix: - fortran: [gfortran] - build: [cmake, make] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Print system information - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - cat /proc/cpuinfo - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Install Dependencies - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - sudo apt update - sudo apt-get install -y gfortran cmake ccache libtinfo5 - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - # We include the commit sha in the cache key, as new cache entries are - # only created if there is no existing entry for the key yet. - # GNU make and cmake call the compilers differently. It looks like - # that causes the cache to mismatch. Keep the ccache for both build - # tools separate to avoid polluting each other. - key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} - # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} - ccache-${{ runner.os }}-${{ matrix.build }} - - - name: Configure ccache - run: | - if [ "${{ matrix.build }}" = "make" ]; then - # Add ccache to path - if [ "$RUNNER_OS" = "Linux" ]; then - echo "/usr/lib/ccache" >> $GITHUB_PATH - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - fi - # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: Build OpenBLAS - run: | - case "${{ matrix.build }}" in - "make") - make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" - ;; - "cmake") - mkdir build && cd build - cmake -DDYNAMIC_ARCH=1 \ - -DNOFORTRAN=0 \ - -DBUILD_WITHOUT_LAPACK=0 \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ - .. - cmake --build . - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac - - - name: Show ccache status - continue-on-error: true - run: ccache -s - - - name: Run tests - timeout-minutes: 60 - run: | - case "${{ matrix.build }}" in - "make") - MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' - echo "::group::Tests in 'test' directory" - make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'ctest' directory" - make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'utest' directory" - make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - ;; - "cmake") - cd build && ctest - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac diff --git a/.github/workflows/c910v.yml b/.github/workflows/c910v.yml deleted file mode 100644 index a47ca1dce6..0000000000 --- a/.github/workflows/c910v.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: c910v qemu test - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - TEST: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-latest - env: - xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618 - toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz - strategy: - fail-fast: false - matrix: - include: - - target: RISCV64_GENERIC - triple: riscv64-linux-gnu - apt_triple: riscv64-linux-gnu - opts: NO_SHARED=1 TARGET=RISCV64_GENERIC - - target: C910V - triple: riscv64-unknown-linux-gnu - apt_triple: riscv64-linux-gnu - opts: NO_SHARED=1 TARGET=C910V - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: install build deps - run: | - sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ - gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross - - - name: checkout qemu - uses: actions/checkout@v3 - with: - repository: T-head-Semi/qemu - path: qemu - ref: 1e692ebb43d396c52352406323fc782c1ac99a42 - - - name: build qemu - run: | - # Force use c910v qemu-user - wget https://github.com/revyos/qemu/commit/5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch - cd qemu - patch -p1 < ../5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch - ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system - make -j$(nproc) - make install - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.target }} - - - name: Configure ccache - run: | - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: build OpenBLAS - run: | - wget ${xuetie_toolchain}/${toolchain_file_name} - tar -xvf ${toolchain_file_name} -C /opt - export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH" - - make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) - - - name: test - run: | - export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH - qemu-riscv64 ./utest/openblas_utest - qemu-riscv64 ./utest/openblas_utest_ext - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1 - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1 - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat diff --git a/.github/workflows/cirun-asv.yml b/.github/workflows/cirun-asv.yml new file mode 100644 index 0000000000..94f8adea83 --- /dev/null +++ b/.github/workflows/cirun-asv.yml @@ -0,0 +1,91 @@ +name: ASV Benchmark on arm64 graviton cirun + +on: + push: + branches: + - develop + - release-** + pull_request: + branches: + - develop + - release-** + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read # to fetch code (actions/checkout) + +env: + # GITHUB_TOKEN: ${{ secrets.OB_BENCH_TOKEN }} + # BENCHMARKS_REPO: ev-br/ob-bench-asv + ASV_CONFIG: asv.conf.json + MACHINE_NAME: github-actions-cirun-graviton # to identify github actions machine as hostname changes everytime + +jobs: + build: + if: "github.repository == 'OpenMathLib/OpenBLAS'" + runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}" + + strategy: + fail-fast: false + matrix: + fortran: [gfortran] + build: [make] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # To fetch all commits to be able to generate benchmarks html + + - name: Print system information + run: | + if [ "$RUNNER_OS" == "Linux" ]; then + cat /proc/cpuinfo + else + echo "::error::$RUNNER_OS not supported" + exit 1 + fi + + - name: Install system dependencies + run: | + if [ "$RUNNER_OS" == "Linux" ]; then + sudo apt update + sudo apt-get install -y gfortran cmake ccache libtinfo5 python3-pip pkg-config + else + echo "::error::$RUNNER_OS not supported" + exit 1 + fi + + - name: Install python dependencies + run: | + pip3 install numpy meson meson-python ninja build asv virtualenv + # pip3 install scipy_openblas32 + # install the nightly OpenBLAS wheel + pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scipy-openblas32 + python3 -c'import scipy_openblas32 as so; print(so.get_pkg_config())' > scipy_openblas.pc + export PKG_CONFIG_PATH=$PWD + echo ">>>> PKG_CONFIG" $PKG_CONFIG_PATH + cat scipy_openblas.pc + + - name: Set and log asv machine configuration + run: | + cd benchmark/pybench/asv + python3 -m asv machine --yes --config asv.conf.json + echo "Machine Configuration:" + cat ~/.asv-machine.json + rm ~/.asv-machine.json + + echo "Setting machine name to $MACHINE_NAME" + python3 -m asv machine --machine $MACHINE_NAME --yes --config $ASV_CONFIG -v + cat ~/.asv-machine.json + + - name: Run benchmarks + run: | + echo ${{ github.workspace}} + cd benchmark/pybench/asv + python3 -m asv run --config $ASV_CONFIG -v + env: + PKG_CONFIG_PATH: ${{ github.workspace }} + diff --git a/.github/workflows/codspeed-bench.yml b/.github/workflows/codspeed-bench.yml deleted file mode 100644 index 04befefa9e..0000000000 --- a/.github/workflows/codspeed-bench.yml +++ /dev/null @@ -1,150 +0,0 @@ -name: Run codspeed benchmarks - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - benchmarks: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - fortran: [gfortran] - build: [make] - pyver: ["3.12"] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.pyver }} - - - name: Print system information - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - cat /proc/cpuinfo - fi - - - name: Install Dependencies - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - sudo apt-get update - sudo apt-get install -y gfortran cmake ccache libtinfo5 - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - # We include the commit sha in the cache key, as new cache entries are - # only created if there is no existing entry for the key yet. - # GNU make and cmake call the compilers differently. It looks like - # that causes the cache to mismatch. Keep the ccache for both build - # tools separate to avoid polluting each other. - key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} - # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} - ccache-${{ runner.os }}-${{ matrix.build }} - - - name: Write out the .pc - run: | - cd benchmark/pybench - cat > openblas.pc << EOF - libdir=${{ github.workspace }} - includedir= ${{ github.workspace }} - openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64 - version=0.0.99 - extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas - Name: openblas - Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version - Version: ${version} - URL: https://github.com/xianyi/OpenBLAS - Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }} - Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas - Cflags: -I${{ github.workspace}} - EOF - cat openblas.pc - - - name: Configure ccache - run: | - if [ "${{ matrix.build }}" = "make" ]; then - # Add ccache to path - if [ "$RUNNER_OS" = "Linux" ]; then - echo "/usr/lib/ccache" >> $GITHUB_PATH - elif [ "$RUNNER_OS" = "macOS" ]; then - echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - fi - # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: Build OpenBLAS - run: | - case "${{ matrix.build }}" in - "make") - make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" - ;; - "cmake") - mkdir build && cd build - cmake -DDYNAMIC_ARCH=1 \ - -DNOFORTRAN=0 \ - -DBUILD_WITHOUT_LAPACK=0 \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ - .. - cmake --build . - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac - - - name: Show ccache status - continue-on-error: true - run: ccache -s - - - name: Install benchmark dependencies - run: pip install meson ninja numpy pytest pytest-codspeed --user - - - name: Build the wrapper - run: | - cd benchmark/pybench - export PKG_CONFIG_PATH=$PWD - meson setup build --prefix=$PWD/build-install - meson install -C build - # - # sanity check - cd build/openblas_wrap - python -c'import _flapack; print(dir(_flapack))' - - - name: Run benchmarks - uses: CodSpeedHQ/action@v2 - with: - token: ${{ secrets.CODSPEED_TOKEN }} - run: | - cd benchmark/pybench - export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/ - OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed - diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 203097812f..0000000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Publish docs via GitHub Pages -on: - push: - branches: - - develop -jobs: - build: - name: Deploy docs - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: "3.10" - - run: pip install mkdocs mkdocs-material - # mkdocs gh-deploy command only builds to the top-level, hence building then deploying ourselves - - run: mkdocs build - - name: Deploy docs - uses: peaceiris/actions-gh-pages@v3 - if: ${{ github.ref == 'refs/heads/develop' }} - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./site - destination_dir: docs/ diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml deleted file mode 100644 index 669aa81168..0000000000 --- a/.github/workflows/dynamic_arch.yml +++ /dev/null @@ -1,371 +0,0 @@ -name: continuous build - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - build: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ${{ matrix.os }} - - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest] - fortran: [gfortran, flang] - build: [cmake, make] - exclude: - - os: macos-latest - fortran: flang - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Print system information - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - cat /proc/cpuinfo - elif [ "$RUNNER_OS" == "macOS" ]; then - sysctl -a | grep machdep.cpu - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Install Dependencies - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - sudo apt-get update - sudo apt-get install -y gfortran cmake ccache libtinfo5 - elif [ "$RUNNER_OS" == "macOS" ]; then - # It looks like "gfortran" isn't working correctly unless "gcc" is re-installed. - brew reinstall gcc - brew install coreutils cmake ccache - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - # We include the commit sha in the cache key, as new cache entries are - # only created if there is no existing entry for the key yet. - # GNU make and cmake call the compilers differently. It looks like - # that causes the cache to mismatch. Keep the ccache for both build - # tools separate to avoid polluting each other. - key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} - # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} - ccache-${{ runner.os }}-${{ matrix.build }} - - - name: Configure ccache - run: | - if [ "${{ matrix.build }}" = "make" ]; then - # Add ccache to path - if [ "$RUNNER_OS" = "Linux" ]; then - echo "/usr/lib/ccache" >> $GITHUB_PATH - elif [ "$RUNNER_OS" = "macOS" ]; then - echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH - else - echo "::error::$RUNNER_OS not supported" - exit 1 - fi - fi - # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: Build OpenBLAS - run: | - if [ "${{ matrix.fortran }}" = "flang" ]; then - # download and install classic flang - cd /usr/ - sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz - sudo tar xf flang-20190329-x86-70.tgz - sudo rm flang-20190329-x86-70.tgz - cd - - fi - case "${{ matrix.build }}" in - "make") - make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" - ;; - "cmake") - mkdir build && cd build - cmake -DDYNAMIC_ARCH=1 \ - -DNOFORTRAN=0 \ - -DBUILD_WITHOUT_LAPACK=0 \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ - .. - cmake --build . - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac - - - name: Show ccache status - continue-on-error: true - run: ccache -s - - - name: Run tests - timeout-minutes: 60 - run: | - case "${{ matrix.build }}" in - "make") - MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' - echo "::group::Tests in 'test' directory" - make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'ctest' directory" - make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - echo "::group::Tests in 'utest' directory" - make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" - echo "::endgroup::" - ;; - "cmake") - cd build && ctest - ;; - *) - echo "::error::Configuration not supported" - exit 1 - ;; - esac - - - msys2: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: windows-latest - - strategy: - fail-fast: false - matrix: - msystem: [UCRT64, MINGW32, CLANG64, CLANG32] - idx: [int32, int64] - build-type: [Release] - include: - - msystem: UCRT64 - idx: int32 - target-prefix: mingw-w64-ucrt-x86_64 - fc-pkg: fc - - msystem: MINGW32 - idx: int32 - target-prefix: mingw-w64-i686 - fc-pkg: fc - - msystem: CLANG64 - idx: int32 - target-prefix: mingw-w64-clang-x86_64 - fc-pkg: fc - # Compiling with Flang 16 seems to cause test errors on machines - # with AVX512 instructions. Revisit after MSYS2 distributes Flang 17. - no-avx512-flags: -DNO_AVX512=1 - - msystem: CLANG32 - idx: int32 - target-prefix: mingw-w64-clang-i686 - fc-pkg: cc - c-lapack-flags: -DC_LAPACK=ON - - msystem: UCRT64 - idx: int64 - idx64-flags: -DBINARY=64 -DINTERFACE64=1 - target-prefix: mingw-w64-ucrt-x86_64 - fc-pkg: fc - - msystem: CLANG64 - idx: int64 - idx64-flags: -DBINARY=64 -DINTERFACE64=1 - target-prefix: mingw-w64-clang-x86_64 - fc-pkg: fc - # Compiling with Flang 16 seems to cause test errors on machines - # with AVX512 instructions. Revisit after MSYS2 distributes Flang 17. - no-avx512-flags: -DNO_AVX512=1 - - msystem: UCRT64 - idx: int32 - target-prefix: mingw-w64-ucrt-x86_64 - fc-pkg: fc - build-type: None - exclude: - - msystem: MINGW32 - idx: int64 - - msystem: CLANG32 - idx: int64 - - defaults: - run: - # Use MSYS2 bash as default shell - shell: msys2 {0} - - env: - CHERE_INVOKING: 1 - - steps: - - name: Get CPU name - shell: pwsh - run : | - Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name - - - name: Install build dependencies - uses: msys2/setup-msys2@v2 - with: - msystem: ${{ matrix.msystem }} - update: true - release: false # Use pre-installed version - install: >- - base-devel - ${{ matrix.target-prefix }}-cc - ${{ matrix.target-prefix }}-${{ matrix.fc-pkg }} - ${{ matrix.target-prefix }}-cmake - ${{ matrix.target-prefix }}-ninja - ${{ matrix.target-prefix }}-ccache - - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Prepare ccache - # Get cache location of ccache - # Create key that is used in action/cache/restore and action/cache/save steps - id: ccache-prepare - run: | - echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT - # We include the commit sha in the cache key, as new cache entries are - # only created if there is no existing entry for the key yet. - echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT - - - name: Restore ccache - uses: actions/cache/restore@v3 - with: - path: ${{ steps.ccache-prepare.outputs.ccachedir }} - key: ${{ steps.ccache-prepare.outputs.key }} - # Restore a matching ccache cache entry. Prefer same branch. - restore-keys: | - ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }} - ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }} - - - name: Configure ccache - # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota. - run: | - which ccache - test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }} - echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf - echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf - ccache -p - ccache -s - echo $HOME - cygpath -w $HOME - - - name: Configure OpenBLAS - run: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ - -DBUILD_SHARED_LIBS=ON \ - -DBUILD_STATIC_LIBS=ON \ - -DDYNAMIC_ARCH=ON \ - -DUSE_THREAD=ON \ - -DNUM_THREADS=64 \ - -DTARGET=CORE2 \ - ${{ matrix.idx64-flags }} \ - ${{ matrix.c-lapack-flags }} \ - ${{ matrix.no-avx512-flags }} \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ - .. - - - name: Build OpenBLAS - run: cd build && cmake --build . - - - name: Show ccache status - continue-on-error: true - run: ccache -s - - - name: Save ccache - # Save the cache after we are done (successfully) building - uses: actions/cache/save@v3 - with: - path: ${{ steps.ccache-prepare.outputs.ccachedir }} - key: ${{ steps.ccache-prepare.outputs.key }} - - - name: Run tests - id: run-ctest - timeout-minutes: 60 - run: cd build && ctest - - - name: Re-run tests - if: always() && (steps.run-ctest.outcome == 'failure') - timeout-minutes: 60 - run: | - cd build - echo "::group::Re-run ctest" - ctest --rerun-failed --output-on-failure || true - echo "::endgroup::" - echo "::group::Log from these tests" - [ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log - echo "::endgroup::" - - - cross_build: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - include: - - target: mips64el - triple: mips64el-linux-gnuabi64 - opts: DYNAMIC_ARCH=1 TARGET=GENERIC - - target: riscv64 - triple: riscv64-linux-gnu - opts: TARGET=RISCV64_GENERIC - - target: mipsel - triple: mipsel-linux-gnu - opts: TARGET=MIPS1004K - - target: alpha - triple: alpha-linux-gnu - opts: TARGET=EV4 - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Install Dependencies - run: | - sudo apt-get update - sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.target }} - - - name: Configure ccache - run: | - # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - - name: Build OpenBLAS - run: | - make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }} diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml deleted file mode 100644 index da7f6c9a0c..0000000000 --- a/.github/workflows/loongarch64.yml +++ /dev/null @@ -1,133 +0,0 @@ -name: loongarch64 qemu test - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - TEST: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - - target: LOONGSONGENERIC - triple: loongarch64-unknown-linux-gnu - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC - - target: LOONGSON3R5 - triple: loongarch64-unknown-linux-gnu - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5 - - target: LOONGSON2K1000 - triple: loongarch64-unknown-linux-gnu - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000 - - target: DYNAMIC_ARCH - triple: loongarch64-unknown-linux-gnu - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Install APT deps - run: | - sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make ccache - - - name: Download and install loongarch64-toolchain - run: | - wget https://github.com/sunhaiyong1978/CLFS-for-LoongArch/releases/download/8.1/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz - #wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz - tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt - - - name: Checkout qemu - uses: actions/checkout@v3 - with: - repository: qemu/qemu - path: qemu - ref: master - - - name: Install qemu - run: | - cd qemu - ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static - make -j$(nproc) - make install - - - name: Set env - run: | - echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV - echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.target }} - - - name: Configure ccache - run: | - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: Disable utest dsdot:dsdot_n_1 - run: | - echo -n > utest/test_dsdot.c - echo "Due to the qemu versions 7.2 causing utest cases to fail," - echo "the utest dsdot:dsdot_n_1 have been temporarily disabled." - - - name: Build OpenBLAS - run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) - - - name: Test - run: | - export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH - qemu-loongarch64 ./utest/openblas_utest - qemu-loongarch64 ./utest/openblas_utest_ext - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat diff --git a/.github/workflows/loongarch64_clang.yml b/.github/workflows/loongarch64_clang.yml deleted file mode 100644 index d08e56f627..0000000000 --- a/.github/workflows/loongarch64_clang.yml +++ /dev/null @@ -1,135 +0,0 @@ -name: loongarch64 clang qemu test - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - TEST: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - - target: LOONGSONGENERIC - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC - - target: LOONGSON3R5 - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5 - - target: LOONGSON2K1000 - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000 - - target: DYNAMIC_ARCH - opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Install libffi6 - run: | - wget http://ftp.ca.debian.org/debian/pool/main/libf/libffi/libffi6_3.2.1-9_amd64.deb - sudo dpkg -i libffi6_3.2.1-9_amd64.deb - - - name: Install APT deps - run: | - sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make ccache - - - name: Download and install loongarch64-toolchain - run: | - wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz - wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz - tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt - tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt - - - name: Checkout qemu - uses: actions/checkout@v3 - with: - repository: qemu/qemu - path: qemu - ref: master - - - name: Install qemu - run: | - cd qemu - ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static - make -j$(nproc) - make install - - - name: Set env - run: | - echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.target }} - - - name: Configure ccache - run: | - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: Disable utest dsdot:dsdot_n_1 - run: | - echo -n > utest/test_dsdot.c - echo "Due to the qemu versions 7.2 causing utest cases to fail," - echo "the utest dsdot:dsdot_n_1 have been temporarily disabled." - - - name: Build OpenBLAS - run: make CC='ccache clang --target=loongarch64-linux-gnu --sysroot=/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/loongarch64-linux-gnu/sysroot/ -static' FC='ccache loongarch64-linux-gnu-gfortran -static' HOSTCC='ccache clang' CROSS_SUFFIX=llvm- NO_SHARED=1 ${{ matrix.opts }} -j$(nproc) - - - name: Test - run: | - export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH - qemu-loongarch64 ./utest/openblas_utest - qemu-loongarch64 ./utest/openblas_utest_ext - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat - diff --git a/.github/workflows/mips64.yml b/.github/workflows/mips64.yml deleted file mode 100644 index 1491aff78b..0000000000 --- a/.github/workflows/mips64.yml +++ /dev/null @@ -1,123 +0,0 @@ -name: mips64 qemu test - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - TEST: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - - target: MIPS64_GENERIC - triple: mips64el-linux-gnuabi64 - opts: NO_SHARED=1 TARGET=MIPS64_GENERIC - - target: SICORTEX - triple: mips64el-linux-gnuabi64 - opts: NO_SHARED=1 TARGET=SICORTEX - - target: I6400 - triple: mipsisa64r6el-linux-gnuabi64 - opts: NO_SHARED=1 TARGET=I6400 - - target: P6600 - triple: mipsisa64r6el-linux-gnuabi64 - opts: NO_SHARED=1 TARGET=P6600 - - target: I6500 - triple: mipsisa64r6el-linux-gnuabi64 - opts: NO_SHARED=1 TARGET=I6500 - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: install build deps - run: | - sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ - gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross - - - name: checkout qemu - uses: actions/checkout@v3 - with: - repository: qemu/qemu - path: qemu - ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2 - - - name: build qemu - run: | - cd qemu - ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system - make -j$(nproc) - make install - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.target }} - - - name: Configure ccache - run: | - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: build OpenBLAS - run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) - - - name: test - run: | - export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH - qemu-mips64el ./utest/openblas_utest - qemu-mips64el ./utest/openblas_utest_ext - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1 - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1 - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1 - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT2.SUMM - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat - rm -f ./test/?BLAT3.SUMM - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat - OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat diff --git a/.github/workflows/nightly-Homebrew-build.yml b/.github/workflows/nightly-Homebrew-build.yml deleted file mode 100644 index ca57fba709..0000000000 --- a/.github/workflows/nightly-Homebrew-build.yml +++ /dev/null @@ -1,90 +0,0 @@ -# Only the "head" branch of the OpenBLAS package is tested - -on: - push: - paths: - - '**/nightly-Homebrew-build.yml' - pull_request: - branches: - - develop - paths: - - '**/nightly-Homebrew-build.yml' - schedule: - - cron: 45 7 * * * -# This is 7:45 AM UTC daily, late at night in the USA - -# Since push and pull_request will still always be building and testing the `develop` branch, -# it only makes sense to test if this file has been changed - -name: Nightly-Homebrew-Build - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - build-OpenBLAS-with-Homebrew: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: macos-latest - env: - DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer - HOMEBREW_DEVELOPER: "ON" - HOMEBREW_DISPLAY_INSTALL_TIMES: "ON" - HOMEBREW_NO_ANALYTICS: "ON" - HOMEBREW_NO_AUTO_UPDATE: "ON" - HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON" - HOMEBREW_NO_INSTALL_CLEANUP: "ON" - HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON" - HOMEBREW_NO_INSTALL_FROM_API: "ON" - - steps: - - name: Random delay for cron job - run: | - delay=$(( RANDOM % 600 )) - printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}" - sleep ${delay} - if: github.event_name == 'schedule' - - - uses: actions/checkout@v2 - # This isn't even needed, technically. Homebrew will get `develop` via git - - - name: Update Homebrew - if: github.event_name != 'pull_request' - run: brew update || true - - - name: Install prerequisites - run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas - - - name: Install and bottle OpenBLAS - run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas - # the HEAD flags tell Homebrew to build the develop branch fetch via git - - - name: Create bottle - run: | - brew bottle -v openblas - mkdir bottles - mv *.bottle.tar.gz bottles - - - name: Upload bottle - uses: actions/upload-artifact@v1 - with: - name: openblas--HEAD.catalina.bottle.tar.gz - path: bottles - - - name: Show linkage - run: brew linkage -v openblas - - - name: Test openblas - run: brew test --HEAD --verbose openblas - - - name: Audit openblas formula - run: | - brew audit --strict openblas - brew cat openblas - - - name: Post logs on failure - if: failure() - run: brew gist-logs --with-hostname -v openblas diff --git a/.github/workflows/riscv64_vector.yml b/.github/workflows/riscv64_vector.yml deleted file mode 100644 index dd6fe9ca80..0000000000 --- a/.github/workflows/riscv64_vector.yml +++ /dev/null @@ -1,253 +0,0 @@ -name: riscv64 zvl256b qemu test - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -permissions: - contents: read # to fetch code (actions/checkout) - -jobs: - TEST: - if: "github.repository == 'OpenMathLib/OpenBLAS'" - runs-on: ubuntu-latest - env: - triple: riscv64-unknown-linux-gnu - riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain - riscv_gnu_toolchain_version: 13.2.0 - riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz - strategy: - fail-fast: false - matrix: - include: - - target: RISCV64_ZVL128B - opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64 - qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64 - - target: RISCV64_ZVL256B - opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64 - qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64 - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: install build deps - run: | - sudo apt-get update - sudo apt-get install autoconf automake autotools-dev ninja-build make \ - libgomp1-riscv64-cross ccache - wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path} - tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt - - - name: Compilation cache - uses: actions/cache@v3 - with: - path: ~/.ccache - key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} - restore-keys: | - ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} - ccache-${{ runner.os }}-${{ matrix.target }} - - - name: Configure ccache - run: | - test -d ~/.ccache || mkdir -p ~/.ccache - echo "max_size = 300M" > ~/.ccache/ccache.conf - echo "compression = true" >> ~/.ccache/ccache.conf - ccache -s - - - name: build OpenBLAS libs - run: | - export PATH="/opt/riscv/bin:$PATH" - make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \ - CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \ - AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \ - RANLIB='ccache ${triple}-ranlib' \ - FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \ - HOSTCC=gcc HOSTFC=gfortran -j$(nproc) - - - name: build OpenBLAS tests - run: | - export PATH="/opt/riscv/bin:$PATH" - make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \ - CC='${triple}-gcc' \ - AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \ - RANLIB='ccache ${triple}-ranlib' \ - FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \ - HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests - - - name: build lapack-netlib tests - working-directory: ./lapack-netlib/TESTING - run: | - export PATH="/opt/riscv/bin:$PATH" - make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \ - CC='${triple}-gcc' \ - AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \ - RANLIB='ccache ${triple}-ranlib' \ - FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \ - HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \ - LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \ - LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \ - LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \ - - - name: OpenBLAS tests - shell: bash - run: | - export PATH="/opt/riscv/bin:$PATH" - export QEMU_CPU=${{ matrix.qemu_cpu }} - rm -rf ./test_out - mkdir -p ./test_out - run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \ - echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \ - if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \ - else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \ - RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \ - } - run_test test cblat1 & - run_test test cblat2 cblat2.dat & - run_test test cblat3 cblat3.dat & - run_test test dblat1 & - run_test test dblat2 dblat2.dat & - run_test test dblat3 dblat3.dat & - run_test test sblat1 & - run_test test sblat2 sblat2.dat & - run_test test sblat3 sblat3.dat & - run_test test zblat1 & - run_test test zblat2 zblat2.dat & - run_test test zblat3 zblat3.dat & - run_test ctest xccblat1 & - run_test ctest xccblat2 cin2 & - run_test ctest xccblat3 cin3 & - run_test ctest xdcblat1 & - run_test ctest xdcblat2 din2 & - run_test ctest xdcblat3 din3 & - run_test ctest xscblat1 & - run_test ctest xscblat2 sin2 & - run_test ctest xscblat3 sin3 & - run_test ctest xzcblat1 & - run_test ctest xzcblat2 zin2 & - run_test ctest xzcblat3 zin3 & - wait - while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*) - if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi - - - name: netlib tests - shell: bash - run: | - : # these take a very long time - echo "Skipping netlib tests in CI" - exit 0 - : # comment out exit above to enable the tests - : # probably we want to identify a subset to run in CI - export PATH="/opt/riscv/bin:$PATH" - export QEMU_CPU=${{ matrix.qemu_cpu }} - rm -rf ./test_out - mkdir -p ./test_out - run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \ - echo "$4" >> $OUTPUT; \ - echo "$CMD" >> $OUTPUT; \ - qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \ - RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \ - if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \ - if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \ - } - run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" & - run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" & - run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" & - run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" & - run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" & - run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" & - run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" & - run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" & - run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" & - run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" & - run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & - run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" & - run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" & - run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" & - run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" & - run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" & - run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" & - run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" & - run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" & - run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" & - run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" & - run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" & - run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" & - run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" & - run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" & - run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" & - run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" & - run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" & - run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & - run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" & - run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" & - run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" & - run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" & - run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" & - run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" & - run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" & - run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" & - run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" & - run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" & - run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" & - run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" & - run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" & - run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" & - run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" & - run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" & - run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" & - run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & - run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" & - run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" & - run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" & - run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" & - run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" & - run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" & - run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" & - run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" & - run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" & - run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" & - run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" & - run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" & - run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" & - run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" & - run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" & - run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" & - run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" & - run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & - run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" & - run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" & - run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" & - run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" & - run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" & - run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" & - run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" & - run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" & - run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" & - run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" & - run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" & - run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" & - run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" & - run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" & - run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" & - run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" & - run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" & - run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" & - wait - while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*) - python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary - TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)" - NUMERICAL_ERRORS=-1 - OTHER_ERRORS=-1 - . <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary - if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi - if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi diff --git a/benchmark/pybench/asv/README.md b/benchmark/pybench/asv/README.md new file mode 100644 index 0000000000..1436e08d30 --- /dev/null +++ b/benchmark/pybench/asv/README.md @@ -0,0 +1 @@ +Benchmark graphs are at https://ev-br.github.io/ob-bench-asv/ diff --git a/benchmark/pybench/asv/asv.conf.json b/benchmark/pybench/asv/asv.conf.json new file mode 100644 index 0000000000..8323173318 --- /dev/null +++ b/benchmark/pybench/asv/asv.conf.json @@ -0,0 +1,195 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "OpenBLAS", + + // The project's homepage + "project_url": "https://github.com/OpenMathLib/OpenBLAS/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "../../..", + + // The Python project's subdirectory in your repo. If missing or + // the empty string, the project is assumed to be located at the root + // of the repository. + // "repo_subdir": "benchmarks/pybench/asv", + + // Customizable commands for building the project. + // See asv.conf.json documentation. + // To build the package using pyproject.toml (PEP518), uncomment the following lines + "build_command": [ + "python -m pip install build", + "python -m build", + // https://github.com/scipy/scipy/issues/20574 + "PIP_NO_BUILD_ISOLATION=false python -m pip wheel . --no-deps --no-index -w {build_cache_dir} {build_dir}" + ], + // To build the package using setuptools and a setup.py file, uncomment the following lines + // "build_command": [ + // "python setup.py build", + // "python -mpip wheel -w {build_cache_dir} {build_dir}" + // ], + + // Customizable commands for installing and uninstalling the project. + // See asv.conf.json documentation. + "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], + "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], + + // List of branches to benchmark. If not provided, defaults to "main" + // (for git) or "default" (for mercurial). + "branches": ["develop"], // for git + // "branches": ["default"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv", "mamba" (above 3.8) + // or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + //"install_timeout": 600, + + // the base URL to show a commit for the project. + // "show_commit_url": "http://github.com/owner/project/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["3.8", "3.12"], + + // The list of conda channel names to be searched for benchmark + // dependency packages in the specified order + // "conda_channels": ["conda-forge", "defaults"], + + // A conda environment file that is used for environment creation. + // "conda_environment_file": "environment.yml", + + // The matrix of dependencies to test. Each key of the "req" + // requirements dictionary is the name of a package (in PyPI) and + // the values are version numbers. An empty list or empty string + // indicates to just test against the default (latest) + // version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed + // via pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // The ``@env`` and ``@env_nobuild`` keys contain the matrix of + // environment variables to pass to build and benchmark commands. + // An environment will be created for every combination of the + // cartesian product of the "@env" variables in this matrix. + // Variables in "@env_nobuild" will be passed to every environment + // during the benchmark phase, but will not trigger creation of + // new environments. A value of ``null`` means that the variable + // will not be set for the current combination. + // + // "matrix": { + // "req": { + // "numpy": ["1.6", "1.7"], + // "six": ["", null], // test with and without six installed + // "pip+emcee": [""] // emcee is only available for install with pip. + // }, + // "env": {"ENV_VAR_1": ["val1", "val2"]}, + // "env_nobuild": {"ENV_VAR_2": ["val3", null]}, + // }, + + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // - req + // Required packages + // - env + // Environment variables + // - env_nobuild + // Non-build environment variables + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda + // {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1 + // ], + // + // "include": [ + // // additional env for python3.12 + // {"python": "3.12", "req": {"numpy": "1.26"}, "env_nobuild": {"FOO": "123"}}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "3.12", "req": {"libpython": ""}}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache results of the recent builds in each + // environment, making them faster to install next time. This is + // the number of builds to keep, per environment. + // "build_cache_size": 2, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // }, + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // }, +} diff --git a/benchmark/pybench/asv/benchmarks/__init__.py b/benchmark/pybench/asv/benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/benchmark/pybench/asv/benchmarks/benchmarks.py b/benchmark/pybench/asv/benchmarks/benchmarks.py new file mode 100644 index 0000000000..120615a57c --- /dev/null +++ b/benchmark/pybench/asv/benchmarks/benchmarks.py @@ -0,0 +1,258 @@ +# Write the benchmarking functions here. +# See "Writing benchmarks" in the asv docs for more information. + +''' +class TimeSuite: + """ + An example benchmark that times the performance of various kinds + of iterating over dictionaries in Python. + """ + def setup(self): + self.d = {} + for x in range(500): + self.d[x] = None + + def time_keys(self): + for key in self.d.keys(): + pass + + def time_values(self): + for value in self.d.values(): + pass + + def time_range(self): + d = self.d + for key in range(500): + d[key] + + +class MemSuite: + def mem_list(self): + return [0] * 256 +''' + + +import numpy as np +from openblas_wrap import ( + # level 1 + dnrm2, ddot, daxpy, + # level 3 + dgemm, dsyrk, + # lapack + dgesv, # linalg.solve + dgesdd, dgesdd_lwork, # linalg.svd + dsyev, dsyev_lwork, # linalg.eigh +) + +# ### BLAS level 1 ### + +# dnrm2 + +dnrm2_sizes = [100, 1000] + +def run_dnrm2(n, x, incx): + res = dnrm2(x, n, incx=incx) + return res + + + +class Nrm2: + + params = [100, 1000] + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + self.x = rndm.uniform(size=(n,)).astype(float) + + def time_dnrm2(self, n): + run_dnrm2(n, self.x, 1) + + +# ddot + +ddot_sizes = [100, 1000] + +def run_ddot(x, y,): + res = ddot(x, y) + return res + + +class DDot: + params = ddot_sizes + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + self.x = np.array(rndm.uniform(size=(n,)), dtype=float) + self.y = np.array(rndm.uniform(size=(n,)), dtype=float) + + def time_ddot(self, n): + run_ddot(self.x, self.y) + + + +# daxpy + +daxpy_sizes = [100, 1000] + +def run_daxpy(x, y,): + res = daxpy(x, y, a=2.0) + return res + + +class Daxpy: + params = daxpy_sizes + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + self.x = np.array(rndm.uniform(size=(n,)), dtype=float) + self.y = np.array(rndm.uniform(size=(n,)), dtype=float) + + def time_daxpy(self, n): + run_daxpy(self.x, self.y) + + + +# ### BLAS level 3 ### + +# dgemm + +gemm_sizes = [100, 1000] + +def run_dgemm(a, b, c): + alpha = 1.0 + res = dgemm(alpha, a, b, c=c, overwrite_c=True) + return res + + +class Dgemm: + params = gemm_sizes + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + self.a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') + self.b = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') + self.c = np.empty((n, n), dtype=float, order='F') + + def time_dgemm(self, n): + run_dgemm(self.a, self.b, self.c) + + +# dsyrk + +syrk_sizes = [100, 1000] + + +def run_dsyrk(a, c): + res = dsyrk(1.0, a, c=c, overwrite_c=True) + return res + + +class DSyrk: + params = syrk_sizes + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + self.a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') + self.c = np.empty((n, n), dtype=float, order='F') + + def time_dsyrk(self, n): + run_dsyrk(self.a, self.c) + + +# ### LAPACK ### + +# linalg.solve + +dgesv_sizes = [100, 1000] + + +def run_dgesv(a, b): + res = dgesv(a, b, overwrite_a=True, overwrite_b=True) + return res + + +class Dgesv: + params = dgesv_sizes + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + self.a = (np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') + + np.eye(n, order='F')) + self.b = np.array(rndm.uniform(size=(n, 1)), order='F') + + def time_dgesv(self, n): + run_dgesv(self.a, self.b) + + # XXX: how to run asserts? + # lu, piv, x, info = benchmark(run_gesv, a, b) + # assert lu is a + # assert x is b + # assert info == 0 + + +# linalg.svd + +dgesdd_sizes = ["100, 5", "1000, 222"] + + +def run_dgesdd(a, lwork): + res = dgesdd(a, lwork=lwork, full_matrices=False, overwrite_a=False) + return res + + +class Dgesdd: + params = dgesdd_sizes + param_names = ["(m, n)"] + + def setup(self, mn): + m, n = (int(x) for x in mn.split(",")) + + rndm = np.random.RandomState(1234) + a = np.array(rndm.uniform(size=(m, n)), dtype=float, order='F') + + lwork, info = dgesdd_lwork(m, n) + lwork = int(lwork) + assert info == 0 + + self.a, self.lwork = a, lwork + + def time_dgesdd(self, mn): + run_dgesdd(self.a, self.lwork) + + +# linalg.eigh + +dsyev_sizes = [50, 200] + + +def run_dsyev(a, lwork): + res = dsyev(a, lwork=lwork, overwrite_a=True) + return res + + +class Dsyev: + params = dsyev_sizes + param_names = ["size"] + + def setup(self, n): + rndm = np.random.RandomState(1234) + a = rndm.uniform(size=(n, n)) + a = np.asarray(a + a.T, dtype=float, order='F') + a_ = a.copy() + + lwork, info = dsyev_lwork(n) + lwork = int(lwork) + assert info == 0 + + self.a = a_ + self.lwork = lwork + + def time_dsyev(self, n): + run_dsyev(self.a, self.lwork) + diff --git a/benchmark/pybench/asv/meson.build b/benchmark/pybench/asv/meson.build new file mode 100644 index 0000000000..88d05be7d7 --- /dev/null +++ b/benchmark/pybench/asv/meson.build @@ -0,0 +1,48 @@ +# +# Taken from SciPy (of course) +# +project( + 'openblas-wrap', + 'c', 'fortran', + version: '0.1', + license: 'BSD-3', + meson_version: '>= 1.1.0', + default_options: [ + 'buildtype=debugoptimized', + 'b_ndebug=if-release', + 'c_std=c17', + 'fortran_std=legacy', + ], +) + +py3 = import('python').find_installation(pure: false) +py3_dep = py3.dependency() + +cc = meson.get_compiler('c') + +_global_c_args = cc.get_supported_arguments( + '-Wno-unused-but-set-variable', + '-Wno-unused-function', + '-Wno-conversion', + '-Wno-misleading-indentation', +) +add_project_arguments(_global_c_args, language : 'c') + +# We need -lm for all C code (assuming it uses math functions, which is safe to +# assume for SciPy). For C++ it isn't needed, because libstdc++/libc++ is +# guaranteed to depend on it. For Fortran code, Meson already adds `-lm`. +m_dep = cc.find_library('m', required : false) +if m_dep.found() + add_project_link_arguments('-lm', language : 'c') +endif + +generate_f2pymod = find_program('openblas_wrap/generate_f2pymod.py') + +openblas = dependency('scipy_openblas', method: 'pkg-config', required: true) +openblas_dep = declare_dependency( + dependencies: openblas, + compile_args: [] +) + + +subdir('openblas_wrap') diff --git a/benchmark/pybench/asv/openblas_wrap/__init__.py b/benchmark/pybench/asv/openblas_wrap/__init__.py new file mode 100644 index 0000000000..9958423ac3 --- /dev/null +++ b/benchmark/pybench/asv/openblas_wrap/__init__.py @@ -0,0 +1,29 @@ +""" +Trampoline to hide the LAPACK details (scipy.lapack.linalg or scipy_openblas32 or...) +from benchmarking. +""" + +__version__ = "0.1" + +import scipy_openblas32 # preload symbols. typically done in _distributor_init.py + +#from scipy.linalg.blas import ( +from ._flapack import ( + # level 1 + scipy_dnrm2 as dnrm2, + scipy_ddot as ddot, + scipy_daxpy as daxpy, + # level 3 + scipy_dgemm as dgemm, + scipy_dsyrk as dsyrk, +) + +#from scipy.linalg.lapack import ( +from openblas_wrap._flapack import ( + # linalg.solve + scipy_dgesv as dgesv, + # linalg.svd + scipy_dgesdd as dgesdd, scipy_dgesdd_lwork as dgesdd_lwork, + # linalg.eigh + scipy_dsyev as dsyev, scipy_dsyev_lwork as dsyev_lwork +) diff --git a/benchmark/pybench/asv/openblas_wrap/_distributor_init.py b/benchmark/pybench/asv/openblas_wrap/_distributor_init.py new file mode 100644 index 0000000000..1779095c75 --- /dev/null +++ b/benchmark/pybench/asv/openblas_wrap/_distributor_init.py @@ -0,0 +1,4 @@ +''' +Helper to preload OpenBLAS from scipy_openblas32 +''' +import scipy_openblas32 diff --git a/benchmark/pybench/asv/openblas_wrap/blas_lapack.pyf.src b/benchmark/pybench/asv/openblas_wrap/blas_lapack.pyf.src new file mode 100644 index 0000000000..76278e0a30 --- /dev/null +++ b/benchmark/pybench/asv/openblas_wrap/blas_lapack.pyf.src @@ -0,0 +1,327 @@ +! +! Taken from scipy/linalg +! +! Shorthand notations +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! +! Level 1 BLAS +! + + +python module _flapack + usercode ''' +#define F_INT int +''' + +interface + + +subroutine axpy(n,a,x,offx,incx,y,offy,incy) + ! Calculate z = a*x+y, where a is scalar. + + callstatement (*f2py_func)(&n,&a,x+offx,&incx,y+offy,&incy) + callprotoargument F_INT*,*,*,F_INT*,*,F_INT* + + dimension(*), intent(in) :: x + dimension(*), intent(in,out,out=z) :: y + optional, intent(in):: a=<1.0,\0,(1.0\,0.0),\2> + integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 + integer optional, intent(in),check(incy>0||incy<0) :: incy = 1 + integer optional, intent(in),depend(x) :: offx=0 + integer optional, intent(in),depend(y) :: offy=0 + check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n + check(len(y)-offy>(n-1)*abs(incy)) :: n + +end subroutine axpy + +function scipy_ddot(n,x,offx,incx,y,offy,incy) result (xy) + ! Computes a vector-vector dot product. + + callstatement scipy_ddot_return_value = (*f2py_func)(&n,x+offx,&incx,y+offy,&incy) + callprotoargument F_INT*,double*,F_INT*,double*,F_INT* + intent(c) scipy_ddot + fortranname F_FUNC(scipy_ddot,DDOT) + + double precision dimension(*), intent(in) :: x + double precision dimension(*), intent(in) :: y + double precision ddot,xy + integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 + integer optional, intent(in),check(incy>0||incy<0) :: incy = 1 + integer optional, intent(in),depend(x) :: offx=0 + integer optional, intent(in),depend(y) :: offy=0 + check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n + check(len(y)-offy>(n-1)*abs(incy)) :: n + +end function scipy_ddot + + +function nrm2(n,x,offx,incx) result(n2) + + nrm2, n2 + + callstatement nrm2_return_value = (*f2py_func)(&n,x+offx,&incx) + callprotoargument F_INT*,*,F_INT* + intent(c) nrm2 + fortranname F_FUNC(nrm2,NRM2) + + dimension(*),intent(in) :: x + + integer optional, intent(in),check(incx>0) :: incx = 1 + + integer optional,intent(in),depend(x) :: offx=0 + check(offx>=0 && offx(n-1)*abs(incx)) :: n + +end function nrm2 + +! +! Level 3 BLAS +! + + +subroutine gemm(m,n,k,alpha,a,b,beta,c,trans_a,trans_b,lda,ka,ldb,kb) + ! Computes a scalar-matrix-matrix product and adds the result to a + ! scalar-matrix product. + ! + ! c = gemm(alpha,a,b,beta=0,c=0,trans_a=0,trans_b=0,overwrite_c=0) + ! Calculate C <- alpha * op(A) * op(B) + beta * C + + callstatement (*f2py_func)((trans_a?(trans_a==2?"C":"T"):"N"), & + (trans_b?(trans_b==2?"C":"T"):"N"),&m,&n,&k,&alpha,a,&lda,b,&ldb,&beta,c,&m) + callprotoargument char*,char*,F_INT*,F_INT*,F_INT*,*,*,F_INT*,*, & + F_INT*,*,*,F_INT* + + integer optional,intent(in),check(trans_a>=0 && trans_a <=2) :: trans_a = 0 + integer optional,intent(in),check(trans_b>=0 && trans_b <=2) :: trans_b = 0 + intent(in) :: alpha + intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2> + + dimension(lda,ka),intent(in) :: a + dimension(ldb,kb),intent(in) :: b + dimension(m,n),intent(in,out,copy),depend(m,n),optional :: c + check(shape(c,0)==m && shape(c,1)==n) :: c + + integer depend(a),intent(hide) :: lda = shape(a,0) + integer depend(a),intent(hide) :: ka = shape(a,1) + integer depend(b),intent(hide) :: ldb = shape(b,0) + integer depend(b),intent(hide) :: kb = shape(b,1) + + integer depend(a,trans_a,ka,lda),intent(hide):: m = (trans_a?ka:lda) + integer depend(a,trans_a,ka,lda),intent(hide):: k = (trans_a?lda:ka) + integer depend(b,trans_b,kb,ldb,k),intent(hide),check(trans_b?kb==k:ldb==k) :: & + n = (trans_b?ldb:kb) + +end subroutine gemm + + +subroutine rk(n,k,alpha,a,beta,c,trans,lower,lda,ka) + ! performs one of the symmetric rank k operations + ! C := alpha*A*A**T + beta*C, or C := alpha*A**T*A + beta*C, + ! + ! c = syrk(alpha,a,beta=0,c=0,trans=0,lower=0,overwrite_c=0) + ! + callstatement (*f2py_func)((lower?"L":"U"), & + (trans?(trans==2?"C":"T"):"N"), &n,&k,&alpha,a,&lda,&beta,c,&n) + callprotoargument char*,char*,F_INT*,F_INT*,*,*,F_INT*,*, & + *,F_INT* + + integer optional, intent(in),check(lower==0||lower==1) :: lower = 0 + integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0 + + intent(in) :: alpha + intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2,\2,\2> + + dimension(lda,ka),intent(in) :: a + dimension(n,n),intent(in,out,copy),depend(n),optional :: c + check(shape(c,0)==n && shape(c,1)==n) :: c + + integer depend(a),intent(hide) :: lda = shape(a,0) + integer depend(a),intent(hide) :: ka = shape(a,1) + + integer depend(a, trans, ka, lda), intent(hide) :: n = (trans ? ka : lda) + integer depend(a, trans, ka, lda), intent(hide) :: k = (trans ? lda : ka) + +end subroutine rk + + +! +! LAPACK +! + +subroutine gesv(n,nrhs,a,piv,b,info) + ! lu,piv,x,info = gesv(a,b,overwrite_a=0,overwrite_b=0) + ! Solve A * X = B. + ! A = P * L * U + ! U is upper diagonal triangular, L is unit lower triangular, + ! piv pivots columns. + + callstatement {F_INT i;(*f2py_func)(&n,&nrhs,a,&n,piv,b,&n,&info);for(i=0;i\*,F_INT*,F_INT*,*,F_INT*,F_INT* + + integer depend(a),intent(hide):: n = shape(a,0) + integer depend(b),intent(hide):: nrhs = shape(b,1) + dimension(n,n),check(shape(a,0)==shape(a,1)) :: a + integer dimension(n),depend(n),intent(out) :: piv + dimension(n,nrhs),check(shape(a,0)==shape(b,0)),depend(n) :: b + integer intent(out)::info + intent(in,out,copy,out=x) b + intent(in,out,copy,out=lu) a +end subroutine gesv + + +subroutine gesdd(m,n,minmn,u0,u1,vt0,vt1,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info) + ! u,s,vt,info = gesdd(a,compute_uv=1,lwork=..,overwrite_a=0) + ! Compute the singular value decomposition (SVD) using divide and conquer: + ! A = U * SIGMA * transpose(V) + ! A - M x N matrix + ! U - M x M matrix or min(M,N) x N if full_matrices=False + ! SIGMA - M x N zero matrix with a main diagonal filled with min(M,N) + ! singular values + ! transpose(V) - N x N matrix or N x min(M,N) if full_matrices=False + + callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,a,&m,s,u,&u0,vt,&vt0,work,&lwork,iwork,&info) + callprotoargument char*,F_INT*,F_INT*,*,F_INT*,*,*,F_INT*,*,F_INT*,*,F_INT*,F_INT*,F_INT* + + integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1 + integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1 + integer intent(hide),depend(a):: m = shape(a,0) + integer intent(hide),depend(a):: n = shape(a,1) + integer intent(hide),depend(m,n):: minmn = MIN(m,n) + integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1) + integer intent(hide),depend(compute_uv,minmn, full_matrices) :: u1 = (compute_uv?(full_matrices?m:minmn):1) + integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1) + integer intent(hide),depend(compute_uv,minmn) :: vt1 = (compute_uv?n:1) + dimension(m,n),intent(in,copy,aligned8) :: a + dimension(minmn),intent(out),depend(minmn) :: s + dimension(u0,u1),intent(out),depend(u0, u1) :: u + dimension(vt0,vt1),intent(out),depend(vt0, vt1) :: vt + dimension(lwork),intent(hide,cache),depend(lwork) :: work + integer optional,intent(in),depend(minmn,compute_uv) & + :: lwork = max((compute_uv?4*minmn*minmn+MAX(m,n)+9*minmn:MAX(14*minmn+4,10*minmn+2+25*(25+8))+MAX(m,n)),1) + integer intent(hide,cache),dimension(8*minmn),depend(minmn) :: iwork + integer intent(out)::info + +end subroutine gesdd + +subroutine gesdd_lwork(m,n,minmn,u0,vt0,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info) + ! LWORK computation for (S/D)GESDD + + fortranname gesdd + callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,&a,&m,&s,&u,&u0,&vt,&vt0,&work,&lwork,&iwork,&info) + callprotoargument char*,F_INT*,F_INT*,*,F_INT*,*,*,F_INT*,*,F_INT*,*,F_INT*,F_INT*,F_INT* + + integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1 + integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1 + integer intent(in) :: m + integer intent(in) :: n + integer intent(hide),depend(m,n):: minmn = MIN(m,n) + integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1) + integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1) + intent(hide) :: a + intent(hide) :: s + intent(hide) :: u + intent(hide) :: vt + intent(out) :: work + integer intent(hide) :: lwork = -1 + integer intent(hide) :: iwork + integer intent(out) :: info + +end subroutine gesdd_lwork + + +subroutine syev(compute_v,lower,n,w,a,lda,work,lwork,info) + ! w,v,info = syev(a,compute_v=1,lower=0,lwork=3*n-1,overwrite_a=0) + ! Compute all eigenvalues and, optionally, eigenvectors of a + ! real symmetric matrix A. + ! + ! Performance tip: + ! If compute_v=0 then set also overwrite_a=1. + + callstatement (*f2py_func)((compute_v?"V":"N"),(lower?"L":"U"),&n,a,&lda,w,work,&lwork,&info) + callprotoargument char*,char*,F_INT*,*,F_INT*,*,*,F_INT*,F_INT* + + integer optional,intent(in):: compute_v = 1 + check(compute_v==1||compute_v==0) compute_v + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + integer intent(hide),depend(a):: n = shape(a,0) + integer intent(hide),depend(a):: lda = MAX(1,shape(a,0)) + dimension(n,n),check(shape(a,0)==shape(a,1)) :: a + intent(in,copy,out,out=v) :: a + + dimension(n),intent(out),depend(n) :: w + + integer optional,intent(in),depend(n) :: lwork=max(3*n-1,1) + check(lwork>=3*n-1) :: lwork + dimension(lwork),intent(hide),depend(lwork) :: work + + integer intent(out) :: info + +end subroutine syev + + +subroutine syev_lwork(lower,n,w,a,lda,work,lwork,info) + ! LWORK routines for syev + + fortranname syev + + callstatement (*f2py_func)("N",(lower?"L":"U"),&n,&a,&lda,&w,&work,&lwork,&info) + callprotoargument char*,char*,F_INT*,*,F_INT*,*,*,F_INT*,F_INT* + + integer intent(in):: n + integer optional,intent(in),check(lower==0||lower==1) :: lower = 0 + + integer intent(hide),depend(n):: lda = MAX(1, n) + intent(hide):: a + intent(hide):: w + integer intent(hide):: lwork = -1 + + intent(out):: work + integer intent(out):: info + +end subroutine syev_lwork + +end interface + +end python module _flapack + + + diff --git a/benchmark/pybench/asv/openblas_wrap/generate_f2pymod.py b/benchmark/pybench/asv/openblas_wrap/generate_f2pymod.py new file mode 100644 index 0000000000..5a8ba13895 --- /dev/null +++ b/benchmark/pybench/asv/openblas_wrap/generate_f2pymod.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +""" +Process f2py template files (`filename.pyf.src` -> `filename.pyf`) + +Usage: python generate_pyf.py filename.pyf.src -o filename.pyf +""" + +import os +import sys +import re +import subprocess +import argparse + + +# START OF CODE VENDORED FROM `numpy.distutils.from_template` +############################################################# +""" +process_file(filename) + + takes templated file .xxx.src and produces .xxx file where .xxx + is .pyf .f90 or .f using the following template rules: + + '<..>' denotes a template. + + All function and subroutine blocks in a source file with names that + contain '<..>' will be replicated according to the rules in '<..>'. + + The number of comma-separated words in '<..>' will determine the number of + replicates. + + '<..>' may have two different forms, named and short. For example, + + named: + where anywhere inside a block '

' will be replaced with + 'd', 's', 'z', and 'c' for each replicate of the block. + + <_c> is already defined: <_c=s,d,c,z> + <_t> is already defined: <_t=real,double precision,complex,double complex> + + short: + , a short form of the named, useful when no

appears inside + a block. + + In general, '<..>' contains a comma separated list of arbitrary + expressions. If these expression must contain a comma|leftarrow|rightarrow, + then prepend the comma|leftarrow|rightarrow with a backslash. + + If an expression matches '\\' then it will be replaced + by -th expression. + + Note that all '<..>' forms in a block must have the same number of + comma-separated entries. + + Predefined named template rules: + + + + + +""" + +routine_start_re = re.compile( + r'(\n|\A)(( (\$|\*))|)\s*(subroutine|function)\b', + re.I +) +routine_end_re = re.compile(r'\n\s*end\s*(subroutine|function)\b.*(\n|\Z)', re.I) +function_start_re = re.compile(r'\n (\$|\*)\s*function\b', re.I) + +def parse_structure(astr): + """ Return a list of tuples for each function or subroutine each + tuple is the start and end of a subroutine or function to be + expanded. + """ + + spanlist = [] + ind = 0 + while True: + m = routine_start_re.search(astr, ind) + if m is None: + break + start = m.start() + if function_start_re.match(astr, start, m.end()): + while True: + i = astr.rfind('\n', ind, start) + if i==-1: + break + start = i + if astr[i:i+7]!='\n $': + break + start += 1 + m = routine_end_re.search(astr, m.end()) + ind = end = m and m.end()-1 or len(astr) + spanlist.append((start, end)) + return spanlist + +template_re = re.compile(r"<\s*(\w[\w\d]*)\s*>") +named_re = re.compile(r"<\s*(\w[\w\d]*)\s*=\s*(.*?)\s*>") +list_re = re.compile(r"<\s*((.*?))\s*>") + +def find_repl_patterns(astr): + reps = named_re.findall(astr) + names = {} + for rep in reps: + name = rep[0].strip() or unique_key(names) + repl = rep[1].replace(r'\,', '@comma@') + thelist = conv(repl) + names[name] = thelist + return names + +def find_and_remove_repl_patterns(astr): + names = find_repl_patterns(astr) + astr = re.subn(named_re, '', astr)[0] + return astr, names + +item_re = re.compile(r"\A\\(?P\d+)\Z") +def conv(astr): + b = astr.split(',') + l = [x.strip() for x in b] + for i in range(len(l)): + m = item_re.match(l[i]) + if m: + j = int(m.group('index')) + l[i] = l[j] + return ','.join(l) + +def unique_key(adict): + """ Obtain a unique key given a dictionary.""" + allkeys = list(adict.keys()) + done = False + n = 1 + while not done: + newkey = '__l%s' % (n) + if newkey in allkeys: + n += 1 + else: + done = True + return newkey + + +template_name_re = re.compile(r'\A\s*(\w[\w\d]*)\s*\Z') +def expand_sub(substr, names): + substr = substr.replace(r'\>', '@rightarrow@') + substr = substr.replace(r'\<', '@leftarrow@') + lnames = find_repl_patterns(substr) + substr = named_re.sub(r"<\1>", substr) # get rid of definition templates + + def listrepl(mobj): + thelist = conv(mobj.group(1).replace(r'\,', '@comma@')) + if template_name_re.match(thelist): + return "<%s>" % (thelist) + name = None + for key in lnames.keys(): # see if list is already in dictionary + if lnames[key] == thelist: + name = key + if name is None: # this list is not in the dictionary yet + name = unique_key(lnames) + lnames[name] = thelist + return "<%s>" % name + + substr = list_re.sub(listrepl, substr) # convert all lists to named templates + # newnames are constructed as needed + + numsubs = None + base_rule = None + rules = {} + for r in template_re.findall(substr): + if r not in rules: + thelist = lnames.get(r, names.get(r, None)) + if thelist is None: + raise ValueError('No replicates found for <%s>' % (r)) + if r not in names and not thelist.startswith('_'): + names[r] = thelist + rule = [i.replace('@comma@', ',') for i in thelist.split(',')] + num = len(rule) + + if numsubs is None: + numsubs = num + rules[r] = rule + base_rule = r + elif num == numsubs: + rules[r] = rule + else: + print("Mismatch in number of replacements (base <{}={}>) " + "for <{}={}>. Ignoring." + .format(base_rule, ','.join(rules[base_rule]), r, thelist)) + if not rules: + return substr + + def namerepl(mobj): + name = mobj.group(1) + return rules.get(name, (k+1)*[name])[k] + + newstr = '' + for k in range(numsubs): + newstr += template_re.sub(namerepl, substr) + '\n\n' + + newstr = newstr.replace('@rightarrow@', '>') + newstr = newstr.replace('@leftarrow@', '<') + return newstr + +def process_str(allstr): + newstr = allstr + writestr = '' + + struct = parse_structure(newstr) + + oldend = 0 + names = {} + names.update(_special_names) + for sub in struct: + cleanedstr, defs = find_and_remove_repl_patterns(newstr[oldend:sub[0]]) + writestr += cleanedstr + names.update(defs) + writestr += expand_sub(newstr[sub[0]:sub[1]], names) + oldend = sub[1] + writestr += newstr[oldend:] + + return writestr + +include_src_re = re.compile( + r"(\n|\A)\s*include\s*['\"](?P[\w\d./\\]+\.src)['\"]", + re.I +) + +def resolve_includes(source): + d = os.path.dirname(source) + with open(source) as fid: + lines = [] + for line in fid: + m = include_src_re.match(line) + if m: + fn = m.group('name') + if not os.path.isabs(fn): + fn = os.path.join(d, fn) + if os.path.isfile(fn): + lines.extend(resolve_includes(fn)) + else: + lines.append(line) + else: + lines.append(line) + return lines + +def process_file(source): + lines = resolve_includes(source) + return process_str(''.join(lines)) + +_special_names = find_repl_patterns(''' +<_c=s,d,c,z> +<_t=real,double precision,complex,double complex> + + + + + +''') + +# END OF CODE VENDORED FROM `numpy.distutils.from_template` +########################################################### + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("infile", type=str, + help="Path to the input file") + parser.add_argument("-o", "--outdir", type=str, + help="Path to the output directory") + args = parser.parse_args() + + if not args.infile.endswith(('.pyf', '.pyf.src', '.f.src')): + raise ValueError(f"Input file has unknown extension: {args.infile}") + + outdir_abs = os.path.join(os.getcwd(), args.outdir) + + # Write out the .pyf/.f file + if args.infile.endswith(('.pyf.src', '.f.src')): + code = process_file(args.infile) + fname_pyf = os.path.join(args.outdir, + os.path.splitext(os.path.split(args.infile)[1])[0]) + + with open(fname_pyf, 'w') as f: + f.write(code) + else: + fname_pyf = args.infile + + # Now invoke f2py to generate the C API module file + if args.infile.endswith(('.pyf.src', '.pyf')): + p = subprocess.Popen([sys.executable, '-m', 'numpy.f2py', fname_pyf, + '--build-dir', outdir_abs], #'--quiet'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.getcwd()) + out, err = p.communicate() + if not (p.returncode == 0): + raise RuntimeError(f"Writing {args.outfile} with f2py failed!\n" + f"{out}\n" + r"{err}") + + +if __name__ == "__main__": + main() diff --git a/benchmark/pybench/asv/openblas_wrap/meson.build b/benchmark/pybench/asv/openblas_wrap/meson.build new file mode 100644 index 0000000000..9f1b717876 --- /dev/null +++ b/benchmark/pybench/asv/openblas_wrap/meson.build @@ -0,0 +1,50 @@ +# find numpy & f2py includes +inc_numpy = run_command(py3, + ['-c', 'import os; os.chdir(".."); import numpy; print(numpy.get_include())'], + check : true +).stdout().strip() + +inc_f2py = run_command(py3, + ['-c', 'import os; os.chdir(".."); import numpy.f2py; print(numpy.f2py.get_include())'], + check : true +).stdout().strip() + + +inc_np = include_directories(inc_numpy, inc_f2py) +fortranobject_c = inc_f2py / 'fortranobject.c' + + +fortranobject_lib = static_library('_fortranobject', + fortranobject_c, +# c_args: numpy_nodepr_api, + dependencies: py3_dep, + include_directories: [inc_np, inc_f2py], + gnu_symbol_visibility: 'hidden', +) +fortranobject_dep = declare_dependency( + link_with: fortranobject_lib, + include_directories: [inc_np, inc_f2py], +) + + +# f2py generated wrappers + +flapack_module = custom_target('flapack_module', + output: ['_flapackmodule.c'], + input: 'blas_lapack.pyf.src', + command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'], +) + +py3.extension_module('_flapack', + flapack_module, + link_args: [], # version_link_args, + dependencies: [openblas_dep, fortranobject_dep], + install: true, + subdir: 'openblas_wrap' +) + + +py3.install_sources( + ['__init__.py'], + subdir: 'openblas_wrap' +) diff --git a/benchmark/pybench/asv/pyproject.toml b/benchmark/pybench/asv/pyproject.toml new file mode 100644 index 0000000000..535f96425e --- /dev/null +++ b/benchmark/pybench/asv/pyproject.toml @@ -0,0 +1,22 @@ +[build-system] +build-backend = "mesonpy" +requires = [ + "meson-python>=0.16.0", + "numpy", + "scipy_openblas32" +] + + + +[project] +name = "openblas_wrap" +version = "0.1" +maintainers = [ + {name = ".", email = ".@gmail.com"} +] +description = "a wrapper" +requires-python = ">=3.10" +dependencies = ["numpy>=1.23,<3", + "scipy_openblas32" +] +