diff --git a/godel-script/CMakeLists.txt b/godel-script/CMakeLists.txt index 84109426..35f2a787 100644 --- a/godel-script/CMakeLists.txt +++ b/godel-script/CMakeLists.txt @@ -20,6 +20,7 @@ set(GODEL_FRONTEND_HDR_FILES godel-frontend/src/semantic.h godel-frontend/src/symbol.h godel-frontend/src/ir/aggregator_inline_remark.h + godel-frontend/src/ir/call_graph.h godel-frontend/src/ir/flatten_block.h godel-frontend/src/ir/ir_gen.h godel-frontend/src/ir/ir_context.h @@ -29,6 +30,7 @@ set(GODEL_FRONTEND_HDR_FILES godel-frontend/src/ir/pass.h godel-frontend/src/ir/pass_manager.h godel-frontend/src/ir/remove_unused.h + godel-frontend/src/ir/reorder.h godel-frontend/src/error/error.h godel-frontend/src/ast/ast_node.h godel-frontend/src/ast/ast_root.h @@ -60,6 +62,7 @@ set(GODEL_FRONTEND_SRC_FILES godel-frontend/src/semantic.cpp godel-frontend/src/symbol.cpp godel-frontend/src/ir/aggregator_inline_remark.cpp + godel-frontend/src/ir/call_graph.cpp godel-frontend/src/ir/flatten_block.cpp godel-frontend/src/ir/ir_gen.cpp godel-frontend/src/ir/ir_context.cpp @@ -69,6 +72,7 @@ set(GODEL_FRONTEND_SRC_FILES godel-frontend/src/ir/pass.cpp godel-frontend/src/ir/pass_manager.cpp godel-frontend/src/ir/remove_unused.cpp + godel-frontend/src/ir/reorder.cpp godel-frontend/src/error/error.cpp godel-frontend/src/ast/ast_visitor.cpp godel-frontend/src/ast/ast_root.cpp diff --git a/godel-script/README.md b/godel-script/README.md index 1087a50a..63bdb49d 100644 --- a/godel-script/README.md +++ b/godel-script/README.md @@ -63,7 +63,7 @@ Use this command to apply patch: ```bash cd souffle -git am ../../0001-init-self-used-souffle-from-public-souffle.patch +git am ../0001-init-self-used-souffle-from-public-souffle.patch ``` Use these commands to revert: diff --git a/godel-script/godel-backend/0001-init-self-used-souffle-from-public-souffle.patch b/godel-script/godel-backend/0001-init-self-used-souffle-from-public-souffle.patch index 0ca2b88a..6cc998a2 100644 --- a/godel-script/godel-backend/0001-init-self-used-souffle-from-public-souffle.patch +++ b/godel-script/godel-backend/0001-init-self-used-souffle-from-public-souffle.patch @@ -1,16 +1,9 @@ -From 9cd9cafbc050f6a2ce04a2aaf7ed3267f32cc2db Mon Sep 17 00:00:00 2001 +From 5ef4e439424421214071cf8b9bb5b413534fe582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=81=95=E5=B1=B1?= -Date: Mon, 24 Jun 2024 16:55:26 +0800 -Subject: [PATCH] init self-used souffle from public souffle +Date: Fri, 13 Dec 2024 18:02:01 +0800 +Subject: [PATCH] init self used souffle from public souffle --- - .github/actions/cmake-test/action.yml | 2 +- - .github/actions/set-test-ids/action.yml | 2 +- - .github/images/arch-linux/entrypoint.sh | 2 +- - .github/scripts/updatePPA.sh | 8 +- - .github/workflows/CI-Tests.yml | 67 +- - .github/workflows/VS-CI-Tests.yml | 20 +- - .github/workflows/create-packages.yml | 28 +- CMakeLists.txt | 29 +- README.md | 6 +- choco-packages.config | 5 +- @@ -22,15 +15,15 @@ Subject: [PATCH] init self-used souffle from public souffle sh/checkStyle.sh | 4 +- sh/check_os.sh | 2 +- src/CMakeLists.txt | 104 +- - src/DynamicLibrary.cpp | 72 + - src/ExtraConfig.h | 24 + + src/DynamicLibrary.cpp | 84 + + src/ExtraConfig.h | 36 + src/FunctorOps.cpp | 4 - src/FunctorOps.h | 5 - src/Global.cpp | 6 +- src/Global.h | 11 +- src/GraphUtils.h | 76 +- src/LogStatement.h | 31 +- - src/MainEntry.cpp | 5 + + src/MainEntry.cpp | 17 + src/RelationTag.h | 2 + src/TranslationUnitBase.h | 22 +- src/ast/Aggregator.cpp | 29 +- @@ -233,7 +226,7 @@ Subject: [PATCH] init self-used souffle from public souffle src/include/souffle/io/ReadStreamSQLite.h | 26 +- src/include/souffle/io/SerialisationStream.h | 2 +- src/include/souffle/io/WriteStream.h | 2 +- - src/include/souffle/io/WriteStreamSQLite.h | 30 +- + src/include/souffle/io/WriteStreamSQLite.h | 50 +- src/include/souffle/profile/Cell.h | 16 +- src/include/souffle/profile/CellInterface.h | 2 +- src/include/souffle/profile/Cli.h | 17 +- @@ -261,7 +254,7 @@ Subject: [PATCH] init self-used souffle from public souffle src/interpreter/BTreeDeleteIndex.cpp | 18 +- src/interpreter/BTreeIndex.cpp | 19 +- src/interpreter/Context.h | 9 - - src/interpreter/Engine.cpp | 564 +++--- + src/interpreter/Engine.cpp | 620 +++--- src/interpreter/Engine.h | 45 +- src/interpreter/EqrelIndex.cpp | 3 +- src/interpreter/Generator.cpp | 144 +- @@ -423,7 +416,7 @@ Subject: [PATCH] init self-used souffle from public souffle tests/syntactic/syntax5/syntax5.err | 2 +- tests/syntactic/syntax6/syntax6.err | 2 +- tests/syntactic/syntax8/syntax8.err | 2 +- - 419 files changed, 7762 insertions(+), 9621 deletions(-) + 412 files changed, 7824 insertions(+), 9542 deletions(-) create mode 100644 src/DynamicLibrary.cpp create mode 100644 src/ExtraConfig.h create mode 100644 src/MainEntry.cpp @@ -435,372 +428,6 @@ Subject: [PATCH] init self-used souffle from public souffle create mode 100644 src/parser/Helper.h create mode 100644 src/ram/CountUniqueKeys.h -diff --git a/.github/actions/cmake-test/action.yml b/.github/actions/cmake-test/action.yml -index 90ddfab..460a1ab 100644 ---- a/.github/actions/cmake-test/action.yml -+++ b/.github/actions/cmake-test/action.yml -@@ -14,7 +14,7 @@ runs: - using: "composite" - steps: - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - - - name: setup-env - run: | -diff --git a/.github/actions/set-test-ids/action.yml b/.github/actions/set-test-ids/action.yml -index 2cc54e6..d87f49f 100644 ---- a/.github/actions/set-test-ids/action.yml -+++ b/.github/actions/set-test-ids/action.yml -@@ -13,5 +13,5 @@ runs: - - id: set-test-ids - run: | - CHUNKS=$(python3 -c "print(list(range(${{ inputs.n-chunks }})))") -- echo "chunks=${CHUNKS}" >> $GITHUB_OUTPUT -+ echo "::set-output name=chunks::${CHUNKS}" - shell: bash -diff --git a/.github/images/arch-linux/entrypoint.sh b/.github/images/arch-linux/entrypoint.sh -index 7aa8d7a..751c90b 100755 ---- a/.github/images/arch-linux/entrypoint.sh -+++ b/.github/images/arch-linux/entrypoint.sh -@@ -3,6 +3,6 @@ - set -e - set -x - --envsubst '${RELEASE_TAG},${REPO_OWNER}' < PKGBUILD.in > PKGBUILD -+envsubst '${RELEASE_TAG}' < PKGBUILD.in > PKGBUILD - makepkg - makepkg --printsrcinfo > .SRCINFO -diff --git a/.github/scripts/updatePPA.sh b/.github/scripts/updatePPA.sh -index 5eeb9f8..d4588f3 100755 ---- a/.github/scripts/updatePPA.sh -+++ b/.github/scripts/updatePPA.sh -@@ -46,17 +46,17 @@ sudo apt-get install createrepo rpm - echo "%_gpg_name Bot\n%__gpg_sign_cmd %{__gpg} gpg --force-v3-sigs --batch --verbose --no-armor --no-secmem-warning -u \"%{_gpg_name}\" -sbo %{__signature_filename} --digest-algo sha256 %{__plaintext_filename}'" > ~/.rpmmacros - - ## Fedora --mkdir -p $TMPDIR/ppa/fedora/39/x86_64 -+mkdir -p $TMPDIR/ppa/fedora/34/x86_64 - cd $TMPDIR/ppa/fedora - --for i in $DEBPATH/*fedora-39*/*rpm -+for i in $DEBPATH/*fedora-34*/*rpm - do - rpm --addsign $i - done - --cp $DEBPATH/*fedora-39*/*rpm 39/x86_64/ -+cp $DEBPATH/*fedora-34*/*rpm 34/x86_64/ - --createrepo 39/x86_64 -+createrepo 34/x86_64 - - git add . - git commit -m "Added fedora rpm files for $SOUFFLE_TAG" -diff --git a/.github/workflows/CI-Tests.yml b/.github/workflows/CI-Tests.yml -index f8b865f..0eda992 100644 ---- a/.github/workflows/CI-Tests.yml -+++ b/.github/workflows/CI-Tests.yml -@@ -3,9 +3,6 @@ on: - pull_request: - types: [opened, synchronize] - workflow_dispatch: -- push: -- branches: -- - 'master' - - jobs: - Code-Style: -@@ -13,7 +10,7 @@ jobs: - - steps: - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - with: - fetch-depth: 2 - -@@ -31,7 +28,7 @@ jobs: - chunks: ${{ steps.set-test-ids.outputs.chunks }} - steps: - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - - id: set-test-ids - uses: ./.github/actions/set-test-ids - with: -@@ -54,7 +51,7 @@ jobs: - - steps: - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - - - name: install-lcov - if: ${{ matrix.domain == '32bit' }} -@@ -97,7 +94,7 @@ jobs: - - - name: upload-coverage-artifact - if: ${{ matrix.domain == '32bit' }} -- uses: actions/upload-artifact@v4 -+ uses: actions/upload-artifact@v2 - with: - name: coverage-${{ matrix.domain }}-${{ matrix.chunk }} - path: coverage.info -@@ -113,17 +110,11 @@ jobs: - matrix: - chunk: ${{ fromJSON(needs.Test-Setup.outputs.chunks) }} - -- runs-on: macos-12 -+ runs-on: macos-latest - - steps: -- - name: Select XCode version -- uses: maxim-lobanov/setup-xcode@v1 -- with: -- # Pending https://github.com/actions/runner-images/issues/6350 -- xcode-version: '13.4' -- - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - - - name: install-deps - run: sh/setup/install_macos_deps.sh -@@ -135,39 +126,6 @@ jobs: - n-chunks: ${{ needs.Test-Setup.outputs.n-chunks }} - chunk: ${{ matrix.chunk }} - -- AppleM-CMake: -- needs: Test-Setup -- timeout-minutes: 150 -- -- name: AppleM-CMake (chunk ${{ matrix.chunk }}) -- -- strategy: -- fail-fast: false -- matrix: -- chunk: ${{ fromJSON(needs.Test-Setup.outputs.chunks) }} -- -- runs-on: macos-14 -- -- steps: -- - name: Select XCode version -- uses: maxim-lobanov/setup-xcode@v1 -- with: -- xcode-version: '15.2' -- -- - name: checkout -- uses: actions/checkout@v4 -- -- - name: install-deps -- run: sh/setup/install_macos_arm_deps.sh -- -- - name: cmake-test-64bit -- uses: ./.github/actions/cmake-test -- with: -- # disable openmp on ARM architecture, see souffle-lang/souffle#2476 -- cmake-flags: -DSOUFFLE_DOMAIN_64BIT=ON -DSOUFFLE_USE_OPENMP=OFF -- n-chunks: ${{ needs.Test-Setup.outputs.n-chunks }} -- chunk: ${{ matrix.chunk }} -- - Memory-Check: - needs: Test-Setup - timeout-minutes: 150 -@@ -183,16 +141,11 @@ jobs: - - steps: - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - - - name: install-deps - run: sudo sh/setup/install_ubuntu_deps.sh - -- - name: fix mmap_rnd_bits -- # issue with ubuntu:latest runner and ASAN -- # https://github.com/actions/runner-images/issues/9491 -- run: sudo sysctl vm.mmap_rnd_bits=28 -- - - name: cmake-test-32bit - uses: ./.github/actions/cmake-test - with: -@@ -208,7 +161,7 @@ jobs: - - steps: - - name: checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - with: - fetch-depth: 0 - -@@ -216,13 +169,13 @@ jobs: - run: sudo apt-get update && sudo apt-get install lcov - - - name: download-coverage-artifacts -- uses: actions/download-artifact@v4 -+ uses: actions/download-artifact@v2 - - - name: merge-coverage-report - run: lcov $(for i in coverage-*-*/coverage.info; do echo -a $i; done) --output-file coverage.info - - - name: upload-coverage-report -- uses: codecov/codecov-action@v4 -+ uses: codecov/codecov-action@v2 - with: - token: ${{ secrets.CODECOV_TOKEN }} - files: coverage.info -diff --git a/.github/workflows/VS-CI-Tests.yml b/.github/workflows/VS-CI-Tests.yml -index 3955a6f..3ba0030 100644 ---- a/.github/workflows/VS-CI-Tests.yml -+++ b/.github/workflows/VS-CI-Tests.yml -@@ -4,9 +4,6 @@ on: - pull_request: - types: [opened, synchronize] - workflow_dispatch: -- push: -- branches: -- - 'master' - - env: - CHOCO_CACHE_DIR: "${{ github.workspace }}/choco-cache" -@@ -16,10 +13,10 @@ jobs: - Windows-CMake-MSVC: - runs-on: windows-2019 - steps: -- - uses: actions/checkout@v4 -+ - uses: actions/checkout@v2 - - - name: Dependencies Cache -- uses: actions/cache@v3 -+ uses: actions/cache@v2 - env: - cache-name: cache-chocolatey - with: -@@ -32,18 +29,19 @@ jobs: - - name: Binary Dependencies (Chocolatey) - run: | - choco config set cacheLocation ${{ env.CHOCO_CACHE_DIR }} -- choco install choco-packages.config --no-progress --installargs 'ADD_CMAKE_TO_PATH=System' -+ choco install choco-packages.config --no-progress --installargs 'ADD_CMAKE_TO_PATH=""System""' - - # Use vcpkg to install devel library dependencies. - - name: Library Dependencies (vcpkg) -- uses: lukka/run-vcpkg@v11 -+ uses: lukka/run-vcpkg@v7 - with: -- vcpkgGitCommitId: '56954f1db97f38635782d5ad7cdfd45d2731c854' -+ vcpkgGitCommitId: 'af2287382b1991dbdcb7e5112d236f3323b9dd7a' -+ vcpkgTriplet: x64-windows -+ vcpkgArguments: 'sqlite3 zlib libffi' - - - name: Create Build Directory - working-directory: ${{github.workspace}} -- run: | -- mkdir build -+ run: mkdir build - - - name: Configure Build - working-directory: ${{github.workspace}} -@@ -51,7 +49,7 @@ jobs: - $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." - Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - refreshenv -- cmake -S . -B build -G "Visual Studio 16 2019" -A x64 "-DCMAKE_TOOLCHAIN_FILE=${{env.VCPKG_ROOT}}/scripts/buildsystems/vcpkg.cmake" -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=/bigobj -DSOUFFLE_DOMAIN_64BIT=ON -DCMAKE_FIND_LIBRARY_PREFIXES=";lib" -DCMAKE_FIND_LIBRARY_SUFFIXES=".lib;.dll" -DSOUFFLE_USE_CURSES=OFF -DSOUFFLE_USE_ZLIB=ON -DSOUFFLE_USE_SQLITE=ON -DCMAKE_FIND_DEBUG_MODE=FALSE -DSOUFFLE_BASH_COMPLETION=OFF -+ cmake -S . -B build -G "Visual Studio 16 2019" -A x64 "-DCMAKE_TOOLCHAIN_FILE=${{env.VCPKG_ROOT}}/scripts/buildsystems/vcpkg.cmake" -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=/bigobj -DSOUFFLE_DOMAIN_64BIT=ON -DCMAKE_FIND_LIBRARY_PREFIXES=";lib" -DCMAKE_FIND_LIBRARY_SUFFIXES=".lib;.dll" -DSOUFFLE_USE_CURSES=OFF -DSOUFFLE_USE_ZLIB=ON -DCMAKE_FIND_DEBUG_MODE=FALSE -DSOUFFLE_BASH_COMPLETION=OFF - - - name: Build - working-directory: ${{github.workspace}} -diff --git a/.github/workflows/create-packages.yml b/.github/workflows/create-packages.yml -index 35531db..46f76c4 100644 ---- a/.github/workflows/create-packages.yml -+++ b/.github/workflows/create-packages.yml -@@ -15,14 +15,18 @@ jobs: - - release: oraclelinux-8 - extension: ".rpm" - OS-name: "el/8" -- - release: fedora-39 -+ - release: fedora-34 - extension: ".rpm" -- OS-name: "fedora/39" -+ OS-name: "fedora/34" -+ # build issue on fedora 35 -+ # - release: fedora-35 -+ # extension: ".rpm" -+ # OS-name: "fedora/35" - - runs-on: ubuntu-latest - steps: - - name: Checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - with: - fetch-depth: 0 - -@@ -37,14 +41,14 @@ jobs: - run: |- - docker cp container:/souffle/build/ . && - cd build && -- echo "pkg_name=$(ls *${{ matrix.extension }} | head -n1)" >> $GITHUB_OUTPUT -- echo "artifact_name=x86_64-${{ matrix.release }}-$(ls *${{ matrix.extension }} | head -n1)" >> $GITHUB_OUTPUT -+ echo "::set-output name=pkg_name::$(ls *${{ matrix.extension }} | head -n1)" -+ echo "::set-output name=artifact_name::x86_64-${{ matrix.release }}-$(ls *${{ matrix.extension }} | head -n1)" - - - name: Naming Artifact - run: cp build/${{ steps.extract_pkg.outputs.pkg_name }} build/${{ steps.extract_pkg.outputs.artifact_name }} - - - name: Upload Artifact -- uses: actions/upload-artifact@v3 -+ uses: actions/upload-artifact@v2 - with: - name: ${{ steps.extract_pkg.outputs.artifact_name }} - path: build/${{ steps.extract_pkg.outputs.artifact_name }} -@@ -53,14 +57,14 @@ jobs: - runs-on: ubuntu-latest - steps: - - name: Checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Prepare - id: prepare - run: |- -- echo "release_tag=$(git describe --tags --always | sed "s/-.*$//")" >> $GITHUB_OUTPUT -+ echo "::set-output name=release_tag::$(git describe --tags --always | sed "s/-.*$//")" - - - name: Build Container - run: docker build ./.github/images/arch-linux/ -t package_builder -@@ -105,7 +109,7 @@ jobs: - runs-on: ubuntu-latest - steps: - - name: Download All Artifacts -- uses: actions/download-artifact@v3 -+ uses: actions/download-artifact@v2 - with: - path: ./downloads - -@@ -126,16 +130,16 @@ jobs: - needs: CPack-Package-Build - if: ${{ always() }} - -- runs-on: ubuntu-latest -+ runs-on: ubuntu-18.04 - steps: - - name: Checkout -- uses: actions/checkout@v4 -+ uses: actions/checkout@v2 - with: - fetch-depth: 0 - clean: false - - - name: Download All Artifacts -- uses: actions/download-artifact@v3 -+ uses: actions/download-artifact@v2 - with: - path: ./downloads - diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ab514d..1204593 100644 --- a/CMakeLists.txt @@ -1447,10 +1074,22 @@ index c2ce37b..5c07576 100644 set(SOUFFLE_COMPILED_LINK_OPTIONS "/link ${SOUFFLE_COMPILED_LINK_OPTIONS}") diff --git a/src/DynamicLibrary.cpp b/src/DynamicLibrary.cpp new file mode 100644 -index 0000000..aeea973 +index 0000000..28be988 --- /dev/null +++ b/src/DynamicLibrary.cpp -@@ -0,0 +1,72 @@ +@@ -0,0 +1,84 @@ ++// Copyright (c), 2016-present, Sourcebrella, Inc Ltd - All rights reserved. ++// Unauthorized copying, using, modifying of this file, via any medium is strictly prohibited. ++// Proprietary and confidential. ++// ++// Alipay.com Inc. ++// Copyright (c) 2004 All Rights Reserved. ++// ++// Author: 冰莲 (lyn249877@antfin.com) ++// File Description: ++// Creation Date: 2022-01-28 ++// Modification History: ++ +#include +#include + @@ -1525,10 +1164,22 @@ index 0000000..aeea973 +} diff --git a/src/ExtraConfig.h b/src/ExtraConfig.h new file mode 100644 -index 0000000..24bdd29 +index 0000000..961a3aa --- /dev/null +++ b/src/ExtraConfig.h -@@ -0,0 +1,24 @@ +@@ -0,0 +1,36 @@ ++// Copyright (c), 2016-present, Sourcebrella, Inc Ltd - All rights reserved. ++// Unauthorized copying, using, modifying of this file, via any medium is strictly prohibited. ++// Proprietary and confidential. ++// ++// Alipay.com Inc. ++// Copyright (c) 2004 All Rights Reserved. ++// ++// Author: 冰莲 (lyn249877@antfin.com) ++// File Description: ++// Creation Date: 2022-01-28 ++// Modification History: ++ +#pragma once + +#ifndef GODEL_EXTRACONFIG_H @@ -1870,10 +1521,22 @@ index 1a84c08..498d9d8 100644 line << ";"; diff --git a/src/MainEntry.cpp b/src/MainEntry.cpp new file mode 100644 -index 0000000..948655e +index 0000000..a5a6416 --- /dev/null +++ b/src/MainEntry.cpp -@@ -0,0 +1,5 @@ +@@ -0,0 +1,17 @@ ++// Copyright (c), 2016-present, Sourcebrella, Inc Ltd - All rights reserved. ++// Unauthorized copying, using, modifying of this file, via any medium is strictly prohibited. ++// Proprietary and confidential. ++// ++// Alipay.com Inc. ++// Copyright (c) 2004 All Rights Reserved. ++// ++// Author: 冰莲 (lyn249877@antfin.com) ++// File Description: ++// Creation Date: 2021-09-15 ++// Modification History: ++ +namespace souffle { int main(int argc, char **argv); } + +int main(int argc, char** argv) { @@ -15436,10 +15099,10 @@ index 555ce8a..06449cd 100644 } } diff --git a/src/include/souffle/io/WriteStreamSQLite.h b/src/include/souffle/io/WriteStreamSQLite.h -index 240b6da..09f9218 100644 +index 240b6da..c51a180 100644 --- a/src/include/souffle/io/WriteStreamSQLite.h +++ b/src/include/souffle/io/WriteStreamSQLite.h -@@ -58,20 +58,30 @@ protected: +@@ -58,20 +58,27 @@ protected: void writeNextTuple(const RamDomain* tuple) override { for (std::size_t i = 0; i < arity; i++) { RamDomain value = 0; // Silence warning @@ -15447,10 +15110,7 @@ index 240b6da..09f9218 100644 switch (typeAttributes.at(i)[0]) { - case 's': value = getSymbolTableID(tuple[i]); break; -+ case 's': -+ value = getSymbolTableID(tuple[i]); -+ symvalue = symbolTable.decode(tuple[i]).c_str(); -+ break; ++ case 's': symvalue = symbolTable.decode(tuple[i]).c_str(); break; default: value = tuple[i]; break; } - @@ -15477,28 +15137,65 @@ index 240b6da..09f9218 100644 } } if (sqlite3_step(insertStatement) != SQLITE_DONE) { -@@ -192,18 +202,18 @@ private: +@@ -153,8 +160,8 @@ private: + + void prepareStatements() { + prepareInsertStatement(); +- prepareSymbolInsertStatement(); +- prepareSymbolSelectStatement(); ++ // prepareSymbolInsertStatement(); ++ // prepareSymbolSelectStatement(); + } + void prepareSymbolInsertStatement() { + std::stringstream insertSQL; +@@ -178,7 +185,7 @@ private: + + void prepareInsertStatement() { + std::stringstream insertSQL; +- insertSQL << "INSERT INTO '_" << relationName << "' VALUES "; ++ insertSQL << "INSERT INTO '" << relationName << "' VALUES "; + insertSQL << "(@V0"; + for (unsigned int i = 1; i < arity; i++) { + insertSQL << ",@V" << i; +@@ -192,23 +199,30 @@ private: void createTables() { createRelationTable(); - createRelationView(); - createSymbolTable(); -+ createRelationView(); +- createSymbolTable(); ++ // createSymbolTable(); ++ // createRelationView(); } void createRelationTable() { ++ const auto columnNames = params["relation"]["params"].array_items(); ++ std::stringstream createTableText; - createTableText << "CREATE TABLE IF NOT EXISTS '_" << relationName << "' ("; +- createTableText << "CREATE TABLE IF NOT EXISTS '_" << relationName << "' ("; ++ createTableText << "CREATE TABLE IF NOT EXISTS '" << relationName << "' ("; if (arity > 0) { - createTableText << "'0' INTEGER"; -+ createTableText << "'0' " << (typeAttributes.at(0)[0] == 's'? "TEXT":"INTEGER"); - for (unsigned int i = 1; i < arity; i++) { - createTableText << ",'" << std::to_string(i) << "' "; +- for (unsigned int i = 1; i < arity; i++) { +- createTableText << ",'" << std::to_string(i) << "' "; - createTableText << "INTEGER"; ++ for (unsigned int i = 0; i < arity; i++) { ++ const std::string tableColumnName = std::to_string(i); ++ const auto& realColumnName = ++ (columnNames[i].is_string() ? columnNames[i].string_value() : tableColumnName); ++ if (i) { ++ createTableText << ","; ++ } ++ createTableText << "'" << realColumnName << "' "; + createTableText << (typeAttributes.at(i)[0] == 's'? "TEXT":"INTEGER"); } } createTableText << ");"; + executeSQL(createTableText.str(), db); +- executeSQL("DELETE FROM '_" + relationName + "';", db); ++ executeSQL("DELETE FROM '" + relationName + "';", db); + } + + void createRelationView() { diff --git a/src/include/souffle/profile/Cell.h b/src/include/souffle/profile/Cell.h index 0ebd83e..a759495 100644 --- a/src/include/souffle/profile/Cell.h @@ -17006,7 +16703,7 @@ index 6b9a0fd..efba119 100644 } // namespace souffle::interpreter diff --git a/src/interpreter/Engine.cpp b/src/interpreter/Engine.cpp -index 41683d3..cb38b9f 100644 +index 41683d3..f46ba08 100644 --- a/src/interpreter/Engine.cpp +++ b/src/interpreter/Engine.cpp @@ -23,18 +23,16 @@ @@ -17117,7 +16814,7 @@ index 41683d3..cb38b9f 100644 RecordTable& Engine::getRecordTable() { return recordTable; } -@@ -339,6 +314,37 @@ ram::TranslationUnit& Engine::getTranslationUnit() { +@@ -339,6 +314,86 @@ ram::TranslationUnit& Engine::getTranslationUnit() { return tUnit; } @@ -17151,24 +16848,80 @@ index 41683d3..cb38b9f 100644 + } + return ""; +} ++ ++static const char* godel_lang_builtin_string_to_upper(const char *self) { ++ auto buffer = new char[strlen(self) + 1](); ++ std::transform(self, self + strlen(self), buffer, toupper); ++ return buffer; ++} ++ ++static const char* godel_lang_builtin_string_to_lower(const char *self) { ++ auto buffer = new char[strlen(self) + 1](); ++ std::transform(self, self + strlen(self), buffer, tolower); ++ return buffer; ++} ++ ++static const char* godel_lang_builtin_string_replace_all(const char *self, const char *pattern, const char* replacement) { ++ std::regex re(pattern); ++ std::stringstream ss; ++ std::regex_replace(std::ostreambuf_iterator(ss), self, self + strlen(self), re, replacement); ++ ++ const auto& res = ss.str(); ++ auto buffer = new char[res.length() + 1](); ++ std::strncpy(buffer, res.c_str(), res.length()); ++ buffer[res.length()] = 0; ++ return buffer; ++} ++ ++static const char* godel_lang_builtin_string_replace_once(const char *self, const char *pattern, const char* replacement, int index) { ++ std::regex re(pattern); ++ std::string result = self; ++ ++ std::string temp = result; ++ std::smatch match; ++ ++ size_t offset = 0; ++ size_t matched_index = 0; ++ while (std::regex_search(temp, match, re)) { ++ if (matched_index == static_cast(index)) { ++ result.replace(offset + match.position(), match.length(), replacement); ++ break; ++ } ++ ++matched_index; ++ offset += match.position() + match.length(); ++ temp = match.suffix().str(); ++ } ++ ++ auto buffer = new char[result.length() + 1](); ++ std::strncpy(buffer, result.c_str(), result.length()); ++ buffer[result.length()] = 0; ++ return buffer; ++} + void* Engine::getMethodHandle(const std::string& method) { for (void* libHandle : dll) { auto* methodHandle = dlsym(libHandle, method.c_str()); -@@ -346,6 +352,12 @@ void* Engine::getMethodHandle(const std::string& method) { +@@ -346,6 +401,19 @@ void* Engine::getMethodHandle(const std::string& method) { return methodHandle; } } ++ + // TODO: Given from Gödel -+ if (method == "get_field_by_index") { -+ return reinterpret_cast(get_field_by_index); -+ } else if (method == "godel_lang_builtin_string_getMatchResult") { -+ return reinterpret_cast(godel_lang_builtin_string_getMatchResult); ++ static std::unordered_map mapper = { ++ {"get_field_by_index", reinterpret_cast(get_field_by_index)}, ++ {"godel_lang_builtin_string_getMatchResult", reinterpret_cast(godel_lang_builtin_string_getMatchResult)}, ++ {"godel_lang_builtin_string_to_upper", reinterpret_cast(godel_lang_builtin_string_to_upper)}, ++ {"godel_lang_builtin_string_to_lower", reinterpret_cast(godel_lang_builtin_string_to_lower)}, ++ {"godel_lang_builtin_string_replace_all", reinterpret_cast(godel_lang_builtin_string_replace_all)}, ++ {"godel_lang_builtin_string_replace_once", reinterpret_cast(godel_lang_builtin_string_replace_once)} ++ }; ++ if (mapper.count(method)) { ++ return mapper.at(method); + } return nullptr; } -@@ -359,13 +371,13 @@ void Engine::createRelation(const ram::Relation& id, const std::size_t idx) { +@@ -359,13 +427,13 @@ void Engine::createRelation(const ram::Relation& id, const std::size_t idx) { } RelationHandle res; @@ -17186,7 +16939,7 @@ index 41683d3..cb38b9f 100644 } else { res = createBTreeRelation(id, isa.getIndexSelection(id.getName())); } -@@ -377,19 +389,19 @@ const std::vector& Engine::loadDLL() { +@@ -377,19 +445,19 @@ const std::vector& Engine::loadDLL() { return dll; } @@ -17212,7 +16965,7 @@ index 41683d3..cb38b9f 100644 // Set up our paths to have a library appended for (std::string& path : paths) { if (path.back() != pathSeparator) { -@@ -406,11 +418,7 @@ const std::vector& Engine::loadDLL() { +@@ -406,11 +474,7 @@ const std::vector& Engine::loadDLL() { void* tmp = nullptr; for (const std::string& path : paths) { std::string fullpath = path + "lib" + library + dynamicLibSuffix; @@ -17224,7 +16977,7 @@ index 41683d3..cb38b9f 100644 if (tmp != nullptr) { dll.push_back(tmp); break; -@@ -424,18 +432,16 @@ const std::vector& Engine::loadDLL() { +@@ -424,18 +488,16 @@ const std::vector& Engine::loadDLL() { std::size_t Engine::getIterationNumber() const { return iteration; } @@ -17244,7 +16997,7 @@ index 41683d3..cb38b9f 100644 SignalHandler::instance()->enableLogging(); } -@@ -450,7 +456,7 @@ void Engine::executeMain() { +@@ -450,7 +512,7 @@ void Engine::executeMain() { Context ctxt; execute(main.get(), ctxt); } else { @@ -17253,7 +17006,7 @@ index 41683d3..cb38b9f 100644 // Prepare the frequency table for threaded use const ram::Program& program = tUnit.getProgram(); visit(program, [&](const ram::TupleOperation& node) { -@@ -463,7 +469,7 @@ void Engine::executeMain() { +@@ -463,7 +525,7 @@ void Engine::executeMain() { ProfileEventSingleton::instance().startTimer(); ProfileEventSingleton::instance().makeTimeEvent("@time;starttime"); // Store configuration @@ -17262,7 +17015,7 @@ index 41683d3..cb38b9f 100644 for (auto&& v : vs) ProfileEventSingleton::instance().makeConfigRecord(k, v); -@@ -482,8 +488,6 @@ void Engine::executeMain() { +@@ -482,8 +544,6 @@ void Engine::executeMain() { visit(program, [&](const ram::Query&) { ++ruleCount; }); ProfileEventSingleton::instance().makeConfigRecord("ruleCount", std::to_string(ruleCount)); @@ -17271,7 +17024,7 @@ index 41683d3..cb38b9f 100644 Context ctxt; execute(main.get(), ctxt); ProfileEventSingleton::instance().stopTimer(); -@@ -506,7 +510,7 @@ void Engine::generateIR() { +@@ -506,7 +566,7 @@ void Engine::generateIR() { NodeGenerator generator(*this); if (subroutine.empty()) { for (const auto& sub : program.getSubroutines()) { @@ -17280,7 +17033,7 @@ index 41683d3..cb38b9f 100644 } } if (main == nullptr) { -@@ -520,7 +524,10 @@ void Engine::executeSubroutine( +@@ -520,7 +580,10 @@ void Engine::executeSubroutine( ctxt.setReturnValues(ret); ctxt.setArguments(args); generateIR(); @@ -17292,7 +17045,7 @@ index 41683d3..cb38b9f 100644 } RamDomain Engine::execute(const Node* node, Context& ctxt) { -@@ -531,9 +538,9 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -531,9 +594,9 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { // Overload CASE based on number of arguments. // CASE(Kind) -> BASE_CASE(Kind) @@ -17305,7 +17058,7 @@ index 41683d3..cb38b9f 100644 #define BASE_CASE(Kind) \ case (I_##Kind): { \ -@@ -541,12 +548,12 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -541,12 +604,12 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { [[maybe_unused]] const auto& shadow = *static_cast(node); \ [[maybe_unused]] const auto& cur = *static_cast(node->getShadow()); // EXTEND_CASE also defer the relation type @@ -17321,7 +17074,7 @@ index 41683d3..cb38b9f 100644 #define ESAC(Kind) \ } \ (); \ -@@ -580,10 +587,6 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -580,10 +643,6 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { return cur.getConstant(); ESAC(NumericConstant) @@ -17332,7 +17085,7 @@ index 41683d3..cb38b9f 100644 CASE(StringConstant) return shadow.getConstant(); ESAC(StringConstant) -@@ -597,7 +600,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -597,7 +656,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { ESAC(AutoIncrement) CASE(IntrinsicOperator) @@ -17341,7 +17094,7 @@ index 41683d3..cb38b9f 100644 #define BINARY_OP_TYPED(ty, op) return ramBitCast(static_cast(EVAL_CHILD(ty, 0) op EVAL_CHILD(ty, 1))) #define BINARY_OP_LOGICAL(opcode, op) BINARY_OP_INTEGRAL(opcode, op) -@@ -618,7 +621,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -618,7 +677,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { { \ auto result = EVAL_CHILD(RamDomain, 0); \ auto* result_val = &getSymbolTable().decode(result); \ @@ -17350,7 +17103,7 @@ index 41683d3..cb38b9f 100644 auto alt = EVAL_CHILD(RamDomain, i); \ if (alt == result) continue; \ \ -@@ -633,7 +636,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -633,7 +692,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { #define MINMAX_OP(ty, op) \ { \ auto result = EVAL_CHILD(ty, 0); \ @@ -17359,7 +17112,7 @@ index 41683d3..cb38b9f 100644 result = op(result, EVAL_CHILD(ty, i)); \ } \ return ramBitCast(result); \ -@@ -655,7 +658,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -655,7 +714,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { getSymbolTable().decode(EVAL_CHILD(RamDomain, 0)))); // clang-format on @@ -17368,7 +17121,7 @@ index 41683d3..cb38b9f 100644 switch (cur.getOperator()) { /** Unary Functor Operators */ case FunctorOp::ORD: return execute(shadow.getChild(0), ctxt); -@@ -763,7 +766,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -763,7 +822,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { case FunctorOp::CAT: { std::stringstream ss; @@ -17377,7 +17130,7 @@ index 41683d3..cb38b9f 100644 ss << getSymbolTable().decode(execute(shadow.getChild(i), ctxt)); } return getSymbolTable().encode(ss.str()); -@@ -777,7 +780,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -777,7 +836,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { std::string sub_str; try { sub_str = str.substr(idx, len); @@ -17386,7 +17139,7 @@ index 41683d3..cb38b9f 100644 std::cerr << "warning: wrong index position provided by substr(\""; std::cerr << str << "\"," << (int32_t)idx << "," << (int32_t)len << ") functor.\n"; } -@@ -788,17 +791,9 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -788,17 +847,9 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { case FunctorOp::URANGE: case FunctorOp::FRANGE: fatal("ICE: functor `%s` must map onto `NestedIntrinsicOperator`", cur.getOperator()); @@ -17405,7 +17158,7 @@ index 41683d3..cb38b9f 100644 #undef BINARY_OP_LOGICAL #undef BINARY_OP_INTEGRAL -@@ -814,8 +809,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -814,8 +865,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { ESAC(IntrinsicOperator) CASE(NestedIntrinsicOperator) @@ -17416,7 +17169,7 @@ index 41683d3..cb38b9f 100644 ctxt[cur.getTupleId()] = tuple.data(); execute(shadow.getChild(numArgs), ctxt); }; -@@ -832,7 +827,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -832,7 +883,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { case ram::NestedIntrinsicOp::FRANGE: return RUN_RANGE(RamFloat); } @@ -17425,7 +17178,7 @@ index 41683d3..cb38b9f 100644 #undef RUN_RANGE ESAC(NestedIntrinsicOperator) -@@ -841,7 +836,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -841,7 +892,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { auto userFunctor = reinterpret_cast(shadow.getFunctionPointer()); if (userFunctor == nullptr) fatal("cannot find user-defined operator `%s`", name); @@ -17434,7 +17187,7 @@ index 41683d3..cb38b9f 100644 if (cur.isStateful()) { auto exec = std::bind(&Engine::execute, this, std::placeholders::_1, std::placeholders::_2); -@@ -961,7 +956,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -961,7 +1012,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { ESAC(UserDefinedOperator) CASE(PackRecord) @@ -17444,7 +17197,7 @@ index 41683d3..cb38b9f 100644 std::unique_ptr data = std::make_unique(arity); for (std::size_t i = 0; i < arity; ++i) { data[i] = execute(shadow.getChild(i), ctxt); -@@ -989,8 +985,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -989,8 +1041,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { return !execute(shadow.getChild(), ctxt); ESAC(Negation) @@ -17455,7 +17208,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return rel.empty(); \ ESAC(EmptinessCheck) -@@ -998,8 +994,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -998,8 +1050,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(EMPTINESS_CHECK) #undef EMPTINESS_CHECK @@ -17466,7 +17219,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return rel.size(); \ ESAC(RelationSize) -@@ -1007,17 +1003,17 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1007,17 +1059,17 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(RELATION_SIZE) #undef RELATION_SIZE @@ -17490,7 +17243,7 @@ index 41683d3..cb38b9f 100644 ESAC(ProvenanceExistenceCheck) FOR_EACH_PROVENANCE(PROVENANCE_EXISTENCE_CHECK) -@@ -1049,53 +1045,30 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1049,53 +1101,30 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { COMPARE(GE, >=) case BinaryConstraintOp::MATCH: { @@ -17560,7 +17313,7 @@ index 41683d3..cb38b9f 100644 } return result; } -@@ -1115,7 +1088,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1115,7 +1144,7 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { } } @@ -17569,7 +17322,7 @@ index 41683d3..cb38b9f 100644 #undef COMPARE_NUMERIC #undef COMPARE_STRING -@@ -1138,8 +1111,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1138,8 +1167,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { return result; ESAC(TupleOperation) @@ -17580,7 +17333,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return evalScan(rel, cur, shadow, ctxt); \ ESAC(Scan) -@@ -1147,24 +1120,24 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1147,24 +1176,24 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(SCAN) #undef SCAN @@ -17611,7 +17364,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return evalParallelIndexScan(rel, cur, shadow, ctxt); \ ESAC(ParallelIndexScan) -@@ -1172,8 +1145,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1172,8 +1201,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(PARALLEL_INDEX_SCAN) #undef PARALLEL_INDEX_SCAN @@ -17622,7 +17375,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return evalIfExists(rel, cur, shadow, ctxt); \ ESAC(IfExists) -@@ -1181,8 +1154,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1181,8 +1210,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(IFEXISTS) #undef IFEXISTS @@ -17633,7 +17386,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return evalParallelIfExists(rel, cur, shadow, ctxt); \ ESAC(ParallelIfExists) -@@ -1190,16 +1163,16 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1190,16 +1219,16 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(PARALLEL_IFEXISTS) #undef PARALLEL_IFEXISTS @@ -17654,7 +17407,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return evalParallelIndexIfExists(rel, cur, shadow, ctxt); \ ESAC(ParallelIndexIfExists) -@@ -1226,8 +1199,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1226,8 +1255,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { return execute(shadow.getNestedOperation(), ctxt); ESAC(UnpackRecord) @@ -17665,7 +17418,7 @@ index 41683d3..cb38b9f 100644 const auto& rel = *static_cast(shadow.getRelation()); \ return evalParallelAggregate(rel, cur, shadow, ctxt); \ ESAC(ParallelAggregate) -@@ -1235,25 +1208,26 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1235,25 +1264,26 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(PARALLEL_AGGREGATE) #undef PARALLEL_AGGREGATE @@ -17701,7 +17454,7 @@ index 41683d3..cb38b9f 100644 return evalIndexAggregate(cur, shadow, ctxt); \ ESAC(IndexAggregate) -@@ -1286,8 +1260,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1286,8 +1316,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { return result; ESAC(Filter) @@ -17712,7 +17465,7 @@ index 41683d3..cb38b9f 100644 auto& rel = *static_cast(shadow.getRelation()); \ return evalGuardedInsert(rel, shadow, ctxt); \ ESAC(GuardedInsert) -@@ -1295,8 +1269,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1295,8 +1325,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(GUARDED_INSERT) #undef GUARDED_INSERT @@ -17723,7 +17476,7 @@ index 41683d3..cb38b9f 100644 auto& rel = *static_cast(shadow.getRelation()); \ return evalInsert(rel, shadow, ctxt); \ ESAC(Insert) -@@ -1304,18 +1278,18 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1304,18 +1334,18 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { FOR_EACH(INSERT) #undef INSERT @@ -17748,7 +17501,7 @@ index 41683d3..cb38b9f 100644 if (shadow.getChild(i) == nullptr) { ctxt.addReturnValue(0); } else { -@@ -1345,11 +1319,9 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1345,11 +1375,9 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { CASE(Loop) resetIterationNumber(); @@ -17760,7 +17513,7 @@ index 41683d3..cb38b9f 100644 resetIterationNumber(); return true; ESAC(Loop) -@@ -1374,23 +1346,27 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1374,23 +1402,27 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { return execute(shadow.getChild(), ctxt); ESAC(DebugInfo) @@ -17775,12 +17528,12 @@ index 41683d3..cb38b9f 100644 + rel.__purge(); \ + return true; \ + ESAC(Clear) ++ ++ FOR_EACH(CLEAR) ++#undef CLEAR -#define ESTIMATEJOINSIZE(Structure, Arity, AuxiliaryArity, ...) \ - CASE(EstimateJoinSize, Structure, Arity, AuxiliaryArity) \ -+ FOR_EACH(CLEAR) -+#undef CLEAR -+ +#define COUNTUNIQUEKEYS(Structure, Arity, ...) \ + CASE(CountUniqueKeys, Structure, Arity) \ const auto& rel = *static_cast(shadow.getRelation()); \ @@ -17800,7 +17553,7 @@ index 41683d3..cb38b9f 100644 return true; ESAC(Call) -@@ -1413,7 +1389,6 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1413,7 +1445,6 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { ->readAll(rel); } catch (std::exception& e) { std::cerr << "Error loading " << rel.getName() << " data: " << e.what() << "\n"; @@ -17808,7 +17561,7 @@ index 41683d3..cb38b9f 100644 } return true; } else if (op == "output" || op == "printsize") { -@@ -1481,13 +1456,6 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { +@@ -1481,13 +1512,6 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { swapRelation(shadow.getSourceId(), shadow.getTargetId()); return true; ESAC(Swap) @@ -17822,7 +17575,7 @@ index 41683d3..cb38b9f 100644 } UNREACHABLE_BAD_CASE_ANALYSIS -@@ -1602,7 +1570,7 @@ RamDomain Engine::evalParallelScan( +@@ -1602,7 +1626,7 @@ RamDomain Engine::evalParallelScan( const Rel& rel, const ram::ParallelScan& cur, const ParallelScan& shadow, Context& ctxt) { auto viewContext = shadow.getViewContext(); @@ -17831,7 +17584,7 @@ index 41683d3..cb38b9f 100644 PARALLEL_START Context newCtxt(ctxt); -@@ -1630,8 +1598,8 @@ RamDomain Engine::evalParallelScan( +@@ -1630,8 +1654,8 @@ RamDomain Engine::evalParallelScan( } template @@ -17842,7 +17595,7 @@ index 41683d3..cb38b9f 100644 (void)ctxt; constexpr std::size_t Arity = Rel::Arity; bool onlyConstants = true; -@@ -1681,8 +1649,8 @@ RamDomain Engine::evalEstimateJoinSize( +@@ -1681,8 +1705,8 @@ RamDomain Engine::evalEstimateJoinSize( // ensure range is non-empty auto* index = rel.getIndex(indexPos); // initial values @@ -17853,7 +17606,7 @@ index 41683d3..cb38b9f 100644 if (!index->scan().empty()) { // assign first tuple as prev as a dummy -@@ -1710,14 +1678,14 @@ RamDomain Engine::evalEstimateJoinSize( +@@ -1710,14 +1734,14 @@ RamDomain Engine::evalEstimateJoinSize( ++total; } } @@ -17870,7 +17623,7 @@ index 41683d3..cb38b9f 100644 bool first = true; for (auto& [k, constant] : cur.getConstantsMap()) { if (first) { -@@ -1727,18 +1695,18 @@ RamDomain Engine::evalEstimateJoinSize( +@@ -1727,18 +1751,18 @@ RamDomain Engine::evalEstimateJoinSize( } constantsStream << k << "->" << *constant; } @@ -17894,7 +17647,7 @@ index 41683d3..cb38b9f 100644 } return true; } -@@ -1777,7 +1745,7 @@ RamDomain Engine::evalParallelIndexScan( +@@ -1777,7 +1801,7 @@ RamDomain Engine::evalParallelIndexScan( CAL_SEARCH_BOUND(superInfo, low, high); std::size_t indexPos = shadow.getViewId(); @@ -17903,7 +17656,7 @@ index 41683d3..cb38b9f 100644 PARALLEL_START Context newCtxt(ctxt); auto viewInfo = viewContext->getViewInfoForNested(); -@@ -1822,7 +1790,7 @@ RamDomain Engine::evalParallelIfExists( +@@ -1822,7 +1846,7 @@ RamDomain Engine::evalParallelIfExists( const Rel& rel, const ram::ParallelIfExists& cur, const ParallelIfExists& shadow, Context& ctxt) { auto viewContext = shadow.getViewContext(); @@ -17912,7 +17665,7 @@ index 41683d3..cb38b9f 100644 auto viewInfo = viewContext->getViewInfoForNested(); PARALLEL_START Context newCtxt(ctxt); -@@ -1886,7 +1854,7 @@ RamDomain Engine::evalParallelIndexIfExists(const Rel& rel, const ram::ParallelI +@@ -1886,7 +1910,7 @@ RamDomain Engine::evalParallelIndexIfExists(const Rel& rel, const ram::ParallelI CAL_SEARCH_BOUND(superInfo, low, high); std::size_t indexPos = shadow.getViewId(); @@ -17921,7 +17674,7 @@ index 41683d3..cb38b9f 100644 PARALLEL_START Context newCtxt(ctxt); -@@ -1914,68 +1882,49 @@ RamDomain Engine::evalParallelIndexIfExists(const Rel& rel, const ram::ParallelI +@@ -1914,68 +1938,49 @@ RamDomain Engine::evalParallelIndexIfExists(const Rel& rel, const ram::ParallelI return true; } @@ -18026,7 +17779,7 @@ index 41683d3..cb38b9f 100644 for (const auto& tuple : ranges) { ctxt[aggregate.getTupleId()] = tuple.data(); -@@ -1986,11 +1935,8 @@ RamDomain Engine::evalAggregate( +@@ -1986,11 +1991,8 @@ RamDomain Engine::evalAggregate( shouldRunNested = true; @@ -18039,7 +17792,7 @@ index 41683d3..cb38b9f 100644 ++res; continue; } -@@ -1999,56 +1945,43 @@ RamDomain Engine::evalAggregate( +@@ -1999,56 +2001,43 @@ RamDomain Engine::evalAggregate( assert(expression); // only case where this is null is `COUNT` RamDomain val = execute(expression, ctxt); @@ -18126,7 +17879,7 @@ index 41683d3..cb38b9f 100644 // write result to environment souffle::Tuple tuple; -@@ -2072,7 +2005,8 @@ RamDomain Engine::evalParallelAggregate( +@@ -2072,7 +2061,8 @@ RamDomain Engine::evalParallelAggregate( for (const auto& info : viewInfo) { newCtxt.createView(*getRelationHandle(info[0]), info[1], info[2]); } @@ -18136,7 +17889,7 @@ index 41683d3..cb38b9f 100644 } template -@@ -2097,7 +2031,8 @@ RamDomain Engine::evalParallelIndexAggregate( +@@ -2097,7 +2087,8 @@ RamDomain Engine::evalParallelIndexAggregate( std::size_t viewId = shadow.getViewId(); auto view = Rel::castView(newCtxt.getView(viewId)); @@ -18146,7 +17899,7 @@ index 41683d3..cb38b9f 100644 } template -@@ -2113,7 +2048,8 @@ RamDomain Engine::evalIndexAggregate( +@@ -2113,7 +2104,8 @@ RamDomain Engine::evalIndexAggregate( std::size_t viewId = shadow.getViewId(); auto view = Rel::castView(ctxt.getView(viewId)); diff --git a/godel-script/godel-frontend/src/ast/expr.h b/godel-script/godel-frontend/src/ast/expr.h index 3f61de6f..4bd5a3b2 100644 --- a/godel-script/godel-frontend/src/ast/expr.h +++ b/godel-script/godel-frontend/src/ast/expr.h @@ -306,8 +306,9 @@ class call_head: public expr { func_call* call; initializer* ini; +private: // mark schema(xxx) is schema::__all__(xxx) - bool schema_loader; + bool flag_is_schema_loader; private: void check_call_and_init() { @@ -321,20 +322,22 @@ class call_head: public expr { call_head(const span& location): expr(ast_class::ac_call_head, location), first(nullptr), call(nullptr), ini(nullptr), - schema_loader(false) {} + flag_is_schema_loader(false) {} ~call_head() override; void set_first_expression(expr* node) { first = node; } void set_func_call(func_call* node) { call = node; check_call_and_init(); } void set_initializer(initializer* node) { ini = node; check_call_and_init(); } - void set_is_schema_loader() { schema_loader = true; } + void set_is_schema_loader() { flag_is_schema_loader = true; } +public: expr* get_first_expression() { return first; } bool has_func_call() const { return call!=nullptr; } func_call* get_func_call() { return call; } bool is_initializer() const { return ini!=nullptr; } initializer* get_initializer() { return ini; } - bool is_schema_loader() const { return schema_loader; } + bool is_schema_loader() const { return flag_is_schema_loader; } +public: void accept(ast_visitor* visitor) override; }; diff --git a/godel-script/godel-frontend/src/cli.cpp b/godel-script/godel-frontend/src/cli.cpp index 2e7709f8..078ec3dc 100644 --- a/godel-script/godel-frontend/src/cli.cpp +++ b/godel-script/godel-frontend/src/cli.cpp @@ -42,10 +42,12 @@ std::ostream& help(std::ostream& out) { << reset << "\nUsage: ./godel " << green << "[options] \n\n" << reset << "Compile options:\n" + << green << " -### " + << reset << "Print detailed compilation commands (not run).\n" << green << " -s, --souffle " << reset << "Output generated souffle to file.\n" << green << " -r, --run-souffle " - << reset << "Run compiled godel script program directly.\n" + << reset << "Run compiled godel script program.\n" << green << " -p, --package-path " << reset << "Give godelscript package root path.\n" << green << " -f, --fact " @@ -53,7 +55,7 @@ std::ostream& help(std::ostream& out) { << green << " -e, --extract-template " << reset << "Extract probable script template.\n" << green << " -l, --location-extract " - << reset << "Extract all functions and methods location into json.\n"; + << reset << "Extract function and method location into json.\n"; out << reset << "\nInformation dump options:\n" << green << " -h, --help " @@ -81,7 +83,9 @@ std::ostream& help(std::ostream& out) { << green << " --dump-lsp " << reset << "Show semantic result in json format.\n" << green << " --lsp-dump-use-indexed-file " - << reset << "Use file index instead of string.\n"; + << reset << "Use file index instead of string.\n" + << green << " --lsp-dump-only-schema " + << reset << "Only dump schema without location.\n"; out << reset << "\nLexical analysis dump options:\n" << green << " --lexer-dump-token " @@ -93,25 +97,33 @@ std::ostream& help(std::ostream& out) { << green << " --semantic-only " << reset << "Only do semantic analysis and exit.\n" << green << " --semantic-pub-check " - << reset << "Enable semantic public access authority checker.\n" - << green << " --semantic-no-else " - << reset << "Enable semantic no else branch checker.\n"; + << reset << "Enable semantic public access authority checker.\n"; out << reset << "\nSouffle code generation options:\n" + << green << " -O1 " + << reset << "Enable souffle code generator optimizer, level 1.\n" + << green << " -O2 " + << reset << "Enable souffle code generator optimizer, level 2.\n" + << green << " -O3 " + << reset << "Enable souffle code generator optimizer, level 3.\n" << green << " -Of, --opt-for " << reset << "Enable souffle code generator for statement optimizer.\n" << green << " -Ol, --opt-let " << reset << "Enable souffle code generator let statement optimizer(not suggested).\n" << green << " -Oim, --opt-ir-merge " - << reset << "Enable souffle inst combine pass (Experimental).\n" + << reset << "Enable souffle inst combine pass.\n" << green << " -Osc, --opt-self-constraint " << reset << "Enable self data constraint optimizer in souffle code generator.\n" + << green << " -Ojr, --opt-join-reorder " + << reset << "Enable join reorder optimizer(experimental).\n" << green << " --disable-remove-unused " << reset << "Disable unused method deletion pass.\n" << green << " --disable-do-schema-opt " << reset << "Disable DO Schema data constraint __all__ method optimization.\n" << green << " --souffle-debug " - << reset << "Dump generated souffle code by stdout.\n" + << reset << "Dump generated souffle code by stdout.\n"; + out + << reset << "\nSouffle execution options:\n" << green << " --souffle-slow-transformers " << reset << "Enable Souffle slow transformers.\n" << green << " --enable-souffle-profiling " @@ -125,7 +137,9 @@ std::ostream& help(std::ostream& out) { << green << " --output-csv " << reset << "Redirect stdout souffle execution result into csv.\n" << green << " --output-sqlite " - << reset << "Redirect stdout souffle execution result into sqlite.\n"; + << reset << "Redirect stdout souffle execution result into sqlite.\n" + << green << " -Drs, --directly-run-souffle " + << reset << "Directly run input souffle source.\n"; out << "\n"; return out; @@ -168,9 +182,53 @@ void report_invalid_argument(const std::string& arg) { report::error().fatal(info); } +void dump_configure(const configure& conf) { + if (conf.empty()) { + return; + } + + std::unordered_map mapper = { + {option::cli_executable_path, "executable"}, + {option::cli_input_path, "input-script"} + }; + + for(const auto& i : settings) { + if (mapper.count(i.second.command_type) && + mapper.at(i.second.command_type).length()>i.first.length()) { + continue; + } + mapper[i.second.command_type] = i.first; + } + for(const auto& i : options) { + if (mapper.count(i.second) && + mapper.at(i.second).length()>i.first.length()) { + continue; + } + mapper[i.second] = i.first; + } + + std::clog << conf.at(option::cli_executable_path) << " "; + std::clog << conf.at(option::cli_input_path); + for(const auto& i : mapper) { + if (i.first == option::cli_executable_path || + i.first == option::cli_input_path) { + continue; + } + if (!conf.count(i.first)) { + continue; + } + std::clog << " " << i.second; + if (conf.at(i.first).length()) { + std::clog << " " << conf.at(i.first); + } + } + std::clog << "\n\n"; +} + configure process_args(const std::vector& vec) { configure config = { - {option::cli_executable_path, vec[0]} // load executable path here + // load executable path here + { option::cli_executable_path, vec[0] } }; report::error err; @@ -178,6 +236,10 @@ configure process_args(const std::vector& vec) { const auto& arg = vec[i]; if (options.count(arg)) { config[options.at(arg)] = ""; + } else if (multi_options.count(arg)) { + for(auto o : multi_options.at(arg)) { + config[o] = ""; + } } else if (settings.count(arg)) { ++i; if (i>=vec.size() || vec[i][0]=='-') { @@ -213,6 +275,11 @@ configure process_args(const std::vector& vec) { err.fatal("input file is required."); } + if (config.count(option::cli_show_real_cmd_args)) { + dump_configure(config); + std::exit(0); + } + return config; } diff --git a/godel-script/godel-frontend/src/cli.h b/godel-script/godel-frontend/src/cli.h index 041be21d..8c071e0f 100644 --- a/godel-script/godel-frontend/src/cli.h +++ b/godel-script/godel-frontend/src/cli.h @@ -18,6 +18,7 @@ enum class option { cli_run_souffle, // generate souffle and run cli_dump_souffle_file, // generate souffle and dump + /* information dump */ cli_help, // get help cli_verbose, // verbose output information cli_version, // get version @@ -29,29 +30,45 @@ enum class option { cli_dump_global, // get global symbol information cli_dump_local, // get local variables' information + /* language server */ cli_dump_lsp, // get godel frontend json dump cli_dump_lsp_file_indexed, // use indexed file name in json dump + cli_dump_lsp_only_schema, // only dump schema - cli_lexer_dump_token, // dump tokens - cli_lexer_dump_comment, // dump comments - cli_semantic_only, // only do semantic analysis and exit - cli_semantic_pub_check, // switch pub-access check on - cli_semantic_no_else, // switch no-else check on + /* lexer */ + cli_lexer_dump_token, // dump tokens + cli_lexer_dump_comment, // dump comments + /* semantic analysis */ + cli_semantic_only, // only do semantic analysis and exit + cli_semantic_pub_check, // switch pub-access check on + + /* optimization */ cli_enable_for_opt, // switch for optimization on cli_enable_let_opt, // switch let optimization on cli_enable_ir_merge, // switch ir merge on cli_enable_self_constraint_opt, // switch self constraint optimization on + cli_enable_join_reorder, // switch join reorder optimization on cli_disable_remove_unused, // switch unused method deletion off cli_disable_do_schema_opt, // switch do schema optimization off cli_souffle_debug_dump, // switch souffle debug mode on cli_souffle_slow_transformers, // switch souffle slow transformers on - cli_enable_souffle_cache, // switch souffle cache on - cli_clean_souffle_cache, // switch clean souffle cache on cli_enable_souffle_profiling, // switch souffle profiling on + + /* souffle cache */ + cli_enable_souffle_cache, // switch souffle cache on + cli_clean_souffle_cache, // switch clean souffle cache on + + /* souffle output redirection */ cli_souffle_json_output, // switch souffle json output on cli_souffle_csv_output, // switch souffle csv output on - cli_souffle_sqlite_output // switch souffle sqlite output on + cli_souffle_sqlite_output, // switch souffle sqlite output on + + /* directly run souffle */ + cli_directly_run_souffle, // run souffle directly + + /* special debug info */ + cli_show_real_cmd_args }; struct info_setting { @@ -94,12 +111,12 @@ const std::unordered_map options = { {"--dump-local", option::cli_dump_local}, {"--dump-lsp", option::cli_dump_lsp}, {"--lsp-dump-use-indexed-file", option::cli_dump_lsp_file_indexed}, + {"--lsp-dump-only-schema", option::cli_dump_lsp_only_schema}, {"--color-off", option::cli_color_off}, {"--lexer-dump-token", option::cli_lexer_dump_token}, {"--lexer-dump-comment", option::cli_lexer_dump_comment}, {"--semantic-only", option::cli_semantic_only}, {"--semantic-pub-check", option::cli_semantic_pub_check}, - {"--semantic-no-else", option::cli_semantic_no_else}, {"--opt-for", option::cli_enable_for_opt}, {"-Of", option::cli_enable_for_opt}, {"--opt-let", option::cli_enable_let_opt}, @@ -108,13 +125,29 @@ const std::unordered_map options = { {"-Oim", option::cli_enable_ir_merge}, {"--opt-self-constraint", option::cli_enable_self_constraint_opt}, {"-Osc", option::cli_enable_self_constraint_opt}, + {"--opt-join-reorder", option::cli_enable_join_reorder}, + {"-Ojr", option::cli_enable_join_reorder}, {"--disable-remove-unused", option::cli_disable_remove_unused}, {"--disable-do-schema-opt", option::cli_disable_do_schema_opt}, {"--souffle-debug", option::cli_souffle_debug_dump}, {"--souffle-slow-transformers", option::cli_souffle_slow_transformers}, {"--enable-souffle-profiling", option::cli_enable_souffle_profiling}, {"--enable-souffle-cache", option::cli_enable_souffle_cache}, - {"--clean-souffle-cache", option::cli_clean_souffle_cache} + {"--clean-souffle-cache", option::cli_clean_souffle_cache}, + {"-Drs", option::cli_directly_run_souffle}, + {"--directly-run-souffle", option::cli_directly_run_souffle}, + {"-###", option::cli_show_real_cmd_args} +}; + +const std::unordered_map> multi_options = { + {"-O1", {option::cli_enable_for_opt}}, + {"-O2", {option::cli_enable_for_opt, + option::cli_enable_self_constraint_opt, + option::cli_enable_ir_merge}}, + {"-O3", {option::cli_enable_for_opt, + option::cli_enable_self_constraint_opt, + option::cli_enable_ir_merge, + option::cli_enable_join_reorder}} }; typedef std::unordered_map configure; @@ -123,6 +156,7 @@ std::ostream& welcome(std::ostream&); std::ostream& version(std::ostream&); std::ostream& help(std::ostream&); void report_invalid_argument(const std::string&); +void dump_configure(const configure&); configure process_args(const std::vector&); } diff --git a/godel-script/godel-frontend/src/engine.cpp b/godel-script/godel-frontend/src/engine.cpp index ad995112..84fc25c4 100644 --- a/godel-script/godel-frontend/src/engine.cpp +++ b/godel-script/godel-frontend/src/engine.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -214,6 +215,23 @@ std::string engine::dump_json_used_files() const { return res + "]"; } +void engine::dump_json_only_schema_without_loc(std::ostream& out) const { + out << "{\"semantic\":{\"schema\":["; + std::string res = ""; + for(const auto& i : name_space()) { + if (i.second!=symbol_kind::schema) { + continue; + } + res += global().get_schema(mapper().at(i.first)).to_json(false); + res += ","; + } + if (res.back()==',') { + res.pop_back(); + } + out << res; + out << "]}}"; +} + void engine::dump_json(std::ostream& out) const { out << "{"; @@ -263,7 +281,11 @@ bool engine::language_server_dump(const configure& config) { if (config.count(option::cli_dump_lsp_file_indexed)) { span::set_flag_lsp_dump_use_file_index(true); } - dump_json(std::cout); + if (config.count(option::cli_dump_lsp_only_schema)) { + dump_json_only_schema_without_loc(std::cout); + } else { + dump_json(std::cout); + } error::json_output_stderr(); return true; } @@ -333,9 +355,27 @@ void engine::template_extract() { return; } -void engine::run_souffle(const configure& config) { +void engine::run_souffle_from_file(const configure& config) { + const auto& path = config.at(option::cli_input_path); + if (!std::filesystem::exists(path)) { + err.fatal("file <" + path + "> does not exist."); + } else if (!std::filesystem::is_regular_file(path)) { + err.fatal("file <" + path + "> is not regular file."); + } + std::ifstream in(path, std::ios::binary); + std::stringstream ss; + ss << in.rdbuf(); + const auto souffle_content = ss.str(); + run_souffle(souffle_content, config); +} + +void engine::run_souffle_from_generated(const configure& config) { const auto souffle_content = ir_gen::get_mutable_context().str_output(config); + run_souffle(souffle_content, config); +} +void engine::run_souffle(const std::string& souffle_content, + const configure& config) { // extra arguments to be passed to souffle std::vector argv = {}; @@ -429,6 +469,11 @@ const error& engine::run(const configure& config) { return err; } + if (config.count(option::cli_directly_run_souffle)) { + run_souffle_from_file(config); + return err; + } + if (config.count(option::cli_dump_lsp)) { report::error::set_json_out(); } @@ -538,7 +583,7 @@ const error& engine::run(const configure& config) { // directly run souffle program if (config.count(option::cli_run_souffle)) { - run_souffle(config); + run_souffle_from_generated(config); } return err; } diff --git a/godel-script/godel-frontend/src/engine.h b/godel-script/godel-frontend/src/engine.h index 4ab06fd0..02600788 100644 --- a/godel-script/godel-frontend/src/engine.h +++ b/godel-script/godel-frontend/src/engine.h @@ -99,6 +99,7 @@ class engine { std::string dump_json_local() const; std::string dump_json_infer() const; std::string dump_json_used_files() const; + void dump_json_only_schema_without_loc(std::ostream&) const; void dump_json(std::ostream&) const; void dump_used_modules() const; bool language_server_dump(const configure&); @@ -110,7 +111,13 @@ class engine { void do_semantic_analysis(const configure&); void ast_structure_dump(); void template_extract(); - void run_souffle(const configure&); + +private: + // run souffle code from source file + void run_souffle_from_file(const configure&); + // run generated souffle code + void run_souffle_from_generated(const configure&); + void run_souffle(const std::string&, const configure&); public: const auto& name_space() const { return semantic_analyser.get_context().this_name_space; } diff --git a/godel-script/godel-frontend/src/error/error.cpp b/godel-script/godel-frontend/src/error/error.cpp index e3a69bd9..b2c48c3c 100644 --- a/godel-script/godel-frontend/src/error/error.cpp +++ b/godel-script/godel-frontend/src/error/error.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace report { @@ -217,23 +218,26 @@ void error::report_context(const span& loc, } } -void error::warn_report_ignored_DO_schema(const std::vector>& vec) { +void error::warn_ignored_DO_schema(const std::unordered_set& vec) { if (json_output) { return; } + // report head auto info = std::to_string(vec.size()); info += " \"__all__\" methods of DO schemas are ignored:"; report_head_info(info, false); + + // report ignored schema size_t ignored_count = 0; for(const auto& i : vec) { ++ignored_count; if (ignored_count > 4) { break; } - report_context(i.second, false, ""); + std::clog << reset << " " << i << "\n"; } if (vec.size() > 4) { - std::clog << cyan << " --> " << reset << "...(" << vec.size()-4 << ")\n"; + std::clog << reset << " ...(" << vec.size()-4 << ")\n"; } std::clog << std::endl; } diff --git a/godel-script/godel-frontend/src/error/error.h b/godel-script/godel-frontend/src/error/error.h index 1199cc7c..9c0f47b2 100644 --- a/godel-script/godel-frontend/src/error/error.h +++ b/godel-script/godel-frontend/src/error/error.h @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace report { @@ -124,7 +125,7 @@ class error { void report_context(const span&, bool, const std::string&); public: - void warn_report_ignored_DO_schema(const std::vector>&); + void warn_ignored_DO_schema(const std::unordered_set&); public: void load(const std::string&); diff --git a/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp index 1733c276..bb4eacd2 100644 --- a/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp +++ b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.cpp @@ -3,27 +3,43 @@ namespace godel { void aggregator_inline_remark::visit_call(lir::call* node) { + // if this call is not used in aggregator, skip if (!in_aggregator) { return; } + + // no need to remark intrinsics + switch(node->get_func_kind()) { + case lir::call::kind::database_load: + case lir::call::kind::find: + case lir::call::kind::key_cmp: + case lir::call::kind::to_set: + case lir::call::kind::basic_method: + case lir::call::kind::basic_static: return; + default: break; + } + + const auto mangled_rule_name = node->get_mangled_name(); switch(node->get_func_kind()) { case lir::call::kind::function: case lir::call::kind::method: - if (inline_rules.count(replace_colon(node->get_function_name()))) { + if (inline_rules.count(mangled_rule_name) && + !inline_rules.at(mangled_rule_name)->is_inherited()) { err.warn(node->get_location(), "inline function \"" + node->get_function_name() + "\" used in aggregator.", - "will generate as a normal function." + "will generate as non-inline to avoid ungrounded error." ); } - need_remark.insert(replace_colon(node->get_function_name())); + need_remark.insert(mangled_rule_name); break; default: break; } } void aggregator_inline_remark::visit_aggregator(lir::aggregator* node) { + // this should be unreachable, used to check if codegen works correctly if (in_aggregator) { err.err(node->get_location(), "detect nested aggregator, please check generated code." @@ -37,12 +53,14 @@ void aggregator_inline_remark::visit_aggregator(lir::aggregator* node) { } bool aggregator_inline_remark::run() { + // load inline rules from declarations for(const auto& decl : ctx->rule_decls) { if (decl->is_inline()) { - inline_rules.insert(replace_colon(decl->get_rule_raw_name())); + inline_rules.insert({ decl->get_mangled_name(), decl }); } } + // visit all rule impls' block to find matched cases for(auto impl : ctx->rule_impls) { impl->get_block()->accept(this); } @@ -56,13 +74,14 @@ bool aggregator_inline_remark::run() { impl->get_block()->accept(this); } + // if error is reported during this pass, return false if (err.get_error()) { return false; } // remark inline to false for(auto& decl : ctx->rule_decls) { - if (need_remark.count(replace_colon(decl->get_rule_raw_name()))) { + if (need_remark.count(decl->get_mangled_name())) { decl->set_is_inline_rule(false); } } diff --git a/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h index fd349797..abe33fc7 100644 --- a/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h +++ b/godel-script/godel-frontend/src/ir/aggregator_inline_remark.h @@ -4,13 +4,19 @@ #include #include +#include #include namespace godel { +// Soufflé does not allow inline predicates used in aggregators. +// This pass will add a remark to the predicates to indicate that +// they should not be inlined. +// And after this pass these predicates' inline attribute will be +// set to false. class aggregator_inline_remark: public pass { private: - std::unordered_set inline_rules; + std::unordered_map inline_rules; std::unordered_set need_remark; bool in_aggregator = false; diff --git a/godel-script/godel-frontend/src/ir/call_graph.cpp b/godel-script/godel-frontend/src/ir/call_graph.cpp new file mode 100644 index 00000000..67b407dd --- /dev/null +++ b/godel-script/godel-frontend/src/ir/call_graph.cpp @@ -0,0 +1,100 @@ +#include "godel-frontend/src/ir/call_graph.h" + +namespace godel { + +void call_graph_generator::scan_ir_call(lir::call* node, callee_dict& dict) const { + switch(node->get_func_kind()) { + case lir::call::kind::database_load: + case lir::call::kind::find: + case lir::call::kind::key_cmp: + case lir::call::kind::to_set: + case lir::call::kind::basic_method: + case lir::call::kind::basic_static: return; + default: break; + } + + // only need user-defined rules + dict.insert(node->get_mangled_name()); + return; +} + +void call_graph_generator::scan_call(souffle_rule_impl* impl, + callee_dict& dict) const { + // recursively search used rules + // but we use bfs queue to avoid stack overflow + // so visitor(dfs) is not needed here + std::queue bfs; + bfs.push(impl->get_block()); + + while(!bfs.empty()) { + auto block = bfs.front(); + bfs.pop(); + for(auto stmt : block->get_content()) { + switch(stmt->get_kind()) { + case lir::inst_kind::inst_call: + scan_ir_call((lir::call*)stmt, dict); + break; + case lir::inst_kind::inst_ctor: + dict.insert(((lir::constructor*)stmt)->get_mangled_name()); + break; + case lir::inst_kind::inst_block: + bfs.push((lir::block*)stmt); + break; + case lir::inst_kind::inst_not: + bfs.push(((lir::not_operand*)stmt)->get_body()); + break; + case lir::inst_kind::inst_and: + bfs.push(((lir::and_operand*)stmt)->get_left_block()); + bfs.push(((lir::and_operand*)stmt)->get_right_block()); + break; + case lir::inst_kind::inst_or: + bfs.push(((lir::or_operand*)stmt)->get_left_block()); + bfs.push(((lir::or_operand*)stmt)->get_right_block()); + break; + case lir::inst_kind::inst_aggr: + bfs.push(((lir::aggregator*)stmt)->get_body()); + default: break; + } + } + } +} + +void call_graph_generator::initialize_call_graph( + const std::vector& impls, call_graph& cg) const { + for(auto i : impls) { + const auto name = i->get_mangled_name(); + if (!cg.count(name)) { + cg.insert({name, {}}); + } + // construct the call graph and mark all used rules + scan_call(i, cg.at(name)); + } +} + +const call_graph& call_graph_generator::apply(const ir_context& ctx) { + cg.clear(); + + // initialize call graph root + for(const auto& i : ctx.souffle_output) { + const auto name = rule_mangle(i); + if (!cg.count(name)) { + cg.insert({name, {}}); + } + } + for(const auto& i : ctx.annotated_output) { + const auto name = i.get_mangled_name(); + if (!cg.count(name)) { + cg.insert({name, {}}); + } + } + + // construct call graph by scanning the IR + initialize_call_graph(ctx.rule_impls, cg); + initialize_call_graph(ctx.database_get_table, cg); + initialize_call_graph(ctx.schema_get_field, cg); + initialize_call_graph(ctx.schema_data_constraint_impls, cg); + + return cg; +} + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/call_graph.h b/godel-script/godel-frontend/src/ir/call_graph.h new file mode 100644 index 00000000..3c76a716 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/call_graph.h @@ -0,0 +1,31 @@ +#pragma once + +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/lir.h" + +#include +#include +#include +#include +#include + +namespace godel { + +typedef std::unordered_set callee_dict; +typedef std::unordered_map call_graph; + +class call_graph_generator { +private: + call_graph cg; + +private: + void scan_ir_call(lir::call*, callee_dict&) const; + void scan_call(souffle_rule_impl*, callee_dict&) const; + void initialize_call_graph(const std::vector&, + call_graph&) const; + +public: + const call_graph& apply(const ir_context&); +}; + +} \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/inst_combine.cpp b/godel-script/godel-frontend/src/ir/inst_combine.cpp index 426d7bc7..693cc140 100644 --- a/godel-script/godel-frontend/src/ir/inst_combine.cpp +++ b/godel-script/godel-frontend/src/ir/inst_combine.cpp @@ -6,7 +6,7 @@ namespace godel { void inst_combine_pass::visit_store(lir::store* s) { const auto& src = s->get_source(); - const auto& dst = s->get_destination(); + const auto& tgt = s->get_target(); // record this case: // @@ -20,10 +20,10 @@ void inst_combine_pass::visit_store(lir::store* s) { // // (call(ssa_temp_2, a, b)) // - if (dst.kind==lir::inst_value_kind::variable && + if (tgt.kind==lir::inst_value_kind::variable && src.kind==lir::inst_value_kind::variable) { - variable_reference_graph[dst.content].insert({src.content, s}); - variable_reference_graph[src.content].insert({dst.content, s}); + variable_reference_graph[tgt.content].insert({src.content, s}); + variable_reference_graph[src.content].insert({tgt.content, s}); } // record this case: @@ -38,9 +38,9 @@ void inst_combine_pass::visit_store(lir::store* s) { // // (call(ssa_temp_2, 1, 2)) // - if (dst.kind==lir::inst_value_kind::variable && + if (tgt.kind==lir::inst_value_kind::variable && src.kind==lir::inst_value_kind::literal) { - variable_reference_graph[dst.content].insert({src.content, s}); + variable_reference_graph[tgt.content].insert({src.content, s}); } } @@ -216,19 +216,19 @@ void combine_worker::visit_record(lir::record* node) { } void combine_worker::visit_unary(lir::unary* node) { - const auto& dst = node->get_destination(); - if (is_single_ref_ssa_temp(dst.content)) { - const auto& ref = get_single_ref(dst.content); - node->get_mutable_destination().content = ref.first; + const auto& tgt = node->get_target(); + if (is_single_ref_ssa_temp(tgt.content)) { + const auto& ref = get_single_ref(tgt.content); + node->get_mutable_target().content = ref.first; ref.second->set_flag_eliminated(true); } } void combine_worker::visit_binary(lir::binary* node) { - const auto& dst = node->get_destination(); - if (is_single_ref_ssa_temp(dst.content)) { - const auto& ref = get_single_ref(dst.content); - node->get_mutable_destination().content = ref.first; + const auto& tgt = node->get_target(); + if (is_single_ref_ssa_temp(tgt.content)) { + const auto& ref = get_single_ref(tgt.content); + node->get_mutable_target().content = ref.first; ref.second->set_flag_eliminated(true); } } diff --git a/godel-script/godel-frontend/src/ir/ir_context.cpp b/godel-script/godel-frontend/src/ir/ir_context.cpp index 2a7618d4..ca712d24 100644 --- a/godel-script/godel-frontend/src/ir/ir_context.cpp +++ b/godel-script/godel-frontend/src/ir/ir_context.cpp @@ -2,6 +2,7 @@ #include "godel-frontend/src/ir/pass_manager.h" #include "godel-frontend/src/ir/remove_unused.h" #include "godel-frontend/src/ir/inst_combine.h" +#include "godel-frontend/src/ir/name_mangling.h" #include #include @@ -22,13 +23,13 @@ void souffle_type_alias::dump(std::ostream& out) const { } void souffle_schema::dump(std::ostream& out) const { - out << ".decl schema_" << replace_colon(name) << "("; - out << "result: " << replace_colon(name) << ", db_id: DBIndex"; + out << ".decl " << get_mangled_name() << "("; + out << "result: " << type_mangle(name) << ", db_id: DBIndex"; if (fields.size()) { out << ", "; } for(const auto& i : fields) { - out << i.first << ": " << replace_colon(i.second); + out << i.first << ": " << type_mangle(i.second); if (i!=fields.back()) { out << ", "; } @@ -39,7 +40,7 @@ void souffle_schema::dump(std::ostream& out) const { void souffle_input_decl::dump(std::ostream& os) const { os << ".decl " << decl_name << "("; for(const auto& i : fields) { - os << i.first << ": " << replace_colon(i.second); + os << i.first << ": " << type_mangle(i.second); if (i!=fields.back()) { os << ", "; } @@ -54,15 +55,15 @@ void souffle_input_impl::dump(std::ostream& os) const { } void souffle_rule_decl::dump(std::ostream& out) const { - out << ".decl " << replace_colon(name) << "("; + out << ".decl " << get_mangled_name() << "("; if (return_type.length() && return_type!="bool") { - out << "result: " << replace_colon(return_type); + out << "result: " << type_mangle(return_type); if (params.size()) { out << ", "; } } for(const auto& i : params) { - out << i.first << ": " << replace_colon(i.second); + out << i.first << ": " << type_mangle(i.second); if (i!=params.back()) { out << ", "; } @@ -71,11 +72,14 @@ void souffle_rule_decl::dump(std::ostream& out) const { if (flag_is_inline_rule) { out << " inline"; } + if (flag_is_inherited_rule) { + out << " // inherited"; + } out << "\n"; } void souffle_rule_impl::dump(std::ostream& out) const { - out << replace_colon(func_name) << "("; + out << get_mangled_name() << "("; if (!params.empty()) { auto it = params.begin(); out << *it++; @@ -170,10 +174,12 @@ bool ir_context::cache_input_impl(std::ostream& out, const std::string& fn) cons if (!enable_souffle_cache || !std::filesystem::exists(tempfile)) { return false; } + + // sqlite-cache reader will not convert empty string to "n/a" if (check_cache_table_exists(fn)) { out << ".input " << fn; out << "(IO=\"sqlite-cache\", dbname=\"" << tempfile.string() << "\", "; - out << "name=\"_" << fn << "\")\n"; + out << "name=\"" << fn << "\")\n"; return true; } return false; @@ -182,7 +188,7 @@ bool ir_context::cache_input_impl(std::ostream& out, const std::string& fn) cons void ir_context::dump_rule_impls(std::ostream& out, const std::unordered_set& cache_decl) const { for(auto i : rule_impls) { - const auto name = replace_colon(i->get_func_name()); + const auto name = i->get_mangled_name(); // cache input if (cache_decl.count(name) && cache_input_impl(out, name)) { continue; @@ -244,7 +250,7 @@ void ir_context::dump_souffle_annotated_input(std::ostream& out) const { return; } for(const auto& i : annotated_input) { - out << ".input " << replace_colon(i.rule_name); + out << ".input " << i.get_mangled_name(); if (i.format=="\"json\"") { out << "(IO=\"jsonfile\", filename="; out << i.file_path << ", format=\"object\")"; @@ -268,7 +274,7 @@ void ir_context::dump_souffle_multi_json_output(std::ostream& out) const { // dump output rules for(const auto& i : souffle_output) { const auto temp_file = temp / ("godel_script_" + i + ".json"); - out << ".output " << replace_colon(i); + out << ".output " << rule_mangle(i); out << "(IO=\"jsonfile\", filename=\""; out << temp_file.string() << "\", format=\"object\")" << "\n"; } @@ -277,7 +283,7 @@ void ir_context::dump_souffle_multi_json_output(std::ostream& out) const { } } -void ir_context::dump_souffle_output(std::ostream& out) const { +void ir_context::dump_souffle_redirect_output(std::ostream& out) const { if (souffle_output.empty()) { return; } @@ -305,7 +311,7 @@ void ir_context::dump_souffle_output(std::ostream& out) const { } // dump output rules for(const auto& i : souffle_output) { - out << ".output " << replace_colon(i) << io_format << "\n"; + out << ".output " << rule_mangle(i) << io_format << "\n"; } if (souffle_output.size()) { out << "\n"; @@ -317,7 +323,7 @@ void ir_context::dump_souffle_annotated_output(std::ostream& out) const { return; } for(const auto& i : annotated_output) { - out << ".output " << replace_colon(i.rule_name); + out << ".output " << i.get_mangled_name(); if (i.format=="\"json\"") { out << "(IO=\"jsonfile\", filename="; out << i.file_path << ", format=\"object\")"; @@ -345,10 +351,10 @@ void ir_context::dump(std::ostream& out, const cli::configure& conf) { std::unordered_set cache_decl; for(auto i : rule_decls) { if (i->is_inline()) { - inline_decl.insert(replace_colon(i->get_rule_raw_name())); + inline_decl.insert(i->get_mangled_name()); } if (i->need_cache()) { - cache_decl.insert(replace_colon(i->get_rule_raw_name())); + cache_decl.insert(i->get_mangled_name()); } } @@ -385,7 +391,7 @@ void ir_context::dump(std::ostream& out, const cli::configure& conf) { dump_input_impls(out); dump_souffle_annotated_input(out); // dump souffle output - dump_souffle_output(out); + dump_souffle_redirect_output(out); dump_souffle_annotated_output(out); if (enable_souffle_cache) { @@ -406,7 +412,7 @@ bool ir_context::check_cache_table_exists(const std::string& rule) const { } auto cmd = std::string("SELECT name FROM sqlite_master WHERE type='table'"); - cmd += " AND name='_" + rule + "';"; + cmd += " AND name='" + rule + "';"; sqlite3_stmt* stmt = nullptr; const char* tail = nullptr; diff --git a/godel-script/godel-frontend/src/ir/ir_context.h b/godel-script/godel-frontend/src/ir/ir_context.h index ce98629a..c2bfea07 100644 --- a/godel-script/godel-frontend/src/ir/ir_context.h +++ b/godel-script/godel-frontend/src/ir/ir_context.h @@ -3,6 +3,7 @@ #include "godel-frontend/src/ir/lir.h" #include "godel-frontend/src/sema/context.h" #include "godel-frontend/src/cli.h" +#include "godel-frontend/src/ir/name_mangling.h" #include #include @@ -32,14 +33,19 @@ struct souffle_type_alias { void dump(std::ostream&) const; }; +// generate rule begin with `schema_` struct souffle_schema { std::string name; std::vector> fields; + auto get_mangled_name() const { + return rule_mangle("schema_" + name); + } void dump(std::ostream&) const; }; // declaration of database input +// generate rule begin with `input_` struct souffle_input_decl { std::string database_name; std::string table_type; @@ -54,15 +60,17 @@ struct souffle_input_decl { const std::string& tt, uint64_t lt): database_name(dbn), table_type(tt), load_times(lt) { - decl_name = "input_" + replace_colon(database_name) + "_" + - replace_colon(table_type) + "_" + - std::to_string(load_times); + decl_name = "input_" + database_name + + "_" + table_type + + "_" + std::to_string(load_times); + decl_name = rule_mangle(decl_name); } void dump(std::ostream&) const; - const auto& get_decl_name() const { return decl_name; } + const auto& get_mangled_name() const { return decl_name; } }; // implementation of database input +// generate rule begin with `input_` struct souffle_input_impl { std::string name; std::string table_name; @@ -81,14 +89,16 @@ struct souffle_input_impl { const std::string& idb): name(n), table_name(tn), table_type(tt), path_id(pid), input_db_path(idb) { - decl_name = "input_" + replace_colon(name) + "_" + - replace_colon(table_type) + "_" + - std::to_string(path_id); + decl_name = "input_" + name + + "_" + table_type + + "_" + std::to_string(path_id); + decl_name = rule_mangle(decl_name); } void dump(std::ostream&) const; - const auto& get_decl_name() const { return decl_name; } + const auto& get_mangled_name() const { return decl_name; } }; +// generate rule begin with `rule_` class souffle_rule_decl { private: std::string name; @@ -98,34 +108,40 @@ class souffle_rule_decl { private: bool flag_is_inline_rule; bool flag_need_cache; + bool flag_is_inherited_rule; public: souffle_rule_decl(const std::string& n): name(n), return_type(""), flag_is_inline_rule(false), - flag_need_cache(false) {} + flag_need_cache(false), + flag_is_inherited_rule(false) {} void dump(std::ostream&) const; public: void set_return_type(const std::string& t) { return_type = t; } void set_is_inline_rule(bool flag) { flag_is_inline_rule = flag; } void set_need_cache(bool flag) { flag_need_cache = flag; } + void set_is_inherited_rule(bool flag) { flag_is_inherited_rule = flag; } void add_param(const std::string& pn, const std::string& pt) { params.push_back({pn, pt}); } public: const auto& get_rule_raw_name() const { return name; } + auto get_mangled_name() const { return rule_mangle(name); } const auto& get_params() const { return params; } const auto& get_return_type() const { return return_type; } auto is_inline() const { return flag_is_inline_rule; } auto need_cache() const { return flag_need_cache;} + auto is_inherited() const { return flag_is_inherited_rule; } }; +// generate rule begin with `rule_` class souffle_rule_impl { private: std::string func_name; - std::vector params; + std::vector params; lir::block block; public: @@ -133,12 +149,20 @@ class souffle_rule_impl { func_name(c), block(loc) { block.set_use_semicolon(); } - void add_param(const std::string& p) { - params.push_back(p); + +public: + void add_param_variable(const std::string& var) { + params.push_back(lir::inst_value_t::variable(var)); + } + void add_param_literal(const std::string& lit) { + params.push_back(lir::inst_value_t::literal(lit)); } + +public: auto get_block() { return █ } const auto& get_params() const { return params; } const auto& get_func_name() const { return func_name; } + auto get_mangled_name() const { return rule_mangle(func_name); } void dump(std::ostream&) const; }; @@ -147,6 +171,8 @@ struct souffle_annotated_file_output { std::string format; std::string file_path; std::string rule_name; + + auto get_mangled_name() const { return rule_mangle(rule_name); } }; // alias to annotated_file_output @@ -156,8 +182,9 @@ struct ir_context { std::vector functors; std::vector type_alias; - // souffle stdout output, can be redirected to file output + // rules' name for souffle stdout output, can be redirected to file output std::vector souffle_output; + // mapper stores real name of mangled output rule, // used for merging output files into one file std::unordered_map souffle_output_real_name; @@ -214,7 +241,7 @@ struct ir_context { void dump_input_impls(std::ostream&) const; void dump_souffle_annotated_input(std::ostream&) const; void dump_souffle_multi_json_output(std::ostream&) const; - void dump_souffle_output(std::ostream&) const; + void dump_souffle_redirect_output(std::ostream&) const; void dump_souffle_annotated_output(std::ostream&) const; void dump(std::ostream&, const cli::configure&); diff --git a/godel-script/godel-frontend/src/ir/ir_gen.cpp b/godel-script/godel-frontend/src/ir/ir_gen.cpp index b813ab43..09ae8fc7 100644 --- a/godel-script/godel-frontend/src/ir/ir_gen.cpp +++ b/godel-script/godel-frontend/src/ir/ir_gen.cpp @@ -28,7 +28,7 @@ void ir_gen::emit_type_alias_for_database() { .type_name = sc.second.name, .type_loc = sc.second.location }; - const auto name = replace_colon(sym.full_path_name()); + const auto name = type_mangle(sym.full_path_name()); // insert type alias into the context // for example: @@ -63,8 +63,8 @@ void ir_gen::emit_type_alias_for_schema_with_primary_key(const schema& sc) { // .type Example = int // irc.type_alias.push_back(souffle_type_alias { - .alias = replace_colon(sym.full_path_name()), - .real = replace_colon(real_type.full_path_name_without_set()), + .alias = type_mangle(sym.full_path_name()), + .real = type_mangle(real_type.full_path_name_without_set()), .structure_type_list = {} }); } @@ -82,7 +82,7 @@ void ir_gen::emit_type_alias_for_schema_without_primary_key(const schema& sc) { for(const auto& i : sc.ordered_fields) { real += i + ": "; const auto& type = sc.fields.at(i); - const auto name = replace_colon(type.full_path_name_without_set()); + const auto name = type_mangle(type.full_path_name_without_set()); real += name + ", "; structure_type_list.push_back(name); } @@ -99,7 +99,7 @@ void ir_gen::emit_type_alias_for_schema_without_primary_key(const schema& sc) { // .type Example = [id: int, name: string] // irc.type_alias.push_back(souffle_type_alias { - .alias = replace_colon(sym.full_path_name()), + .alias = type_mangle(sym.full_path_name()), .real = real, .structure_type_list = structure_type_list }); @@ -126,7 +126,7 @@ void ir_gen::emit_type_alias_for_enum() { .type_loc = e.second.location }; irc.type_alias.push_back(souffle_type_alias { - .alias = replace_colon(sym.full_path_name()), + .alias = type_mangle(sym.full_path_name()), .real = "int", .structure_type_list = {} }); @@ -160,8 +160,8 @@ void ir_gen::emit_used_database_get_table_impl(const std::string& db_type_name, "get_table_" + db_type_name + "_" + table_name, report::span::null() ); - get_table_impl->add_param("result"); - get_table_impl->add_param(db_id); + get_table_impl->add_param_variable("ret?result"); + get_table_impl->add_param_literal(db_id); get_table_impl->get_block()->set_use_comma(); // generate input call @@ -175,7 +175,7 @@ void ir_gen::emit_used_database_get_table_impl(const std::string& db_type_name, // only need primary key for(const auto& f : sc.ordered_fields) { input_call->add_arg(sc.fields.at(f).primary? - lir::inst_value_t::variable("result"): + lir::inst_value_t::variable("ret?result"): lir::inst_value_t::default_value() ); } @@ -200,7 +200,7 @@ void ir_gen::emit_used_database_get_table_impl(const std::string& db_type_name, // generate result = [ f1, f2, ...] get_table_impl->get_block()->add_new_content(new lir::store( lir::inst_value_t::literal(literal), - lir::inst_value_t::variable("result"), + lir::inst_value_t::variable("ret?result"), report::span::null() )); } @@ -220,19 +220,19 @@ void ir_gen::emit_schema_data_constraint_impl(const std::string& db_type_name, ); // load parameter + // schema with primary key, just set the result = primary key + // schema without primary key, need to generate result = [...] if (sc.has_primary_key()) { - // schema with primary key, just set the result = primary key - input_to_schema->add_param(sc.get_primary_key()); + input_to_schema->add_param_variable(sc.get_primary_key()); } else { - // schema without primary key, need to generate result = [...] - input_to_schema->add_param("result"); + input_to_schema->add_param_variable("ret?result"); } - // database index - input_to_schema->add_param(db_id); + // database index, literal + input_to_schema->add_param_literal(db_id); // schema field for(const auto& field : sc.ordered_fields) { - input_to_schema->add_param(field); + input_to_schema->add_param_variable(field); } // load result, generate result = [...] @@ -247,7 +247,7 @@ void ir_gen::emit_schema_data_constraint_impl(const std::string& db_type_name, literal += "]"; input_to_schema->get_block()->add_new_content(new lir::store( lir::inst_value_t::literal(literal), - lir::inst_value_t::variable("result"), + lir::inst_value_t::variable("ret?result"), report::span::null() )); } @@ -398,6 +398,7 @@ void ir_gen::emit_schema_method_decl(const function& method, method.has_annotation("@inline") || method.inherit ); + method_decl->set_is_inherited_rule(method.inherit); // load parameters of the method for(const auto& arg_name : method.ordered_parameter_list) { @@ -428,11 +429,11 @@ void ir_gen::emit_schema_inherit_method(const schema& sc, if (method.return_type!=symbol::null() && method.return_type!=symbol::boolean()) { - impl->add_param("result"); - call->add_arg(lir::inst_value_t::variable("result")); + impl->add_param_variable("ret?result"); + call->add_arg(lir::inst_value_t::variable("ret?result")); } for(auto& arg : method.ordered_parameter_list) { - impl->add_param(arg); + impl->add_param_variable(arg); call->add_arg(lir::inst_value_t::variable(arg)); } irc.rule_impls.push_back(impl); @@ -468,7 +469,7 @@ void ir_gen::emit_schema_type_check_impl(const symbol& sym, const schema& sc) { "typecheck_" + sym.full_path_name(), report::span::null() ); - typecheck_impl->add_param("self"); + typecheck_impl->add_param_variable("self"); // generate inner data constraint call auto data_constraint_call = new lir::call( @@ -499,6 +500,7 @@ void ir_gen::emit_schema_get_field() { }; for(const auto& field : sc.second.ordered_fields) { + // generate get field method name auto name = "get_field_" + sym.full_path_name() + "_" + field; auto rule = new souffle_rule_decl(name); rule->set_return_type( @@ -512,15 +514,15 @@ void ir_gen::emit_schema_get_field() { // implementation of get field method auto rule_impl = new souffle_rule_impl(name, report::span::null()); - rule_impl->add_param("result"); - rule_impl->add_param("self"); + rule_impl->add_param_variable("ret?result"); + rule_impl->add_param_variable("self"); // necessary optimization, if the field is primary key, // we can directly store self in result to avoid extra join if (sc.second.fields.at(field).primary) { auto assign = new lir::store( lir::inst_value_t::variable("self"), - lir::inst_value_t::variable("result"), + lir::inst_value_t::variable("ret?result"), report::span::null() ); rule_impl->get_block()->add_new_content(assign); @@ -538,7 +540,7 @@ void ir_gen::emit_schema_get_field() { call->add_arg(lir::inst_value_t::default_value()); for(const auto& f : sc.second.ordered_fields) { call->add_arg(f==field? - lir::inst_value_t::variable("result"): + lir::inst_value_t::variable("ret?result"): lir::inst_value_t::default_value() ); } @@ -569,21 +571,20 @@ void ir_gen::emit_DO_schema_default_constructor() { // generate this method as a rule implementation: // rule_name(result, ...) :- schema_name(result, db, ...). - const auto function_name = replace_colon(sym.full_path_name() + "::__all__"); auto func_impl = new souffle_rule_impl( - "rule_" + function_name, + "rule_" + sym.full_path_name() + "::__all__", report::span::null() ); - func_impl->add_param("result"); - func_impl->add_param("db"); + func_impl->add_param_variable("ret?result"); + func_impl->add_param_variable("db"); irc.rule_impls.push_back(func_impl); auto call = new lir::call( - "schema_" + replace_colon(sym.full_path_name()), + "schema_" + sym.full_path_name(), report::span::null() ); func_impl->get_block()->add_new_content(call); - call->add_arg(lir::inst_value_t::variable("result")); + call->add_arg(lir::inst_value_t::variable("ret?result")); call->add_arg(lir::inst_value_t::variable("db")); for(size_t i = 0; iadd_arg(lir::inst_value_t::default_value()); @@ -813,6 +814,7 @@ void ir_gen::get_field_from_schema(call_expr* node) { const auto index = ctx->global.get_index(name); const auto& sch = ctx->global.get_schema(index); if (sch.fields.count(node->get_field_name()->get_name())) { + // generate get field method call auto lir_call = new lir::call( "get_field_" + name + "_" + node->get_field_name()->get_name(), node->get_location() @@ -1097,7 +1099,7 @@ void ir_gen::report_ignored_DO_schema_data_constraint() { if (ignored_DO_schema.empty()) { return; } - err.warn_report_ignored_DO_schema(ignored_DO_schema); + err.warn_ignored_DO_schema(ignored_DO_schema); } bool ir_gen::visit_number_literal(number_literal* node) { @@ -1426,10 +1428,10 @@ void ir_gen::not_data_constraint_func_decl(const std::string& function_name, ); if (node->has_return_value() && node->get_return_type()->get_full_name()!="bool") { - current_rule->add_param("result"); + current_rule->add_param_variable("ret?result"); } for(auto i : node->get_parameter_list()) { - current_rule->add_param(i->get_var_name()->get_name()); + current_rule->add_param_variable(i->get_var_name()->get_name()); } irc.rule_impls.push_back(current_rule); @@ -1502,27 +1504,23 @@ void ir_gen::data_constraint_func_decl(const std::string& function_name, if (sc.referenced_by_database_table && flag_ignore_do_schema_data_constraint) { // DO schema's __all__ does not need to be generated to data constraint - ignored_DO_schema.push_back({ - impl_schema_name, - node->get_name()->get_location() - }); + ignored_DO_schema.insert(impl_schema_name); } else { current_rule = new souffle_rule_impl( "schema_" + impl_schema_name, node->get_location() ); - current_rule->add_param("result"); + current_rule->add_param_variable("ret?result"); // add database name into parameter - current_rule->add_param( - database_param_name.empty()? - "[-1, -1]": - database_param_name - ); + if (database_param_name.empty()) { + current_rule->add_param_literal("[-1, -1]"); + } else { + current_rule->add_param_variable(database_param_name); + } // add field name into parameter, doing mangling for(const auto& f : sc.ordered_fields) { - const auto type = sc.fields.at(f); - const auto name_mangled_field = field_name_mangling(f, type); - current_rule->add_param(name_mangled_field); + const auto name_mangled_field = field_mangle(f); + current_rule->add_param_variable(name_mangled_field); } irc.rule_impls.push_back(current_rule); @@ -1558,13 +1556,13 @@ void ir_gen::data_constraint_func_decl(const std::string& function_name, "rule_" + function_name, node->get_location() ); - fn_impl->add_param("result"); + fn_impl->add_param_variable("ret?result"); for(auto i : node->get_parameter_list()) { - fn_impl->add_param(i->get_var_name()->get_name()); + fn_impl->add_param_variable(i->get_var_name()->get_name()); } auto call = new lir::call("schema_" + impl_schema_name, node->get_location()); fn_impl->get_block()->add_new_content(call); - call->add_arg(lir::inst_value_t::variable("result")); + call->add_arg(lir::inst_value_t::variable("ret?result")); call->add_arg(database_param_name.empty()? lir::inst_value_t::default_value(): lir::inst_value_t::variable(database_param_name) @@ -1645,7 +1643,7 @@ bool ir_gen::visit_query_decl(query_decl* node) { node->get_location() ); for(const auto& i : query_self.ordered_output_list) { - query_impl->add_param(i); + query_impl->add_param_variable(i); } query_impl->get_block()->set_use_comma(); blocks.push_back(query_impl->get_block()); @@ -1891,7 +1889,7 @@ bool ir_gen::visit_ret_stmt(ret_stmt* node) { if (value_stack.size()) { blocks.back()->add_new_content(new lir::store( value_stack.back().to_inst_value(), - lir::inst_value_t::variable("result"), + lir::inst_value_t::variable("ret?result"), node->get_return_value()->get_location() )); value_stack.pop_back(); @@ -1924,7 +1922,12 @@ bool ir_gen::visit_fact_data(fact_data* node) { } // inst_value_t here should be inst_value_t::variable - new_fact->add_pair(params[c], value_stack.back().to_inst_value()); + new_fact->add_pair( + params[c].content, + value_stack.back().to_inst_value() + ); + + // pop value stack value_stack.pop_back(); c++; } @@ -2233,7 +2236,7 @@ bool ir_gen::visit_func_call(func_call* node) { if (func_stack.back().kind==func_kind::schema_to) { auto schema_to_block = new lir::block(node->get_location()); auto typecheck_call = new lir::call( - "typecheck_" + replace_colon(func_stack.back().generic_type), + "typecheck_" + func_stack.back().generic_type, node->get_location() ); const auto source = value_stack.back(); @@ -2264,7 +2267,7 @@ bool ir_gen::visit_func_call(func_call* node) { // if (func_stack.back().kind==func_kind::schema_is) { auto typecheck_call = new lir::call( - "typecheck_" + replace_colon(func_stack.back().generic_type), + "typecheck_" + func_stack.back().generic_type, node->get_location() ); const auto source = value_stack.back(); @@ -2332,6 +2335,7 @@ void ir_gen::generate_spread_expr( const auto& infer_schema = ctx->global.get_schema(index); for(const auto& field : infer_schema.ordered_fields) { + // generate get field method name const auto name = "get_field_" + full_name + "_" + field; // generate call @@ -2397,7 +2401,7 @@ bool ir_gen::visit_initializer(initializer* node) { } else { // generate construct code in data constraint block for(const auto& f : sc.ordered_fields) { - const auto name_mangled_field = field_name_mangling(f, sc.fields.at(f)); + const auto name_mangled_field = field_mangle(f); blocks.back()->add_new_content(new lir::store( fields.at(f).to_inst_value(), lir::inst_value_t::variable(name_mangled_field), @@ -2407,15 +2411,15 @@ bool ir_gen::visit_initializer(initializer* node) { // generate result variable if (sc.has_primary_key()) { const auto& key = sc.get_primary_key(); - const auto name_mangled_field = field_name_mangling(key, sc.fields.at(key)); + const auto name_mangled_field = field_mangle(key); blocks.back()->add_new_content(new lir::store( lir::inst_value_t::variable(name_mangled_field), - lir::inst_value_t::variable("result"), + lir::inst_value_t::variable("ret?result"), node->get_location() )); } else { auto record = new lir::record( - lir::inst_value_t::variable("result"), + lir::inst_value_t::variable("ret?result"), node->get_location() ); blocks.back()->add_new_content(record); diff --git a/godel-script/godel-frontend/src/ir/ir_gen.h b/godel-script/godel-frontend/src/ir/ir_gen.h index a647ccda..66f548db 100644 --- a/godel-script/godel-frontend/src/ir/ir_gen.h +++ b/godel-script/godel-frontend/src/ir/ir_gen.h @@ -200,7 +200,7 @@ class ir_gen: public ast_visitor { void generate_method_call(func_call*, lir::call*); private: - std::vector> ignored_DO_schema; + std::unordered_set ignored_DO_schema; void report_ignored_DO_schema_data_constraint(); private: diff --git a/godel-script/godel-frontend/src/ir/lir.cpp b/godel-script/godel-frontend/src/ir/lir.cpp index e9e36245..6c76ea7f 100644 --- a/godel-script/godel-frontend/src/ir/lir.cpp +++ b/godel-script/godel-frontend/src/ir/lir.cpp @@ -1,21 +1,11 @@ #include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/ir/name_mangling.h" #include #include #include namespace godel { - -std::string replace_colon(const std::string& input) { - auto result = input; - auto colon_pos = result.find(':'); - while(colon_pos!=std::string::npos) { - result.replace(colon_pos, 1, "_"); - colon_pos = result.find(':', colon_pos+1); - } - return result; -} - namespace lir { std::ostream& operator<<(std::ostream& os, const inst_value_t& ivt) { @@ -52,7 +42,7 @@ void boolean::dump(std::ostream& os, const std::string& indent) const { } void store::dump(std::ostream& os, const std::string& indent) const { - os << indent << destination << " = " << source; + os << indent << target << " = " << source; } void call::generate_key_cmp(std::ostream& os) const { @@ -222,7 +212,7 @@ void call::generate_find(std::ostream& os) const { // class_instance_set = class_or_interface_instance // ) // - if (function_name=="find") { + if (function_name == "find") { os << "(" << destination << " = " << arguments[0] << ", "; os << arguments[0] << " = " << arguments[1] << ")"; return; @@ -246,7 +236,7 @@ void call::dump(std::ostream& os, const std::string& indent) const { } // normal function and method call - os << replace_colon(function_name) << "("; + os << get_mangled_name() << "("; if (destination.content.size()) { os << destination << (arguments.size()? ", ":""); } @@ -263,7 +253,7 @@ void call::dump(std::ostream& os, const std::string& indent) const { } void constructor::dump(std::ostream& os, const std::string& indent) const { - os << indent << "schema_" << replace_colon(schema_name); + os << indent << get_mangled_name(); os << "(" << destination << ", _" << (fields_value.size()? ", ":""); size_t s = fields_value.size(); @@ -294,7 +284,7 @@ void record::dump(std::ostream& os, const std::string& indent) const { } void unary::dump(std::ostream& os, const std::string& indent) const { - os << indent << destination << " = "; + os << indent << target << " = "; switch(operand) { case kind::op_neg: os << "-"; break; default: assert(false && "unreachable"); break; @@ -303,7 +293,7 @@ void unary::dump(std::ostream& os, const std::string& indent) const { } void binary::dump(std::ostream& os, const std::string& indent) const { - os << indent<< destination << " = "; + os << indent << target << " = "; os << left << " "; switch(operator_kind) { case kind::op_add: os << "+"; break; diff --git a/godel-script/godel-frontend/src/ir/lir.h b/godel-script/godel-frontend/src/ir/lir.h index 02dc66dd..e69ec945 100644 --- a/godel-script/godel-frontend/src/ir/lir.h +++ b/godel-script/godel-frontend/src/ir/lir.h @@ -1,6 +1,7 @@ #pragma once #include "godel-frontend/src/error/error.h" +#include "godel-frontend/src/ir/name_mangling.h" #include #include @@ -8,9 +9,6 @@ #include namespace godel { - -std::string replace_colon(const std::string&); - namespace lir { enum class inst_value_kind { @@ -150,16 +148,16 @@ class boolean: public inst { class store: public inst { private: inst_value_t source; - inst_value_t destination; + inst_value_t target; public: store(const inst_value_t& src, - const inst_value_t& dst, + const inst_value_t& tgt, const report::span& loc): - inst(inst_kind::inst_store, loc), source(src), destination(dst) {} + inst(inst_kind::inst_store, loc), source(src), target(tgt) {} store(const store& s): inst(inst_kind::inst_store, s.get_location()), - source(s.source), destination(s.destination) {} + source(s.source), target(s.target) {} ~store() override = default; void dump(std::ostream&, const std::string&) const override; void accept(inst_visitor* v) override { @@ -167,7 +165,7 @@ class store: public inst { } const auto& get_source() const { return source; } - const auto& get_destination() const { return destination; } + const auto& get_target() const { return target; } }; class call: public inst { @@ -185,6 +183,7 @@ class call: public inst { basic_static, // basic static method (static call) }; +public: // native method for int type enum class int_method_kind { int_add, @@ -208,7 +207,7 @@ class call: public inst { int_to_set }; // mapper for method name -> int method kind - std::unordered_map int_basic_methods = { + const std::unordered_map int_basic_methods = { {"int::add", int_method_kind::int_add}, {"int::sub", int_method_kind::int_sub}, {"int::div", int_method_kind::int_div}, @@ -248,7 +247,7 @@ class call: public inst { string_replace_once }; // mapper for method name -> string method kind - std::unordered_map string_basic_methods = { + const std::unordered_map string_basic_methods = { {"string::substr", string_method_kind::string_substr}, {"string::get_regex_match_result", string_method_kind::string_get_regex_match_result}, {"string::matches", string_method_kind::string_matches}, @@ -300,6 +299,11 @@ class call: public inst { generic_type(c.generic_type) {} ~call() override = default; void dump(std::ostream&, const std::string&) const override; + void accept(inst_visitor* v) override { + v->visit_call(this); + } + +public: void add_arg(const inst_value_t& arg) { arguments.push_back(arg); } void set_return(const inst_value_t& dst) { destination = dst; } void set_call_type(kind t) { type = t; } @@ -307,14 +311,26 @@ class call: public inst { const auto& get_generic_type() const { return generic_type; } const auto& get_function_name() const { return function_name; } auto get_func_kind() const { return type; } - void accept(inst_visitor* v) override { - v->visit_call(this); - } - const auto& get_arguments() const { return arguments; } const auto& get_return() const { return destination; } auto& get_mutable_arguments() { return arguments; } auto& get_mutable_result() { return destination; } + +public: + // get mangled function name, may fail in some cases + // if failed to mangle, return raw function name directly + auto get_mangled_name() const { + switch(get_func_kind()) { + case lir::call::kind::database_load: + case lir::call::kind::find: + case lir::call::kind::key_cmp: + case lir::call::kind::to_set: + case lir::call::kind::basic_method: + case lir::call::kind::basic_static: return get_function_name(); + default: break; + } + return rule_mangle(get_function_name()); + } }; class constructor: public inst { @@ -340,6 +356,7 @@ class constructor: public inst { fields_value.push_back(source); } const auto& get_fields() const { return fields_value; } + const auto& get_result() const { return destination; } const auto& get_schema_name() const { return schema_name; } void accept(inst_visitor* v) override { v->visit_constructor(this); @@ -347,8 +364,18 @@ class constructor: public inst { auto& get_mutable_fields() { return fields_value; } auto& get_mutable_result() { return destination; } + +public: + // get mangled constructor name + auto get_mangled_name() const { + return rule_mangle("schema_" + schema_name); + } }; +// souffle record type, for example: +// res = [1, "str", -1.0] +// res type is [number, string, float] +// class record: public inst { private: std::vector fields_value; @@ -363,11 +390,14 @@ class record: public inst { fields_value(c.fields_value), destination(c.destination) {} ~record() override = default; + +public: void dump(std::ostream&, const std::string&) const override; void add_field(const inst_value_t& source) { fields_value.push_back(source); } const auto& get_fields() const { return fields_value; } + const auto& get_result() const { return destination; } void accept(inst_visitor* v) override { v->visit_record(this); } @@ -385,23 +415,24 @@ class unary: public inst { private: kind operand; inst_value_t source; - inst_value_t destination; + inst_value_t target; public: unary(const kind op, const inst_value_t& src, - const inst_value_t& dst, + const inst_value_t& tgt, const report::span& loc): inst(inst_kind::inst_unary, loc), operand(op), - source(src), destination(dst) {} + source(src), target(tgt) {} unary(const unary& u): inst(inst_kind::inst_unary, u.get_location()), operand(u.operand), - source(u.source), destination(u.destination) {} + source(u.source), target(u.target) {} ~unary() override = default; public: - const auto& get_destination() const { return destination; } - auto& get_mutable_destination() { return destination; } + const auto& get_source() const { return source; } + const auto& get_target() const { return target; } + auto& get_mutable_target() { return target; } public: void dump(std::ostream&, const std::string&) const override; @@ -422,21 +453,21 @@ class binary: public inst { private: inst_value_t left; inst_value_t right; - inst_value_t destination; + inst_value_t target; kind operator_kind; public: binary(const inst_value_t& l, const inst_value_t& r, - const inst_value_t& dst, + const inst_value_t& tgt, const kind op, const report::span& loc): inst(inst_kind::inst_binary, loc), left(l), right(r), - destination(dst), operator_kind(op) {} + target(tgt), operator_kind(op) {} binary(const binary& b): inst(inst_kind::inst_binary, b.get_location()), left(b.left), right(b.right), - destination(b.destination), operator_kind(b.operator_kind) {} + target(b.target), operator_kind(b.operator_kind) {} ~binary() override = default; public: @@ -449,8 +480,8 @@ class binary: public inst { auto get_operator() const { return operator_kind; } const auto& get_left() const { return left; } const auto& get_right() const { return right; } - const auto& get_destination() const { return destination; } - auto& get_mutable_destination() { return destination; } + const auto& get_target() const { return target; } + auto& get_mutable_target() { return target; } }; class compare: public inst { @@ -536,6 +567,9 @@ class fact: public inst { void add_pair(const std::string& name, const inst_value_t& literal) { literals.push_back({name, literal}); } + +public: + const auto& get_pairs() const { return literals; } void dump(std::ostream&, const std::string&) const override; void accept(inst_visitor* v) override { v->visit_fact(this); diff --git a/godel-script/godel-frontend/src/ir/name_mangling.cpp b/godel-script/godel-frontend/src/ir/name_mangling.cpp index 377aae75..bb8f166b 100644 --- a/godel-script/godel-frontend/src/ir/name_mangling.cpp +++ b/godel-script/godel-frontend/src/ir/name_mangling.cpp @@ -1,8 +1,18 @@ #include "godel-frontend/src/ir/name_mangling.h" +#include +#include +#include +#include +#include + namespace godel { -std::string field_name_mangling(const std::string& name, const symbol& type) { +bool starts_with(const std::string& str, const std::string& prefix) { + return str.compare(0, prefix.size(), prefix) == 0; +} + +std::string field_mangle(const std::string& name) { // by field name mangling, we try to avoid variable name conflictions // for example: here's a schema `schema test {a: int}` // @@ -25,17 +35,108 @@ std::string field_name_mangling(const std::string& name, const symbol& type) { // and this will cause `ungrounded error` or `empty result`. // but if we do the name mangling, then it will be like this: // - // schema_test(result, [-1, -1], field_0x6669656c64_a_int) :- ( - // field_0x6669656c64_a_int = a, - // ^^^^^^^^^^^^^^^^^^^^^^^^ this is the field of test: `test.a` - // ^ this is the variable `a` - // result = [field_0x6669656c64_a_int], + // schema_test(result, [-1, -1], fld?a) :- ( + // fld?a = a, + // ^^^^^^^^^^ this is the field of test: `test.a` + // ^ this is the variable `a` + // result = [fld?a], // a = range(0, 10) // ). // - return "field_0x6669656c64_" + name + "_" + replace_colon( - type.full_path_name_without_set() - ); + // souffle identifier can use `?`, so we use it here. + // souffle identifier ll: [\?a-zA-Z]|[_\?a-zA-Z][_\?a-zA-Z0-9]+ + return "fld?" + name; +} + +std::string mangle(const std::string& name) { + static std::unordered_map cache = {}; + if (cache.count(name)) { + return cache.at(name); + } + + std::vector vec = {}; + size_t last = 0; + size_t pos = name.find("::", 0); + while(pos!=std::string::npos) { + if (pos>last) { + vec.push_back(name.substr(last, pos-last)); + } + last = pos + 2; + pos = name.find("::", last); + } + if (last!=name.length()) { + vec.push_back(name.substr(last)); + } + + auto tmp = std::string(""); + for(const auto& i : vec) { + tmp += std::to_string(i.length()) + i; + } + cache.insert({name, tmp}); + return tmp; +} + +std::string type_mangle(const std::string& name) { + static std::unordered_map cache = { + {"number", "number"}, + {"int", "int"}, + {"string", "string"}, + {"symbol", "symbol"}, + {"float", "float"}, + {"bool", "bool"}, + {"DBIndex", "DBIndex"} + }; + if (cache.count(name)) { + return cache.at(name); + } + + const auto res = "T_" + mangle(name); + cache.insert({name, res}); + return res; +} + +std::string rule_mangle(const std::string& name) { + static std::unordered_map cache = { + {"all_data_DBIndex", "all_data_DBIndex"} + }; + if (cache.count(name)) { + return cache.at(name); + } + + auto temp = std::string(""); + auto prefix = std::string(""); + if (starts_with(name, "rule_")) { + // rule_xxx -> R_xxx + temp = name.substr(5); + prefix = "R_"; + } else if (starts_with(name, "schema_")) { + // schema_xxx -> S_xxx + temp = name.substr(7); + prefix = "S_"; + } else if (starts_with(name, "input_")) { + // input_xxx -> I_xxx + temp = name.substr(6); + prefix = "I_"; + } else if (starts_with(name, "get_field_")) { + // get_field_xxx -> GF_xxx + temp = name.substr(10); + prefix = "GF_"; + } else if (starts_with(name, "get_table_")) { + // get_table_xxx -> GT_xxx + temp = name.substr(10); + prefix = "GT_"; + } else if (starts_with(name, "typecheck_")) { + // typecheck_xxx -> TC_xxx + temp = name.substr(11); + prefix = "TC_"; + } else { + // std::cerr << "unknown rule name: " << name << std::endl; + assert(false && "unknown rule name"); + } + + const auto res = prefix + mangle(temp); + cache.insert({name, res}); + return res; } } \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/name_mangling.h b/godel-script/godel-frontend/src/ir/name_mangling.h index 466c03d9..a162d25a 100644 --- a/godel-script/godel-frontend/src/ir/name_mangling.h +++ b/godel-script/godel-frontend/src/ir/name_mangling.h @@ -1,11 +1,15 @@ #pragma once -#include "godel-frontend/src/ir/ir_context.h" -#include "godel-frontend/src/ir/lir.h" -#include "godel-frontend/src/symbol.h" +#include +#include namespace godel { -std::string field_name_mangling(const std::string&, const symbol&); +bool starts_with(const std::string&, const std::string&); + +std::string mangle(const std::string&); +std::string field_mangle(const std::string&); +std::string type_mangle(const std::string&); +std::string rule_mangle(const std::string&); } \ No newline at end of file diff --git a/godel-script/godel-frontend/src/ir/pass.h b/godel-script/godel-frontend/src/ir/pass.h index 4ff43e3a..1fadf6f2 100644 --- a/godel-script/godel-frontend/src/ir/pass.h +++ b/godel-script/godel-frontend/src/ir/pass.h @@ -14,7 +14,9 @@ enum class pass_kind { ps_remove_unused_type, ps_inst_combine, ps_flatten_nested_block, - ps_aggregator_inline_remark + ps_aggregator_inline_remark, + ps_ungrounded_check, + ps_join_reorder }; // there are three types of passes: diff --git a/godel-script/godel-frontend/src/ir/pass_manager.cpp b/godel-script/godel-frontend/src/ir/pass_manager.cpp index 0c4bcf16..08caec9d 100644 --- a/godel-script/godel-frontend/src/ir/pass_manager.cpp +++ b/godel-script/godel-frontend/src/ir/pass_manager.cpp @@ -3,6 +3,9 @@ #include "godel-frontend/src/ir/remove_unused.h" #include "godel-frontend/src/ir/flatten_block.h" #include "godel-frontend/src/ir/aggregator_inline_remark.h" +#include "godel-frontend/src/ir/reorder.h" + +#include "godel-frontend/src/util/util.h" namespace godel { @@ -30,15 +33,18 @@ void pass_manager::run(ir_context& ctx, const cli::configure& conf) { } ordered_pass_list.push_back(new flatten_nested_block(ctx)); ordered_pass_list.push_back(new aggregator_inline_remark(ctx)); + if (conf.count(cli::option::cli_enable_join_reorder)) { + ordered_pass_list.push_back(new join_reorder(ctx)); + } bool verbose_info = conf.count(cli::option::cli_verbose); + util::time_stamp tsp; // must run in order, stop on first failure for(auto p : ordered_pass_list) { // print info if (verbose_info) { - std::clog << "IR Pass Running: "; - std::clog << p->get_name() << "\n"; + tsp.stamp(); } // run pass @@ -46,6 +52,11 @@ void pass_manager::run(ir_context& ctx, const cli::configure& conf) { err.err("failed to run pass: " + std::string(p->get_name())); break; } + + if (verbose_info) { + std::clog << util::format_time(tsp.duration()) << " "; + std::clog << p->get_name() << "\n"; + } } if (verbose_info) { diff --git a/godel-script/godel-frontend/src/ir/remove_unused.cpp b/godel-script/godel-frontend/src/ir/remove_unused.cpp index 00d5584e..9b91bde1 100644 --- a/godel-script/godel-frontend/src/ir/remove_unused.cpp +++ b/godel-script/godel-frontend/src/ir/remove_unused.cpp @@ -1,102 +1,12 @@ #include "godel-frontend/src/ir/remove_unused.h" +#include "godel-frontend/src/ir/name_mangling.h" namespace godel { -void call_graph_generator::check_inst(lir::inst* stmt, - std::queue& bfs, - used_dict& dict) const { - switch(stmt->get_kind()) { - case lir::inst_kind::inst_call: - dict.insert(replace_colon( - reinterpret_cast(stmt)->get_function_name() - )); - break; - case lir::inst_kind::inst_ctor: - dict.insert(replace_colon( - "schema_" + - reinterpret_cast(stmt)->get_schema_name() - )); - break; - case lir::inst_kind::inst_block: - bfs.push(reinterpret_cast(stmt)); - break; - case lir::inst_kind::inst_not: - bfs.push(reinterpret_cast(stmt)->get_body()); - break; - case lir::inst_kind::inst_and: - bfs.push(reinterpret_cast(stmt)->get_left_block()); - bfs.push(reinterpret_cast(stmt)->get_right_block()); - break; - case lir::inst_kind::inst_or: - bfs.push(reinterpret_cast(stmt)->get_left_block()); - bfs.push(reinterpret_cast(stmt)->get_right_block()); - break; - case lir::inst_kind::inst_aggr: - bfs.push(reinterpret_cast(stmt)->get_body()); - default: break; - } -} - -void call_graph_generator::scan_call(souffle_rule_impl* impl, - used_dict& dict) const { - // recursively search used rules - // but we use bfs queue to avoid stack overflow - // so visitor(dfs) is not needed here - std::queue bfs; - bfs.push(impl->get_block()); - while(!bfs.empty()) { - auto block = bfs.front(); - bfs.pop(); - for(auto stmt : block->get_content()) { - check_inst(stmt, bfs, dict); - } - } -} - -void call_graph_generator::initialize_call_graph_root(const std::vector& output, - call_graph& cg) const { - for(const auto& i : output) { - const auto name = replace_colon(i); - if (!cg.count(name)) { - cg.insert({name, {}}); - } - } -} - -void call_graph_generator::initialize_call_graph_root( - const std::vector& output, - call_graph& cg) const { - for(const auto& i : output) { - const auto name = replace_colon(i.rule_name); - if (!cg.count(name)) { - cg.insert({name, {}}); - } - } -} - -void call_graph_generator::initialize_call_graph(const std::vector& impls, - call_graph& cg) const { - for(auto i : impls) { - const auto name = replace_colon(i->get_func_name()); - if (!cg.count(name)) { - cg.insert({name, {}}); - } - // construct the call graph and mark all used rules - scan_call(i, cg.at(name)); - } -} - -const used_dict& call_graph_generator::apply(const ir_context& ctx) { +const callee_dict& unused_remove_pass::used_finder::apply(const ir_context& ctx) { // create call graph data structure - call_graph cg; - - // construct call graph by scanning the IR - initialize_call_graph_root(ctx.souffle_output, cg); - initialize_call_graph_root(ctx.annotated_output, cg); - initialize_call_graph(ctx.rule_impls, cg); - initialize_call_graph(ctx.database_get_table, cg); - initialize_call_graph(ctx.schema_get_field, cg); - initialize_call_graph(ctx.schema_data_constraint_impls, cg); + call_graph_generator cgg; + const auto& cg = cgg.apply(ctx); // use bfs to find all used rules std::queue bfs; @@ -106,12 +16,14 @@ const used_dict& call_graph_generator::apply(const ir_context& ctx) { used.insert("all_data_DBIndex"); // start from souffle output, the root of call graph for(const auto& i : ctx.souffle_output) { - bfs.push(replace_colon(i)); - used.insert(replace_colon(i)); + const auto mangled_name = rule_mangle(i); + bfs.push(mangled_name); + used.insert(mangled_name); } for(const auto& i : ctx.annotated_output) { - bfs.push(replace_colon(i.rule_name)); - used.insert(replace_colon(i.rule_name)); + const auto mangled_name = i.get_mangled_name(); + bfs.push(mangled_name); + used.insert(mangled_name); } // use bfs to find all used rules @@ -137,20 +49,22 @@ const used_dict& call_graph_generator::apply(const ir_context& ctx) { return used; } -void unused_remove_pass::remove_unused_schema_data_constraint_decl(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_schema_data_constraint_decl( + const callee_dict& used_rule) { std::vector used; for(const auto& i : ctx->schema_data_constraint_decls) { - if (used_rule.count("schema_" + replace_colon(i.name))) { + if (used_rule.count(i.get_mangled_name())) { used.push_back(i); } } ctx->schema_data_constraint_decls = used; } -void unused_remove_pass::remove_unused_schema_data_constraint_impl(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_schema_data_constraint_impl( + const callee_dict& used_rule) { std::vector used; for(auto i : ctx->schema_data_constraint_impls) { - if (used_rule.count(replace_colon(i->get_func_name()))) { + if (used_rule.count(i->get_mangled_name())) { used.push_back(i); } else { delete i; @@ -159,10 +73,11 @@ void unused_remove_pass::remove_unused_schema_data_constraint_impl(const used_di ctx->schema_data_constraint_impls = used; } -void unused_remove_pass::remove_unused_schema_get_field(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_schema_get_field( + const callee_dict& used_rule) { std::vector used; for(auto i : ctx->schema_get_field) { - if (used_rule.count(replace_colon(i->get_func_name()))) { + if (used_rule.count(i->get_mangled_name())) { used.push_back(i); } else { delete i; @@ -171,10 +86,11 @@ void unused_remove_pass::remove_unused_schema_get_field(const used_dict& used_ru ctx->schema_get_field = used; } -void unused_remove_pass::remove_unused_rule_decl(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_rule_decl( + const callee_dict& used_rule) { std::vector used; for(auto i : ctx->rule_decls) { - if (used_rule.count(replace_colon(i->get_rule_raw_name()))) { + if (used_rule.count(i->get_mangled_name())) { used.push_back(i); } else { delete i; @@ -183,10 +99,10 @@ void unused_remove_pass::remove_unused_rule_decl(const used_dict& used_rule) { ctx->rule_decls = used; } -void unused_remove_pass::remove_unused_rule_impl(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_rule_impl(const callee_dict& used_rule) { std::vector used; for(auto i : ctx->rule_impls) { - if (used_rule.count(replace_colon(i->get_func_name()))) { + if (used_rule.count(i->get_mangled_name())) { used.push_back(i); } else { delete i; @@ -195,40 +111,42 @@ void unused_remove_pass::remove_unused_rule_impl(const used_dict& used_rule) { ctx->rule_impls = used; } -void unused_remove_pass::remove_unused_input_decl(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_input_decl(const callee_dict& used_rule) { std::vector used; for(const auto& i : ctx->input_decls) { - if (used_rule.count(i.get_decl_name())) { + if (used_rule.count(i.get_mangled_name())) { used.push_back(i); } } ctx->input_decls = used; } -void unused_remove_pass::remove_unused_input_impl(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_input_impl(const callee_dict& used_rule) { std::vector used; for(const auto& i : ctx->input_impls) { - if (used_rule.count(i.get_decl_name())) { + if (used_rule.count(i.get_mangled_name())) { used.push_back(i); } } ctx->input_impls = used; } -void unused_remove_pass::remove_unused_annotated_input(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_annotated_input( + const callee_dict& used_rule) { std::vector used; for(const auto& i : ctx->annotated_input) { - if (used_rule.count(i.rule_name)) { + if (used_rule.count(i.get_mangled_name())) { used.push_back(i); } } ctx->annotated_input = used; } -void unused_remove_pass::remove_unused_database_get_table(const used_dict& used_rule) { +void unused_remove_pass::remove_unused_database_get_table( + const callee_dict& used_rule) { std::vector used; for(auto i : ctx->database_get_table) { - if (used_rule.count(replace_colon(i->get_func_name()))) { + if (used_rule.count(i->get_mangled_name())) { used.push_back(i); } else { delete i; @@ -238,8 +156,8 @@ void unused_remove_pass::remove_unused_database_get_table(const used_dict& used_ } bool unused_remove_pass::run() { - call_graph_generator cgg; - const auto& used_rule = cgg.apply(*ctx); + used_finder uf; + const auto& used_rule = uf.apply(*ctx); remove_unused_schema_data_constraint_decl(used_rule); remove_unused_schema_data_constraint_impl(used_rule); remove_unused_schema_get_field(used_rule); @@ -253,22 +171,22 @@ bool unused_remove_pass::run() { } bool unused_type_alias_remove_pass::run() { - std::unordered_set used_type = {"int", "string"}; + std::unordered_set used_type = {"int", "string", "float"}; for(const auto& i : ctx->schema_data_constraint_decls) { for(const auto& field : i.fields) { - used_type.insert(replace_colon(field.second)); + used_type.insert(type_mangle(field.second)); } } for(const auto& i : ctx->input_decls) { for(const auto& field : i.fields) { - used_type.insert(replace_colon(field.second)); + used_type.insert(type_mangle(field.second)); } } for(auto i : ctx->rule_decls) { for(const auto& param : i->get_params()) { - used_type.insert(replace_colon(param.second)); + used_type.insert(type_mangle(param.second)); } - used_type.insert(replace_colon(i->get_return_type())); + used_type.insert(type_mangle(i->get_return_type())); } // add their real type into the used type too for(const auto& i : ctx->type_alias) { diff --git a/godel-script/godel-frontend/src/ir/remove_unused.h b/godel-script/godel-frontend/src/ir/remove_unused.h index 220b6d86..80d68065 100644 --- a/godel-script/godel-frontend/src/ir/remove_unused.h +++ b/godel-script/godel-frontend/src/ir/remove_unused.h @@ -3,47 +3,34 @@ #include "godel-frontend/src/ir/lir.h" #include "godel-frontend/src/ir/ir_context.h" #include "godel-frontend/src/ir/pass.h" +#include "godel-frontend/src/ir/call_graph.h" #include #include #include -#include -#include namespace godel { -typedef std::unordered_set used_dict; -typedef std::unordered_map call_graph; - -class call_graph_generator { +class unused_remove_pass: public pass { private: - used_dict used; + class used_finder { + private: + callee_dict used; -private: - void check_inst(lir::inst*, std::queue&, used_dict&) const; - void scan_call(souffle_rule_impl*, used_dict&) const; - void initialize_call_graph_root(const std::vector&, - call_graph&) const; - void initialize_call_graph_root(const std::vector&, - call_graph&) const; - void initialize_call_graph(const std::vector&, - call_graph&) const; + public: + const callee_dict& apply(const ir_context&); + }; -public: - const used_dict& apply(const ir_context&); -}; - -class unused_remove_pass: public pass { private: - void remove_unused_schema_data_constraint_decl(const used_dict&); - void remove_unused_schema_data_constraint_impl(const used_dict&); - void remove_unused_schema_get_field(const used_dict&); - void remove_unused_rule_decl(const used_dict&); - void remove_unused_rule_impl(const used_dict&); - void remove_unused_input_decl(const used_dict&); - void remove_unused_input_impl(const used_dict&); - void remove_unused_annotated_input(const used_dict&); - void remove_unused_database_get_table(const used_dict&); + void remove_unused_schema_data_constraint_decl(const callee_dict&); + void remove_unused_schema_data_constraint_impl(const callee_dict&); + void remove_unused_schema_get_field(const callee_dict&); + void remove_unused_rule_decl(const callee_dict&); + void remove_unused_rule_impl(const callee_dict&); + void remove_unused_input_decl(const callee_dict&); + void remove_unused_input_impl(const callee_dict&); + void remove_unused_annotated_input(const callee_dict&); + void remove_unused_database_get_table(const callee_dict&); public: unused_remove_pass(ir_context& c): pass(pass_kind::ps_remove_unused, c) {} diff --git a/godel-script/godel-frontend/src/ir/reorder.cpp b/godel-script/godel-frontend/src/ir/reorder.cpp new file mode 100644 index 00000000..fad7f1c3 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/reorder.cpp @@ -0,0 +1,545 @@ +#include "godel-frontend/src/ir/reorder.h" +#include "godel-frontend/src/ir/call_graph.h" + +#include + +namespace godel { +namespace reorder { + +void cost_analysis::tarjan(const std::string& node, const call_graph& cg) { + // init visit time stamp + DFN[node] = LOW[node] = ++index; + + // push stack + in_stack[node] = true; + Stap.push_back(node); + + // search next node + for(const auto& succ : cg.at(node)) { + if (!cg.count(succ)) { + continue; + } + if (!DFN.at(succ)) { + tarjan(succ, cg); + LOW[node] = std::min(LOW[node], LOW[succ]); + } else if (in_stack[succ]) { + LOW[node] = std::min(LOW[node], DFN[succ]); + } + } + if (DFN[node]==LOW[node]) { + std::vector res = {}; + + // get result from stack + auto top = std::string(""); + while(top!=node) { + top = Stap.back(); + Stap.pop_back(); + in_stack[top] = false; + res.push_back(top); + } + + // get self-circle + if (res.size()==1 && cg.at(node).count(node)) { + in_circle_rule_set.insert(node); + } + // get strongly connected components + if (res.size()>1) { + for(const auto& i : res) { + in_circle_rule_set.insert(i); + } + } + } +} + +void cost_analysis::solve_circle(const call_graph& cg) { + Stap = {}; + index = 0; + for(const auto& i : cg) { + DFN[i.first] = 0; + LOW[i.first] = 0; + in_stack[i.first] = false; + } + for(const auto& i : cg) { + if (!DFN.at(i.first)) { + tarjan(i.first, cg); + } + } +} + +void cost_analysis::topo_delete_leaf( + const std::unordered_set& set, call_graph& cg) { + // delete scanned nodes + for(const auto& i : set) { + cg.erase(i); + } + // and delete references of these nodes + for(auto& i : cg) { + for(const auto& j : set) { + if (i.second.count(j)) { + i.second.erase(j); + } + } + } +} + +void cost_analysis::topo_scan(call_graph& copy) { + topo_sort_result.push_back({}); + for(const auto& i : copy) { + // reference list is empty + if (i.second.empty()) { + topo_sort_result.back().insert(i.first); + } + // in circle, self-circle also included + if (i.second.size()) { + bool flag_all_ref_in_circle = true; + for(const auto& j : i.second) { + if (!in_circle_rule_set.count(j)) { + flag_all_ref_in_circle = false; + break; + } + } + if (flag_all_ref_in_circle) { + topo_sort_result.back().insert(i.first); + } + } + } + // delete reference + topo_delete_leaf(topo_sort_result.back(), copy); +} + +void cost_analysis::topo_sort(const call_graph& cg) { + // detect self-circle and circle using tarjan algorithm. + solve_circle(cg); + + // init leaf node + topo_sort_result = {{}}; + // leaf node include all kinds of input rules + for(const auto& i : ctx->annotated_input) { + topo_sort_result.back().insert(i.get_mangled_name()); + } + for(const auto& i : ctx->input_impls) { + topo_sort_result.back().insert(i.get_mangled_name()); + } + // all_data_DBIndex is generated automatically, so we add it here + topo_sort_result.back().insert("all_data_DBIndex"); + + // do a copy of call graph + auto copy = cg; + + // delete native functions like: key_eq, key_neq + // otherwise topological sort will loop forever + // because these native functions will never be found in the call graph + for(auto& i : copy) { + std::unordered_set need_delete; + for(auto& j : i.second) { + if (!mapper.count(j) && j!="all_data_DBIndex") { + need_delete.insert(j); + } + } + for(const auto& j : need_delete) { + i.second.erase(j); + } + } + + // delete initialized leaves + topo_delete_leaf(topo_sort_result.back(), copy); + + while(copy.size()) { + topo_scan(copy); + if (topo_sort_result.back().empty()) { + // should be unreachable, but it may encounter an error + // if logic is changed in future changes + // so we add this branch for defence + break; + } + } +} + +void cost_analysis::visit_store(lir::store* node) { + auto res = 0; + if (node->get_source().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_source().content)? 0:1; + // source of store is not always grounded + } + if (node->get_target().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_target().content)? 0:1; + // target is grounded if source is grounded + if (var_grounded.count(node->get_source().content)) { + add_var_grounded(node->get_target().content); + } + } + block_cost_frame.back() *= res; +} + +void cost_analysis::visit_call(lir::call* node) { + // do not analyse intrinsic functions + switch(node->get_func_kind()) { + case lir::call::kind::database_load: + case lir::call::kind::find: + case lir::call::kind::key_cmp: + case lir::call::kind::to_set: + case lir::call::kind::basic_method: + case lir::call::kind::basic_static: return; + default: break; + } + + const auto name = node->get_mangled_name(); + if (rule_cost.count(name)) { + block_cost_frame.back() *= rule_cost.at(name); + } + + // check if argument is grounded + auto res = 0; + for(const auto& i : node->get_arguments()) { + if (i.kind!=lir::inst_value_kind::variable) { + continue; + } + res += var_grounded.count(i.content)? 0:1; + add_var_grounded(i.content); + } + block_cost_frame.back() *= res; + + // rules in circle have more cost + if (in_circle_rule_set.count(name)) { + block_cost_frame.back() *= 2; + } +} + +void cost_analysis::visit_constructor(lir::constructor* node) { + const auto name = node->get_mangled_name(); + if (rule_cost.count(name)) { + block_cost_frame.back() *= rule_cost.at(name); + } + + // check if argument is grounded + auto res = 0; + for(const auto& i : node->get_fields()) { + if (i.kind!=lir::inst_value_kind::variable) { + continue; + } + res += var_grounded.count(i.content)? 0:1; + add_var_grounded(i.content); + } + block_cost_frame.back() *= res; + + // rules in circle have more cost + if (in_circle_rule_set.count(name)) { + block_cost_frame.back() *= 2; + } +} + +void cost_analysis::visit_record(lir::record* node) { + // check if argument is grounded + // if fact variables used in record must be grounded + auto res = 0; + for(const auto& i : node->get_fields()) { + if (i.kind!=lir::inst_value_kind::variable) { + continue; + } + res += var_grounded.count(i.content)? 0:1; + // variable used in record is a variable-call + // so we do not add it to var_grounded + } + + add_var_grounded(node->get_result().content); + block_cost_frame.back() *= res; +} + +void cost_analysis::visit_unary(lir::unary* node) { + auto res = 0; + if (node->get_source().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_source().content)? 0:1; + } + if (node->get_target().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_target().content)? 0:1; + add_var_grounded(node->get_target().content); + } + block_cost_frame.back() *= res; +} + +void cost_analysis::visit_binary(lir::binary* node) { + auto res = 0; + if (node->get_left().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_left().content)? 0:1; + } + if (node->get_right().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_right().content)? 0:1; + } + if (node->get_target().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_target().content)? 0:1; + add_var_grounded(node->get_target().content); + } + block_cost_frame.back() *= res; +} + +void cost_analysis::visit_compare(lir::compare* node) { + auto res = 0; + if (node->get_left().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_left().content)? 0:1; + add_var_grounded(node->get_left().content); + } + if (node->get_right().kind==lir::inst_value_kind::variable) { + res += var_grounded.count(node->get_right().content)? 0:1; + } + block_cost_frame.back() *= res; +} + +void cost_analysis::visit_block(lir::block* node) { + push_var_frame(); + + for(auto i : node->get_content()) { + block_cost_frame.push_back(1); + i->accept(this); + + // use `,` means `and`, otherwise means `or` + // use cost::operator* if in `and` block + // use cost::operator+ if in `or` block + auto res = block_cost_frame.back(); + block_cost_frame.pop_back(); + if (node->get_use_comma()) { + block_cost_frame.back() *= res; + } else { + block_cost_frame.back() += res; + } + } + + pop_var_frame(); +} + +void cost_analysis::visit_fact(lir::fact* node) { + block_cost_frame.back() += node->get_pairs().size(); +} + +void cost_analysis::visit_not_operand(lir::not_operand* node) { + // push new cost frame + block_cost_frame.push_back(1); + // push new var frame + push_var_frame(); + + node->get_body()->accept(this); + + // get cost frame result + const auto res = block_cost_frame.back(); + block_cost_frame.pop_back(); + // pop var frame + pop_var_frame(); + + block_cost_frame.back() *= res; +} + +void cost_analysis::visit_and_operand(lir::and_operand* node) { + // push new cost frame + block_cost_frame.push_back(1); + // push new var frame + push_var_frame(); + + node->get_left_block()->accept(this); + + // get cost frame result + const auto res_left = block_cost_frame.back(); + block_cost_frame.pop_back(); + // pop var frame + pop_var_frame(); + + // push new cost frame + block_cost_frame.push_back(1); + // push new var frame + push_var_frame(); + + node->get_right_block()->accept(this); + + // get cost frame result + const auto res_right = block_cost_frame.back(); + block_cost_frame.pop_back(); + // pop var frame + pop_var_frame(); + + block_cost_frame.back() *= res_left * res_right; +} + +void cost_analysis::visit_or_operand(lir::or_operand* node) { + // push new cost frame + block_cost_frame.push_back(1); + // push new var frame + push_var_frame(); + + node->get_left_block()->accept(this); + + // get cost frame result + const auto res_left = block_cost_frame.back(); + block_cost_frame.pop_back(); + // pop var frame + pop_var_frame(); + + // push new cost frame + block_cost_frame.push_back(1); + // push new var frame + push_var_frame(); + + node->get_right_block()->accept(this); + + // get cost frame result + const auto res_right = block_cost_frame.back(); + block_cost_frame.pop_back(); + // pop var frame + pop_var_frame(); + + block_cost_frame.back() *= res_left + res_right; +} + +void cost_analysis::visit_aggregator(lir::aggregator* node) { + // push new var frame + push_var_frame(); + + node->get_body()->accept(this); + + // pop var frame + pop_var_frame(); + + block_cost_frame.back() *= 1; +} + +std::unordered_map cost_analysis::run() { + // init rule name -> impl* mapper + mapper.clear(); + for(auto i : ctx->rule_impls) { + mapper[i->get_mangled_name()].push_back(i); + } + for(auto i : ctx->database_get_table) { + mapper[i->get_mangled_name()].push_back(i); + } + for(auto i : ctx->schema_get_field) { + mapper[i->get_mangled_name()].push_back(i); + } + for(auto i : ctx->schema_data_constraint_impls) { + mapper[i->get_mangled_name()].push_back(i); + } + + // toposort + call_graph_generator cgg; + const auto& cg = cgg.apply(*ctx); + topo_sort(cg); + + // calculate cost + rule_cost = {}; + for(const auto& level : topo_sort_result) { + for(const auto& i : level) { + // input rules & database constraints' cost should be initialized + if (!mapper.count(i) && !rule_cost.count(i)) { + rule_cost[i] = (i=="all_data_DBIndex"? 1:100); + } + if (mapper.count(i) && !rule_cost.count(i)) { + rule_cost[i] = 0; + for(const auto& j : mapper.at(i)) { + // push new cost frame + block_cost_frame.push_back(1); + + // do calculation + j->get_block()->accept(this); + + // get cost result + rule_cost[i] += block_cost_frame.back(); + block_cost_frame.pop_back(); + } + } + } + } + + std::unordered_map cost_map; + for(const auto& i : rule_cost) { + cost_map.insert({i.first, i.second.num}); + } + + return cost_map; +} + +} + +void join_reorder::visit_block(lir::block* node) { + // use semicolon means this is `or statement`, so we don't need to reorder + if (node->get_use_semicolon()) { + for(auto i : node->get_content()) { + i->accept(this); + } + return; + } + + // temp is used to store the insts which don't need to be reordered + std::vector temp; + // store the rules or constructors which need to be reordered + std::vector> need_reorder_inst; + + // find rules or constructors which need to be reordered + // and push them to need_reorder_inst + for(auto i : node->get_content()) { + if (i->get_kind()==lir::inst_kind::inst_call) { + auto call = reinterpret_cast(i); + const auto name = call->get_mangled_name(); + if (cost_map.count(name) && cost_map.at(name)>3000) { + need_reorder_inst.push_back({name, i}); + } else if (name.find("__all__")!=std::string::npos) { + need_reorder_inst.push_back({name, i}); + } else { + temp.push_back(i); + } + } else if (i->get_kind()==lir::inst_kind::inst_ctor) { + auto ctor = reinterpret_cast(i); + const auto name = ctor->get_mangled_name(); + if (cost_map.count(name) && cost_map.at(name)>3000) { + need_reorder_inst.push_back({name, i}); + } else if (name.find("__all__")!=std::string::npos) { + need_reorder_inst.push_back({name, i}); + } else { + temp.push_back(i); + } + } else { + temp.push_back(i); + } + i->accept(this); + } + + // TODO: algorithem should be rewritten later + // sort with costs, but this is not a good way of reordering + // because rules's cost is not only based on the cost of the rule + // but also based on the natural joins + // if the natural joins are not reordered, the cost of the rules + // will be higher than before. + std::sort(need_reorder_inst.begin(), need_reorder_inst.end(), + [&](const auto& a, const auto& b) { + return cost_map.at(a.first) < cost_map.at(b.first); + } + ); + + node->get_mutable_content().clear(); + for(auto i : temp) { + node->add_new_content(i); + } + for(auto& i : need_reorder_inst) { + node->add_new_content(i.second); + } +} + +bool join_reorder::run() { + cost_map = reorder::cost_analysis(ctx).run(); + // for(const auto& i : cost_map) { + // if (i.second < 3000) { + // continue; + // } + // std::cout << i.first << " " << i.second << "\n"; + // } + for(auto i : ctx->rule_impls) { + i->get_block()->accept(this); + } + for(auto i : ctx->database_get_table) { + i->get_block()->accept(this); + } + for(auto i : ctx->schema_get_field) { + i->get_block()->accept(this); + } + for(auto i : ctx->schema_data_constraint_impls) { + i->get_block()->accept(this); + } + return true; +} + +} diff --git a/godel-script/godel-frontend/src/ir/reorder.h b/godel-script/godel-frontend/src/ir/reorder.h new file mode 100644 index 00000000..c3ba53e9 --- /dev/null +++ b/godel-script/godel-frontend/src/ir/reorder.h @@ -0,0 +1,153 @@ +#pragma once + +#include "godel-frontend/src/ir/lir.h" +#include "godel-frontend/src/ir/ir_context.h" +#include "godel-frontend/src/ir/pass.h" +#include "godel-frontend/src/ir/call_graph.h" + +#include + +namespace godel { +namespace reorder { + +// cost type, for rule cost calculation +struct cost { +private: + static size_t quick_multiply(size_t a, size_t b) { + size_t res = 0; + while(b) { + if (b & 1) { + size_t tmp = res + a; + if (tmp < std::max(res, a)) { + return SIZE_MAX; + } + res = tmp; + } + a <<= 1; + b >>= 1; + } + return res; + } + +public: + size_t num; + + cost(size_t n = 0): num(n) {} + + // * is in fact get sum value + // if overflow, use the max value to mark as INF + cost operator*(const cost& other) const { + return cost { quick_multiply(num, other.num) }; + } + + // + is in fact get max value + cost operator+(const cost& other) const { + const size_t res = num + other.num; + if (res < std::max(num, other.num)) { + return cost { SIZE_MAX }; + } + return cost { res }; + } + + cost& operator*=(const cost& other) { + *this = *this * other; + return *this; + } + + cost& operator+=(const cost& other) { + *this = *this + other; + return *this; + } +}; + +class cost_analysis: public lir::inst_visitor { +// check rule call circle before doing topological sort +private: + std::unordered_set in_circle_rule_set; + + std::unordered_map DFN; + std::unordered_map LOW; + std::unordered_map in_stack; + size_t index; + std::vector Stap; + void tarjan(const std::string&, const call_graph&); + void solve_circle(const call_graph&); + +// do topological sort +private: + std::vector> topo_sort_result; + + void topo_delete_leaf(const std::unordered_set&, call_graph&); + void topo_scan(call_graph&); + void topo_sort(const call_graph&); + +// rule cost calculation +private: + std::unordered_map> mapper; + std::unordered_map rule_cost; + std::vector block_cost_frame; + + // need to check variable grounding + // if rules call with some already grounded variables + // these variables will do natural join instead of join + // this will affect the cost calculation + // so we need to record the variable that has been grounded + std::unordered_set var_grounded; + std::vector> var_grounded_frame; + void push_var_frame() { var_grounded_frame.push_back({}); } + void add_var_grounded(const std::string& var) { + if (var_grounded.count(var)) { + return; + } + var_grounded.insert(var); + var_grounded_frame.back().insert(var); + } + void pop_var_frame() { + for(const auto& i : var_grounded_frame.back()) { + var_grounded.erase(i); + } + var_grounded_frame.pop_back(); + } + +private: + void visit_store(lir::store*) override; + void visit_call(lir::call*) override; + void visit_constructor(lir::constructor*) override; + void visit_record(lir::record*) override; + void visit_unary(lir::unary*) override; + void visit_binary(lir::binary*) override; + void visit_compare(lir::compare*) override; + void visit_block(lir::block*) override; + void visit_fact(lir::fact*) override; + void visit_not_operand(lir::not_operand*) override; + void visit_and_operand(lir::and_operand*) override; + void visit_or_operand(lir::or_operand*) override; + void visit_aggregator(lir::aggregator*) override; + +private: + ir_context* ctx; + +public: + cost_analysis(ir_context* c): ctx(c) {} + std::unordered_map run(); +}; + +} + +// reorder join order pass, used to optimize execution time +class join_reorder: public pass { +private: + std::unordered_map cost_map; + +private: + void visit_block(lir::block*) override; + +public: + join_reorder(ir_context& c): pass(pass_kind::ps_join_reorder, c) {} + const char* get_name() const override { + return "[Transform] Join Reorder"; + } + bool run() override; +}; + +} diff --git a/godel-script/godel-frontend/src/sema/context.h b/godel-script/godel-frontend/src/sema/context.h index 1161e45b..67ac8a4e 100644 --- a/godel-script/godel-frontend/src/sema/context.h +++ b/godel-script/godel-frontend/src/sema/context.h @@ -39,7 +39,7 @@ struct context { // store variable names that should not be used const std::unordered_set invalid_variable_name = { "Self", "self", "count", "sum", "output", - "input", "max", "min", "result" + "input", "max", "min" }; public: diff --git a/godel-script/godel-frontend/src/sema/function_declaration.h b/godel-script/godel-frontend/src/sema/function_declaration.h index 756afbec..06411da0 100644 --- a/godel-script/godel-frontend/src/sema/function_declaration.h +++ b/godel-script/godel-frontend/src/sema/function_declaration.h @@ -20,7 +20,7 @@ class function_generator { // not the same as ctx.invalid_variable_name, this allows `self` const std::unordered_set invalid_parameter_name = { "Self", "count", "sum", "output", - "input", "max", "min", "result" + "input", "max", "min" }; bool flag_in_impl = false; std::string self_type_name = ""; diff --git a/godel-script/godel-frontend/src/sema/symbol_import.cpp b/godel-script/godel-frontend/src/sema/symbol_import.cpp index 7050b66f..407035fb 100644 --- a/godel-script/godel-frontend/src/sema/symbol_import.cpp +++ b/godel-script/godel-frontend/src/sema/symbol_import.cpp @@ -53,7 +53,6 @@ configure symbol_import::inherit_config(const std::string& filename) { // executable path, for souffle arguments option::cli_executable_path, // semantic check options - option::cli_semantic_no_else, option::cli_semantic_pub_check, // verbose info dump option::cli_verbose, diff --git a/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp b/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp index e2f5410e..07c31b33 100644 --- a/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp +++ b/godel-script/godel-frontend/src/sema/ungrounded_checker.cpp @@ -4,7 +4,7 @@ namespace godel { bool return_ungrounded_checker::visit_in_block_expr(in_block_expr* node) { err->err(node->get_location(), - "require a return statement, but get an expression.", + "require return statement, but get expression.", "return value is ungrounded." ); return true; @@ -15,7 +15,7 @@ bool return_ungrounded_checker::visit_let_stmt(let_stmt* node) { node->get_code_block()->accept(this); } else { err->err(node->get_location(), - "require a return statement in this statement.", + "require return statement in this statement.", "return value is ungrounded." ); } @@ -27,7 +27,7 @@ bool return_ungrounded_checker::visit_if_stmt(if_stmt* node) { node->get_code_block()->accept(this); } else { err->err(node->get_location(), - "require a return statement in this statement.", + "require return statement in this statement.", "return value is ungrounded." ); } @@ -39,7 +39,7 @@ bool return_ungrounded_checker::visit_for_stmt(for_stmt* node) { node->get_code_block()->accept(this); } else { err->err(node->get_location(), - "require a return statement in this statement.", + "require return statement in this statement.", "return value is ungrounded." ); } @@ -62,8 +62,8 @@ bool return_ungrounded_checker::visit_match_stmt(match_stmt* node) { bool return_ungrounded_checker::visit_block_stmt(block_stmt* node) { if (node->get_statement().empty()) { err->err(node->get_location(), - "require a return statement in this block.", - "otherwise the return value is ungrounded." + "require return statement in this block.", + "return value is ungrounded." ); } for(auto i : node->get_statement()) { @@ -77,7 +77,7 @@ bool return_ungrounded_checker::visit_block_stmt(block_stmt* node) { case ast_class::ac_in_block_expr: i->accept(this); break; default: err->err(i->get_location(), - "require a return statement in this statement.", + "require return statement in this statement.", "return value is ungrounded." ); } @@ -90,6 +90,7 @@ bool return_ungrounded_checker::visit_function_decl(function_decl* func) { if (!func->has_return_value() || !func->implemented()) { return true; } + // do not check predicate if (func->get_return_type()->get_resolve().type==symbol::boolean()) { return true; @@ -100,7 +101,88 @@ bool return_ungrounded_checker::visit_function_decl(function_decl* func) { return true; } -bool negative_expression_ungrounded_checker::visit_call_expr(call_expr* node) { +bool undetermined_checker::match_undetermined_call(call_root* node) { + // undetermined call in souffle is in fact an ungrounded error + // if variables assigned by undetermined call is not bound, + // it will cause ungrounded error. + // undetermined call in godel script is like: + // + // int::__undetermined_all__() + // string::__undetermined_all__() + // + + // call head should be identifier `int` or `string`, without function call. + auto head = node->get_call_head(); + if (head->has_func_call() || head->is_initializer()) { + return false; + } + auto first = head->get_first_expression(); + // check call head is in fact an identifier + if (first->get_ast_class()!=ast_class::ac_identifier) { + return false; + } + // identifier must be `int` or `string` + const auto& name = reinterpret_cast(first)->get_name(); + if (name!="int" && name!="string") { + return false; + } + + if (node->get_call_chain().empty()) { + return false; + } + + // first call expression should be get path + auto first_call = node->get_call_chain()[0]; + if (first_call->get_call_type()!=call_expr::type::get_path) { + return false; + } + auto field = first_call->get_field_name(); + // `__undetermined_all__` is a special function. + if (field->get_name()!="__undetermined_all__") { + return false; + } + if (!first_call->has_func_call()) { + return false; + } + + return true; +} + +bool undetermined_checker::visit_call_root(call_root* node) { + if (match_undetermined_call(node)) { + if (node->get_call_chain().size()>1) { + err->err(node->get_location(), + "ungrounded value cannot be used in call chain." + ); + } else if (!in_for_initialization_level) { + err->warn(node->get_location(), + "undetermined value used outside for-initialization.", + "will cause ungrounded error." + ); + } + } + + node->get_call_head()->accept(this); + for(auto i : node->get_call_chain()) { + i->accept(this); + } + return true; +} + +bool undetermined_checker::visit_for_stmt(for_stmt* node) { + ++in_for_initialization_level; + for(auto i : node->get_symbols()) { + i->accept(this); + } + --in_for_initialization_level; + + if (node->has_statement()) { + node->get_code_block()->accept(this); + } + return true; +} + +bool neg_expr_ungrounded_checker::visit_call_expr(call_expr* node) { if (in_logical_negative_expression_level) { if (node->is_aggregator_find()) { err->err(node->get_field_name()->get_location(), @@ -132,6 +214,13 @@ bool negative_expression_ungrounded_checker::visit_call_expr(call_expr* node) { "will cause ungrounded error." ); } + if (node->has_func_call() && + node->get_func_call()->get_resolve().type!=symbol::boolean()) { + err->warn(node->get_location(), + "non-boolean return value in logical negative expression.", + "will produce temporary variables, causing ungrounded error." + ); + } } node->get_field_name()->accept(this); if (node->is_generic()) { @@ -146,7 +235,7 @@ bool negative_expression_ungrounded_checker::visit_call_expr(call_expr* node) { return true; } -bool negative_expression_ungrounded_checker::visit_initializer(initializer* node) { +bool neg_expr_ungrounded_checker::visit_initializer(initializer* node) { if (in_logical_negative_expression_level) { err->err(node->get_location(), "object construction is not allowed in logical negative expression.", @@ -159,31 +248,40 @@ bool negative_expression_ungrounded_checker::visit_initializer(initializer* node return true; } -bool negative_expression_ungrounded_checker::visit_unary_operator(unary_operator* node) { - bool is_logical_negative = ( - node->get_operator_type()==unary_operator::type::logical_negation - ); - if (is_logical_negative) { +bool neg_expr_ungrounded_checker::visit_unary_operator(unary_operator* node) { + if (node->get_operator_type()==unary_operator::type::logical_negation) { in_logical_negative_expression_level++; } node->get_child()->accept(this); - if (is_logical_negative) { + if (node->get_operator_type()==unary_operator::type::logical_negation) { in_logical_negative_expression_level--; } return true; } -void ungrounded_parameter_checker::unused_parameter_check(const report::span& stmt_loc) { +bool neg_expr_ungrounded_checker::visit_binary_operator(binary_operator* node) { + if (in_logical_negative_expression_level && !in_binary_operator_level) { + err->warn(node->get_location(), + "binary expression used in logical negative expression.", + "will produce temporary variables, causing ungrounded error." + ); + } + ++in_binary_operator_level; + node->get_left()->accept(this); + node->get_right()->accept(this); + --in_binary_operator_level; + return true; +} + +void ungrounded_parameter_checker::report_unused_parameter(const report::span& stmt_loc) { // do not check inline function & check if need to check constraint for self bool flag_is_self_typecheck_free = false; - if (func_node->get_annotations().size()) { - for(auto i : func_node->get_annotations()) { - if (i->get_annotation()=="@inline") { - return; - } - if (i->get_annotation()=="@self_typecheck_free") { - flag_is_self_typecheck_free = true; - } + for(auto i : func_node->get_annotations()) { + if (i->get_annotation()=="@inline") { + return; + } + if (i->get_annotation()=="@self_typecheck_free") { + flag_is_self_typecheck_free = true; } } @@ -214,8 +312,7 @@ void ungrounded_parameter_checker::unused_parameter_check(const report::span& st // unused int/float/string and "self" parameter is marked as ungrounded // set of int/float/string are not considered as ungrounded - if ((type == symbol::i64() || type == symbol::f64() || type == symbol::str()) && - !record_is_set_flag.at(i)) { + if (is_native_type(type) && !record_is_set_flag.at(i)) { ungrounded_params += ungrounded_params.length()? ", ":""; ungrounded_params += i; } else if (i!="self") { @@ -229,17 +326,14 @@ void ungrounded_parameter_checker::unused_parameter_check(const report::span& st // unused warning report if (unused_params.length()) { err->warn(stmt_loc, - "unused parameter \"" + unused_params + - "\" in this statement branch.", - "may cause empty result." + "unused parameter \"" + unused_params + "\" in this branch." ); } // ungrounded error report if (ungrounded_params.length()) { err->err(stmt_loc, - "ungrounded parameter \"" + ungrounded_params + - "\" in this statement branch." + "ungrounded parameter \"" + ungrounded_params + "\" in this branch." ); } @@ -249,8 +343,8 @@ void ungrounded_parameter_checker::unused_parameter_check(const report::span& st } if (flag_self_ungrounded && flag_is_self_typecheck_free) { err->err(stmt_loc, - "ungrounded parameter \"self\" in this statement branch.", - "need to constraint this parameter, otherwise it causes ungrounded error." + "ungrounded \"self\" in this branch.", + "constraint \"self\" to avoid ungrounded error." ); } } @@ -262,7 +356,7 @@ bool ungrounded_parameter_checker::visit_identifier(identifier* node) { const auto& name = node->get_name(); if (record.count(name) && !used_variable.back().count(name)) { // if this table's size is not zero - // this means in progress of analysing logical or expression + // this means in progress of analysing logical `or` expression if (logical_or_variable_used.size()) { logical_or_variable_used.back().insert(name); return true; @@ -347,7 +441,7 @@ bool ungrounded_parameter_checker::visit_for_stmt(for_stmt* node) { if (node->has_statement()) { node->get_code_block()->accept(this); } else { - unused_parameter_check(node->get_location()); + report_unused_parameter(node->get_location()); } pop_used_variable_mark_scope(); return true; @@ -361,7 +455,7 @@ bool ungrounded_parameter_checker::visit_let_stmt(let_stmt* node) { if (node->has_statement()) { node->get_code_block()->accept(this); } else { - unused_parameter_check(node->get_location()); + report_unused_parameter(node->get_location()); } pop_used_variable_mark_scope(); return true; @@ -390,7 +484,7 @@ bool ungrounded_parameter_checker::visit_if_stmt(if_stmt* node) { if (node->has_statement()) { node->get_code_block()->accept(this); } else { - unused_parameter_check(node->get_location()); + report_unused_parameter(node->get_location()); } pop_used_variable_mark_scope(); return true; @@ -447,7 +541,7 @@ bool ungrounded_parameter_checker::visit_ret_stmt(ret_stmt* node) { // pop table logical_or_variable_used.pop_back(); } - unused_parameter_check(node->get_location()); + report_unused_parameter(node->get_location()); pop_used_variable_mark_scope(); return true; } @@ -455,7 +549,7 @@ bool ungrounded_parameter_checker::visit_ret_stmt(ret_stmt* node) { bool ungrounded_parameter_checker::visit_in_block_expr(in_block_expr* node) { new_used_variable_mark_scope(); node->get_expr()->accept(this); - unused_parameter_check(node->get_location()); + report_unused_parameter(node->get_location()); pop_used_variable_mark_scope(); return true; } @@ -463,7 +557,7 @@ bool ungrounded_parameter_checker::visit_in_block_expr(in_block_expr* node) { bool ungrounded_parameter_checker::visit_block_stmt(block_stmt* node) { // if having no statement in it, check unused parameter directly if (!node->get_statement().size()) { - unused_parameter_check(node->get_location()); + report_unused_parameter(node->get_location()); return true; } for(auto i : node->get_statement()) { diff --git a/godel-script/godel-frontend/src/sema/ungrounded_checker.h b/godel-script/godel-frontend/src/sema/ungrounded_checker.h index 96864713..851c29e0 100644 --- a/godel-script/godel-frontend/src/sema/ungrounded_checker.h +++ b/godel-script/godel-frontend/src/sema/ungrounded_checker.h @@ -33,19 +33,42 @@ class return_ungrounded_checker: public ast_visitor { } }; -class negative_expression_ungrounded_checker: public ast_visitor { +class neg_expr_ungrounded_checker: public ast_visitor { private: report::error* err; size_t in_logical_negative_expression_level; + size_t in_binary_operator_level; private: bool visit_call_expr(call_expr*) override; bool visit_initializer(initializer*) override; bool visit_unary_operator(unary_operator*) override; + bool visit_binary_operator(binary_operator*) override; public: - negative_expression_ungrounded_checker(report::error* err_ptr): - err(err_ptr), in_logical_negative_expression_level(0) {} + neg_expr_ungrounded_checker(report::error* e): + err(e), in_logical_negative_expression_level(0), + in_binary_operator_level(0) {} + void check(ast_root* root) { + root->accept(this); + } +}; + +class undetermined_checker: public ast_visitor { +private: + report::error* err; + size_t in_for_initialization_level; + +private: + bool match_undetermined_call(call_root*); + +private: + bool visit_call_root(call_root*) override; + bool visit_for_stmt(for_stmt*) override; + +public: + undetermined_checker(report::error* e): + err(e), in_for_initialization_level(0) {} void check(ast_root* root) { root->accept(this); } @@ -102,7 +125,15 @@ class ungrounded_parameter_checker: public ast_visitor { } private: - void unused_parameter_check(const report::span&); + bool is_native_type(const godel::symbol& sym) const { + return sym == symbol::i64() || + sym == symbol::f64() || + sym == symbol::str(); + } + void report_unused_parameter(const report::span&); + bool check_directly_return_self(ret_stmt*); + +private: bool visit_identifier(identifier*) override; bool visit_call_expr(call_expr*) override; bool visit_unary_operator(unary_operator*) override; @@ -111,7 +142,6 @@ class ungrounded_parameter_checker: public ast_visitor { bool visit_let_stmt(let_stmt*) override; bool visit_if_stmt(if_stmt*) override; bool visit_match_stmt(match_stmt*) override; - bool check_directly_return_self(ret_stmt*); bool visit_ret_stmt(ret_stmt*) override; bool visit_in_block_expr(in_block_expr*) override; bool visit_block_stmt(block_stmt*) override; diff --git a/godel-script/godel-frontend/src/semantic.cpp b/godel-script/godel-frontend/src/semantic.cpp index ac97286d..4e2e8abd 100644 --- a/godel-script/godel-frontend/src/semantic.cpp +++ b/godel-script/godel-frontend/src/semantic.cpp @@ -575,9 +575,7 @@ void semantic::cond_stmt_check(cond_stmt* node, const function& func) { if_stmt_check(node->get_if_stmt(), func); // check if having else branches when switch is on - if (flag_check_no_else_branch && ( - node->get_elsif_stmt().size() || - node->has_else_stmt())) { + if (node->get_elsif_stmt().size() || node->has_else_stmt()) { err.err(node->get_location(), "else branches are not allowed.", "may cause fatal ungrounded error." @@ -877,17 +875,31 @@ infer semantic::in_expr(binary_operator* node) { ); return infer::boolean(); } + + // left hand side value should not be value set if (left_type.is_set) { err.err(node->get_left()->get_location(), - "should be single value but get value set \"" + + "expect single value but get set \"" + left_type.full_path_name() + "\"." ); return infer::boolean(); } - if (left_type!=right_type || !right_type.is_set) { + + // right hand side value must be value set + if (!right_type.is_set) { + err.err(node->get_right()->get_location(), + "expect \"*" + left_type.full_path_name() + + "\", but get single value \"" + right_type.full_path_name() + + "\"." + ); + return infer::boolean(); + } + + // type should be the same + if (left_type!=right_type) { err.err(node->get_right()->get_location(), - "should be a value set, expect \"*" + left_type.full_path_name() + - "\", but get a single value \"" + right_type.full_path_name() + + "expect \"*" + left_type.full_path_name() + + "\", but get \"" + right_type.full_path_name() + "\"." ); return infer::boolean(); @@ -3008,7 +3020,6 @@ const error& semantic::analyse(const configure& config, ast_root* root) { // stage 1: initialize impl_schema_name = ""; flag_check_access_authority = config.count(option::cli_semantic_pub_check); - flag_check_no_else_branch = config.count(option::cli_semantic_no_else); ctx.this_file_name = root->get_file(); // stage 2: @@ -3066,7 +3077,8 @@ const error& semantic::analyse(const configure& config, ast_root* root) { // stage 7: all function implemention block check all_function_block_check(root); return_ungrounded_checker(&err).check(root); - negative_expression_ungrounded_checker(&err).check(root); + undetermined_checker(&err).check(root); + neg_expr_ungrounded_checker(&err).check(root); if (err.get_error()) { return err; } diff --git a/godel-script/godel-frontend/src/semantic.h b/godel-script/godel-frontend/src/semantic.h index 93d51c01..6ab940af 100644 --- a/godel-script/godel-frontend/src/semantic.h +++ b/godel-script/godel-frontend/src/semantic.h @@ -60,9 +60,6 @@ class semantic { // switch of public access authority checker bool flag_check_access_authority = false; - // switch of condition statements having else branch checker - bool flag_check_no_else_branch = false; - private: // generate output used functions and methods void generate_output_used_function_name(); diff --git a/godel-script/godel-frontend/src/symbol.cpp b/godel-script/godel-frontend/src/symbol.cpp index f17063ee..ff577442 100644 --- a/godel-script/godel-frontend/src/symbol.cpp +++ b/godel-script/godel-frontend/src/symbol.cpp @@ -29,11 +29,13 @@ function function::build_native(const std::string& name, return native_function; } -std::string function::to_json() const { +std::string function::to_json(bool with_location) const { std::string res = "{\"name\":\"" + name; res += has_generic? "\",":"\","; - res += "\"location\":" + location.to_json() + ","; - res += "\"return\":" + return_type.to_json() + ","; + if (with_location) { + res += "\"location\":" + location.to_json() + ","; + } + res += "\"return\":" + return_type.to_json(with_location) + ","; res += "\"is_public\":"; res += public_access_authority? "\"true\",":"\"false\","; res += "\"is_inherited\":"; @@ -41,7 +43,7 @@ std::string function::to_json() const { res += "\"parameter\":["; for(const auto& i : ordered_parameter_list) { res += "{\"name\":\"" + i + "\",\"type\":"; - res += parameter_list.at(i).to_json() + "},"; + res += parameter_list.at(i).to_json(with_location) + "},"; } if (res.back()==',') { res.pop_back(); @@ -108,14 +110,16 @@ std::string enumerate::fuzzy_search(const std::string& id) const { return fuzzy_search_name; } -std::string schema::to_json() const { +std::string schema::to_json(bool with_location) const { std::string res = "{\"name\":\"" + name + "\","; - res += "\"location\":" + location.to_json() + ","; + if (with_location) { + res += "\"location\":" + location.to_json() + ","; + } res += "\"parent\": \"" + std::string(parent? parent->name:"") + "\","; res += "\"fields\":["; for(const auto& i : ordered_fields) { const auto& type = fields.at(i); - res += "{\"name\":\"" + i + "\",\"type\":" + type.to_json(); + res += "{\"name\":\"" + i + "\",\"type\":" + type.to_json(with_location); res += ",\"primary\":"; res += type.primary? "\"true\"":"\"false\""; res += ",\"is_inherited\":"; @@ -131,10 +135,10 @@ std::string schema::to_json() const { } res += "],\"methods\":["; for(const auto& i : methods) { - res += i.second.to_json() + ","; + res += i.second.to_json(with_location) + ","; } for(const auto& i : natives) { - res += i.second.to_json() + ","; + res += i.second.to_json(with_location) + ","; } if (res.back()==',') { res.pop_back(); diff --git a/godel-script/godel-frontend/src/symbol.h b/godel-script/godel-frontend/src/symbol.h index f0d2f393..ee28c0b7 100644 --- a/godel-script/godel-frontend/src/symbol.h +++ b/godel-script/godel-frontend/src/symbol.h @@ -85,10 +85,14 @@ struct symbol { return out; } - std::string to_json() const { - return "{\"name\":\"" + type_name + - "\",\"is_set\":\"" + (is_set? "true":"false") + - "\",\"type_loc\":" + type_loc.to_json() + "}"; + std::string to_json(bool with_location = true) const { + auto res = "{\"name\":\"" + type_name + + "\",\"is_set\":\"" + (is_set? "true":"false") + "\""; + if (with_location) { + res += ",\"type_loc\":" + type_loc.to_json(); + } + res += "}"; + return res; } // get full path name of the symbol @@ -249,9 +253,12 @@ struct function { // for aggregator to check input set type std::vector aggregator_set_type; - + + // mark it is implemented bool implemented = false; + // mark it is inherited method bool inherit = false; + // mark it is native method bool builtin = false; // default private bool public_access_authority = false; @@ -323,7 +330,7 @@ struct function { return out; } - std::string to_json() const; + std::string to_json(bool with_location = true) const; }; struct basic { @@ -445,7 +452,7 @@ struct schema { return out; } - std::string to_json() const; + std::string to_json(bool with_location = true) const; std::string fuzzy_search(const std::string&) const; bool has_primary_key() const {