diff --git a/.github/actions/build_linux/action.yml b/.github/actions/build_linux/action.yml index fc8eacc23039c..66f4ed7950e7f 100644 --- a/.github/actions/build_linux/action.yml +++ b/.github/actions/build_linux/action.yml @@ -68,7 +68,7 @@ runs: if: inputs.artifacts == 'all' shell: bash run: | - artifacts="meta,metactl,metabench,query,sqllogictests" + artifacts="meta,metactl,metabench,query,sqllogictests,bendsave" binaries="" for artifact in ${artifacts//,/ }; do binaries="${binaries} --bin databend-$artifact" diff --git a/.github/actions/test_sqllogic_standalone_linux_minio_with_bendsave/action.yml b/.github/actions/test_sqllogic_standalone_linux_minio_with_bendsave/action.yml new file mode 100644 index 0000000000000..14bd04b330ce0 --- /dev/null +++ b/.github/actions/test_sqllogic_standalone_linux_minio_with_bendsave/action.yml @@ -0,0 +1,93 @@ +name: "Test sqllogic Standalone(minio) with bendsave" +description: "Running sqllogic tests in standalone mode" + +runs: + using: "composite" + steps: + - uses: ./.github/actions/setup_test + with: + artifacts: sqllogictests,meta,query,bendsave + + - name: Minio Setup for (ubuntu-latest only) + shell: bash + run: | + docker run -d --network host --name minio \ + -e "MINIO_ACCESS_KEY=minioadmin" \ + -e "MINIO_SECRET_KEY=minioadmin" \ + -e "MINIO_ADDRESS=:9900" \ + -v /tmp/data:/data \ + -v /tmp/config:/root/.minio \ + minio/minio server /data + + export AWS_ACCESS_KEY_ID=minioadmin + export AWS_SECRET_ACCESS_KEY=minioadmin + export AWS_EC2_METADATA_DISABLED=true + + aws --endpoint-url http://127.0.0.1:9900/ s3 mb s3://testbucket + aws --endpoint-url http://127.0.0.1:9900/ s3 mb s3://backupbucket + + - name: Run sqllogic Tests with Standalone mode + shell: bash + env: + TEST_HANDLERS: "http" + run: bash ./scripts/ci/ci-run-sqllogic-tests-minio-with-bendsave.sh tpch + + - name: Run bendsave backup + shell: bash + env: + AWS_ACCESS_KEY_ID: minioadmin + AWS_SECRET_ACCESS_KEY: minioadmin + AWS_EC2_METADATA_DISABLED: true + AWS_REGION: us-west-2 + run: | + export STORAGE_TYPE=s3 + export STORAGE_S3_BUCKET=testbucket + export STORAGE_S3_ROOT=admin + export STORAGE_S3_ENDPOINT_URL=http://127.0.0.1:9900 + export STORAGE_S3_ACCESS_KEY_ID=minioadmin + export STORAGE_S3_SECRET_ACCESS_KEY=minioadmin + export STORAGE_ALLOW_INSECURE=true + + ./target/${{ env.BUILD_PROFILE }}/databend-bendsave backup --from ./scripts/ci/deploy/config/databend-query-node-1.toml --to s3://backupbucket?endpoint=http://127.0.0.1:9900/ + + aws --endpoint-url http://127.0.0.1:9900/ s3 ls s3://backupbucket --recursive + + - name: Destroy the existing services. + shell: bash + env: + AWS_ACCESS_KEY_ID: minioadmin + AWS_SECRET_ACCESS_KEY: minioadmin + AWS_EC2_METADATA_DISABLED: true + AWS_REGION: us-west-2 + run: | + # kill all services + pkill databend-query + pkill databend-meta + # destory databend query + aws --endpoint-url http://127.0.0.1:9900/ s3 rm s3://testbucket --recursive + # destory databend meta + rm -rf /tmp/databend/meta1 + + - name: Run bendsave restore + shell: bash + env: + AWS_ACCESS_KEY_ID: minioadmin + AWS_SECRET_ACCESS_KEY: minioadmin + AWS_EC2_METADATA_DISABLED: true + AWS_REGION: us-west-2 + run: | + export STORAGE_TYPE=s3 + export STORAGE_S3_BUCKET=testbucket + export STORAGE_S3_ROOT=admin + export STORAGE_S3_ENDPOINT_URL=http://127.0.0.1:9900 + export STORAGE_S3_ACCESS_KEY_ID=minioadmin + export STORAGE_S3_SECRET_ACCESS_KEY=minioadmin + export STORAGE_ALLOW_INSECURE=true + + ./target/${{ env.BUILD_PROFILE }}/databend-bendsave restore --from s3://backupbucket?endpoint=http://127.0.0.1:9900/ --to-query ./scripts/ci/deploy/config/databend-query-node-1.toml --to-meta ./scripts/ci/deploy/config/databend-meta-node-for-bendsave.toml --confirm + + - name: Run sqllogic Tests with Standalone mode again for testing + shell: bash + env: + TEST_HANDLERS: "http" + run: bash ./scripts/ci/ci-run-sqllogic-tests-minio-with-bendsave.sh tpch diff --git a/.github/workflows/reuse.sqllogic.yml b/.github/workflows/reuse.sqllogic.yml index d5aac10eb1049..d482c02fe2ca6 100644 --- a/.github/workflows/reuse.sqllogic.yml +++ b/.github/workflows/reuse.sqllogic.yml @@ -25,7 +25,7 @@ env: jobs: management_mode: - runs-on: [self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test_sqllogic_management_mode_linux @@ -72,7 +72,7 @@ jobs: name: test-sqllogic-standalone-${{ matrix.tests.dirs }}-${{ matrix.handler }} standalone_udf_server: - runs-on: [self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] steps: - uses: actions/checkout@v4 - name: Start UDF Server @@ -94,7 +94,7 @@ jobs: name: test-sqllogic-standalone-udf-server standalone_cloud: - runs-on: [self-hosted, X64, Linux, 4c16g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 4c16g, "${{ inputs.runner_provider }}" ] steps: - uses: actions/checkout@v4 - name: Start Cloud Control Server @@ -115,7 +115,7 @@ jobs: name: test-sqllogic-standalone-cloud standalone_minio: - runs-on: [self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] strategy: fail-fast: false matrix: @@ -141,8 +141,24 @@ jobs: with: name: test-sqllogic-standalone-minio-${{ matrix.dirs }}-${{ matrix.handler }}-${{ matrix.format }} + standalone_minio_with_bendsave: + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup_license + with: + runner_provider: ${{ inputs.runner_provider }} + type: ${{ inputs.license_type }} + - uses: ./.github/actions/test_sqllogic_standalone_linux_minio_with_bendsave + timeout-minutes: 30 + - name: Upload failure + if: failure() + uses: ./.github/actions/artifact_failure + with: + name: test-sqllogic-standalone-minio-tpch-http-with-bendsave + standalone_iceberg_tpch: - runs-on: [self-hosted, X64, Linux, 4c16g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 4c16g, "${{ inputs.runner_provider }}" ] steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 @@ -201,7 +217,7 @@ jobs: name: test-sqllogic-cluster-${{ matrix.tests.dirs }}-${{ matrix.handler }} stage: - runs-on: [self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] strategy: fail-fast: false matrix: @@ -230,7 +246,7 @@ jobs: name: test-sqllogic-stage-${{ matrix.storage }}-${{ matrix.handler }}-${{ matrix.dedup }} standalone_no_table_meta_cache: - runs-on: [self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] strategy: fail-fast: false matrix: @@ -254,7 +270,7 @@ jobs: name: test-sqllogic-standalone-no-table-meta-cache-${{ matrix.dirs }}-${{ matrix.handler }} ee: - runs-on: [self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}"] + runs-on: [ self-hosted, X64, Linux, 2c8g, "${{ inputs.runner_provider }}" ] strategy: fail-fast: false matrix: diff --git a/Cargo.lock b/Cargo.lock index b700e93b3403d..525f6dc15512c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -228,9 +228,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.91" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c042108f3ed77fd83760a5fd79b53be043192bb3b9dba91d8c574c0ada7850c8" +checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" dependencies = [ "backtrace", ] @@ -1422,9 +1422,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.3" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" +checksum = "675f87afced0413c9bb02843499dbbd3882a237645883f71a2b59644a6d2f753" dependencies = [ "arrayref", "arrayvec", @@ -1891,9 +1891,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.11" +version = "1.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fb8dd288a69fc53a1996d7ecfbf4a20d59065bff137ce7e56bbd620de191189" +checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" dependencies = [ "jobserver", "libc", @@ -2365,9 +2365,9 @@ checksum = "6051f239ecec86fde3410901ab7860d458d160371533842974fc61f96d15879b" [[package]] name = "constant_time_eq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "convert_case" @@ -2903,6 +2903,34 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" +[[package]] +name = "databend-bendsave" +version = "0.1.0" +dependencies = [ + "anyhow", + "bytes", + "clap", + "databend-common-base", + "databend-common-config", + "databend-common-license", + "databend-common-meta-client", + "databend-common-meta-control", + "databend-common-meta-types", + "databend-common-storage", + "databend-common-users", + "databend-enterprise-query", + "databend-meta", + "databend-query", + "form_urlencoded", + "futures", + "http 1.1.0", + "log", + "logforth", + "opendal", + "tokio", + "toml 0.8.19", +] + [[package]] name = "databend-binaries" version = "0.1.0" @@ -6567,9 +6595,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" dependencies = [ "futures-channel", "futures-core", @@ -6604,9 +6632,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -6614,15 +6642,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" dependencies = [ "futures-core", "futures-task", @@ -6642,9 +6670,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-lite" @@ -6658,9 +6686,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -6669,21 +6697,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-channel", "futures-core", @@ -7884,6 +7912,8 @@ version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", ] @@ -7898,11 +7928,11 @@ dependencies = [ [[package]] name = "hashlink" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.15.2", ] [[package]] @@ -9455,7 +9485,6 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" dependencies = [ - "cc", "pkg-config", "vcpkg", ] @@ -9539,9 +9568,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" dependencies = [ "serde", "value-bag", @@ -11657,9 +11686,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.1" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", "prost-derive", @@ -11667,13 +11696,12 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.1" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ - "bytes", "heck 0.5.0", - "itertools 0.13.0", + "itertools 0.14.0", "log", "multimap", "once_cell", @@ -11688,12 +11716,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.1" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.95", @@ -11701,9 +11729,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.1" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ "prost", ] @@ -13686,9 +13714,9 @@ dependencies = [ [[package]] name = "sqlx" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93334716a037193fac19df402f8571269c84a00852f6a7066b5d2616dcd64d3e" +checksum = "4410e73b3c0d8442c5f99b425d7a435b5ee0ae4167b3196771dd3f7a01be745f" dependencies = [ "sqlx-core", "sqlx-macros", @@ -13699,37 +13727,31 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d8060b456358185f7d50c55d9b5066ad956956fddec42ee2e8567134a8936e" +checksum = "6a007b6936676aa9ab40207cde35daab0a04b823be8ae004368c0793b96a61e0" dependencies = [ - "atoi", - "byteorder", "bytes", "crc", "crossbeam-queue", "either", "event-listener 5.3.1", - "futures-channel", "futures-core", "futures-intrusive", "futures-io", "futures-util", - "hashbrown 0.14.5", - "hashlink 0.9.1", - "hex", + "hashbrown 0.15.2", + "hashlink 0.10.0", "indexmap 2.6.0", "log", "memchr", "once_cell", - "paste", "percent-encoding", "serde", "serde_json", "sha2", "smallvec", - "sqlformat", - "thiserror 1.0.65", + "thiserror 2.0.12", "tokio", "tokio-stream", "tracing", @@ -13738,9 +13760,9 @@ dependencies = [ [[package]] name = "sqlx-macros" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cac0692bcc9de3b073e8d747391827297e075c7710ff6276d9f7a1f3d58c6657" +checksum = "3112e2ad78643fef903618d78cf0aec1cb3134b019730edb039b69eaf531f310" dependencies = [ "proc-macro2", "quote", @@ -13751,9 +13773,9 @@ dependencies = [ [[package]] name = "sqlx-macros-core" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1804e8a7c7865599c9c79be146dc8a9fd8cc86935fa641d3ea58e5f0688abaa5" +checksum = "4e9f90acc5ab146a99bf5061a7eb4976b573f560bc898ef3bf8435448dd5e7ad" dependencies = [ "dotenvy", "either", @@ -13777,9 +13799,9 @@ dependencies = [ [[package]] name = "sqlx-mysql" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64bb4714269afa44aef2755150a0fc19d756fb580a67db8885608cf02f47d06a" +checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233" dependencies = [ "atoi", "base64 0.22.1", @@ -13812,16 +13834,16 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 1.0.65", + "thiserror 2.0.12", "tracing", "whoami", ] [[package]] name = "sqlx-postgres" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fa91a732d854c5d7726349bb4bb879bb9478993ceb764247660aee25f67c2f8" +checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613" dependencies = [ "atoi", "base64 0.22.1", @@ -13832,7 +13854,6 @@ dependencies = [ "etcetera", "futures-channel", "futures-core", - "futures-io", "futures-util", "hex", "hkdf", @@ -13850,16 +13871,16 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 1.0.65", + "thiserror 2.0.12", "tracing", "whoami", ] [[package]] name = "sqlx-sqlite" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5b2cf34a45953bfd3daaf3db0f7a7878ab9b7a6b91b422d24a7a9e4c857b680" +checksum = "f85ca71d3a5b24e64e1d08dd8fe36c6c95c339a896cc33068148906784620540" dependencies = [ "atoi", "flume", diff --git a/Cargo.toml b/Cargo.toml index 3ef2c1b367790..ad16e8cb0f1cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -109,6 +109,7 @@ members = [ "src/meta/service", "tests/sqllogictests", "src/tests/sqlsmith", + "src/bendsave", ] # Workspace dependencies @@ -304,6 +305,7 @@ faststr = "0.2" feature-set = { version = "0.1.1" } flatbuffers = "24" # Must use the same version with arrow-ipc foreign_vec = "0.1.0" +form_urlencoded = { version = "1" } fs_extra = "1.3.0" futures = "0.3.24" futures-async-stream = { version = "0.2.7" } diff --git a/LICENSE b/LICENSE index fd5f6f158afa2..18d7d4a46f6ef 100644 --- a/LICENSE +++ b/LICENSE @@ -5,6 +5,7 @@ Portions of this software are licensed as follows: * "src/binaries/meta/ee_main.rs" * "src/meta/ee" * "src/query/ee" + * "src/bendsave" * All third party components incorporated into this software are licensed under the original license provided by the owner of the applicable component. * Content outside of the above mentioned directories or restrictions above is available under the "Apache-2.0" license as defined in "licenses/Apache-2.0.txt". diff --git a/licenserc-ee.toml b/licenserc-ee.toml index 6f0c0c974ba04..eb642ce725acb 100644 --- a/licenserc-ee.toml +++ b/licenserc-ee.toml @@ -21,6 +21,7 @@ includes = [ "src/binaries/meta/ee_main.rs", "src/meta/ee", "src/query/ee", + "src/bendsave", ] excludes = [ @@ -33,10 +34,8 @@ excludes = [ ".gitignore", ".gitattributes", ".editorconfig", - "LICENSE", "Makefile", - # docs and generated files "**/*.md", "**/*.hbs", diff --git a/licenserc.toml b/licenserc.toml index 113e2df818c4c..f6a113b4fec77 100644 --- a/licenserc.toml +++ b/licenserc.toml @@ -9,13 +9,12 @@ excludes = [ "tools", "benchmark", "src/common/compress/tests", - # licensed under Elastic License 2.0 "src/binaries/query/ee_main.rs", "src/meta/binaries/meta/ee_main.rs", "src/meta/ee", "src/query/ee", - + "src/bendsave", # hidden files ".cargo", ".databend", @@ -25,10 +24,8 @@ excludes = [ ".gitignore", ".gitattributes", ".editorconfig", - "LICENSE", "Makefile", - # docs and generated files "**/*.md", "**/*.hbs", diff --git a/scripts/ci/ci-run-sqllogic-tests-minio-with-bendsave.sh b/scripts/ci/ci-run-sqllogic-tests-minio-with-bendsave.sh new file mode 100755 index 0000000000000..ab6469094f8fe --- /dev/null +++ b/scripts/ci/ci-run-sqllogic-tests-minio-with-bendsave.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright 2020-2021 The Databend Authors. +# SPDX-License-Identifier: Apache-2.0. + +set -e + +echo "*************************************" +echo "* Setting STORAGE_TYPE to S3. *" +echo "* *" +echo "* Please make sure that S3 backend *" +echo "* is ready, and configured properly.*" +echo "*************************************" +export STORAGE_TYPE=s3 +export STORAGE_S3_BUCKET=testbucket +export STORAGE_S3_ROOT=admin +export STORAGE_S3_ENDPOINT_URL=http://127.0.0.1:9900 +export STORAGE_S3_ACCESS_KEY_ID=minioadmin +export STORAGE_S3_SECRET_ACCESS_KEY=minioadmin +export STORAGE_ALLOW_INSECURE=true + +echo "Starting standalone DatabendQuery and DatabendMeta" +./scripts/ci/deploy/databend-query-standalone-for-bendsave.sh + +TEST_HANDLERS=${TEST_HANDLERS:-"mysql,http"} +TEST_PARALLEL=${TEST_PARALLEL:-8} +BUILD_PROFILE=${BUILD_PROFILE:-debug} + +RUN_DIR="" +if [ $# -gt 0 ]; then + RUN_DIR="--run_dir $*" +fi +echo "Run suites using argument: $RUN_DIR" + +echo "Starting databend-sqllogic tests" +target/${BUILD_PROFILE}/databend-sqllogictests --handlers ${TEST_HANDLERS} ${RUN_DIR} --skip_dir management,explain_native,ee --enable_sandbox --parallel ${TEST_PARALLEL} diff --git a/scripts/ci/deploy/config/databend-meta-node-for-bendsave.toml b/scripts/ci/deploy/config/databend-meta-node-for-bendsave.toml new file mode 100644 index 0000000000000..e66104124c405 --- /dev/null +++ b/scripts/ci/deploy/config/databend-meta-node-for-bendsave.toml @@ -0,0 +1,22 @@ +# Usage: +# databend-meta -c databend-meta-node-1.toml + +log_dir = "./.databend/logs1" +admin_api_address = "0.0.0.0:28101" +grpc_api_address = "0.0.0.0:9191" +# databend-query fetch this address to update its databend-meta endpoints list, +# in case databend-meta cluster changes. +grpc_api_advertise_host = "127.0.0.1" + +[raft_config] +id = 1 +raft_dir = "/tmp/databend/meta1" +raft_api_port = 28103 + +# Assign raft_{listen|advertise}_host in test config. +# This allows you to catch a bug in unit tests when something goes wrong in raft meta nodes communication. +raft_listen_host = "127.0.0.1" +raft_advertise_host = "localhost" + +# Start up mode: single node cluster +single = true diff --git a/scripts/ci/deploy/databend-query-standalone-for-bendsave.sh b/scripts/ci/deploy/databend-query-standalone-for-bendsave.sh new file mode 100755 index 0000000000000..20ee9bca77b52 --- /dev/null +++ b/scripts/ci/deploy/databend-query-standalone-for-bendsave.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2022 The Databend Authors. +# SPDX-License-Identifier: Apache-2.0. + +set -e + +SCRIPT_PATH="$(cd "$(dirname "$0")" >/dev/null 2>&1 && pwd)" +cd "$SCRIPT_PATH/../../.." || exit +BUILD_PROFILE=${BUILD_PROFILE:-debug} + +killall databend-query || true +killall databend-meta || true +sleep 1 + +for bin in databend-query databend-meta; do + if test -n "$(pgrep $bin)"; then + echo "The $bin is not killed. force killing." + killall -9 $bin || true + fi +done + +# Wait for killed process to cleanup resources +sleep 1 + +echo 'Start databend-meta...' +nohup target/${BUILD_PROFILE}/databend-meta -c scripts/ci/deploy/config/databend-meta-node-for-bendsave.toml --single --log-level=INFO & +echo "Waiting on databend-meta 10 seconds..." +python3 scripts/ci/wait_tcp.py --timeout 30 --port 9191 + +echo 'Start databend-query...' +nohup target/${BUILD_PROFILE}/databend-query -c scripts/ci/deploy/config/databend-query-node-1.toml --internal-enable-sandbox-tenant & + +echo "Waiting on databend-query 10 seconds..." +python3 scripts/ci/wait_tcp.py --timeout 30 --port 8000 diff --git a/src/bendsave/Cargo.toml b/src/bendsave/Cargo.toml new file mode 100644 index 0000000000000..10afa356a06d7 --- /dev/null +++ b/src/bendsave/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "databend-bendsave" +description = "databend backup and restore tools" +version = "0.1.0" +authors = { workspace = true } +license = "Elastic-2.0" +edition = "2021" + +[dependencies] +databend-common-base = { workspace = true } +databend-common-config = { workspace = true } +databend-common-license = { workspace = true } +databend-common-meta-client = { workspace = true } +databend-common-meta-control = { workspace = true } +databend-common-meta-types = { workspace = true } +databend-common-storage = { workspace = true } +databend-common-users = { workspace = true } +databend-enterprise-query = { workspace = true } +databend-meta = { workspace = true } +databend-query = { workspace = true } + +anyhow = { workspace = true } +bytes = { workspace = true } +clap = { workspace = true } +form_urlencoded = { workspace = true } +futures = { workspace = true } +http = { workspace = true } +log = { workspace = true } +logforth = { workspace = true } +opendal = { workspace = true, features = ["services-s3", "services-fs"] } +tokio = { workspace = true } +toml = { workspace = true, features = ["parse"] } + +[lints] +workspace = true diff --git a/src/bendsave/README.md b/src/bendsave/README.md new file mode 100644 index 0000000000000..eb1c13a4619ea --- /dev/null +++ b/src/bendsave/README.md @@ -0,0 +1,68 @@ +# Databend Bendsave + +bendsave is a tool built by Databend Labs to backup and restore data from a Databend cluster. + +## Architecture + +```txt + +-------------------------+ + | Databend Cluster | + +-----------+------------+ + | + | Read/Write Data + v + +-----------+------------+ + | databend-bendsave | + +-----------+------------+ + | + | Storage Operations + v ++-------------------+-------------------+ +| | +| Storage Backend (S3 etc.) | +| | ++---------------------------------------+ +``` + +## Usage + +```shell +# Backup +bendsave backup --from /path/to/query-node-1.toml --to s3://backup/ +# Restore +bendsave restore --from s3://backup/manifests/20250115_201500.manifest --to /path/to/query-node-1.toml +``` + +### Backup + +```shell +databend-bendsave backup \ + --from /path/to/databend-query-config.toml \ + --to s3://backup?endpoint=http://127.0.0.1:9900/ +``` + +The `backup` command creates a backup of data from a Databend cluster to a specified location. + +The `--from` flag specifies the path to the Databend query configuration file, while the `--to` flag defines the destination for storing the backup. The `--to` flag should be a URL indicating where the backup will be stored, with all relevant backup configurations embedded in the URL. + +For example, the URL `s3://backup?endpoint=http://127.0.0.1:9900/` specifies that the backup will be stored in an S3 bucket named `backupbucket`, with the S3 service endpoint set to `http://127.0.0.1:9900/`. + +Users can provide the `access_key_id` directly in the URL, such as `s3://backup?access_key_id=xxx&secret_access_key=xxx`. However, it is recommended to use environment variables like `AWS_ACCESS_KEY_ID` instead. + +### Restore + +```shell +databend-bendsave restore \ + --from s3://backup?endpoint=http://127.0.0.1:9900/ \ + --to-query /path/to/databend-query-config.toml \ + --to-meta /path/to/databend-meta-config.toml \ + --confirm +``` + +The `restore` command recovers data from a backup and applies it to a Databend cluster. + +The `--from` flag defines the URL of the backup location, incorporating all necessary configurations within the URL. This should match the URL used during the backup process. + +The `--to-query` flag specifies the path to the Databend query configuration file, while the `--to-meta` flag designates the path to the Databend meta configuration file. + +The `--confirm` flag confirms the restore operation. diff --git a/src/bendsave/src/backup.rs b/src/bendsave/src/backup.rs new file mode 100644 index 0000000000000..3bcc17dd38f98 --- /dev/null +++ b/src/bendsave/src/backup.rs @@ -0,0 +1,60 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::Result; +use futures::SinkExt; +use futures::TryStreamExt; +use log::info; +use opendal::Operator; + +use crate::storage::init_query; +use crate::storage::load_bendsave_storage; +use crate::storage::load_databend_meta; +use crate::storage::load_query_config; +use crate::storage::load_query_storage; +use crate::storage::verify_query_license; +use crate::utils::storage_copy; +use crate::utils::DATABEND_META_BACKUP_PATH; + +pub async fn backup(from: &str, to: &str) -> Result<()> { + let query_cfg = load_query_config(from)?; + init_query(&query_cfg)?; + verify_query_license(&query_cfg).await?; + let databend_storage = load_query_storage(&query_cfg)?; + + let bendsave_storage = load_bendsave_storage(to).await?; + + // backup metadata first. + backup_meta(bendsave_storage.clone()).await?; + storage_copy(databend_storage, bendsave_storage).await?; + + info!("databend backup has been finished"); + Ok(()) +} + +/// Backup the entire databend meta to epochfs. +pub async fn backup_meta(op: Operator) -> Result<()> { + let (_client_handle, stream) = load_databend_meta().await?; + let mut stream = stream.map_err(std::io::Error::other); + + let mut file = op + .writer_with(DATABEND_META_BACKUP_PATH) + .chunk(8 * 1024 * 1024) + .await? + .into_bytes_sink(); + file.send_all(&mut stream).await?; + file.close().await?; + info!("databend meta has been backed up"); + Ok(()) +} diff --git a/src/bendsave/src/lib.rs b/src/bendsave/src/lib.rs new file mode 100644 index 0000000000000..b50235bbe6d9b --- /dev/null +++ b/src/bendsave/src/lib.rs @@ -0,0 +1,21 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod storage; +pub mod utils; + +mod backup; +pub use backup::backup; +mod restore; +pub use restore::restore; diff --git a/src/bendsave/src/main.rs b/src/bendsave/src/main.rs new file mode 100644 index 0000000000000..a5aeb6dedb59b --- /dev/null +++ b/src/bendsave/src/main.rs @@ -0,0 +1,101 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::Result; +use clap::Parser; +use clap::Subcommand; +use databend_bendsave::backup; +use databend_bendsave::restore; +use logforth::append; +use logforth::filter::EnvFilter; +use logforth::Dispatch; +use logforth::Logger; + +#[derive(Parser)] +#[command(name = "bendsave")] +#[command(about = "Databend backup and restore tool", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Create backups of cluster data and metadata + Backup { + /// Configuration file path + #[arg(long)] + from: String, + /// Backup destination + #[arg(long)] + to: String, + }, + + /// Restore a Databend cluster from a backup + Restore { + /// Backup manifest file path + #[arg(long)] + from: String, + /// Target configuration file path of databend query + #[arg(long)] + to_query: String, + /// Target configuration file path of databend meta + #[arg(long)] + to_meta: String, + /// Confirm restoration and perform it immediately + #[arg(long, default_value_t = false)] + confirm: bool, + }, +} + +#[tokio::main] +async fn main() -> Result<()> { + Logger::new() + .dispatch( + Dispatch::new() + .filter(EnvFilter::from_default_env()) + .append(append::Stderr::default()), + ) + .apply()?; + + let cli = Cli::parse(); + + match &cli.command { + Commands::Backup { from, to } => { + println!("Backing up from {} to {}", from, to); + backup(from, to).await?; + } + Commands::Restore { + from, + to_query, + to_meta, + confirm, + } => { + if *confirm { + println!( + "Restoring from {} to query {} and meta {} with confirmation", + from, to_query, to_meta + ); + restore(from, to_query, to_meta).await?; + } else { + println!( + "Dry-run restore from {} at query {} and meta {}", + from, to_query, to_meta + ); + } + } + } + + Ok(()) +} diff --git a/src/bendsave/src/restore.rs b/src/bendsave/src/restore.rs new file mode 100644 index 0000000000000..646ede89bdd2c --- /dev/null +++ b/src/bendsave/src/restore.rs @@ -0,0 +1,71 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::Result; +use databend_common_meta_control::args::ImportArgs; +use databend_common_meta_control::import::import_data; +use futures::StreamExt; +use log::info; +use opendal::Operator; +use tokio::io::AsyncWriteExt; + +use crate::storage::init_query; +use crate::storage::load_bendsave_storage; +use crate::storage::load_meta_config; +use crate::storage::load_query_config; +use crate::storage::load_query_storage; +use crate::utils::storage_copy; +use crate::utils::DATABEND_META_BACKUP_PATH; + +pub async fn restore(from: &str, to_query: &str, to_meta: &str) -> Result<()> { + let bendsave_storage = load_bendsave_storage(from).await?; + + let query_cfg = load_query_config(to_query)?; + init_query(&query_cfg)?; + let databend_storage = load_query_storage(&query_cfg)?; + + let meta_config = load_meta_config(to_meta)?; + + storage_copy(bendsave_storage.clone(), databend_storage).await?; + restore_meta(bendsave_storage, &meta_config).await?; + Ok(()) +} + +pub async fn restore_meta(efs: Operator, meta_cfg: &databend_meta::configs::Config) -> Result<()> { + let mut stream = efs + .reader(DATABEND_META_BACKUP_PATH) + .await? + .into_bytes_stream(..) + .await?; + + // Write databend meta backup to local fs. + let mut local_file = tokio::fs::File::create_new(DATABEND_META_BACKUP_PATH).await?; + while let Some(buf) = stream.next().await { + let mut buf = buf?; + local_file.write_all_buf(&mut buf).await?; + } + local_file.sync_all().await?; + local_file.shutdown().await?; + + let import_args = ImportArgs { + raft_dir: Some(meta_cfg.raft_config.raft_dir.to_string()), + db: DATABEND_META_BACKUP_PATH.to_string(), + initial_cluster: vec![], + id: meta_cfg.raft_config.id, + }; + import_data(&import_args).await?; + + info!("databend meta has been restored"); + Ok(()) +} diff --git a/src/bendsave/src/storage.rs b/src/bendsave/src/storage.rs new file mode 100644 index 0000000000000..d5aa30a4a4a6c --- /dev/null +++ b/src/bendsave/src/storage.rs @@ -0,0 +1,244 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::anyhow; +use anyhow::Ok; +use anyhow::Result; +use bytes::BufMut; +use bytes::Bytes; +use bytes::BytesMut; +use databend_common_base::base::GlobalInstance; +use databend_common_base::runtime::GlobalIORuntime; +use databend_common_config::Config; +use databend_common_config::GlobalConfig; +use databend_common_config::InnerConfig; +use databend_common_license::license::Feature; +use databend_common_license::license_manager::LicenseManager; +use databend_common_license::license_manager::LicenseManagerSwitch; +use databend_common_meta_client::ClientHandle; +use databend_common_meta_client::MetaGrpcClient; +use databend_common_meta_types::protobuf::ExportRequest; +use databend_common_storage::init_operator; +use databend_common_users::builtin::BuiltIn; +use databend_common_users::UserApiProvider; +use databend_enterprise_query::license::RealLicenseManager; +use databend_query::sessions::SessionManager; +use databend_query::sessions::SessionType; +use futures::TryStream; +use futures::TryStreamExt; +use log::debug; +use opendal::layers::LoggingLayer; +use opendal::layers::RetryLayer; +use opendal::Operator; + +/// Load the configuration file of databend query. +/// +/// The given input is the path to databend query's configuration file. +pub fn load_query_config(path: &str) -> Result { + let outer_config: Config = Config::load_with_config_file(path)?; + let inner_config: InnerConfig = outer_config.try_into()?; + Ok(inner_config) +} + +/// Load the configuration file and return the operator for databend. +/// +/// The given input is the path to databend's configuration file. +pub fn load_query_storage(cfg: &InnerConfig) -> Result { + let op = init_operator(&cfg.storage.params)?; + debug!("databend storage loaded: {:?}", op.info()); + Ok(op) +} + +/// Load the configuration file of databend meta. +/// +/// The given input is the path to databend meta's configuration file. +pub fn load_meta_config(path: &str) -> Result { + let content = std::fs::read_to_string(path)?; + let outer_config: databend_meta::configs::outer_v0::Config = toml::from_str(&content)?; + let inner_config: databend_meta::configs::Config = outer_config.into(); + + if !inner_config.raft_config.raft_dir.starts_with("/") { + return Err(anyhow!( + "raft_dir of meta service must be an absolute path, but got: {:?}", + inner_config.raft_config.raft_dir + )); + } + + debug!("databend meta storage loaded: {:?}", inner_config); + Ok(inner_config) +} + +/// Init databend query instance so that we can read meta and check license +/// for it. +/// +/// FIXME: I really don't like this pattern, but it's how databend work. +pub fn init_query(cfg: &InnerConfig) -> Result<()> { + GlobalInstance::init_production(); + + GlobalConfig::init(cfg)?; + GlobalIORuntime::init(cfg.storage.num_cpus as usize)?; + + Ok(()) +} + +/// Verify databend query instance's license so that we can read meta and +/// check license for it. +/// +/// We only need to call it while backup since we can't access metasrv while +/// restoring. +pub async fn verify_query_license(cfg: &InnerConfig) -> Result<()> { + RealLicenseManager::init(cfg.query.tenant_id.tenant_name().to_string())?; + SessionManager::init(cfg)?; + UserApiProvider::init( + cfg.meta.to_meta_grpc_client_conf(), + BuiltIn::default(), + &cfg.query.tenant_id, + cfg.query.tenant_quota.clone(), + ) + .await?; + + let session_manager = SessionManager::create(cfg); + let session = session_manager.create_session(SessionType::Dummy).await?; + let session = session_manager.register_session(session)?; + let settings = session.get_settings(); + + LicenseManagerSwitch::instance().check_enterprise_enabled( + unsafe { settings.get_enterprise_license().unwrap_or_default() }, + Feature::SystemManagement, + )?; + + debug!("databend license check passed"); + Ok(()) +} + +/// Load the databend meta service client +/// +/// This will load databend meta as a stream of bytes. +/// +/// It's internal format looks like +/// +/// ```text +/// {"xx": "yy"}\n +/// {"xx": "bb"}\n +/// ``` +pub async fn load_databend_meta() -> Result<( + Arc, + impl TryStream, +)> { + let cfg = GlobalConfig::instance(); + let grpc_client_conf = cfg.meta.to_meta_grpc_client_conf(); + debug!("connect meta services on {:?}", grpc_client_conf.endpoints); + + let meta_client = MetaGrpcClient::try_new(&grpc_client_conf)?; + let mut established_client = meta_client.make_established_client().await?; + + // Convert stream from meta chunks to bytes. + let stream = established_client + .export_v1(ExportRequest::default()) + .await? + .into_inner() + .map_ok(|v| { + debug!("load databend meta data with {} entries", v.data.len()); + let mut bs = BytesMut::with_capacity( + v.data.len() + v.data.iter().map(|v| v.len()).sum::(), + ); + v.data.into_iter().for_each(|b| { + bs.extend_from_slice(b.as_bytes()); + bs.put_u8(b'\n'); + }); + bs.freeze() + }) + .map_err(|err| anyhow!("bandsave load databend meta data failed: {err:?}")); + Ok((meta_client, stream)) +} + +/// Load epochfs storage from uri. +/// +/// S3: `s3://bucket/path/to/root/?region=us-east-1&access_key_id=xxx&secret_access_key=xxx` +/// Fs: `fs://path/to/data` +pub async fn load_bendsave_storage(uri: &str) -> Result { + let uri = http::Uri::from_str(uri)?; + let scheme = uri.scheme_str().unwrap_or_default(); + let name = uri.host().unwrap_or_default(); + let path = uri.path(); + let mut map: HashMap = + form_urlencoded::parse(uri.query().unwrap_or_default().as_bytes()) + .map(|(k, v)| (k.to_string(), v.to_lowercase())) + .collect(); + + let op = match scheme { + "s3" => { + if name.is_empty() { + return Err(anyhow!( + "bendsave requires bucket but it's empty in uri: {}", + uri.to_string() + )); + } + map.insert("bucket".to_string(), name.to_string()); + map.insert("root".to_string(), path.to_string()); + let op = Operator::from_iter::(map)?.finish(); + Ok(op) + } + "fs" => { + map.insert("root".to_string(), format!("/{name}/{path}")); + let op = Operator::from_iter::(map)?.finish(); + Ok(op) + } + _ => Err(anyhow::anyhow!("Unsupported scheme: {}", scheme)), + }?; + + let op = op + .layer(RetryLayer::default().with_jitter()) + .layer(LoggingLayer::default()); + debug!("epoch storage loaded: {:?}", op.info()); + Ok(op) +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use databend_common_base::base::tokio; + use opendal::Scheme; + + use super::*; + + #[tokio::test] + async fn test_load_meta_config() -> Result<()> { + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + let file_path = Path::new(manifest_dir).join("tests/fixtures/databend_meta_config.toml"); + let cfg = load_meta_config(&file_path.to_string_lossy())?; + + assert_eq!(cfg.raft_config.raft_dir, "/tmp/.databend/meta1"); + assert_eq!(cfg.raft_config.id, 1); + Ok(()) + } + + #[tokio::test] + async fn test_load_epochfs_storage() -> Result<()> { + let op = load_bendsave_storage("s3://bendsave/tmp?region=us-east-1").await?; + assert_eq!(op.info().scheme(), Scheme::S3); + assert_eq!(op.info().name(), "bendsave"); + assert_eq!(op.info().root(), "/tmp/"); + + let op = load_bendsave_storage("fs://tmp").await?; + assert_eq!(op.info().scheme(), Scheme::Fs); + assert_eq!(op.info().root(), "/tmp"); + Ok(()) + } +} diff --git a/src/bendsave/src/utils.rs b/src/bendsave/src/utils.rs new file mode 100644 index 0000000000000..fa68d063b9ee5 --- /dev/null +++ b/src/bendsave/src/utils.rs @@ -0,0 +1,61 @@ +// Copyright 2023 Databend Cloud +// +// Licensed under the Elastic License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.elastic.co/licensing/elastic-license +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::Result; +use futures::SinkExt; +use futures::StreamExt; +use log::info; +use opendal::Operator; + +/// The backup path for databend meta. +pub static DATABEND_META_BACKUP_PATH: &str = "databend_meta.db"; + +/// Copy the entire storage from one operator to another. +pub async fn storage_copy(src: Operator, dst: Operator) -> Result<()> { + let mut list = src.lister_with("/").recursive(true).await?; + while let Some(entry) = list.next().await.transpose()? { + if entry.metadata().is_dir() { + continue; + } + + let src_meta = src.stat(entry.path()).await?; + + // Skip if the file is already exists. + if let Ok(dst_meta) = dst.stat(entry.path()).await { + if src_meta.content_length() == dst_meta.content_length() + && src_meta.etag() == dst_meta.etag() + { + continue; + } + } + + let mut stream = src + .reader_with(entry.path()) + .chunk(8 * 1024 * 1024) + .await? + .into_bytes_stream(..) + .await?; + + let mut file = dst + .writer_with(entry.path()) + .chunk(8 * 1024 * 1024) + .await? + .into_bytes_sink(); + file.send_all(&mut stream).await?; + file.close().await?; + info!("file {} has been copied", entry.path()); + } + info!("storage copy has been finished"); + Ok(()) +} diff --git a/src/bendsave/tests/fixtures/databend_meta_config.toml b/src/bendsave/tests/fixtures/databend_meta_config.toml new file mode 100644 index 0000000000000..815de7ec89726 --- /dev/null +++ b/src/bendsave/tests/fixtures/databend_meta_config.toml @@ -0,0 +1,22 @@ +# Usage: +# databend-meta -c databend-meta-node-1.toml + +log_dir = "./.databend/logs1" +admin_api_address = "0.0.0.0:28101" +grpc_api_address = "0.0.0.0:9191" +# databend-query fetch this address to update its databend-meta endpoints list, +# in case databend-meta cluster changes. +grpc_api_advertise_host = "127.0.0.1" + +[raft_config] +id = 1 +raft_dir = "/tmp/.databend/meta1" +raft_api_port = 28103 + +# Assign raft_{listen|advertise}_host in test config. +# This allows you to catch a bug in unit tests when something goes wrong in raft meta nodes communication. +raft_listen_host = "127.0.0.1" +raft_advertise_host = "localhost" + +# Start up mode: single node cluster +single = true diff --git a/src/bendsave/tests/fixtures/databend_query_config.toml b/src/bendsave/tests/fixtures/databend_query_config.toml new file mode 100644 index 0000000000000..e0d8a6cd35fa4 --- /dev/null +++ b/src/bendsave/tests/fixtures/databend_query_config.toml @@ -0,0 +1,112 @@ +# Usage: +# databend-query -c databend_query_config_spec.toml + +[query] +max_active_sessions = 256 +shutdown_wait_timeout_ms = 5000 + +# For flight rpc. +flight_api_address = "0.0.0.0:9091" + +# Databend Query http address. +# For admin RESET API. +admin_api_address = "0.0.0.0:8080" + +# Databend Query metrics RESET API. +metric_api_address = "0.0.0.0:7070" + +# Databend Query MySQL Handler. +mysql_handler_host = "0.0.0.0" +mysql_handler_port = 3307 + +# Databend Query ClickHouse Handler. +clickhouse_http_handler_host = "0.0.0.0" +clickhouse_http_handler_port = 8124 + +# Databend Query HTTP Handler. +http_handler_host = "0.0.0.0" +http_handler_port = 8000 + +# Databend Query FlightSQL Handler. +flight_sql_handler_host = "0.0.0.0" +flight_sql_handler_port = 8900 + +tenant_id = "test_tenant" +cluster_id = "test_cluster" + +table_engine_memory_enabled = true +default_storage_format = 'parquet' +default_compression = 'zstd' + +enable_udf_server = true +udf_server_allow_list = ['http://0.0.0.0:8815'] +udf_server_allow_insecure = true + +cloud_control_grpc_server_address = "http://0.0.0.0:50051" + +# network_policy_whitelist = ['127.0.0.0/8'] + +[[query.users]] +name = "root" +auth_type = "no_password" + +[[query.users]] +name = "default" +auth_type = "no_password" + +# [[query.users]] +# name = "admin" +# auth_type = "no_password" + +# [[query.users]] +# name = "databend" +# auth_type = "double_sha1_password" +# # echo -n "databend" | sha1sum | cut -d' ' -f1 | xxd -r -p | sha1sum +# auth_string = "3081f32caef285c232d066033c89a78d88a6d8a5" + +# [[query.users]] +# name = "datafuselabs" +# auth_type = "sha256_password" +# # echo -n "datafuselabs" | sha256sum +# auth_string = "6db1a2f5da402b43c066fcadcbf78f04260b3236d9035e44dd463f21e29e6f3b" + +# This for test +[[query.udfs]] +name = "ping" +definition = "CREATE FUNCTION ping(STRING) RETURNS STRING LANGUAGE python HANDLER = 'ping' ADDRESS = 'http://0.0.0.0:8815'" + +[query.settings] +aggregate_spilling_memory_ratio = 60 +join_spilling_memory_ratio = 60 + +[log] + +[log.file] +level = "DEBUG" +format = "text" +dir = "./.databend/logs_1" + +[log.query] +on = true + +[log.structlog] +on = true +dir = "./.databend/structlog_1" + +[meta] +# It is a list of `grpc_api_advertise_host:` of databend-meta config +endpoints = ["0.0.0.0:9191"] +username = "root" +password = "root" +client_timeout_in_second = 60 +auto_sync_interval = 60 + +# Storage config. +[storage] +# fs | s3 | azblob | obs | oss +type = "fs" + +# Set a local folder to store your data. +# Comment out this block if you're NOT using local file system as storage. +[storage.fs] +data_path = "/tmp/bendsave" diff --git a/src/meta/service/src/configs/mod.rs b/src/meta/service/src/configs/mod.rs index 300f89a40c408..853ac1a18d406 100644 --- a/src/meta/service/src/configs/mod.rs +++ b/src/meta/service/src/configs/mod.rs @@ -13,6 +13,6 @@ // limitations under the License. mod inner; -mod outer_v0; +pub mod outer_v0; pub use inner::Config; diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 6835ac3955814..586605ca9305d 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -235,6 +235,34 @@ impl Config { Ok(conf) } + + pub fn load_with_config_file(config_file: &str) -> Result { + let mut builder: serfig::Builder = serfig::Builder::default(); + + // Load from config file first. + { + let config_file = if !config_file.is_empty() { + config_file.to_string() + } else if let Ok(path) = env::var("CONFIG_FILE") { + path + } else { + "".to_string() + }; + + if !config_file.is_empty() { + builder = builder.collect(from_file(Toml, &config_file)); + } + } + + // Then, load from env. + builder = builder.collect(from_env()); + + // Check obsoleted. + let conf = builder.build()?; + conf.check_obsoleted()?; + + Ok(conf) + } } /// Storage config group. diff --git a/src/query/service/src/lib.rs b/src/query/service/src/lib.rs index c561457f57628..c39d865309fbb 100644 --- a/src/query/service/src/lib.rs +++ b/src/query/service/src/lib.rs @@ -44,6 +44,7 @@ extern crate core; pub mod auth; +pub mod builtin; pub mod catalogs; pub mod clusters; pub mod databases; @@ -59,7 +60,6 @@ pub mod stream; pub mod table_functions; pub mod test_kits; -mod builtin; mod global_services; pub use databend_common_sql as sql;