Skip to content

Commit 2d3d8d4

Browse files
committed
feat: infrastructure for "evil" test payloads
Add the overall infrastructure so we can implement a large zoo of evil test payloads. Adds a few payloads already to demonstrate the architecture. This somewhat streamlines/re-designs the `guests/bundle` build script. For #16.
1 parent cb4233f commit 2d3d8d4

File tree

23 files changed

+646
-57
lines changed

23 files changed

+646
-57
lines changed

Cargo.lock

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ resolver = "3"
33
members = [
44
"arrow2bytes",
55
"guests/bundle",
6+
"guests/evil",
67
"guests/python",
78
"guests/rust",
89
"host",
@@ -24,6 +25,7 @@ datafusion-expr = { version = "49.0.1", default-features = false }
2425
datafusion-sql = { version = "49.0.1", default-features = false }
2526
datafusion-udf-wasm-arrow2bytes = { path = "arrow2bytes", version = "0.1.0" }
2627
datafusion-udf-wasm-bundle = { path = "guests/bundle", version = "0.1.0" }
28+
datafusion-udf-wasm-evil = { path = "guests/evil", version = "0.1.0" }
2729
datafusion-udf-wasm-guest = { path = "guests/rust", version = "0.1.0" }
2830
datafusion-udf-wasm-host = { path = "host", version = "0.1.0" }
2931
datafusion-udf-wasm-python = { path = "guests/python", version = "0.1.0" }
@@ -32,6 +34,7 @@ http = { version = "1.3.1", default-features = false }
3234
hyper = { version = "1.7", default-features = false }
3335
insta = { version = "1.43.2", "default-features" = false }
3436
pyo3 = { version = "0.27.1", default-features = false, features = ["macros"] }
37+
regex = { version = "1", default-features = false }
3538
sqlparser = { version = "0.55.0", default-features = false, features = [
3639
"std",
3740
"visitor"

guests/Justfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
mod evil
12
mod python
23
mod rust

guests/bundle/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ license.workspace = true
66

77
[build-dependencies]
88
# these need to be marked as build dependencies so the build script reruns whenever they change
9+
datafusion-udf-wasm-evil = { workspace = true, optional = true }
910
datafusion-udf-wasm-guest = { workspace = true, optional = true }
1011
datafusion-udf-wasm-python = { workspace = true, optional = true }
1112
# the actual build-time dependencies
1213
serde_json = "1.0.145"
1314

1415
[features]
16+
evil = ["dep:datafusion-udf-wasm-evil"]
1517
example = ["dep:datafusion-udf-wasm-guest"]
1618
python = ["dep:datafusion-udf-wasm-python"]
1719

guests/bundle/build.rs

Lines changed: 108 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33

44
use std::{
55
collections::HashMap,
6+
fs::File,
7+
io::Write,
68
path::{Path, PathBuf},
79
process::{Command, Stdio},
810
str::FromStr,
911
};
1012

1113
fn main() {
14+
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
1215
let profile: Profile = std::env::var("PROFILE").unwrap().parse().unwrap();
1316
let package_locations = package_locations();
1417

@@ -28,10 +31,16 @@ fn main() {
2831
|| std::env::var("DOCS_RS").is_ok()
2932
|| std::env::var("JUSTCHECK").is_ok();
3033

34+
let mut gen_file = File::create(out_dir.join("gen.rs")).unwrap();
35+
3136
for feature in FEATURES {
3237
println!("processing {}", feature.name);
33-
feature.build_or_stub(stub, profile, &package_locations);
38+
feature.build_or_stub(stub, profile, &package_locations, &out_dir, &mut gen_file);
3439
}
40+
41+
gen_file.flush().unwrap();
42+
43+
println!("cargo::rerun-if-changed=build.rs");
3544
}
3645

3746
/// Get locations for all packages in the dependency tree.
@@ -151,6 +160,27 @@ impl std::fmt::Display for Profile {
151160
}
152161
}
153162

163+
/// Artifact type.
164+
enum ArtifactType {
165+
/// Library.
166+
Lib,
167+
168+
/// Example.
169+
Example(&'static str),
170+
}
171+
172+
/// Just(file) command.
173+
struct JustCmd {
174+
/// Artifact type.
175+
artifact_type: ArtifactType,
176+
177+
/// Name of the resulting constant.
178+
const_name: &'static str,
179+
180+
/// Documentation for the created constant.
181+
doc: &'static str,
182+
}
183+
154184
/// Feature description.
155185
struct Feature {
156186
/// Lowercase feature name.
@@ -159,15 +189,8 @@ struct Feature {
159189
/// Package that contains the feature code.
160190
package: &'static str,
161191

162-
/// `just` command prefix that compiles the feature.
163-
///
164-
/// This will call `just prefix{profile}` within the package directory.
165-
just_cmd_prefix: &'static str,
166-
167-
/// Path components to file in target directory.
168-
///
169-
/// So `["foo", "bar.bin"]` will resolve to `CARGO_TARGET_DIR/wasm32-wasip2/foo/bar.bin`.
170-
just_out_file: &'static [&'static str],
192+
/// Just commands.
193+
just_cmds: &'static [JustCmd],
171194
}
172195

173196
impl Feature {
@@ -177,12 +200,13 @@ impl Feature {
177200
stub: bool,
178201
profile: Profile,
179202
package_locations: &HashMap<String, PathBuf>,
203+
out_dir: &Path,
204+
gen_file: &mut File,
180205
) {
181206
let Self {
182207
name,
183208
package,
184-
just_cmd_prefix,
185-
just_out_file,
209+
just_cmds,
186210
} = self;
187211

188212
let name_upper = name.to_uppercase();
@@ -191,32 +215,61 @@ impl Feature {
191215
return;
192216
}
193217

194-
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
195-
196-
let out_file = if stub {
197-
let out_file = out_dir.join(format!("{name}.wasm"));
198-
// write empty stub file
199-
std::fs::write(&out_file, b"").unwrap();
200-
out_file
201-
} else {
202-
let target_dir = out_dir.join(name);
218+
let cwd = package_locations.get(*package).unwrap();
219+
let target_dir = out_dir.join(name);
220+
221+
for just_cmd in *just_cmds {
222+
let JustCmd {
223+
artifact_type,
224+
const_name,
225+
doc,
226+
} = just_cmd;
227+
let out_file = if stub {
228+
let out_file = out_dir.join(format!("{name}.wasm"));
229+
// write empty stub file
230+
std::fs::write(&out_file, b"").unwrap();
231+
out_file
232+
} else {
233+
let mut just_cmd = "build-".to_owned();
234+
match artifact_type {
235+
ArtifactType::Lib => {}
236+
ArtifactType::Example(example) => {
237+
just_cmd.push_str(example);
238+
just_cmd.push('-');
239+
}
240+
}
241+
just_cmd.push_str(profile.as_str());
242+
243+
just_build(cwd, &just_cmd, &target_dir);
244+
245+
let out = target_dir.join("wasm32-wasip2").join(profile.as_str());
246+
match artifact_type {
247+
ArtifactType::Lib => out.join(format!("{}.wasm", package.replace("-", "_"))),
248+
ArtifactType::Example(example) => out
249+
.join("examples")
250+
.join(format!("{}.wasm", example.replace("-", "_"))),
251+
}
252+
};
203253

204-
just_build(
205-
package_locations.get(*package).unwrap(),
206-
&format!("{just_cmd_prefix}{profile}"),
207-
&target_dir,
254+
println!(
255+
"cargo::rustc-env=BIN_PATH_{const_name}={}",
256+
out_file.display(),
208257
);
209258

210-
just_out_file.iter().fold(
211-
target_dir.join("wasm32-wasip2").join(profile.as_str()),
212-
|path, part| path.join(part),
213-
)
214-
};
215-
216-
println!(
217-
"cargo::rustc-env=BIN_PATH_{name_upper}={}",
218-
out_file.display(),
219-
);
259+
writeln!(gen_file, "/// {doc}").unwrap();
260+
writeln!(gen_file, r#"#[cfg(feature = "{name}")]"#).unwrap();
261+
writeln!(gen_file, r#"pub static BIN_{const_name}: &[u8] = include_bytes!(env!("BIN_PATH_{const_name}"));"#).unwrap();
262+
263+
// we cannot really depend directly on examples, so we need to tell Cargo about it
264+
if let ArtifactType::Example(example) = artifact_type {
265+
println!(
266+
"cargo::rerun-if-changed={}",
267+
cwd.join("examples")
268+
.join(format!("{example}.rs",))
269+
.display(),
270+
);
271+
}
272+
}
220273
}
221274
}
222275

@@ -232,18 +285,33 @@ fn just_build(cwd: &Path, just_cmd: &str, cargo_target_dir: &Path) {
232285

233286
/// All supported features.
234287
///
235-
/// This must be in-sync with the feature list in `Cargo.toml` and the imports in `src/lib.rs`.
288+
/// This must be in-sync with the feature list in `Cargo.toml`.
236289
const FEATURES: &[Feature] = &[
290+
Feature {
291+
name: "evil",
292+
package: "datafusion-udf-wasm-evil",
293+
just_cmds: &[JustCmd {
294+
artifact_type: ArtifactType::Lib,
295+
const_name: "EVIL",
296+
doc: "Evil payloads.",
297+
}],
298+
},
237299
Feature {
238300
name: "example",
239301
package: "datafusion-udf-wasm-guest",
240-
just_cmd_prefix: "build-add-one-",
241-
just_out_file: &["examples", "add_one.wasm"],
302+
just_cmds: &[JustCmd {
303+
artifact_type: ArtifactType::Example("add-one"),
304+
const_name: "EXAMPLE",
305+
doc: r#""add-one" example."#,
306+
}],
242307
},
243308
Feature {
244309
name: "python",
245310
package: "datafusion-udf-wasm-python",
246-
just_cmd_prefix: "",
247-
just_out_file: &["datafusion_udf_wasm_python.wasm"],
311+
just_cmds: &[JustCmd {
312+
artifact_type: ArtifactType::Lib,
313+
const_name: "PYTHON",
314+
doc: "Python UDF.",
315+
}],
248316
},
249317
];

guests/bundle/src/lib.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
11
//! Bundles guests as pre-compiled WASM bytecode.
22
3-
/// "add-one" example.
4-
#[cfg(feature = "example")]
5-
pub static BIN_EXAMPLE: &[u8] = include_bytes!(env!("BIN_PATH_EXAMPLE"));
6-
7-
/// Python UDF.
8-
#[cfg(feature = "python")]
9-
pub static BIN_PYTHON: &[u8] = include_bytes!(env!("BIN_PATH_PYTHON"));
3+
include!(concat!(env!("OUT_DIR"), "/gen.rs"));

guests/evil/Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "datafusion-udf-wasm-evil"
3+
version.workspace = true
4+
edition.workspace = true
5+
license.workspace = true
6+
7+
[lib]
8+
crate-type = ["cdylib"]
9+
10+
[dependencies]
11+
arrow.workspace = true
12+
datafusion-common.workspace = true
13+
datafusion-expr.workspace = true
14+
datafusion-udf-wasm-guest.workspace = true
15+
tar.workspace = true
16+
17+
[lints]
18+
workspace = true

guests/evil/Justfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[private]
2+
build profile:
3+
@echo ::group::guests::evil::build-{{profile}}
4+
cargo build --target=wasm32-wasip2 --profile={{replace(profile, "debug", "dev")}}
5+
@echo ::endgroup::
6+
7+
# build library in debug mode
8+
build-debug: (build "debug")
9+
10+
# build library in release mode
11+
build-release: (build "release")
12+
13+
# checks build
14+
check-build: build-debug

guests/evil/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Evil Test Payloads
2+
3+
These are payloads that try to stress the WASM sandbox. They are NOT meant to be used in production.
4+
5+
For efficiency we generate a single WASM binary that is multiplexed using the `EVIL` environment variable.

0 commit comments

Comments
 (0)