Skip to content

Commit 3cb216e

Browse files
committed
Remove kstring in favor of BString (#1460)
Let's not trade safety for some convenience. This reduces performance by 5% at least (in the WebKit repository at 886077e077a496a6e398df52a4b7915d8cd68f76): ``` ❯ hyperfine "/Users/byron/dev/github.com/Byron/gitoxide/target/release/gix index entries ':(attr:export-ignore)' -i" "gix index entries ':(attr:export-ignore)' -i" Benchmark 1: /Users/byron/dev/github.com/Byron/gitoxide/target/release/gix index entries ':(attr:export-ignore)' -i Time (mean ± σ): 820.1 ms ± 71.7 ms [User: 759.9 ms, System: 85.9 ms] Range (min … max): 778.8 ms … 984.3 ms 10 runs Warning: Statistical outliers were detected. Consider re-running this benchmark on a quiet system without any interferences from other programs. It might help to use the '--warmup' or '--prepare' options. Benchmark 2: gix index entries ':(attr:export-ignore)' -i Time (mean ± σ): 782.1 ms ± 1.8 ms [User: 760.1 ms, System: 43.2 ms] Range (min … max): 780.0 ms … 786.0 ms 10 runs Summary gix index entries ':(attr:export-ignore)' -i ran 1.05 ± 0.09 times faster than /Users/byron/dev/github.com/Byron/gitoxide/target/release/gix index entries ':(attr:export-ignore)' -i ```
1 parent a807dd1 commit 3cb216e

File tree

7 files changed

+44
-22
lines changed

7 files changed

+44
-22
lines changed

gix-attributes/src/lib.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
doc = ::document_features::document_features!()
77
)]
88
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
9-
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
9+
#![deny(missing_docs, rust_2018_idioms)]
10+
#![forbid(unsafe_code)]
1011

1112
pub use gix_glob as glob;
1213
use kstring::{KString, KStringRef};

gix-attributes/src/name.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1+
use crate::{Name, NameRef};
12
use bstr::{BStr, BString, ByteSlice};
23
use kstring::KStringRef;
34

4-
use crate::{Name, NameRef};
5-
65
impl<'a> NameRef<'a> {
76
/// Turn this ref into its owned counterpart.
87
pub fn to_owned(self) -> Name {

gix-attributes/src/parse.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
use std::borrow::Cow;
22

3+
use crate::{name, AssignmentRef, Name, NameRef, StateRef};
34
use bstr::{BStr, ByteSlice};
45
use kstring::KStringRef;
56

6-
use crate::{name, AssignmentRef, Name, NameRef, StateRef};
7-
87
/// The kind of attribute that was parsed.
98
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
109
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]

gix-attributes/src/search/mod.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
use std::collections::HashMap;
2-
31
use kstring::KString;
42
use smallvec::SmallVec;
3+
use std::collections::HashMap;
54

65
use crate::{Assignment, AssignmentRef};
76

gix-attributes/src/state.rs

+9-14
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,24 @@
1-
use bstr::{BStr, ByteSlice};
2-
use kstring::{KString, KStringRef};
3-
41
use crate::{State, StateRef};
2+
use bstr::{BStr, BString, ByteSlice};
53

64
/// A container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded.
75
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
86
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
9-
pub struct Value(KString);
7+
// TODO: This should be some sort of 'smallbstring' - but can't use `kstring` here due to UTF8 requirement. 5% performance boost possible.
8+
// What's really needed here is a representation that displays as string when serialized which helps with JSON.
9+
// Maybe `smallvec` with display and serialization wrapper would do the trick?
10+
pub struct Value(BString);
1011

1112
/// A reference container to encapsulate a tightly packed and typically unallocated byte value that isn't necessarily UTF8 encoded.
1213
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
1314
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
14-
pub struct ValueRef<'a>(#[cfg_attr(feature = "serde", serde(borrow))] KStringRef<'a>);
15+
pub struct ValueRef<'a>(#[cfg_attr(feature = "serde", serde(borrow))] &'a [u8]);
1516

1617
/// Lifecycle
1718
impl<'a> ValueRef<'a> {
1819
/// Keep `input` as our value.
1920
pub fn from_bytes(input: &'a [u8]) -> Self {
20-
Self(KStringRef::from_ref(
21-
// SAFETY: our API makes accessing that value as `str` impossible, so illformed UTF8 is never exposed as such.
22-
#[allow(unsafe_code)]
23-
unsafe {
24-
std::str::from_utf8_unchecked(input)
25-
},
26-
))
21+
Self(input)
2722
}
2823
}
2924

@@ -42,7 +37,7 @@ impl ValueRef<'_> {
4237

4338
impl<'a> From<&'a str> for ValueRef<'a> {
4439
fn from(v: &'a str) -> Self {
45-
ValueRef(v.into())
40+
ValueRef(v.as_bytes().into())
4641
}
4742
}
4843

@@ -54,7 +49,7 @@ impl<'a> From<ValueRef<'a>> for Value {
5449

5550
impl From<&str> for Value {
5651
fn from(v: &str) -> Self {
57-
Value(KString::from_ref(v))
52+
Value(v.as_bytes().into())
5853
}
5954
}
6055

gix-attributes/tests/parse/mod.rs

+29
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use bstr::BString;
2+
use gix_attributes::state::ValueRef;
23
use gix_attributes::{parse, StateRef};
34
use gix_glob::pattern::Mode;
45
use gix_testtools::fixture_bytes;
@@ -275,6 +276,19 @@ fn attributes_can_have_values() {
275276
);
276277
}
277278

279+
#[test]
280+
fn attributes_can_have_illformed_utf8() {
281+
assert_eq!(
282+
byte_line(b"p a=one b=\xC3\x28\x41 c=d "),
283+
(
284+
pattern("p", Mode::NO_SUB_DIR, None),
285+
vec![value("a", "one"), byte_value("b", b"\xC3\x28\x41"), value("c", "d")],
286+
1
287+
),
288+
"illformed UTF8 is fully supported"
289+
);
290+
}
291+
278292
#[test]
279293
fn attributes_see_state_adjustments_over_value_assignments() {
280294
assert_eq!(
@@ -325,6 +339,10 @@ fn value<'b>(attr: &str, value: &'b str) -> (BString, StateRef<'b>) {
325339
(attr.into(), StateRef::Value(value.into()))
326340
}
327341

342+
fn byte_value<'b>(attr: &str, value: &'b [u8]) -> (BString, StateRef<'b>) {
343+
(attr.into(), StateRef::Value(ValueRef::from_bytes(value)))
344+
}
345+
328346
fn pattern(name: &str, flags: gix_glob::pattern::Mode, first_wildcard_pos: Option<usize>) -> parse::Kind {
329347
parse::Kind::Pattern(gix_glob::Pattern {
330348
text: name.into(),
@@ -344,6 +362,17 @@ fn line(input: &str) -> ExpandedAttribute {
344362
try_line(input).unwrap()
345363
}
346364

365+
fn byte_line(input: &[u8]) -> ExpandedAttribute {
366+
try_byte_line(input).unwrap()
367+
}
368+
369+
fn try_byte_line(input: &[u8]) -> Result<ExpandedAttribute, parse::Error> {
370+
let mut lines = gix_attributes::parse(input);
371+
let res = expand(lines.next().unwrap())?;
372+
assert!(lines.next().is_none(), "expected only one line");
373+
Ok(res)
374+
}
375+
347376
fn lenient_lines(input: &str) -> Vec<ExpandedAttribute> {
348377
gix_attributes::parse(input.as_bytes())
349378
.map(expand)

gix-attributes/tests/search/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ fn given_attributes_are_made_available_in_given_order() -> crate::Result {
270270
fn size_of_outcome() {
271271
assert_eq!(
272272
std::mem::size_of::<Outcome>(),
273-
904,
273+
840,
274274
"it's quite big, shouldn't change without us noticing"
275275
)
276276
}

0 commit comments

Comments
 (0)