Skip to content

avoid UTF8 decoding in git-transport/git-protocol #634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions git-packetline/src/line/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ use crate::{decode, BandRef, Channel, ErrorRef, PacketLineRef, TextRef, ERR_PREF

impl<'a> PacketLineRef<'a> {
/// Return this instance as slice if it's [`Data`][PacketLineRef::Data].
pub fn as_slice(&self) -> Option<&[u8]> {
pub fn as_slice(&self) -> Option<&'a [u8]> {
match self {
PacketLineRef::Data(d) => Some(d),
PacketLineRef::Flush | PacketLineRef::Delimiter | PacketLineRef::ResponseEnd => None,
}
}
/// Return this instance's [`as_slice()`][PacketLineRef::as_slice()] as [`BStr`].
pub fn as_bstr(&self) -> Option<&BStr> {
pub fn as_bstr(&self) -> Option<&'a BStr> {
self.as_slice().map(Into::into)
}
/// Interpret this instance's [`as_slice()`][PacketLineRef::as_slice()] as [`ErrorRef`].
Expand All @@ -20,13 +20,13 @@ impl<'a> PacketLineRef<'a> {
///
/// Note that this creates an unchecked error using the slice verbatim, which is useful to [serialize it][ErrorRef::write_to()].
/// See [`check_error()`][PacketLineRef::check_error()] for a version that assures the error information is in the expected format.
pub fn as_error(&self) -> Option<ErrorRef<'_>> {
pub fn as_error(&self) -> Option<ErrorRef<'a>> {
self.as_slice().map(ErrorRef)
}
/// Check this instance's [`as_slice()`][PacketLineRef::as_slice()] is a valid [`ErrorRef`] and return it.
///
/// This works for any data received in an error [channel][crate::Channel].
pub fn check_error(&self) -> Option<ErrorRef<'_>> {
pub fn check_error(&self) -> Option<ErrorRef<'a>> {
self.as_slice().and_then(|data| {
if data.len() >= ERR_PREFIX.len() && &data[..ERR_PREFIX.len()] == ERR_PREFIX {
Some(ErrorRef(&data[ERR_PREFIX.len()..]))
Expand All @@ -36,15 +36,15 @@ impl<'a> PacketLineRef<'a> {
})
}
/// Return this instance as text, with the trailing newline truncated if present.
pub fn as_text(&self) -> Option<TextRef<'_>> {
pub fn as_text(&self) -> Option<TextRef<'a>> {
self.as_slice().map(Into::into)
}

/// Interpret the data in this [`slice`][PacketLineRef::as_slice()] as [`BandRef`] according to the given `kind` of channel.
///
/// Note that this is only relevant in a side-band channel.
/// See [`decode_band()`][PacketLineRef::decode_band()] in case `kind` is unknown.
pub fn as_band(&self, kind: Channel) -> Option<BandRef<'_>> {
pub fn as_band(&self, kind: Channel) -> Option<BandRef<'a>> {
self.as_slice().map(|d| match kind {
Channel::Data => BandRef::Data(d),
Channel::Progress => BandRef::Progress(d),
Expand All @@ -53,7 +53,7 @@ impl<'a> PacketLineRef<'a> {
}

/// Decode the band of this [`slice`][PacketLineRef::as_slice()]
pub fn decode_band(&self) -> Result<BandRef<'_>, decode::band::Error> {
pub fn decode_band(&self) -> Result<BandRef<'a>, decode::band::Error> {
let d = self.as_slice().ok_or(decode::band::Error::NonDataLine)?;
Ok(match d[0] {
1 => BandRef::Data(&d[1..]),
Expand All @@ -73,11 +73,11 @@ impl<'a> From<&'a [u8]> for TextRef<'a> {

impl<'a> TextRef<'a> {
/// Return this instance's data.
pub fn as_slice(&self) -> &[u8] {
pub fn as_slice(&self) -> &'a [u8] {
self.0
}
/// Return this instance's data as [`BStr`].
pub fn as_bstr(&self) -> &BStr {
pub fn as_bstr(&self) -> &'a BStr {
self.0.into()
}
}
Expand Down
82 changes: 73 additions & 9 deletions git-packetline/src/read/sidebands/async_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ where
{
fn drop(&mut self) {
if let State::Idle { ref mut parent } = self.state {
parent.as_mut().unwrap().reset();
parent
.as_mut()
.expect("parent is always available if we are idle")
.reset();
}
}
}
Expand Down Expand Up @@ -118,14 +121,22 @@ where
/// Forwards to the parent [StreamingPeekableIter::reset_with()]
pub fn reset_with(&mut self, delimiters: &'static [PacketLineRef<'static>]) {
if let State::Idle { ref mut parent } = self.state {
parent.as_mut().unwrap().reset_with(delimiters)
parent
.as_mut()
.expect("parent is always available if we are idle")
.reset_with(delimiters)
}
}

/// Forwards to the parent [StreamingPeekableIter::stopped_at()]
pub fn stopped_at(&self) -> Option<PacketLineRef<'static>> {
match self.state {
State::Idle { ref parent } => parent.as_ref().unwrap().stopped_at,
State::Idle { ref parent } => {
parent
.as_ref()
.expect("parent is always available if we are idle")
.stopped_at
}
_ => None,
}
}
Expand All @@ -137,10 +148,19 @@ where

/// Effectively forwards to the parent [StreamingPeekableIter::peek_line()], allowing to see what would be returned
/// next on a call to [`read_line()`][io::BufRead::read_line()].
pub async fn peek_data_line(&mut self) -> Option<std::io::Result<Result<&[u8], crate::decode::Error>>> {
///
/// # Warning
///
/// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it.
pub async fn peek_data_line(&mut self) -> Option<std::io::Result<Result<&[u8], decode::Error>>> {
match self.state {
State::Idle { ref mut parent } => match parent.as_mut().unwrap().peek_line().await {
Some(Ok(Ok(crate::PacketLineRef::Data(line)))) => Some(Ok(Ok(line))),
State::Idle { ref mut parent } => match parent
.as_mut()
.expect("parent is always available if we are idle")
.peek_line()
.await
{
Some(Ok(Ok(PacketLineRef::Data(line)))) => Some(Ok(Ok(line))),
Some(Ok(Err(err))) => Some(Ok(Err(err))),
Some(Err(err)) => Some(Err(err)),
_ => None,
Expand All @@ -149,10 +169,55 @@ where
}
}

/// Read a packet line as line.
/// Read a packet line as string line.
pub fn read_line<'b>(&'b mut self, buf: &'b mut String) -> ReadLineFuture<'a, 'b, T, F> {
ReadLineFuture { parent: self, buf }
}

/// Read a packet line from the underlying packet reader, returning empty lines if a stop-packetline was reached.
///
/// # Warning
///
/// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it.
pub async fn read_data_line(&mut self) -> Option<std::io::Result<Result<PacketLineRef<'_>, decode::Error>>> {
match &mut self.state {
State::Idle { parent: Some(parent) } => {
assert_eq!(
self.cap, 0,
"we don't support partial buffers right now - read-line must be used consistently"
);
parent.read_line().await
}
_ => None,
}
}
}

pub struct ReadDataLineFuture<'a, 'b, T: AsyncRead, F> {
parent: &'b mut WithSidebands<'a, T, F>,
buf: &'b mut Vec<u8>,
}

impl<'a, 'b, T, F> Future for ReadDataLineFuture<'a, 'b, T, F>
where
T: AsyncRead + Unpin,
F: FnMut(bool, &[u8]) + Unpin,
{
type Output = std::io::Result<usize>;

fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
assert_eq!(
self.parent.cap, 0,
"we don't support partial buffers right now - read-line must be used consistently"
);
let Self { buf, parent } = &mut *self;
let line = ready!(Pin::new(parent).poll_fill_buf(cx))?;
buf.clear();
buf.extend_from_slice(line);
let bytes = line.len();
self.parent.cap = 0;
Poll::Ready(Ok(bytes))
}
}

pub struct ReadLineFuture<'a, 'b, T: AsyncRead, F> {
Expand All @@ -174,8 +239,7 @@ where
);
let Self { buf, parent } = &mut *self;
let line = std::str::from_utf8(ready!(Pin::new(parent).poll_fill_buf(cx))?)
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))
.unwrap();
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
buf.clear();
buf.push_str(line);
let bytes = line.len();
Expand Down
23 changes: 19 additions & 4 deletions git-packetline/src/read/sidebands/blocking_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,31 @@ where

/// Effectively forwards to the parent [StreamingPeekableIter::peek_line()], allowing to see what would be returned
/// next on a call to [`read_line()`][io::BufRead::read_line()].
///
/// # Warning
///
/// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it.
pub fn peek_data_line(&mut self) -> Option<io::Result<Result<&[u8], crate::decode::Error>>> {
match self.parent.peek_line() {
Some(Ok(Ok(crate::PacketLineRef::Data(line)))) => Some(Ok(Ok(line))),
Some(Ok(Ok(PacketLineRef::Data(line)))) => Some(Ok(Ok(line))),
Some(Ok(Err(err))) => Some(Ok(Err(err))),
Some(Err(err)) => Some(Err(err)),
_ => None,
}
}

/// Read a whole packetline from the underlying reader, with empty lines indicating a stop packetline.
///
/// # Warning
///
/// This skips all sideband handling and may return an unprocessed line with sidebands still contained in it.
pub fn read_data_line(&mut self) -> Option<io::Result<Result<PacketLineRef<'_>, crate::decode::Error>>> {
assert_eq!(
self.cap, 0,
"we don't support partial buffers right now - read-line must be used consistently"
);
self.parent.read_line()
}
}

impl<'a, T, F> BufRead for WithSidebands<'a, T, F>
Expand Down Expand Up @@ -157,9 +174,7 @@ where
self.cap, 0,
"we don't support partial buffers right now - read-line must be used consistently"
);
let line = std::str::from_utf8(self.fill_buf()?)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))
.unwrap();
let line = std::str::from_utf8(self.fill_buf()?).map_err(|err| io::Error::new(io::ErrorKind::Other, err))?;
buf.push_str(line);
let bytes = line.len();
self.cap = 0;
Expand Down
46 changes: 46 additions & 0 deletions git-packetline/tests/read/sideband.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,52 @@ async fn read_line_trait_method_reads_one_packet_line_at_a_time() -> crate::Resu
Ok(())
}

#[maybe_async::test(feature = "blocking-io", async(feature = "async-io", async_std::test))]
async fn readline_reads_one_packet_line_at_a_time() -> crate::Result {
let buf = fixture_bytes("v1/01-clone.combined-output-no-binary");

let mut rd = git_packetline::StreamingPeekableIter::new(&buf[..], &[PacketLineRef::Flush]);

let mut r = rd.as_read();
let line = r.read_data_line().await.unwrap()??.as_bstr().unwrap();
assert_eq!(line, "808e50d724f604f69ab93c6da2919c014667bedb HEAD\0multi_ack thin-pack side-band side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative no-progress include-tag multi_ack_detailed symref=HEAD:refs/heads/master object-format=sha1 agent=git/2.28.0\n");
let line = r.read_data_line().await.unwrap()??.as_bstr().unwrap();
assert_eq!(line, "808e50d724f604f69ab93c6da2919c014667bedb refs/heads/master\n");
let line = r.read_data_line().await;
assert!(line.is_none(), "flush means `None`");
let line = r.read_data_line().await;
assert!(line.is_none(), "…which can't be overcome unless the reader is reset");
assert_eq!(
r.stopped_at(),
Some(PacketLineRef::Flush),
"it knows what stopped the reader"
);

drop(r);
rd.reset();

let mut r = rd.as_read();
let line = r.read_data_line().await.unwrap()??.as_bstr().unwrap();
assert_eq!(line.as_bstr(), "NAK\n");

drop(r);

let mut r = rd.as_read_with_sidebands(|_, _| ());
let line = r.read_data_line().await.unwrap()??.as_bstr().unwrap();
assert_eq!(
line.as_bstr(),
"\x02Enumerating objects: 3, done.\n",
"sidebands are ignored entirely here"
);
for _ in 0..6 {
let _discard_more_progress = r.read_data_line().await.unwrap()??.as_bstr().unwrap();
}
let line = r.read_data_line().await;
assert!(line.is_none(), "and we have reached the end");

Ok(())
}

#[maybe_async::test(feature = "blocking-io", async(feature = "async-io", async_std::test))]
async fn peek_past_an_actual_eof_is_an_error() -> crate::Result {
let input = b"0009ERR e";
Expand Down
2 changes: 1 addition & 1 deletion git-protocol/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ git-credentials = { version = "^0.7.0", path = "../git-credentials" }

thiserror = "1.0.32"
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]}
bstr = { version = "1.0.1", default-features = false, features = ["std"] }
bstr = { version = "1.0.1", default-features = false, features = ["std", "unicode"] }
nom = { version = "7", default-features = false, features = ["std"]}
btoi = "0.4.2"

Expand Down
2 changes: 1 addition & 1 deletion git-protocol/src/command/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ mod v2 {
use git_transport::client::Capabilities;

fn capabilities(command: &str, input: &str) -> Capabilities {
Capabilities::from_lines(Some(Ok("version 2".into())), format!("{}={}", command, input))
Capabilities::from_lines(format!("version 2\n{}={}", command, input).into())
.expect("valid input for V2 capabilities")
}

Expand Down
Loading