Skip to content

Commit fc28701

Browse files
committed
Add new gix-ignore crate with the contents moved and adapted from gix-attributes.
1 parent b645d28 commit fc28701

File tree

16 files changed

+710
-5
lines changed

16 files changed

+710
-5
lines changed

Cargo.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ members = [
173173
"gix-tempfile",
174174
"gix-lock",
175175
"gix-attributes",
176+
"gix-ignore",
176177
"gix-pathspec",
177178
"gix-refspec",
178179
"gix-path",

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ is usable to some extent.
7676
* [gix-discover](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-discover)
7777
* [gix-path](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-path)
7878
* [gix-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-attributes)
79+
* [gix-ignore](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-ignore)
7980
* [gix-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-pathspec)
8081
* [gix-index](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-index)
8182
* [gix-revision](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-revision)

crate-status.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,11 +328,13 @@ Check out the [performance discussion][gix-traverse-performance] as well.
328328
* [ ] Some examples
329329

330330
### gix-attributes
331-
* [x] parse git-ignore files (aka gix-attributes without the attributes or negation)
332-
* [x] parse gix-attributes files
333-
* [ ] create an attributes stack, ideally one that includes 'ignored' status from .gitignore files.
334-
* [ ] support for built-in `binary` macro for `-text -diff -merge`
335-
331+
* [x] parse `.gitattribute` files
332+
* [ ] an attributes stack for matching paths to their attributes, with support for built-in `binary` macro for `-text -diff -merge`
333+
334+
### gix-ignore
335+
* [x] parse `.gitignore` files
336+
* [x] an attributes stack for checking if paths are excluded
337+
336338
### gix-quote
337339
* **ansi-c**
338340
* [x] quote

gix-ignore/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Changelog
2+
3+
All notable changes to this project will be documented in this file.
4+
5+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7+

gix-ignore/Cargo.toml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[package]
2+
name = "gix-ignore"
3+
version = "0.1.0"
4+
repository = "https://github.com/Byron/gitoxide"
5+
license = "MIT/Apache-2.0"
6+
description = "A WIP crate of the gitoxide project dealing .gitignore files"
7+
authors = ["Sebastian Thiel <[email protected]>"]
8+
edition = "2021"
9+
include = ["src/**/*", "CHANGELOG.md"]
10+
rust-version = "1.64"
11+
12+
[lib]
13+
doctest = false
14+
15+
[features]
16+
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
17+
serde1 = ["serde", "bstr/serde", "gix-glob/serde1"]
18+
19+
[dependencies]
20+
gix-glob = { version = "^0.5.5", path = "../gix-glob" }
21+
gix-path = { version = "^0.7.3", path = "../gix-path" }
22+
23+
bstr = { version = "1.3.0", default-features = false, features = ["std", "unicode"]}
24+
unicode-bom = "2.0.2"
25+
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]}
26+
27+
document-features = { version = "0.2.1", optional = true }
28+
29+
[dev-dependencies]
30+
gix-testtools = { path = "../tests/tools"}
31+
gix-utils = { path = "../gix-utils" }
32+
33+
[package.metadata.docs.rs]
34+
all-features = true
35+
rustdoc-args = ["--cfg", "docsrs"]
36+
features = ["document-features"]

gix-ignore/src/lib.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//! Parse `.gitignore` files and provide utilities to match against them.
2+
//!
3+
//! ## Feature Flags
4+
#![cfg_attr(
5+
feature = "document-features",
6+
cfg_attr(doc, doc = ::document_features::document_features!())
7+
)]
8+
#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
9+
#![deny(missing_docs, rust_2018_idioms)]
10+
#![forbid(unsafe_code)]
11+
12+
pub use gix_glob as glob;
13+
14+
///
15+
pub mod search;
16+
/// A grouping of lists of patterns while possibly keeping associated to their base path in order to find matches.
17+
///
18+
/// Pattern lists with base path are queryable relative to that base, otherwise they are relative to the repository root.
19+
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)]
20+
pub struct Search {
21+
/// A list of pattern lists, each representing a patterns from a file or specified by hand, in the order they were
22+
/// specified in.
23+
///
24+
/// When matching, this order is reversed.
25+
pub patterns: Vec<gix_glob::search::pattern::List<search::Ignore>>,
26+
}
27+
28+
///
29+
pub mod parse;
30+
31+
/// Parse git ignore patterns, line by line, from `bytes`.
32+
pub fn parse(bytes: &[u8]) -> parse::Lines<'_> {
33+
parse::Lines::new(bytes)
34+
}

gix-ignore/src/parse.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
use bstr::ByteSlice;
2+
3+
/// An iterator over line-wise ignore patterns parsed from a buffer.
4+
pub struct Lines<'a> {
5+
lines: bstr::Lines<'a>,
6+
line_no: usize,
7+
}
8+
9+
impl<'a> Lines<'a> {
10+
/// Create a new instance from `buf` to parse ignore patterns from.
11+
pub fn new(buf: &'a [u8]) -> Self {
12+
let bom = unicode_bom::Bom::from(buf);
13+
Lines {
14+
lines: buf[bom.len()..].lines(),
15+
line_no: 0,
16+
}
17+
}
18+
}
19+
20+
impl<'a> Iterator for Lines<'a> {
21+
type Item = (gix_glob::Pattern, usize);
22+
23+
fn next(&mut self) -> Option<Self::Item> {
24+
for line in self.lines.by_ref() {
25+
self.line_no += 1;
26+
if line.first() == Some(&b'#') {
27+
continue;
28+
}
29+
match gix_glob::Pattern::from_bytes(truncate_non_escaped_trailing_spaces(line)) {
30+
None => continue,
31+
Some(pattern) => return Some((pattern, self.line_no)),
32+
}
33+
}
34+
None
35+
}
36+
}
37+
38+
/// We always copy just because that's ultimately needed anyway, not because we always have to.
39+
fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> &[u8] {
40+
let mut last_space_pos = None;
41+
let mut bytes = buf.iter().enumerate();
42+
while let Some((pos, b)) = bytes.next() {
43+
match *b {
44+
b' ' => {
45+
last_space_pos.get_or_insert(pos);
46+
continue;
47+
}
48+
b'\\' => {
49+
if bytes.next().is_none() {
50+
return buf;
51+
}
52+
}
53+
_ => {}
54+
}
55+
last_space_pos = None;
56+
}
57+
58+
if let Some(pos) = last_space_pos {
59+
&buf[..pos]
60+
} else {
61+
buf
62+
}
63+
}

gix-ignore/src/search.rs

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
use crate::Search;
2+
use bstr::{BStr, ByteSlice};
3+
use gix_glob::search::{pattern, Pattern};
4+
use std::ffi::OsString;
5+
use std::path::{Path, PathBuf};
6+
7+
/// Describes a matching pattern within a search for ignored paths.
8+
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
9+
pub struct Match<'a, T> {
10+
/// The glob pattern itself, like `/target/*`.
11+
pub pattern: &'a gix_glob::Pattern,
12+
/// The value associated with the pattern.
13+
pub value: &'a T,
14+
/// The path to the source from which the pattern was loaded, or `None` if it was specified by other means.
15+
pub source: Option<&'a Path>,
16+
/// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided.
17+
pub sequence_number: usize,
18+
}
19+
20+
/// An implementation of the [`Pattern`] trait for ignore patterns.
21+
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)]
22+
pub struct Ignore;
23+
24+
impl Pattern for Ignore {
25+
type Value = ();
26+
27+
fn bytes_to_patterns(bytes: &[u8], _source: &std::path::Path) -> Vec<pattern::Mapping<Self::Value>> {
28+
crate::parse(bytes)
29+
.map(|(pattern, line_number)| pattern::Mapping {
30+
pattern,
31+
value: (),
32+
sequence_number: line_number,
33+
})
34+
.collect()
35+
}
36+
37+
fn may_use_glob_pattern(_pattern: &gix_glob::Pattern) -> bool {
38+
true
39+
}
40+
}
41+
42+
/// Instantiation of a search for ignore patterns.
43+
impl Search {
44+
/// Given `git_dir`, a `.git` repository, load static ignore patterns from `info/exclude`
45+
/// and from `excludes_file` if it is provided.
46+
/// Note that it's not considered an error if the provided `excludes_file` does not exist.
47+
pub fn from_git_dir(
48+
git_dir: impl AsRef<Path>,
49+
excludes_file: Option<PathBuf>,
50+
buf: &mut Vec<u8>,
51+
) -> std::io::Result<Self> {
52+
let mut group = Self::default();
53+
54+
let follow_symlinks = true;
55+
// order matters! More important ones first.
56+
group.patterns.extend(
57+
excludes_file
58+
.and_then(|file| pattern::List::<Ignore>::from_file(file, None, follow_symlinks, buf).transpose())
59+
.transpose()?,
60+
);
61+
group.patterns.extend(pattern::List::<Ignore>::from_file(
62+
git_dir.as_ref().join("info").join("exclude"),
63+
None,
64+
follow_symlinks,
65+
buf,
66+
)?);
67+
Ok(group)
68+
}
69+
70+
/// Parse a list of patterns, using slashes as path separators
71+
pub fn from_overrides(patterns: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
72+
Search {
73+
patterns: vec![pattern::List {
74+
patterns: patterns
75+
.into_iter()
76+
.map(Into::into)
77+
.enumerate()
78+
.filter_map(|(seq_id, pattern)| {
79+
let pattern = gix_path::try_into_bstr(PathBuf::from(pattern)).ok()?;
80+
gix_glob::parse(pattern.as_ref()).map(|p| pattern::Mapping {
81+
pattern: p,
82+
value: (),
83+
sequence_number: seq_id,
84+
})
85+
})
86+
.collect(),
87+
source: None,
88+
base: None,
89+
}],
90+
}
91+
}
92+
}
93+
94+
/// Mutation
95+
impl Search {
96+
/// Add patterns as parsed from `bytes`, providing their `source` path and possibly their `root` path, the path they
97+
/// are relative to. This also means that `source` is contained within `root` if `root` is provided.
98+
pub fn add_patterns_buffer(&mut self, bytes: &[u8], source: impl Into<PathBuf>, root: Option<&Path>) {
99+
self.patterns
100+
.push(pattern::List::from_bytes(bytes, source.into(), root));
101+
}
102+
}
103+
104+
/// Return a match if a pattern matches `relative_path`, providing a pre-computed `basename_pos` which is the
105+
/// starting position of the basename of `relative_path`. `is_dir` is true if `relative_path` is a directory.
106+
/// `case` specifies whether cases should be folded during matching or not.
107+
pub fn pattern_matching_relative_path<'a>(
108+
list: &'a gix_glob::search::pattern::List<Ignore>,
109+
relative_path: &BStr,
110+
basename_pos: Option<usize>,
111+
is_dir: Option<bool>,
112+
case: gix_glob::pattern::Case,
113+
) -> Option<Match<'a, ()>> {
114+
let (relative_path, basename_start_pos) =
115+
list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case)?;
116+
list.patterns
117+
.iter()
118+
.rev()
119+
.filter(|pm| Ignore::may_use_glob_pattern(&pm.pattern))
120+
.find_map(
121+
|pattern::Mapping {
122+
pattern,
123+
value,
124+
sequence_number,
125+
}| {
126+
pattern
127+
.matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case)
128+
.then_some(Match {
129+
pattern,
130+
value,
131+
source: list.source.as_deref(),
132+
sequence_number: *sequence_number,
133+
})
134+
},
135+
)
136+
}
137+
138+
/// Like [`pattern_matching_relative_path()`], but returns an index to the pattern
139+
/// that matched `relative_path`, instead of the match itself.
140+
pub fn pattern_idx_matching_relative_path(
141+
list: &gix_glob::search::pattern::List<Ignore>,
142+
relative_path: &BStr,
143+
basename_pos: Option<usize>,
144+
is_dir: Option<bool>,
145+
case: gix_glob::pattern::Case,
146+
) -> Option<usize> {
147+
let (relative_path, basename_start_pos) =
148+
list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case)?;
149+
list.patterns
150+
.iter()
151+
.enumerate()
152+
.rev()
153+
.filter(|(_, pm)| Ignore::may_use_glob_pattern(&pm.pattern))
154+
.find_map(|(idx, pm)| {
155+
pm.pattern
156+
.matches_repo_relative_path(relative_path, basename_start_pos, is_dir, case)
157+
.then_some(idx)
158+
})
159+
}
160+
161+
/// Matching of ignore patterns.
162+
impl Search {
163+
/// Match `relative_path` and return the first match if found.
164+
/// `is_dir` is true if `relative_path` is a directory.
165+
/// `case` specifies whether cases should be folded during matching or not.
166+
pub fn pattern_matching_relative_path<'a>(
167+
&self,
168+
relative_path: impl Into<&'a BStr>,
169+
is_dir: Option<bool>,
170+
case: gix_glob::pattern::Case,
171+
) -> Option<Match<'_, ()>> {
172+
let relative_path = relative_path.into();
173+
let basename_pos = relative_path.rfind(b"/").map(|p| p + 1);
174+
self.patterns
175+
.iter()
176+
.rev()
177+
.find_map(|pl| pattern_matching_relative_path(pl, relative_path, basename_pos, is_dir, case))
178+
}
179+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# no attribute for now
2+
*.[oa] c
3+
4+
# comment
5+
"*.html" a b=c
6+
7+
# other comment
8+
\!foo.html x
9+
10+
\#a/path -a
11+
/* !b
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
make_global_and_external_and_dir_ignores.tar.xz

0 commit comments

Comments
 (0)