,
cache: &ProgramCache,
matches: &'m mut [bool],
slots: &'s mut [Slot],
@@ -93,14 +95,14 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
let mut cache = cache.borrow_mut();
let cache = &mut cache.backtrack;
let start = input.at(start);
- let mut b = Bounded {
+ Bounded {
prog: prog,
input: input,
matches: matches,
slots: slots,
m: cache,
- };
- b.exec_(start, end)
+ }
+ .exec_(start, end)
}
/// Clears the cache such that the backtracking engine can be executed
@@ -196,7 +198,6 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
}
fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
- use prog::Inst::*;
loop {
// This loop is an optimization to avoid constantly pushing/popping
// from the stack. Namely, if we're pushing a job only to run it
@@ -205,64 +206,12 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
if self.has_visited(ip, at) {
return false;
}
- match self.prog[ip] {
- Match(slot) => {
- if slot < self.matches.len() {
- self.matches[slot] = true;
- }
- return true;
- }
- Save(ref inst) => {
- if let Some(&old_pos) = self.slots.get(inst.slot) {
- // If this path doesn't work out, then we save the old
- // capture index (if one exists) in an alternate
- // job. If the next path fails, then the alternate
- // job is popped and the old capture index is restored.
- self.m.jobs.push(Job::SaveRestore {
- slot: inst.slot,
- old_pos: old_pos,
- });
- self.slots[inst.slot] = Some(at.pos());
- }
- ip = inst.goto;
- }
- Split(ref inst) => {
- self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
- ip = inst.goto1;
- }
- EmptyLook(ref inst) => {
- if self.input.is_empty_match(at, inst) {
- ip = inst.goto;
- } else {
- return false;
- }
- }
- Char(ref inst) => {
- if inst.c == at.char() {
- ip = inst.goto;
- at = self.input.at(at.next_pos());
- } else {
- return false;
- }
- }
- Ranges(ref inst) => {
- if inst.matches(at.char()) {
- ip = inst.goto;
- at = self.input.at(at.next_pos());
- } else {
- return false;
- }
- }
- Bytes(ref inst) => {
- if let Some(b) = at.byte() {
- if inst.matches(b) {
- ip = inst.goto;
- at = self.input.at(at.next_pos());
- continue;
- }
- }
- return false;
+ match self.prog[ip].step(self, at) {
+ Ok((next_ip, next_at)) => {
+ ip = next_ip;
+ at = next_at;
}
+ Err(res) => return res,
}
}
}
@@ -280,6 +229,125 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
}
}
+pub trait Step: InstTrait + Sized {
+ fn step(
+ &self,
+ bounded: &mut Bounded<'_, '_, '_, '_, I, Self>,
+ at: InputAt,
+ ) -> Result<(InstPtr, InputAt), bool>;
+}
+
+impl Step for UnicodeInst {
+ fn step(
+ &self,
+ bounded: &mut Bounded<'_, '_, '_, '_, I, Self>,
+ mut at: InputAt,
+ ) -> Result<(InstPtr, InputAt), bool> {
+ use prog::UnicodeInst::*;
+ match *self {
+ Match(slot) => {
+ if slot < bounded.matches.len() {
+ bounded.matches[slot] = true;
+ }
+ Err(true)
+ }
+ Save(ref inst) => {
+ if let Some(&old_pos) = bounded.slots.get(inst.slot) {
+ // If this path doesn't work out, then we save the old
+ // capture index (if one exists) in an alternate
+ // job. If the next path fails, then the alternate
+ // job is popped and the old capture index is restored.
+ bounded.m.jobs.push(Job::SaveRestore {
+ slot: inst.slot,
+ old_pos: old_pos,
+ });
+ bounded.slots[inst.slot] = Some(at.pos());
+ }
+ Ok((inst.goto, at))
+ }
+ Split(ref inst) => {
+ bounded.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
+ Ok((inst.goto1, at))
+ }
+ EmptyLook(ref inst) => {
+ if bounded.input.is_empty_match(at, inst) {
+ Ok((inst.goto, at))
+ } else {
+ Err(false)
+ }
+ }
+ Char(ref inst) => {
+ if inst.c == at.char() {
+ at = bounded.input.at(at.next_pos());
+ Ok((inst.goto, at))
+ } else {
+ Err(false)
+ }
+ }
+ Ranges(ref inst) => {
+ if inst.matches(at.char()) {
+ at = bounded.input.at(at.next_pos());
+ Ok((inst.goto, at))
+ } else {
+ Err(false)
+ }
+ }
+ }
+ }
+}
+
+impl Step for BytesInst {
+ fn step(
+ &self,
+ bounded: &mut Bounded<'_, '_, '_, '_, I, Self>,
+ mut at: InputAt,
+ ) -> Result<(InstPtr, InputAt), bool> {
+ use prog::BytesInst::*;
+ match *self {
+ Match(slot) => {
+ if slot < bounded.matches.len() {
+ bounded.matches[slot] = true;
+ }
+ Err(true)
+ }
+ Save(ref inst) => {
+ if let Some(&old_pos) = bounded.slots.get(inst.slot) {
+ // If this path doesn't work out, then we save the old
+ // capture index (if one exists) in an alternate
+ // job. If the next path fails, then the alternate
+ // job is popped and the old capture index is restored.
+ bounded.m.jobs.push(Job::SaveRestore {
+ slot: inst.slot,
+ old_pos: old_pos,
+ });
+ bounded.slots[inst.slot] = Some(at.pos());
+ }
+ Ok((inst.goto, at))
+ }
+ Split(ref inst) => {
+ bounded.m.jobs.push(Job::Inst { ip: inst.goto2, at: at });
+ Ok((inst.goto1, at))
+ }
+ EmptyLook(ref inst) => {
+ if bounded.input.is_empty_match(at, inst) {
+ Ok((inst.goto, at))
+ } else {
+ Err(false)
+ }
+ }
+ Bytes(ref inst) => {
+ if let Some(b) = at.byte() {
+ if inst.matches(b) {
+ at = bounded.input.at(at.next_pos());
+ return Ok((inst.goto, at));
+ }
+ }
+ Err(false)
+ }
+ }
+ }
+}
+
fn usize_to_u32(n: usize) -> u32 {
if (n as u64) > (::std::u32::MAX as u64) {
panic!("BUG: {} is too big to fit into u32", n)
diff --git a/src/compile.rs b/src/compile.rs
index 9ffd34704..08d9b6849 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::fmt;
use std::iter;
+use std::mem;
use std::result;
use std::sync::Arc;
@@ -9,8 +10,8 @@ use syntax::is_word_byte;
use syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
use prog::{
- EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
- InstSave, InstSplit, Program,
+ BytesInst, EmptyLook, InstBytes, InstChar, InstEmptyLook, InstPtr,
+ InstRanges, InstSave, InstSplit, InstTrait, Program, UnicodeInst,
};
use Error;
@@ -29,9 +30,9 @@ struct Patch {
// `Compiler` is only public via the `internal` module, so avoid deriving
// `Debug`.
#[allow(missing_debug_implementations)]
-pub struct Compiler {
- insts: Vec,
- compiled: Program,
+pub struct Compiler {
+ insts: Vec>,
+ compiled: Program,
capture_name_idx: HashMap,
num_exprs: usize,
size_limit: usize,
@@ -40,7 +41,7 @@ pub struct Compiler {
byte_classes: ByteClassSet,
}
-impl Compiler {
+impl Compiler {
/// Create a new regular expression compiler.
///
/// Various options can be set before calling `compile` on an expression.
@@ -65,22 +66,6 @@ impl Compiler {
self
}
- /// If bytes is true, then the program is compiled as a byte based
- /// automaton, which incorporates UTF-8 decoding into the machine. If it's
- /// false, then the automaton is Unicode scalar value based, e.g., an
- /// engine utilizing such an automaton is responsible for UTF-8 decoding.
- ///
- /// The specific invariant is that when returning a byte based machine,
- /// the neither the `Char` nor `Ranges` instructions are produced.
- /// Conversely, when producing a Unicode scalar value machine, the `Bytes`
- /// instruction is never produced.
- ///
- /// Note that `dfa(true)` implies `bytes(true)`.
- pub fn bytes(mut self, yes: bool) -> Self {
- self.compiled.is_bytes = yes;
- self
- }
-
/// When disabled, the program compiled may match arbitrary bytes.
///
/// When enabled (the default), all compiled programs exclusively match
@@ -108,13 +93,18 @@ impl Compiler {
self.compiled.is_reverse = yes;
self
}
+}
+impl> Compiler {
/// Compile a regular expression given its AST.
///
/// The compiler is guaranteed to succeed unless the program exceeds the
/// specified size limit. If the size limit is exceeded, then compilation
/// stops and returns an error.
- pub fn compile(mut self, exprs: &[Hir]) -> result::Result {
+ pub fn compile(
+ mut self,
+ exprs: &[Hir],
+ ) -> result::Result, Error> {
debug_assert!(!exprs.is_empty());
self.num_exprs = exprs.len();
if exprs.len() == 1 {
@@ -124,7 +114,7 @@ impl Compiler {
}
}
- fn compile_one(mut self, expr: &Hir) -> result::Result {
+ fn compile_one(mut self, expr: &Hir) -> result::Result, Error> {
// If we're compiling a forward DFA and we aren't anchored, then
// add a `.*?` before the first capture group.
// Other matching engines handle this by baking the logic into the
@@ -145,14 +135,14 @@ impl Compiler {
}
self.fill_to_next(patch.hole);
self.compiled.matches = vec![self.insts.len()];
- self.push_compiled(Inst::Match(0));
+ self.push_compiled(I::new_match(0));
self.compile_finish()
}
fn compile_many(
mut self,
exprs: &[Hir],
- ) -> result::Result {
+ ) -> result::Result, Error> {
debug_assert!(exprs.len() > 1);
self.compiled.is_anchored_start =
@@ -176,7 +166,7 @@ impl Compiler {
self.c_capture(0, expr)?.unwrap_or(self.next_inst());
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
- self.push_compiled(Inst::Match(i));
+ self.push_compiled(I::new_match(i));
prev_hole = self.fill_split(split, Some(entry), None);
}
let i = exprs.len() - 1;
@@ -185,11 +175,11 @@ impl Compiler {
self.fill(prev_hole, entry);
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
- self.push_compiled(Inst::Match(i));
+ self.push_compiled(I::new_match(i));
self.compile_finish()
}
- fn compile_finish(mut self) -> result::Result {
+ fn compile_finish(mut self) -> result::Result, Error> {
self.compiled.insts =
self.insts.into_iter().map(|inst| inst.unwrap()).collect();
self.compiled.byte_classes = self.byte_classes.byte_classes();
@@ -474,9 +464,9 @@ impl Compiler {
Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 }))
}
- fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
+ fn c_concat<'a, E>(&mut self, exprs: E) -> ResultOrEmpty
where
- I: IntoIterator,
+ E: IntoIterator,
{
let mut exprs = exprs.into_iter();
let Patch { mut hole, entry } = loop {
@@ -771,7 +761,7 @@ impl Compiler {
}
}
- fn push_compiled(&mut self, inst: Inst) {
+ fn push_compiled(&mut self, inst: I) {
self.insts.push(MaybeInst::Compiled(inst));
}
@@ -795,7 +785,7 @@ impl Compiler {
fn check_size(&self) -> result::Result<(), Error> {
use std::mem::size_of;
- if self.insts.len() * size_of::() > self.size_limit {
+ if self.insts.len() * size_of::() > self.size_limit {
Err(Error::CompiledTooBig(self.size_limit))
} else {
Ok(())
@@ -822,29 +812,31 @@ impl Hole {
}
#[derive(Clone, Debug)]
-enum MaybeInst {
- Compiled(Inst),
+enum MaybeInst {
+ Compiled(I),
Uncompiled(InstHole),
Split,
Split1(InstPtr),
Split2(InstPtr),
}
-impl MaybeInst {
+impl> MaybeInst {
fn fill(&mut self, goto: InstPtr) {
let maybeinst = match *self {
MaybeInst::Split => MaybeInst::Split1(goto),
- MaybeInst::Uncompiled(ref inst) => {
- MaybeInst::Compiled(inst.fill(goto))
+ MaybeInst::Uncompiled(ref mut inst) => {
+ // Replace by dummy `InstHole`
+ let inst = mem::replace(inst, InstHole::Save { slot: 0 });
+ MaybeInst::Compiled((inst, goto).into())
}
MaybeInst::Split1(goto1) => {
- MaybeInst::Compiled(Inst::Split(InstSplit {
+ MaybeInst::Compiled(I::new_split(InstSplit {
goto1: goto1,
goto2: goto,
}))
}
MaybeInst::Split2(goto2) => {
- MaybeInst::Compiled(Inst::Split(InstSplit {
+ MaybeInst::Compiled(I::new_split(InstSplit {
goto1: goto,
goto2: goto2,
}))
@@ -861,7 +853,7 @@ impl MaybeInst {
fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) {
let filled = match *self {
MaybeInst::Split => {
- Inst::Split(InstSplit { goto1: goto1, goto2: goto2 })
+ I::new_split(InstSplit { goto1: goto1, goto2: goto2 })
}
_ => unreachable!(
"must be called on Split instruction, \
@@ -896,7 +888,7 @@ impl MaybeInst {
*self = MaybeInst::Split2(half_filled);
}
- fn unwrap(self) -> Inst {
+ fn unwrap(self) -> I {
match self {
MaybeInst::Compiled(inst) => inst,
_ => unreachable!(
@@ -908,8 +900,10 @@ impl MaybeInst {
}
}
+// TODO: Specialize `compile` into `compile_bytes` and `compile_unicode`
+// to avoid making `InstHole` public?
#[derive(Clone, Debug)]
-enum InstHole {
+pub enum InstHole {
Save { slot: usize },
EmptyLook { look: EmptyLook },
Char { c: char },
@@ -917,32 +911,60 @@ enum InstHole {
Bytes { start: u8, end: u8 },
}
-impl InstHole {
- fn fill(&self, goto: InstPtr) -> Inst {
- match *self {
+impl From<(InstHole, InstPtr)> for UnicodeInst {
+ fn from(val: (InstHole, InstPtr)) -> UnicodeInst {
+ let (hole, goto) = val;
+ match hole {
InstHole::Save { slot } => {
- Inst::Save(InstSave { goto: goto, slot: slot })
+ UnicodeInst::Save(InstSave { goto: goto, slot: slot })
}
InstHole::EmptyLook { look } => {
- Inst::EmptyLook(InstEmptyLook { goto: goto, look: look })
+ UnicodeInst::EmptyLook(InstEmptyLook {
+ goto: goto,
+ look: look,
+ })
+ }
+ InstHole::Char { c } => {
+ UnicodeInst::Char(InstChar { goto: goto, c: c })
}
- InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }),
InstHole::Ranges { ref ranges } => {
- Inst::Ranges(InstRanges { goto: goto, ranges: ranges.clone() })
+ UnicodeInst::Ranges(InstRanges {
+ goto: goto,
+ ranges: ranges.clone(),
+ })
}
- InstHole::Bytes { start, end } => {
- Inst::Bytes(InstBytes { goto: goto, start: start, end: end })
+ InstHole::Bytes { .. } => unreachable!(),
+ }
+ }
+}
+
+impl From<(InstHole, InstPtr)> for BytesInst {
+ fn from(val: (InstHole, InstPtr)) -> BytesInst {
+ let (hole, goto) = val;
+ match hole {
+ InstHole::Save { slot } => {
+ BytesInst::Save(InstSave { goto: goto, slot: slot })
}
+ InstHole::EmptyLook { look } => {
+ BytesInst::EmptyLook(InstEmptyLook { goto: goto, look: look })
+ }
+ InstHole::Char { .. } => unreachable!(),
+ InstHole::Ranges { .. } => unreachable!(),
+ InstHole::Bytes { start, end } => BytesInst::Bytes(InstBytes {
+ goto: goto,
+ start: start,
+ end: end,
+ }),
}
}
}
-struct CompileClass<'a, 'b> {
- c: &'a mut Compiler,
+struct CompileClass<'a, 'b, I: InstTrait> {
+ c: &'a mut Compiler,
ranges: &'b [hir::ClassUnicodeRange],
}
-impl<'a, 'b> CompileClass<'a, 'b> {
+impl<'a, 'b, I: InstTrait + From<(InstHole, usize)>> CompileClass<'a, 'b, I> {
fn compile(mut self) -> Result {
let mut holes = vec![];
let mut initial_entry = None;
@@ -992,9 +1014,9 @@ impl<'a, 'b> CompileClass<'a, 'b> {
}
}
- fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result
+ fn c_utf8_seq_<'r, S>(&mut self, seq: S) -> Result
where
- I: IntoIterator,
+ S: IntoIterator,
{
// The initial instruction for each UTF-8 sequence should be the same.
let mut from_inst = ::std::usize::MAX;
@@ -1013,17 +1035,14 @@ impl<'a, 'b> CompileClass<'a, 'b> {
}
}
self.c.byte_classes.set_range(byte_range.start, byte_range.end);
+ let inst_hole = InstHole::Bytes {
+ start: byte_range.start,
+ end: byte_range.end,
+ };
if from_inst == ::std::usize::MAX {
- last_hole = self.c.push_hole(InstHole::Bytes {
- start: byte_range.start,
- end: byte_range.end,
- });
+ last_hole = self.c.push_hole(inst_hole);
} else {
- self.c.push_compiled(Inst::Bytes(InstBytes {
- goto: from_inst,
- start: byte_range.start,
- end: byte_range.end,
- }));
+ self.c.push_compiled((inst_hole, from_inst).into());
}
from_inst = self.c.insts.len().checked_sub(1).unwrap();
debug_assert!(from_inst < ::std::usize::MAX);
diff --git a/src/dfa.rs b/src/dfa.rs
index 9ac0c2c39..523c2f615 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -43,7 +43,7 @@ use std::mem;
use std::sync::Arc;
use exec::ProgramCache;
-use prog::{Inst, Program};
+use prog::{BytesInst, InstTrait, Program};
use sparse::SparseSet;
/// Return true if and only if the given program can be executed by a DFA.
@@ -54,8 +54,7 @@ use sparse::SparseSet;
///
/// This function will also return false if the given program has any Unicode
/// instructions (Char or Ranges) since the DFA operates on bytes only.
-pub fn can_exec(insts: &Program) -> bool {
- use prog::Inst::*;
+pub fn can_exec(insts: &Program) -> bool {
// If for some reason we manage to allocate a regex program with more
// than i32::MAX instructions, then we can't execute the DFA because we
// use 32 bit instruction pointer deltas for memory savings.
@@ -65,12 +64,6 @@ pub fn can_exec(insts: &Program) -> bool {
if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize {
return false;
}
- for inst in insts {
- match *inst {
- Char(_) | Ranges(_) => return false,
- EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {}
- }
- }
true
}
@@ -172,7 +165,7 @@ pub struct Fsm<'a> {
/// the `dfa` instructions or the `dfa_reverse` instructions from
/// `exec::ExecReadOnly`. (It never uses `ExecReadOnly.nfa`, which may have
/// Unicode opcodes that cannot be executed by the DFA.)
- prog: &'a Program,
+ prog: &'a Program,
/// The start state. We record it here because the pointer may change
/// when the cache is wiped.
start: StatePtr,
@@ -411,7 +404,7 @@ struct StateFlags(u8);
impl Cache {
/// Create new empty cache for the DFA engine.
- pub fn new(prog: &Program) -> Self {
+ pub fn new(prog: &Program) -> Self {
// We add 1 to account for the special EOF byte.
let num_byte_classes = (prog.byte_classes[255] as usize + 1) + 1;
let starts = vec![STATE_UNKNOWN; 256];
@@ -445,7 +438,7 @@ impl CacheInner {
impl<'a> Fsm<'a> {
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn forward(
- prog: &'a Program,
+ prog: &'a Program,
cache: &ProgramCache,
quit_after_match: bool,
text: &[u8],
@@ -475,7 +468,7 @@ impl<'a> Fsm<'a> {
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn reverse(
- prog: &'a Program,
+ prog: &'a Program,
cache: &ProgramCache,
quit_after_match: bool,
text: &[u8],
@@ -505,7 +498,7 @@ impl<'a> Fsm<'a> {
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn forward_many(
- prog: &'a Program,
+ prog: &'a Program,
cache: &ProgramCache,
matches: &mut [bool],
text: &[u8],
@@ -539,7 +532,7 @@ impl<'a> Fsm<'a> {
debug_assert!(dfa.last_match_si != STATE_UNKNOWN);
debug_assert!(dfa.last_match_si != STATE_DEAD);
for ip in dfa.state(dfa.last_match_si).inst_ptrs() {
- if let Inst::Match(slot) = dfa.prog[ip] {
+ if let BytesInst::Match(slot) = dfa.prog[ip] {
matches[slot] = true;
}
}
@@ -894,7 +887,7 @@ impl<'a> Fsm<'a> {
mut si: StatePtr,
b: Byte,
) -> Option {
- use prog::Inst::*;
+ use prog::BytesInst::*;
// Initialize a queue with the current DFA state's NFA states.
qcur.clear();
@@ -957,8 +950,6 @@ impl<'a> Fsm<'a> {
qnext.clear();
for &ip in &*qcur {
match self.prog[ip as usize] {
- // These states never happen in a byte-based program.
- Char(_) | Ranges(_) => unreachable!(),
// These states are handled when following epsilon transitions.
Save(_) | Split(_) | EmptyLook(_) => {}
Match(_) => {
@@ -1056,8 +1047,8 @@ impl<'a> Fsm<'a> {
q: &mut SparseSet,
flags: EmptyFlags,
) {
+ use prog::BytesInst::*;
use prog::EmptyLook::*;
- use prog::Inst::*;
// We need to traverse the NFA to follow epsilon transitions, so avoid
// recursion with an explicit stack.
@@ -1072,7 +1063,6 @@ impl<'a> Fsm<'a> {
}
q.insert(ip as usize);
match self.prog[ip as usize] {
- Char(_) | Ranges(_) => unreachable!(),
Match(_) | Bytes(_) => {
break;
}
@@ -1190,7 +1180,7 @@ impl<'a> Fsm<'a> {
q: &SparseSet,
state_flags: &mut StateFlags,
) -> Option {
- use prog::Inst::*;
+ use prog::BytesInst::*;
// We need to build up enough information to recognize pre-built states
// in the DFA. Generally speaking, this includes every instruction
@@ -1211,7 +1201,6 @@ impl<'a> Fsm<'a> {
for &ip in q {
let ip = usize_to_u32(ip);
match self.prog[ip as usize] {
- Char(_) | Ranges(_) => unreachable!(),
Save(_) | Split(_) => {}
Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip),
EmptyLook(_) => {
diff --git a/src/exec.rs b/src/exec.rs
index 3d5a52bea..2348e357f 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -18,7 +18,7 @@ use input::{ByteInput, CharInput};
use literal::LiteralSearcher;
use pikevm;
use pool::{Pool, PoolGuard};
-use prog::Program;
+use prog::{BytesInst, Program, UnicodeInst};
use re_builder::RegexOptions;
use re_bytes;
use re_set;
@@ -61,6 +61,76 @@ pub struct ExecNoSync<'c> {
#[derive(Debug)]
pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
+#[derive(Debug)]
+enum NfaProgram {
+ Bytes(Program),
+ Unicode(Program),
+}
+
+impl NfaProgram {
+ #[inline]
+ fn len(&self) -> usize {
+ match self {
+ NfaProgram::Bytes(program) => program.len(),
+ NfaProgram::Unicode(program) => program.len(),
+ }
+ }
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ match self {
+ NfaProgram::Bytes(program) => program.is_empty(),
+ NfaProgram::Unicode(program) => program.is_empty(),
+ }
+ }
+
+ #[inline]
+ fn is_anchored_start(&self) -> bool {
+ match self {
+ NfaProgram::Bytes(program) => program.is_anchored_start,
+ NfaProgram::Unicode(program) => program.is_anchored_start,
+ }
+ }
+
+ #[inline]
+ fn is_anchored_end(&self) -> bool {
+ match self {
+ NfaProgram::Bytes(program) => program.is_anchored_end,
+ NfaProgram::Unicode(program) => program.is_anchored_end,
+ }
+ }
+
+ #[inline]
+ fn only_utf8(&self) -> bool {
+ match self {
+ NfaProgram::Bytes(program) => program.only_utf8,
+ NfaProgram::Unicode(program) => program.only_utf8,
+ }
+ }
+
+ #[inline]
+ fn prefixes(&self) -> &LiteralSearcher {
+ match self {
+ NfaProgram::Bytes(program) => &program.prefixes,
+ NfaProgram::Unicode(program) => &program.prefixes,
+ }
+ }
+
+ pub fn capture_name_idx(&self) -> &Arc> {
+ match self {
+ NfaProgram::Bytes(program) => &program.capture_name_idx,
+ NfaProgram::Unicode(program) => &program.capture_name_idx,
+ }
+ }
+
+ pub fn captures(&self) -> &[Option] {
+ match self {
+ NfaProgram::Bytes(program) => &program.captures,
+ NfaProgram::Unicode(program) => &program.captures,
+ }
+ }
+}
+
/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
/// state is determined at compile time and never changes during search.
#[derive(Debug)]
@@ -72,17 +142,17 @@ struct ExecReadOnly {
///
/// N.B. It is not possibly to make this byte-based from the public API.
/// It is only used for testing byte based programs in the NFA simulations.
- nfa: Program,
+ nfa: NfaProgram,
/// A compiled byte based program for DFA execution. This is only used
/// if a DFA can be executed. (Currently, only word boundary assertions are
/// not supported.) Note that this program contains an embedded `.*?`
/// preceding the first capture group, unless the regex is anchored at the
/// beginning.
- dfa: Program,
+ dfa: Program,
/// The same as above, except the program is reversed (and there is no
/// preceding `.*?`). This is used by the DFA to find the starting location
/// of matches.
- dfa_reverse: Program,
+ dfa_reverse: Program,
/// A set of suffix literals extracted from the regex.
///
/// Prefix literals are stored on the `Program`, since they are used inside
@@ -302,7 +372,7 @@ impl ExecBuilder {
if self.options.pats.is_empty() {
let ro = Arc::new(ExecReadOnly {
res: vec![],
- nfa: Program::new(),
+ nfa: NfaProgram::Unicode(Program::new()),
dfa: Program::new(),
dfa_reverse: Program::new(),
suffixes: LiteralSearcher::empty(),
@@ -314,11 +384,6 @@ impl ExecBuilder {
return Ok(Exec { ro: ro, pool });
}
let parsed = self.parse()?;
- let mut nfa = Compiler::new()
- .size_limit(self.options.size_limit)
- .bytes(self.bytes || parsed.bytes)
- .only_utf8(self.only_utf8)
- .compile(&parsed.exprs)?;
let mut dfa = Compiler::new()
.size_limit(self.options.size_limit)
.dfa(true)
@@ -333,8 +398,25 @@ impl ExecBuilder {
#[cfg(feature = "perf-literal")]
let ac = self.build_aho_corasick(&parsed);
- nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
- dfa.prefixes = nfa.prefixes.clone();
+
+ let nfa = if self.bytes || parsed.bytes {
+ let mut program = Compiler::new()
+ .size_limit(self.options.size_limit)
+ .only_utf8(self.only_utf8)
+ .compile(&parsed.exprs)?;
+ program.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
+ dfa.prefixes = program.prefixes.clone();
+ NfaProgram::Bytes(program)
+ } else {
+ let mut program = Compiler::new()
+ .size_limit(self.options.size_limit)
+ .only_utf8(self.only_utf8)
+ .compile(&parsed.exprs)?;
+ program.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
+ dfa.prefixes = program.prefixes.clone();
+ NfaProgram::Unicode(program)
+ };
+
dfa.dfa_size_limit = self.options.dfa_size_limit;
dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
@@ -428,7 +510,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
/// are two slots for every capture group, corresponding to possibly empty
/// start and end locations of the capture.)
fn slots_len(&self) -> usize {
- self.ro.nfa.captures.len() * 2
+ self.ro.nfa.captures().len() * 2
}
fn next_after_empty(&self, _text: &[u8], i: usize) -> usize {
@@ -630,7 +712,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
}
#[cfg(feature = "perf-dfa")]
MatchType::Dfa => {
- if self.ro.nfa.is_anchored_start {
+ if self.ro.nfa.is_anchored_start() {
self.captures_nfa(slots, text, start)
} else {
match self.find_dfa_forward(text, start) {
@@ -701,12 +783,12 @@ impl<'c> ExecNoSync<'c> {
use self::MatchLiteralType::*;
match ty {
Unanchored => {
- let lits = &self.ro.nfa.prefixes;
+ let lits = self.ro.nfa.prefixes();
lits.find(&text[start..]).map(|(s, e)| (start + s, start + e))
}
AnchoredStart => {
- let lits = &self.ro.nfa.prefixes;
- if start == 0 || !self.ro.nfa.is_anchored_start {
+ let lits = self.ro.nfa.prefixes();
+ if start == 0 || !self.ro.nfa.is_anchored_start() {
lits.find_start(&text[start..])
.map(|(s, e)| (start + s, start + e))
} else {
@@ -1086,20 +1168,19 @@ impl<'c> ExecNoSync<'c> {
start: usize,
end: usize,
) -> bool {
- if self.ro.nfa.uses_bytes() {
- pikevm::Fsm::exec(
- &self.ro.nfa,
+ match self.ro.nfa {
+ NfaProgram::Bytes(ref program) => pikevm::Fsm::exec(
+ program,
self.cache.value(),
matches,
slots,
quit_after_match,
- ByteInput::new(text, self.ro.nfa.only_utf8),
+ ByteInput::new(text, self.ro.nfa.only_utf8()),
start,
end,
- )
- } else {
- pikevm::Fsm::exec(
- &self.ro.nfa,
+ ),
+ NfaProgram::Unicode(ref program) => pikevm::Fsm::exec(
+ program,
self.cache.value(),
matches,
slots,
@@ -1107,7 +1188,7 @@ impl<'c> ExecNoSync<'c> {
CharInput::new(text),
start,
end,
- )
+ ),
}
}
@@ -1120,26 +1201,25 @@ impl<'c> ExecNoSync<'c> {
start: usize,
end: usize,
) -> bool {
- if self.ro.nfa.uses_bytes() {
- backtrack::Bounded::exec(
- &self.ro.nfa,
+ match self.ro.nfa {
+ NfaProgram::Bytes(ref program) => backtrack::Bounded::exec(
+ program,
self.cache.value(),
matches,
slots,
- ByteInput::new(text, self.ro.nfa.only_utf8),
+ ByteInput::new(text, self.ro.nfa.only_utf8()),
start,
end,
- )
- } else {
- backtrack::Bounded::exec(
- &self.ro.nfa,
+ ),
+ NfaProgram::Unicode(ref program) => backtrack::Bounded::exec(
+ program,
self.cache.value(),
matches,
slots,
CharInput::new(text),
start,
end,
- )
+ ),
}
}
@@ -1237,7 +1317,7 @@ impl<'c> ExecNoSync<'c> {
#[cfg(feature = "perf-literal")]
fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool {
// Only do this check if the haystack is big (>1MB).
- if text.len() > (1 << 20) && ro.nfa.is_anchored_end {
+ if text.len() > (1 << 20) && ro.nfa.is_anchored_end() {
let lcs = ro.suffixes.lcs();
if lcs.len() >= 1 && !lcs.is_suffix(text) {
return false;
@@ -1250,7 +1330,7 @@ impl<'c> ExecNoSync<'c> {
}
pub fn capture_name_idx(&self) -> &Arc> {
- &self.ro.nfa.capture_name_idx
+ &self.ro.nfa.capture_name_idx()
}
}
@@ -1306,13 +1386,13 @@ impl Exec {
///
/// Any capture that isn't named is None.
pub fn capture_names(&self) -> &[Option] {
- &self.ro.nfa.captures
+ &self.ro.nfa.captures()
}
/// Return a reference to named groups mapping (from group name to
/// group position).
pub fn capture_name_idx(&self) -> &Arc> {
- &self.ro.nfa.capture_name_idx
+ &self.ro.nfa.capture_name_idx()
}
}
@@ -1329,7 +1409,7 @@ impl ExecReadOnly {
return hint.unwrap();
}
// If the NFA is empty, then we'll never match anything.
- if self.nfa.insts.is_empty() {
+ if self.nfa.is_empty() {
return MatchType::Nothing;
}
if let Some(literalty) = self.choose_literal_match_type() {
@@ -1371,15 +1451,15 @@ impl ExecReadOnly {
MatchLiteralType::AhoCorasick,
));
}
- if ro.nfa.prefixes.complete() {
- return if ro.nfa.is_anchored_start {
+ if ro.nfa.prefixes().complete() {
+ return if ro.nfa.is_anchored_start() {
Some(MatchType::Literal(MatchLiteralType::AnchoredStart))
} else {
Some(MatchType::Literal(MatchLiteralType::Unanchored))
};
}
if ro.suffixes.complete() {
- return if ro.nfa.is_anchored_end {
+ return if ro.nfa.is_anchored_end() {
Some(MatchType::Literal(MatchLiteralType::AnchoredEnd))
} else {
// This case shouldn't happen. When the regex isn't
@@ -1412,7 +1492,7 @@ impl ExecReadOnly {
}
// If the regex is anchored at the end but not the start, then
// just match in reverse from the end of the haystack.
- if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end {
+ if !ro.nfa.is_anchored_start() && ro.nfa.is_anchored_end() {
return Some(MatchType::DfaAnchoredReverse);
}
#[cfg(feature = "perf-literal")]
@@ -1536,8 +1616,20 @@ pub struct ProgramCacheInner {
impl ProgramCacheInner {
fn new(ro: &ExecReadOnly) -> Self {
ProgramCacheInner {
- pikevm: pikevm::Cache::new(&ro.nfa),
- backtrack: backtrack::Cache::new(&ro.nfa),
+ pikevm: match ro.nfa {
+ NfaProgram::Bytes(ref program) => pikevm::Cache::new(program),
+ NfaProgram::Unicode(ref program) => {
+ pikevm::Cache::new(program)
+ }
+ },
+ backtrack: match ro.nfa {
+ NfaProgram::Bytes(ref program) => {
+ backtrack::Cache::new(program)
+ }
+ NfaProgram::Unicode(ref program) => {
+ backtrack::Cache::new(program)
+ }
+ },
#[cfg(feature = "perf-dfa")]
dfa: dfa::Cache::new(&ro.dfa),
#[cfg(feature = "perf-dfa")]
diff --git a/src/lib.rs b/src/lib.rs
index 357ac0dd0..15a63f204 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -781,5 +781,5 @@ pub mod internal {
pub use exec::{Exec, ExecBuilder};
pub use input::{Char, CharInput, Input, InputAt};
pub use literal::LiteralSearcher;
- pub use prog::{EmptyLook, Inst, InstRanges, Program};
+ pub use prog::{BytesInst, EmptyLook, InstRanges, Program, UnicodeInst};
}
diff --git a/src/pikevm.rs b/src/pikevm.rs
index 299087da8..f6cd88be7 100644
--- a/src/pikevm.rs
+++ b/src/pikevm.rs
@@ -19,17 +19,17 @@ use std::mem;
use exec::ProgramCache;
use input::{Input, InputAt};
-use prog::{InstPtr, Program};
+use prog::{BytesInst, InstPtr, InstTrait, Program, UnicodeInst};
use re_trait::Slot;
use sparse::SparseSet;
/// An NFA simulation matching engine.
#[derive(Debug)]
-pub struct Fsm<'r, I> {
+pub struct Fsm<'r, I, P: InstTrait> {
/// The sequence of opcodes (among other things) that is actually executed.
///
/// The program may be byte oriented or Unicode codepoint oriented.
- prog: &'r Program,
+ prog: &'r Program
,
/// An explicit stack used for following epsilon transitions. (This is
/// borrowed from the cache.)
stack: &'r mut Vec,
@@ -49,7 +49,7 @@ pub struct Cache {
/// An ordered set of NFA states and their captures.
#[derive(Clone, Debug)]
-struct Threads {
+pub struct Threads {
/// An ordered set of opcodes (each opcode is an NFA state).
set: SparseSet,
/// Captures for every NFA state.
@@ -75,18 +75,18 @@ enum FollowEpsilon {
impl Cache {
/// Create a new allocation used by the NFA machine to record execution
/// and captures.
- pub fn new(_prog: &Program) -> Self {
+ pub fn new(_prog: &Program) -> Self {
Cache { clist: Threads::new(), nlist: Threads::new(), stack: vec![] }
}
}
-impl<'r, I: Input> Fsm<'r, I> {
+impl<'r, I: Input, P: InstTrait + Step> Fsm<'r, I, P> {
/// Execute the NFA matching engine.
///
/// If there's a match, `exec` returns `true` and populates the given
/// captures accordingly.
pub fn exec(
- prog: &'r Program,
+ prog: &'r Program
,
cache: &ProgramCache,
matches: &mut [bool],
slots: &mut [Slot],
@@ -231,39 +231,15 @@ impl<'r, I: Input> Fsm<'r, I> {
at: InputAt,
at_next: InputAt,
) -> bool {
- use prog::Inst::*;
- match self.prog[ip] {
- Match(match_slot) => {
- if match_slot < matches.len() {
- matches[match_slot] = true;
- }
- for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) {
- *slot = *val;
- }
- true
- }
- Char(ref inst) => {
- if inst.c == at.char() {
- self.add(nlist, thread_caps, inst.goto, at_next);
- }
- false
- }
- Ranges(ref inst) => {
- if inst.matches(at.char()) {
- self.add(nlist, thread_caps, inst.goto, at_next);
- }
- false
- }
- Bytes(ref inst) => {
- if let Some(b) = at.byte() {
- if inst.matches(b) {
- self.add(nlist, thread_caps, inst.goto, at_next);
- }
- }
- false
- }
- EmptyLook(_) | Save(_) | Split(_) => false,
- }
+ self.prog[ip].step(
+ self,
+ nlist,
+ matches,
+ slots,
+ thread_caps,
+ at,
+ at_next,
+ )
}
/// Follows epsilon transitions and adds them for processing to nlist,
@@ -300,40 +276,196 @@ impl<'r, I: Input> Fsm<'r, I> {
// traverse the set of states. We only push to the stack when we
// absolutely need recursion (restoring captures or following a
// branch).
- use prog::Inst::*;
loop {
// Don't visit states we've already added.
if nlist.set.contains(ip) {
return;
}
nlist.set.insert(ip);
- match self.prog[ip] {
- EmptyLook(ref inst) => {
- if self.input.is_empty_match(at, inst) {
- ip = inst.goto;
- }
+ if let Some(next_ip) =
+ self.prog[ip].add_step(self, nlist, thread_caps, ip, at)
+ {
+ ip = next_ip;
+ } else {
+ return;
+ }
+ }
+ }
+}
+
+pub trait Step: InstTrait + Sized {
+ fn step<'r, I: Input>(
+ &self,
+ fsm: &mut Fsm<'r, I, Self>,
+ nlist: &mut Threads,
+ matches: &mut [bool],
+ slots: &mut [Slot],
+ thread_caps: &mut [Option],
+ at: InputAt,
+ at_next: InputAt,
+ ) -> bool;
+
+ fn add_step<'r, I: Input>(
+ &self,
+ fsm: &mut Fsm<'r, I, Self>,
+ nlist: &mut Threads,
+ thread_caps: &mut [Option],
+ ip: usize,
+ at: InputAt,
+ ) -> Option;
+}
+
+impl Step for UnicodeInst {
+ fn step<'r, I: Input>(
+ &self,
+ fsm: &mut Fsm<'r, I, UnicodeInst>,
+ nlist: &mut Threads,
+ matches: &mut [bool],
+ slots: &mut [Slot],
+ thread_caps: &mut [Option],
+ at: InputAt,
+ at_next: InputAt,
+ ) -> bool {
+ use prog::UnicodeInst::*;
+ match *self {
+ Match(match_slot) => {
+ if match_slot < matches.len() {
+ matches[match_slot] = true;
}
- Save(ref inst) => {
- if inst.slot < thread_caps.len() {
- self.stack.push(FollowEpsilon::Capture {
- slot: inst.slot,
- pos: thread_caps[inst.slot],
- });
- thread_caps[inst.slot] = Some(at.pos());
- }
- ip = inst.goto;
+ for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) {
+ *slot = *val;
}
- Split(ref inst) => {
- self.stack.push(FollowEpsilon::IP(inst.goto2));
- ip = inst.goto1;
+ true
+ }
+ Char(ref inst) => {
+ if inst.c == at.char() {
+ fsm.add(nlist, thread_caps, inst.goto, at_next);
+ }
+ false
+ }
+ Ranges(ref inst) => {
+ if inst.matches(at.char()) {
+ fsm.add(nlist, thread_caps, inst.goto, at_next);
+ }
+ false
+ }
+ EmptyLook(_) | Save(_) | Split(_) => false,
+ }
+ }
+
+ fn add_step<'r, I: Input>(
+ &self,
+ fsm: &mut Fsm<'r, I, UnicodeInst>,
+ nlist: &mut Threads,
+ thread_caps: &mut [Option],
+ ip: usize,
+ at: InputAt,
+ ) -> Option {
+ use prog::UnicodeInst::*;
+ match *self {
+ EmptyLook(ref inst) => {
+ if fsm.input.is_empty_match(at, inst) {
+ Some(inst.goto)
+ } else {
+ Some(ip)
+ }
+ }
+ Save(ref inst) => {
+ if inst.slot < thread_caps.len() {
+ fsm.stack.push(FollowEpsilon::Capture {
+ slot: inst.slot,
+ pos: thread_caps[inst.slot],
+ });
+ thread_caps[inst.slot] = Some(at.pos());
+ }
+ Some(inst.goto)
+ }
+ Split(ref inst) => {
+ fsm.stack.push(FollowEpsilon::IP(inst.goto2));
+ Some(inst.goto1)
+ }
+ Match(_) | Char(_) | Ranges(_) => {
+ let t = &mut nlist.caps(ip);
+ for (slot, val) in t.iter_mut().zip(thread_caps.iter()) {
+ *slot = *val;
+ }
+ None
+ }
+ }
+ }
+}
+
+impl Step for BytesInst {
+ fn step<'r, I: Input>(
+ &self,
+ fsm: &mut Fsm<'r, I, BytesInst>,
+ nlist: &mut Threads,
+ matches: &mut [bool],
+ slots: &mut [Slot],
+ thread_caps: &mut [Option],
+ at: InputAt,
+ at_next: InputAt,
+ ) -> bool {
+ use prog::BytesInst::*;
+ match *self {
+ Match(match_slot) => {
+ if match_slot < matches.len() {
+ matches[match_slot] = true;
}
- Match(_) | Char(_) | Ranges(_) | Bytes(_) => {
- let t = &mut nlist.caps(ip);
- for (slot, val) in t.iter_mut().zip(thread_caps.iter()) {
- *slot = *val;
+ for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) {
+ *slot = *val;
+ }
+ true
+ }
+ Bytes(ref inst) => {
+ if let Some(b) = at.byte() {
+ if inst.matches(b) {
+ fsm.add(nlist, thread_caps, inst.goto, at_next);
}
- return;
}
+ false
+ }
+ EmptyLook(_) | Save(_) | Split(_) => false,
+ }
+ }
+
+ fn add_step<'r, I: Input>(
+ &self,
+ fsm: &mut Fsm<'r, I, BytesInst>,
+ nlist: &mut Threads,
+ thread_caps: &mut [Option],
+ ip: usize,
+ at: InputAt,
+ ) -> Option {
+ use prog::BytesInst::*;
+ match *self {
+ EmptyLook(ref inst) => {
+ if fsm.input.is_empty_match(at, inst) {
+ Some(inst.goto)
+ } else {
+ Some(ip)
+ }
+ }
+ Save(ref inst) => {
+ if inst.slot < thread_caps.len() {
+ fsm.stack.push(FollowEpsilon::Capture {
+ slot: inst.slot,
+ pos: thread_caps[inst.slot],
+ });
+ thread_caps[inst.slot] = Some(at.pos());
+ }
+ Some(inst.goto)
+ }
+ Split(ref inst) => {
+ fsm.stack.push(FollowEpsilon::IP(inst.goto2));
+ Some(inst.goto1)
+ }
+ Match(_) | Bytes(_) => {
+ let t = &mut nlist.caps(ip);
+ for (slot, val) in t.iter_mut().zip(thread_caps.iter()) {
+ *slot = *val;
+ }
+ None
}
}
}
diff --git a/src/prog.rs b/src/prog.rs
index 74e5f2f6f..640b71b38 100644
--- a/src/prog.rs
+++ b/src/prog.rs
@@ -15,9 +15,9 @@ pub type InstPtr = usize;
/// Program is a sequence of instructions and various facts about thos
/// instructions.
#[derive(Clone)]
-pub struct Program {
+pub struct Program {
/// A sequence of instructions that represents an NFA.
- pub insts: Vec,
+ pub insts: Vec,
/// Pointers to each Match instruction in the sequence.
///
/// This is always length 1 unless this program represents a regex set.
@@ -38,9 +38,6 @@ pub struct Program {
pub byte_classes: Vec,
/// When true, this program can only match valid UTF-8.
pub only_utf8: bool,
- /// When true, this program uses byte range instructions instead of Unicode
- /// range instructions.
- pub is_bytes: bool,
/// When true, the program is compiled for DFA matching. For example, this
/// implies `is_bytes` and also inserts a preceding `.*?` for unanchored
/// regexes.
@@ -74,7 +71,7 @@ pub struct Program {
pub dfa_size_limit: usize,
}
-impl Program {
+impl Program {
/// Creates an empty instruction sequence. Fields are given default
/// values.
pub fn new() -> Self {
@@ -86,7 +83,6 @@ impl Program {
start: 0,
byte_classes: vec![0; 256],
only_utf8: true,
- is_bytes: false,
is_dfa: false,
is_reverse: false,
is_anchored_start: false,
@@ -101,9 +97,9 @@ impl Program {
/// next pc that is not a no-op instruction.
pub fn skip(&self, mut pc: usize) -> usize {
loop {
- match self[pc] {
- Inst::Save(ref i) => pc = i.goto,
- _ => return pc,
+ match self[pc].save_goto() {
+ Some(goto) => pc = goto,
+ None => return pc,
}
}
}
@@ -117,10 +113,7 @@ impl Program {
// meaningless.
return false;
}
- match self[self.skip(pc)] {
- Inst::Match(_) => true,
- _ => false,
- }
+ self[self.skip(pc)].is_match()
}
/// Returns true if the current configuration demands that an implicit
@@ -132,7 +125,7 @@ impl Program {
/// Returns true if this program uses Byte instructions instead of
/// Char/Range instructions.
pub fn uses_bytes(&self) -> bool {
- self.is_bytes || self.is_dfa
+ I::IS_BYTES || self.is_dfa
}
/// Returns true if this program exclusively matches valid UTF-8 bytes.
@@ -148,7 +141,7 @@ impl Program {
// The only instruction that uses heap space is Ranges (for
// Unicode codepoint programs) to store non-overlapping codepoint
// ranges. To keep this operation constant time, we ignore them.
- (self.len() * mem::size_of::())
+ (self.len() * mem::size_of::())
+ (self.matches.len() * mem::size_of::())
+ (self.captures.len() * mem::size_of::