Skip to content

Commit 3c67d2b

Browse files
committed
Add support for entity-matching in domain= filter option
Related issue: - uBlockOrigin/uBlock-issues#1008 This commit adds support entity-matching in the filter option `domain=`. Example: pattern$domain=google.* The `*` above is meant to match any suffix from the Public Suffix List. The semantic is exactly the same as the already existing entity-matching support in static extended filtering: - https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#entity Additionally, in this commit: Fix cases where "just-origin" filters of the form `|http*://` were erroneously normalized to `|http://`. The proper normalization of `|http*://` is `*`. Add support to store hostname strings into the character buffer of a hntrie container. As of commit time, there are 5,544 instances of FilterOriginHit, and 732 instances of FilterOriginMiss, which filters require storing/matching a single hostname string. Those strings are now stored in the character buffer of the already existing origin-related hntrie container. (The same approach is used for plain patterns which are not part of a bidi-trie.)
1 parent 56a3aff commit 3c67d2b

File tree

3 files changed

+376
-207
lines changed

3 files changed

+376
-207
lines changed

src/js/background.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ const µBlock = (( ) => { // jshint ignore:line
138138

139139
// Read-only
140140
systemSettings: {
141-
compiledMagic: 27, // Increase when compiled format changes
142-
selfieMagic: 26, // Increase when selfie format changes
141+
compiledMagic: 28, // Increase when compiled format changes
142+
selfieMagic: 28, // Increase when selfie format changes
143143
},
144144

145145
// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501

src/js/hntrie.js

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,49 @@ const HNTrieContainer = class {
407407
return true;
408408
}
409409

410+
// The following *Hostname() methods can be used to store hostname strings
411+
// outside the trie. This is useful to store/match hostnames which are
412+
// not part of a collection, and yet still benefit from storing the strings
413+
// into a trie container's character buffer.
414+
// TODO: WASM version of matchesHostname()
415+
416+
storeHostname(hn) {
417+
let n = hn.length;
418+
if ( n > 255 ) {
419+
hn = hn.slice(-255);
420+
n = 255;
421+
}
422+
if ( (this.buf.length - this.buf32[CHAR1_SLOT]) < n ) {
423+
this.growBuf(0, n);
424+
}
425+
const offset = this.buf32[CHAR1_SLOT];
426+
this.buf32[CHAR1_SLOT] = offset + n;
427+
const buf8 = this.buf;
428+
for ( let i = 0; i < n; i++ ) {
429+
buf8[offset+i] = hn.charCodeAt(i);
430+
}
431+
return offset - this.buf32[CHAR0_SLOT];
432+
}
433+
434+
extractHostname(i, n) {
435+
const textDecoder = new TextDecoder();
436+
const offset = this.buf32[CHAR0_SLOT] + i;
437+
return textDecoder.decode(this.buf.subarray(offset, offset + n));
438+
}
439+
440+
matchesHostname(hn, i, n) {
441+
this.setNeedle(hn);
442+
const buf8 = this.buf;
443+
const hr = buf8[255];
444+
if ( n > hr ) { return false; }
445+
const hl = hr - n;
446+
const nl = this.buf32[CHAR0_SLOT] + i;
447+
for ( let j = 0; j < n; j++ ) {
448+
if ( buf8[nl+j] !== buf8[hl+j] ) { return false; }
449+
}
450+
return n === hr || hn.charCodeAt(hl-1) === 0x2E /* '.' */;
451+
}
452+
410453
async enableWASM() {
411454
if ( typeof WebAssembly !== 'object' ) { return false; }
412455
if ( this.wasmMemory instanceof WebAssembly.Memory ) { return true; }

0 commit comments

Comments
 (0)