diff --git a/javascript/ql/integration-tests/query-suite/javascript-code-quality.qls.expected b/javascript/ql/integration-tests/query-suite/javascript-code-quality.qls.expected index 876b5f25fa28..451a8b4bf273 100644 --- a/javascript/ql/integration-tests/query-suite/javascript-code-quality.qls.expected +++ b/javascript/ql/integration-tests/query-suite/javascript-code-quality.qls.expected @@ -3,4 +3,5 @@ ql/javascript/ql/src/Expressions/ExprHasNoEffect.ql ql/javascript/ql/src/Expressions/MissingAwait.ql ql/javascript/ql/src/LanguageFeatures/SpuriousArguments.ql ql/javascript/ql/src/Quality/UnhandledErrorInStreamPipeline.ql +ql/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.ql ql/javascript/ql/src/RegExp/RegExpAlwaysMatches.ql diff --git a/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.qhelp b/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.qhelp index 30ef990ec0cd..f94cced3d09a 100644 --- a/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.qhelp +++ b/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.qhelp @@ -5,26 +5,42 @@

-Character classes in regular expressions represent sets of characters, so there is no need to specify -the same character twice in one character class. Duplicate characters in character classes are at best -useless, and may even indicate a latent bug. +Character classes in regular expressions (denoted by square brackets []) represent sets of characters where the pattern matches any single character from that set. Since character classes are sets, specifying the same character multiple times is redundant and often indicates a programming error.

+

+Common mistakes include: +

+ +
-

If the character was accidentally duplicated, remove it. If the character class was meant to be a -group, replace the brackets with parentheses.

+

+Examine each duplicate character to determine the intended behavior: +

+ +

+Note that simply removing | characters from character classes is rarely the correct fix. Instead, analyze the pattern to understand what the author intended to match. +

-In the following example, the character class [password|pwd] contains two instances each -of the characters d, p, s, and w. The programmer -most likely meant to write (password|pwd) (a pattern that matches either the string -"password" or the string "pwd"), and accidentally mistyped the enclosing -brackets. +Example 1: Confusing character classes with groups +

+

+The pattern [password|pwd] does not match "password" or "pwd" as intended. Instead, it matches any single character from the set {p, a, s, w, o, r, d, |}. Note that | has no special meaning inside character classes.

@@ -33,10 +49,23 @@ brackets. To fix this problem, the regular expression should be rewritten to /(password|pwd) =/.

+

+Example 2: CSS unit matching +

+

+The pattern r?e[m|x] appears to be trying to match "rem" or "rex", but actually matches "re" followed by any of the characters {m, |, x}. The correct pattern should be r?e(m|x) or r?e[mx]. +

+ +

+Similarly, v[h|w|min|max] should be v(h|w|min|max) to properly match "vh", "vw", "vmin", or "vmax". +

+
  • Mozilla Developer Network: JavaScript Regular Expressions.
  • +
  • MDN: Character Classes - Details on how character classes work.
  • +
  • MDN: Groups and Ranges - Proper use of grouping with parentheses.
  • diff --git a/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.ql b/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.ql index 06b6d218acab..00366590fcb5 100644 --- a/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.ql +++ b/javascript/ql/src/RegExp/DuplicateCharacterInCharacterClass.ql @@ -5,7 +5,8 @@ * @kind problem * @problem.severity warning * @id js/regex/duplicate-in-character-class - * @tags reliability + * @tags quality + * reliability * correctness * regular-expressions * @precision very-high diff --git a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/DuplicateCharacterInCharacterClass.expected b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/DuplicateCharacterInCharacterClass.expected index 1857b9cc4df4..96f2cf20fd96 100644 --- a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/DuplicateCharacterInCharacterClass.expected +++ b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/DuplicateCharacterInCharacterClass.expected @@ -7,3 +7,39 @@ | tst.js:8:3:8:3 | a | Character 'a' is $@. | tst.js:8:5:8:5 | a | repeated in the same character class | | tst.js:9:3:9:6 | \\x0a | Character '\\x0a' is $@. | tst.js:9:7:9:10 | \\x0a | repeated in the same character class | | tst.js:10:3:10:8 | \\u000a | Character '\\u000a' is $@. | tst.js:10:9:10:10 | \\n | repeated in the same character class | +| tst.js:15:4:15:4 | \| | Character '\|' is $@. | tst.js:15:6:15:6 | \| | repeated in the same character class | +| tst.js:16:3:16:3 | : | Character ':' is $@. | tst.js:16:9:16:9 | : | repeated in the same character class | +| tst.js:17:4:17:4 | ^ | Character '^' is $@. | tst.js:17:11:17:11 | ^ | repeated in the same character class | +| tst.js:17:5:17:5 | s | Character 's' is $@. | tst.js:17:12:17:12 | s | repeated in the same character class | +| tst.js:17:6:17:6 | t | Character 't' is $@. | tst.js:17:13:17:13 | t | repeated in the same character class | +| tst.js:17:6:17:6 | t | Character 't' is $@. | tst.js:17:15:17:15 | t | repeated in the same character class | +| tst.js:17:6:17:6 | t | Character 't' is $@. | tst.js:17:19:17:19 | t | repeated in the same character class | +| tst.js:17:7:17:7 | y | Character 'y' is $@. | tst.js:17:20:17:20 | y | repeated in the same character class | +| tst.js:17:8:17:8 | l | Character 'l' is $@. | tst.js:17:21:17:21 | l | repeated in the same character class | +| tst.js:17:9:17:9 | e | Character 'e' is $@. | tst.js:17:22:17:22 | e | repeated in the same character class | +| tst.js:18:3:18:3 | . | Character '.' is $@. | tst.js:18:5:18:5 | . | repeated in the same character class | +| tst.js:19:6:19:6 | \u0645 | Character '\u0645' is $@. | tst.js:19:8:19:8 | \u0645 | repeated in the same character class | +| tst.js:22:3:22:4 | \\p | Character '\\p' is $@. | tst.js:22:15:22:16 | \\p | repeated in the same character class | +| tst.js:22:5:22:5 | { | Character '{' is $@. | tst.js:22:17:22:17 | { | repeated in the same character class | +| tst.js:22:7:22:7 | e | Character 'e' is $@. | tst.js:22:10:22:10 | e | repeated in the same character class | +| tst.js:22:8:22:8 | t | Character 't' is $@. | tst.js:22:9:22:9 | t | repeated in the same character class | +| tst.js:22:12:22:12 | } | Character '}' is $@. | tst.js:22:23:22:23 | } | repeated in the same character class | +| tst.js:22:13:22:13 | & | Character '&' is $@. | tst.js:22:14:22:14 | & | repeated in the same character class | +| tst.js:22:21:22:21 | I | Character 'I' is $@. | tst.js:22:22:22:22 | I | repeated in the same character class | +| tst.js:26:3:26:4 | \\p | Character '\\p' is $@. | tst.js:26:13:26:14 | \\p | repeated in the same character class | +| tst.js:26:5:26:5 | { | Character '{' is $@. | tst.js:26:15:26:15 | { | repeated in the same character class | +| tst.js:26:7:26:7 | e | Character 'e' is $@. | tst.js:26:10:26:10 | e | repeated in the same character class | +| tst.js:26:7:26:7 | e | Character 'e' is $@. | tst.js:26:17:26:17 | e | repeated in the same character class | +| tst.js:26:7:26:7 | e | Character 'e' is $@. | tst.js:26:28:26:28 | e | repeated in the same character class | +| tst.js:26:8:26:8 | t | Character 't' is $@. | tst.js:26:9:26:9 | t | repeated in the same character class | +| tst.js:26:11:26:11 | r | Character 'r' is $@. | tst.js:26:29:26:29 | r | repeated in the same character class | +| tst.js:26:12:26:12 | } | Character '}' is $@. | tst.js:26:30:26:30 | } | repeated in the same character class | +| tst.js:26:20:26:20 | m | Character 'm' is $@. | tst.js:26:26:26:26 | m | repeated in the same character class | +| tst.js:28:3:28:3 | / | Character '/' is $@. | tst.js:28:5:28:5 | / | repeated in the same character class | +| tst.js:30:4:30:4 | ^ | Character '^' is $@. | tst.js:30:5:30:5 | ^ | repeated in the same character class | +| tst.js:31:4:31:4 | * | Character '*' is $@. | tst.js:31:5:31:5 | * | repeated in the same character class | +| tst.js:33:5:33:5 | \| | Character '\|' is $@. | tst.js:33:6:33:7 | \\\| | repeated in the same character class | +| tst_replace.js:3:26:3:26 | n | Character 'n' is $@. | tst_replace.js:3:28:3:28 | n | repeated in the same character class | +| tst_replace.js:11:18:11:18 | n | Character 'n' is $@. | tst_replace.js:11:20:11:20 | n | repeated in the same character class | +| tst_replace.js:25:18:25:18 | n | Character 'n' is $@. | tst_replace.js:25:20:25:20 | n | repeated in the same character class | +| tst_replace.js:42:18:42:18 | n | Character 'n' is $@. | tst_replace.js:42:20:42:20 | n | repeated in the same character class | diff --git a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js index c87c7140a16c..fe291137c8a6 100644 --- a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js +++ b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst.js @@ -3,12 +3,31 @@ /[\uDC3A\uDC3C]/; /[??]/; // $ Alert /[\u003F\u003f]/; // $ Alert -/[\u003F?]/; // $ Alert +/[\u003F?]/; // $ Alert -- \u003F evaluates to ?, which is the same as ? in the character class /[\x3f\u003f]/; // $ Alert /[aaa]/; // $ Alert /[\x0a\x0a]/; // $ Alert -/[\u000a\n]/; // $ Alert +/[\u000a\n]/; // $ Alert -- \u000a evaluates to \n, which is the same as \n in the character class /[\u{ff}]/; /[\u{12340}-\u{12345}]/u; new RegExp("[\u{12340}-\u{12345}]", "u"); const regex = /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv; +/[a|b|c]/; // $ Alert -- Repeated | character in character class, which has no special meaning in this context +/[:alnum:]/; // $ Alert -- JavaScript does not support POSIX character classes like `[:alnum:]` in regular expressions, thus characters in the class are treated as literals +/[(^style|^staticStyle)]/; // $ Alert +/[.x.]/i; // $ Alert -- Repeated . character in character class +/^[يفمأمسند]/i; // $ Alert -- م duplicate +/[\u{1F600}-\u{1F64F}]/u; +/[\p{Letter}&&\p{ASCII}]/v; // && is an intersection operator while /v flag is present +/[\p{Letter}&&\p{ASCII}]/; // $ Alert -- without /v flag, && is not a valid operator and treated as member of character class thus duplicate +/[\p{Decimal_Number}&&[0-9A-F]]/v; +/[\p{Letter}--[aeiouAEIOU]]/v; +/[\p{Letter}\p{Decimal_Number}]/v; // Union operation between two character classes only with /v flag +/[\p{Letter}\p{Decimal_Number}]/; // $ Alert -- without /v flag, this is not a valid operation and treated as member of character class thus duplicate +/[\[\]]/; +/[/[/]]/; // $ Alert +/[^^abc]/; // First `^` is a negation operator, second treated as literal `^` is a member of character class +/[^^^abc]/; // $ Alert -- Second and third `^` are treated as literals thus duplicates +/[^**]/; // $ Alert +/[-a-z]/; // Matches `-` and range `a-z` no duplicate +/^[:|\|]/ // $ Alert -- `|` is treated as a literal character in the character class, thus duplicate even with escape character diff --git a/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst_replace.js b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst_replace.js new file mode 100644 index 000000000000..afd526007b02 --- /dev/null +++ b/javascript/ql/test/query-tests/RegExp/DuplicateCharacterInCharacterClass/tst_replace.js @@ -0,0 +1,44 @@ +function reg(){ + const nonIdPattern = 'a-z'; + const basePattern = /[]/.source; // $ SPURIOUS:Alert + const finalPattern = basePattern.replace(//g, nonIdPattern); + console.log(finalPattern); + const regex2 = new RegExp(finalPattern); +} + +function reg1(){ + const nonIdPattern = 'a-z'; + const reg = /[]/; // $ SPURIOUS:Alert + const basePattern = reg.source; + const finalPattern = basePattern.replace(//g, nonIdPattern); + console.log(finalPattern); + const regex2 = new RegExp(finalPattern); +} + +function replacer(reg1, reg2){ + const basePattern = reg1.source; + const finalPattern = basePattern.replace(//g, reg2); + return new RegExp(finalPattern); +} +function reg2(){ + const nonIdPattern = 'a-z'; + const reg = /[]/; // $ SPURIOUS:Alert + replacer(reg, nonIdPattern); +} + + +function replacer3(str, reg2){ + const finalPattern = str.replace(//g, reg2); + return new RegExp(finalPattern); +} + +function replacer2(reg1, reg2){ + const basePattern = reg1.source; + return replacer3(basePattern, reg2); +} + +function reg3(){ + const nonIdPattern = 'a-z'; + const reg = /[]/; // $ SPURIOUS:Alert + replacer2(reg, nonIdPattern); +}