Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 1 addition & 136 deletions internal/re2go/grab-article.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 0 additions & 22 deletions internal/re2go/grab-article.re
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,3 @@ func MaybeItsACandidate(input string) bool {
*/
}
}

// Commas as used in Latin, Sindhi, Chinese and various other scripts.
// see: https://en.wikipedia.org/wiki/Comma#Comma_variants
// Original pattern: \u002C|\u060C|\uFE50|\uFE10|\uFE11|\u2E41|\u2E34|\u2E32|\uFF0C
func CountCommas(input string) int {
var count int
var cursor, marker int
input += string(rune(0)) // add terminating null
limit := len(input) - 1 // limit points at the terminating null
_ = marker

for { /*!use:re2c:base_template
re2c:case-insensitive = 1;

commas = [\u002C\u060C\uFE50\uFE10\uFE11\u2E41\u2E34\u2E32\uFF0C];

{commas} { count++; continue }
* { continue }
$ { return count }
*/
}
}
5 changes: 0 additions & 5 deletions internal/re2go/re2go_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,6 @@ func Test_MaybeItsACandidate(t *testing.T) {
assert.False(t, MaybeItsACandidate(`<p class="text">Paragraph text</p>`))
}

func Test_CountCommas(t *testing.T) {
assert.Equal(t, 3, CountCommas("my,name,is,john"))
assert.Equal(t, 9, CountCommas("now,its،a mixed﹐commas︐from︑various⹁place⸴and⸲country,"))
}

func Test_NormalizeSpaces(t *testing.T) {
assert.Equal(t, "some sentence", NormalizeSpaces("some sentence"))
assert.Equal(t, "with tabs", NormalizeSpaces("with \t \ttabs"))
Expand Down
Loading