From a85393b6552d5bef79cc9b93a3d3db65ca4d904b Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Tue, 6 Aug 2019 01:01:28 -0300 Subject: [PATCH 01/15] Restrict repository indexing by file extension --- .../doc/advanced/config-cheat-sheet.en-us.md | 2 ++ models/repo_indexer.go | 11 ++++++++++ modules/setting/indexer.go | 22 +++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 6f3bc465a659a..affb90eef720a 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -177,6 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. +- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to include/exclude from the index; a \`.' matches files with no extension. An empty list means include all files, disregarding `REPO_EXTENSIONS_LIST_EXCLUDE`. +- `REPO_EXTENSIONS_LIST_EXCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to exclude rather than include in the index. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/models/repo_indexer.go b/models/repo_indexer.go index c991b1aac9706..d54b9e397e058 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -232,6 +232,17 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e } func isIndexable(entry *git.TreeEntry) bool { + if setting.Indexer.FileExtensions != nil { + var ext string + parts := strings.Split(entry.Name(), ".") + cnt := len(parts) + if cnt > 1 { + ext = strings.ToLower(parts[cnt-1]) + } + if setting.Indexer.FileExtensions[ext] == setting.Indexer.ExcludeExtensions { + return false + } + } return entry.IsRegular() || entry.IsExecutable() } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 36fd4a020b2ce..ef3a45eabb01b 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -7,6 +7,7 @@ package setting import ( "path" "path/filepath" + "strings" ) // enumerates all the indexer queue types @@ -29,6 +30,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int + FileExtensions map[string]bool + ExcludeExtensions bool }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -51,6 +54,9 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } + Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) + Indexer.ExcludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_EXCLUDE").MustBool(false) + Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType) @@ -58,3 +64,19 @@ func newIndexerService() { Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, "")) Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } + +func extensionsFromString(from string) map[string]bool { + extmap := make(map[string]bool) + for _, ext := range strings.Split(strings.ToLower(from), ",") { + ext = strings.TrimSpace(ext) + if ext == "." { + extmap[""] = true + } else if ext != "" && !strings.Contains(ext, ".") { + extmap[ext] = true + } + } + if len(extmap) == 0 { + return nil + } + return extmap +} From c48c37ad09fb8ead9573677310868c7f21cdb35d Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Tue, 6 Aug 2019 02:51:42 -0300 Subject: [PATCH 02/15] Use REPO_EXTENSIONS_LIST_INCLUDE instead of REPO_EXTENSIONS_LIST_EXCLUDE and have a more flexible extension pattern --- .../doc/advanced/config-cheat-sheet.en-us.md | 4 ++-- models/repo_indexer.go | 2 +- modules/setting/indexer.go | 17 +++++++++++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index affb90eef720a..c12c05c43e867 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to include/exclude from the index; a \`.' matches files with no extension. An empty list means include all files, disregarding `REPO_EXTENSIONS_LIST_EXCLUDE`. -- `REPO_EXTENSIONS_LIST_EXCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to exclude rather than include in the index. +- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files. +- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/models/repo_indexer.go b/models/repo_indexer.go index d54b9e397e058..dd36cf931246c 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -239,7 +239,7 @@ func isIndexable(entry *git.TreeEntry) bool { if cnt > 1 { ext = strings.ToLower(parts[cnt-1]) } - if setting.Indexer.FileExtensions[ext] == setting.Indexer.ExcludeExtensions { + if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions { return false } } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index ef3a45eabb01b..304eada60770f 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -31,7 +31,7 @@ var ( IssueQueueConnStr string IssueQueueBatchNumber int FileExtensions map[string]bool - ExcludeExtensions bool + IncludeExtensions bool }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -55,7 +55,7 @@ func newIndexerService() { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) - Indexer.ExcludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_EXCLUDE").MustBool(false) + Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) @@ -69,10 +69,19 @@ func extensionsFromString(from string) map[string]bool { extmap := make(map[string]bool) for _, ext := range strings.Split(strings.ToLower(from), ",") { ext = strings.TrimSpace(ext) + // Accept *.txt, .txt and txt. Also use . to mean no ext + if strings.HasPrefix(ext, "*.") { + ext = ext[1:] + } if ext == "." { extmap[""] = true - } else if ext != "" && !strings.Contains(ext, ".") { - extmap[ext] = true + } else { + if strings.HasPrefix(ext, ".") { + ext = ext[1:] + } + if ext != "" { + extmap[ext] = true + } } } if len(extmap) == 0 { From c6d5a7984d471b2cf628e37b2fd61cb11f46cd9c Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Tue, 6 Aug 2019 02:56:21 -0300 Subject: [PATCH 03/15] Corrected to pass lint gosimple --- modules/setting/indexer.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 304eada60770f..51b822194a1d4 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -76,9 +76,7 @@ func extensionsFromString(from string) map[string]bool { if ext == "." { extmap[""] = true } else { - if strings.HasPrefix(ext, ".") { - ext = ext[1:] - } + ext = strings.TrimPrefix(ext, ".") if ext != "" { extmap[ext] = true } From 72a650c8e42f4abf59d5df7cd5dc27b451494cc6 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 7 Aug 2019 01:26:12 -0300 Subject: [PATCH 04/15] Add wildcard support to REPO_INDEXER_EXTENSIONS --- .../doc/advanced/config-cheat-sheet.en-us.md | 4 +- go.mod | 1 + go.sum | 2 + models/repo_indexer.go | 16 ++++---- modules/setting/indexer.go | 39 +++++++++---------- vendor/modules.txt | 9 +++++ 6 files changed, 42 insertions(+), 29 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index c12c05c43e867..b8d7bb056b24e 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files. -- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index. +- `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension. +- `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/go.mod b/go.mod index 804573e0d7bfd..ebe44f80e027f 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/go-redis/redis v6.15.2+incompatible github.com/go-sql-driver/mysql v1.4.1 github.com/go-xorm/xorm v0.7.4 + github.com/gobwas/glob v0.2.3 github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 github.com/google/go-github/v24 v24.0.1 diff --git a/go.sum b/go.sum index 89a2362e96573..9183c3d74f38a 100644 --- a/go.sum +++ b/go.sum @@ -145,6 +145,8 @@ github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:9wScpmSP5A3Bk github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM= github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw= github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= diff --git a/models/repo_indexer.go b/models/repo_indexer.go index dd36cf931246c..dabcce7aee407 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -232,14 +232,16 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e } func isIndexable(entry *git.TreeEntry) bool { - if setting.Indexer.FileExtensions != nil { - var ext string - parts := strings.Split(entry.Name(), ".") - cnt := len(parts) - if cnt > 1 { - ext = strings.ToLower(parts[cnt-1]) + if setting.Indexer.FilePatterns != nil { + var found bool + name := strings.ToLower(entry.Name()) + for _, g := range setting.Indexer.FilePatterns { + if g.Match(name) { + found = true + break + } } - if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions { + if found != setting.Indexer.IncludePatterns { return false } } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 51b822194a1d4..3ee3b2e680582 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -8,6 +8,10 @@ import ( "path" "path/filepath" "strings" + + "code.gitea.io/gitea/modules/log" + + "github.com/gobwas/glob" ) // enumerates all the indexer queue types @@ -30,8 +34,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int - FileExtensions map[string]bool - IncludeExtensions bool + FilePatterns []glob.Glob + IncludePatterns bool }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -54,8 +58,8 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } - Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) - Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false) + Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString("")) + Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) @@ -65,25 +69,20 @@ func newIndexerService() { Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } -func extensionsFromString(from string) map[string]bool { - extmap := make(map[string]bool) - for _, ext := range strings.Split(strings.ToLower(from), ",") { - ext = strings.TrimSpace(ext) - // Accept *.txt, .txt and txt. Also use . to mean no ext - if strings.HasPrefix(ext, "*.") { - ext = ext[1:] - } - if ext == "." { - extmap[""] = true - } else { - ext = strings.TrimPrefix(ext, ".") - if ext != "" { - extmap[ext] = true +func extensionsFromString(from string) []glob.Glob { + extarr := make([]glob.Glob, 0, 10) + for _, expr := range strings.Split(strings.ToLower(from), ",") { + expr = strings.TrimSpace(expr) + if expr != "" { + if g, err := glob.Compile(expr, '.', '/'); err != nil { + log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err) + } else { + extarr = append(extarr, g) } } } - if len(extmap) == 0 { + if len(extarr) == 0 { return nil } - return extmap + return extarr } diff --git a/vendor/modules.txt b/vendor/modules.txt index 703f161a8aabc..6db4d2664e79a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -158,6 +158,15 @@ github.com/go-redis/redis/internal/util github.com/go-sql-driver/mysql # github.com/go-xorm/xorm v0.7.4 github.com/go-xorm/xorm +# github.com/gobwas/glob v0.2.3 +github.com/gobwas/glob +github.com/gobwas/glob/compiler +github.com/gobwas/glob/syntax +github.com/gobwas/glob/match +github.com/gobwas/glob/syntax/ast +github.com/gobwas/glob/util/runes +github.com/gobwas/glob/syntax/lexer +github.com/gobwas/glob/util/strings # github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogits/chardet # github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 From 1d7edb41e124eb6dfd0cf23d8a746e0deb58ec55 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 7 Aug 2019 02:22:43 -0300 Subject: [PATCH 05/15] This reverts commit 72a650c8e42f4abf59d5df7cd5dc27b451494cc6. --- .../doc/advanced/config-cheat-sheet.en-us.md | 4 +- go.mod | 1 - go.sum | 2 - models/repo_indexer.go | 16 ++++---- modules/setting/indexer.go | 39 ++++++++++--------- vendor/modules.txt | 9 ----- 6 files changed, 29 insertions(+), 42 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index b8d7bb056b24e..c12c05c43e867 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension. -- `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index. +- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files. +- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/go.mod b/go.mod index ebe44f80e027f..804573e0d7bfd 100644 --- a/go.mod +++ b/go.mod @@ -54,7 +54,6 @@ require ( github.com/go-redis/redis v6.15.2+incompatible github.com/go-sql-driver/mysql v1.4.1 github.com/go-xorm/xorm v0.7.4 - github.com/gobwas/glob v0.2.3 github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 github.com/google/go-github/v24 v24.0.1 diff --git a/go.sum b/go.sum index 9183c3d74f38a..89a2362e96573 100644 --- a/go.sum +++ b/go.sum @@ -145,8 +145,6 @@ github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:9wScpmSP5A3Bk github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM= github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw= github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA= -github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= -github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= diff --git a/models/repo_indexer.go b/models/repo_indexer.go index dabcce7aee407..dd36cf931246c 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -232,16 +232,14 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e } func isIndexable(entry *git.TreeEntry) bool { - if setting.Indexer.FilePatterns != nil { - var found bool - name := strings.ToLower(entry.Name()) - for _, g := range setting.Indexer.FilePatterns { - if g.Match(name) { - found = true - break - } + if setting.Indexer.FileExtensions != nil { + var ext string + parts := strings.Split(entry.Name(), ".") + cnt := len(parts) + if cnt > 1 { + ext = strings.ToLower(parts[cnt-1]) } - if found != setting.Indexer.IncludePatterns { + if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions { return false } } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 3ee3b2e680582..51b822194a1d4 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -8,10 +8,6 @@ import ( "path" "path/filepath" "strings" - - "code.gitea.io/gitea/modules/log" - - "github.com/gobwas/glob" ) // enumerates all the indexer queue types @@ -34,8 +30,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int - FilePatterns []glob.Glob - IncludePatterns bool + FileExtensions map[string]bool + IncludeExtensions bool }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -58,8 +54,8 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } - Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString("")) - Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false) + Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) + Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) @@ -69,20 +65,25 @@ func newIndexerService() { Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } -func extensionsFromString(from string) []glob.Glob { - extarr := make([]glob.Glob, 0, 10) - for _, expr := range strings.Split(strings.ToLower(from), ",") { - expr = strings.TrimSpace(expr) - if expr != "" { - if g, err := glob.Compile(expr, '.', '/'); err != nil { - log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err) - } else { - extarr = append(extarr, g) +func extensionsFromString(from string) map[string]bool { + extmap := make(map[string]bool) + for _, ext := range strings.Split(strings.ToLower(from), ",") { + ext = strings.TrimSpace(ext) + // Accept *.txt, .txt and txt. Also use . to mean no ext + if strings.HasPrefix(ext, "*.") { + ext = ext[1:] + } + if ext == "." { + extmap[""] = true + } else { + ext = strings.TrimPrefix(ext, ".") + if ext != "" { + extmap[ext] = true } } } - if len(extarr) == 0 { + if len(extmap) == 0 { return nil } - return extarr + return extmap } diff --git a/vendor/modules.txt b/vendor/modules.txt index 6db4d2664e79a..703f161a8aabc 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -158,15 +158,6 @@ github.com/go-redis/redis/internal/util github.com/go-sql-driver/mysql # github.com/go-xorm/xorm v0.7.4 github.com/go-xorm/xorm -# github.com/gobwas/glob v0.2.3 -github.com/gobwas/glob -github.com/gobwas/glob/compiler -github.com/gobwas/glob/syntax -github.com/gobwas/glob/match -github.com/gobwas/glob/syntax/ast -github.com/gobwas/glob/util/runes -github.com/gobwas/glob/syntax/lexer -github.com/gobwas/glob/util/strings # github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogits/chardet # github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 From 7450aeeb81d04743f9f35a3e647d476655ad8e79 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 7 Aug 2019 02:24:32 -0300 Subject: [PATCH 06/15] Add wildcard support to REPO_INDEXER_EXTENSIONS (no make vendor) --- .../doc/advanced/config-cheat-sheet.en-us.md | 4 +- models/repo_indexer.go | 16 ++++---- modules/setting/indexer.go | 39 +++++++++---------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index c12c05c43e867..b8d7bb056b24e 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -177,8 +177,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_EXTENSIONS`: **empty**: A comma separated list of file extensions to exclude from the index; a \`.' matches files with no extension. An empty list means do not exclude any files. -- `REPO_EXTENSIONS_LIST_INCLUDE`: **false**: If true, `REPO_INDEXER_EXTENSIONS` are the file extensions to include rather than exclude from the index. +- `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension. +- `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/models/repo_indexer.go b/models/repo_indexer.go index dd36cf931246c..dabcce7aee407 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -232,14 +232,16 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e } func isIndexable(entry *git.TreeEntry) bool { - if setting.Indexer.FileExtensions != nil { - var ext string - parts := strings.Split(entry.Name(), ".") - cnt := len(parts) - if cnt > 1 { - ext = strings.ToLower(parts[cnt-1]) + if setting.Indexer.FilePatterns != nil { + var found bool + name := strings.ToLower(entry.Name()) + for _, g := range setting.Indexer.FilePatterns { + if g.Match(name) { + found = true + break + } } - if setting.Indexer.FileExtensions[ext] != setting.Indexer.IncludeExtensions { + if found != setting.Indexer.IncludePatterns { return false } } diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 51b822194a1d4..3ee3b2e680582 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -8,6 +8,10 @@ import ( "path" "path/filepath" "strings" + + "code.gitea.io/gitea/modules/log" + + "github.com/gobwas/glob" ) // enumerates all the indexer queue types @@ -30,8 +34,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int - FileExtensions map[string]bool - IncludeExtensions bool + FilePatterns []glob.Glob + IncludePatterns bool }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -54,8 +58,8 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } - Indexer.FileExtensions = extensionsFromString(sec.Key("REPO_INDEXER_EXTENSIONS").MustString("")) - Indexer.IncludeExtensions = sec.Key("REPO_EXTENSIONS_LIST_INCLUDE").MustBool(false) + Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString("")) + Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) @@ -65,25 +69,20 @@ func newIndexerService() { Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } -func extensionsFromString(from string) map[string]bool { - extmap := make(map[string]bool) - for _, ext := range strings.Split(strings.ToLower(from), ",") { - ext = strings.TrimSpace(ext) - // Accept *.txt, .txt and txt. Also use . to mean no ext - if strings.HasPrefix(ext, "*.") { - ext = ext[1:] - } - if ext == "." { - extmap[""] = true - } else { - ext = strings.TrimPrefix(ext, ".") - if ext != "" { - extmap[ext] = true +func extensionsFromString(from string) []glob.Glob { + extarr := make([]glob.Glob, 0, 10) + for _, expr := range strings.Split(strings.ToLower(from), ",") { + expr = strings.TrimSpace(expr) + if expr != "" { + if g, err := glob.Compile(expr, '.', '/'); err != nil { + log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err) + } else { + extarr = append(extarr, g) } } } - if len(extmap) == 0 { + if len(extarr) == 0 { return nil } - return extmap + return extarr } From 106faf3371ca7e90fa5587c8a194dbf81c8e6a6d Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 7 Aug 2019 11:01:03 -0300 Subject: [PATCH 07/15] Simplify isIndexable() for better clarity --- models/repo_indexer.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/models/repo_indexer.go b/models/repo_indexer.go index dabcce7aee407..5bbd2c334c006 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -232,20 +232,19 @@ func addDelete(filename string, repo *Repository, batch rupture.FlushingBatch) e } func isIndexable(entry *git.TreeEntry) bool { - if setting.Indexer.FilePatterns != nil { - var found bool - name := strings.ToLower(entry.Name()) - for _, g := range setting.Indexer.FilePatterns { - if g.Match(name) { - found = true - break - } - } - if found != setting.Indexer.IncludePatterns { - return false + if !entry.IsRegular() && !entry.IsExecutable() { + return false + } + if setting.Indexer.FilePatterns == nil { + return true + } + name := strings.ToLower(entry.Name()) + for _, g := range setting.Indexer.FilePatterns { + if g.Match(name) { + return setting.Indexer.IncludePatterns } } - return entry.IsRegular() || entry.IsExecutable() + return !setting.Indexer.IncludePatterns } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command From e48f041d59f1db255e3ba72d94de529461122b5e Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 7 Aug 2019 11:01:43 -0300 Subject: [PATCH 08/15] Add gobwas/glob to vendors --- go.mod | 1 + go.sum | 2 + vendor/github.com/gobwas/glob/.gitignore | 8 + vendor/github.com/gobwas/glob/.travis.yml | 9 + vendor/github.com/gobwas/glob/LICENSE | 21 + vendor/github.com/gobwas/glob/bench.sh | 26 + .../gobwas/glob/compiler/compiler.go | 525 ++++++++++++++++++ vendor/github.com/gobwas/glob/glob.go | 80 +++ vendor/github.com/gobwas/glob/match/any.go | 45 ++ vendor/github.com/gobwas/glob/match/any_of.go | 82 +++ vendor/github.com/gobwas/glob/match/btree.go | 146 +++++ .../github.com/gobwas/glob/match/contains.go | 58 ++ .../github.com/gobwas/glob/match/every_of.go | 99 ++++ vendor/github.com/gobwas/glob/match/list.go | 49 ++ vendor/github.com/gobwas/glob/match/match.go | 81 +++ vendor/github.com/gobwas/glob/match/max.go | 49 ++ vendor/github.com/gobwas/glob/match/min.go | 57 ++ .../github.com/gobwas/glob/match/nothing.go | 27 + vendor/github.com/gobwas/glob/match/prefix.go | 50 ++ .../gobwas/glob/match/prefix_any.go | 55 ++ .../gobwas/glob/match/prefix_suffix.go | 62 +++ vendor/github.com/gobwas/glob/match/range.go | 48 ++ vendor/github.com/gobwas/glob/match/row.go | 77 +++ .../github.com/gobwas/glob/match/segments.go | 91 +++ vendor/github.com/gobwas/glob/match/single.go | 43 ++ vendor/github.com/gobwas/glob/match/suffix.go | 35 ++ .../gobwas/glob/match/suffix_any.go | 43 ++ vendor/github.com/gobwas/glob/match/super.go | 33 ++ vendor/github.com/gobwas/glob/match/text.go | 45 ++ vendor/github.com/gobwas/glob/readme.md | 148 +++++ .../github.com/gobwas/glob/syntax/ast/ast.go | 122 ++++ .../gobwas/glob/syntax/ast/parser.go | 157 ++++++ .../gobwas/glob/syntax/lexer/lexer.go | 273 +++++++++ .../gobwas/glob/syntax/lexer/token.go | 88 +++ .../github.com/gobwas/glob/syntax/syntax.go | 14 + .../gobwas/glob/util/runes/runes.go | 154 +++++ .../gobwas/glob/util/strings/strings.go | 39 ++ vendor/modules.txt | 9 + 38 files changed, 2951 insertions(+) create mode 100644 vendor/github.com/gobwas/glob/.gitignore create mode 100644 vendor/github.com/gobwas/glob/.travis.yml create mode 100644 vendor/github.com/gobwas/glob/LICENSE create mode 100644 vendor/github.com/gobwas/glob/bench.sh create mode 100644 vendor/github.com/gobwas/glob/compiler/compiler.go create mode 100644 vendor/github.com/gobwas/glob/glob.go create mode 100644 vendor/github.com/gobwas/glob/match/any.go create mode 100644 vendor/github.com/gobwas/glob/match/any_of.go create mode 100644 vendor/github.com/gobwas/glob/match/btree.go create mode 100644 vendor/github.com/gobwas/glob/match/contains.go create mode 100644 vendor/github.com/gobwas/glob/match/every_of.go create mode 100644 vendor/github.com/gobwas/glob/match/list.go create mode 100644 vendor/github.com/gobwas/glob/match/match.go create mode 100644 vendor/github.com/gobwas/glob/match/max.go create mode 100644 vendor/github.com/gobwas/glob/match/min.go create mode 100644 vendor/github.com/gobwas/glob/match/nothing.go create mode 100644 vendor/github.com/gobwas/glob/match/prefix.go create mode 100644 vendor/github.com/gobwas/glob/match/prefix_any.go create mode 100644 vendor/github.com/gobwas/glob/match/prefix_suffix.go create mode 100644 vendor/github.com/gobwas/glob/match/range.go create mode 100644 vendor/github.com/gobwas/glob/match/row.go create mode 100644 vendor/github.com/gobwas/glob/match/segments.go create mode 100644 vendor/github.com/gobwas/glob/match/single.go create mode 100644 vendor/github.com/gobwas/glob/match/suffix.go create mode 100644 vendor/github.com/gobwas/glob/match/suffix_any.go create mode 100644 vendor/github.com/gobwas/glob/match/super.go create mode 100644 vendor/github.com/gobwas/glob/match/text.go create mode 100644 vendor/github.com/gobwas/glob/readme.md create mode 100644 vendor/github.com/gobwas/glob/syntax/ast/ast.go create mode 100644 vendor/github.com/gobwas/glob/syntax/ast/parser.go create mode 100644 vendor/github.com/gobwas/glob/syntax/lexer/lexer.go create mode 100644 vendor/github.com/gobwas/glob/syntax/lexer/token.go create mode 100644 vendor/github.com/gobwas/glob/syntax/syntax.go create mode 100644 vendor/github.com/gobwas/glob/util/runes/runes.go create mode 100644 vendor/github.com/gobwas/glob/util/strings/strings.go diff --git a/go.mod b/go.mod index 804573e0d7bfd..ebe44f80e027f 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/go-redis/redis v6.15.2+incompatible github.com/go-sql-driver/mysql v1.4.1 github.com/go-xorm/xorm v0.7.4 + github.com/gobwas/glob v0.2.3 github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 github.com/google/go-github/v24 v24.0.1 diff --git a/go.sum b/go.sum index 89a2362e96573..9183c3d74f38a 100644 --- a/go.sum +++ b/go.sum @@ -145,6 +145,8 @@ github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:9wScpmSP5A3Bk github.com/go-xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:56xuuqnHyryaerycW3BfssRdxQstACi0Epw/yC5E2xM= github.com/go-xorm/xorm v0.7.4 h1:g/NgC590SzqV5VKmdRDNe/K3Holw3YJUCXX28r+rFGw= github.com/go-xorm/xorm v0.7.4/go.mod h1:vpza5fydeRgt+stvo9qgMhSNohYqmNt0I1/D6hkCekA= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 h1:deE7ritpK04PgtpyVOS2TYcQEld9qLCD5b5EbVNOuLA= github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561/go.mod h1:YgYOrVn3Nj9Tq0EvjmFbphRytDj7JNRoWSStJZWDJTQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= diff --git a/vendor/github.com/gobwas/glob/.gitignore b/vendor/github.com/gobwas/glob/.gitignore new file mode 100644 index 0000000000000..b4ae623be5522 --- /dev/null +++ b/vendor/github.com/gobwas/glob/.gitignore @@ -0,0 +1,8 @@ +glob.iml +.idea +*.cpu +*.mem +*.test +*.dot +*.png +*.svg diff --git a/vendor/github.com/gobwas/glob/.travis.yml b/vendor/github.com/gobwas/glob/.travis.yml new file mode 100644 index 0000000000000..e8a276826cf2e --- /dev/null +++ b/vendor/github.com/gobwas/glob/.travis.yml @@ -0,0 +1,9 @@ +sudo: false + +language: go + +go: + - 1.5.3 + +script: + - go test -v ./... diff --git a/vendor/github.com/gobwas/glob/LICENSE b/vendor/github.com/gobwas/glob/LICENSE new file mode 100644 index 0000000000000..9d4735cad9f43 --- /dev/null +++ b/vendor/github.com/gobwas/glob/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Sergey Kamardin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/gobwas/glob/bench.sh b/vendor/github.com/gobwas/glob/bench.sh new file mode 100644 index 0000000000000..804cf22e6460b --- /dev/null +++ b/vendor/github.com/gobwas/glob/bench.sh @@ -0,0 +1,26 @@ +#! /bin/bash + +bench() { + filename="/tmp/$1-$2.bench" + if test -e "${filename}"; + then + echo "Already exists ${filename}" + else + backup=`git rev-parse --abbrev-ref HEAD` + git checkout $1 + echo -n "Creating ${filename}... " + go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem + echo "OK" + git checkout ${backup} + sleep 5 + fi +} + + +to=$1 +current=`git rev-parse --abbrev-ref HEAD` + +bench ${to} $2 +bench ${current} $2 + +benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench" diff --git a/vendor/github.com/gobwas/glob/compiler/compiler.go b/vendor/github.com/gobwas/glob/compiler/compiler.go new file mode 100644 index 0000000000000..02e7de80a0b07 --- /dev/null +++ b/vendor/github.com/gobwas/glob/compiler/compiler.go @@ -0,0 +1,525 @@ +package compiler + +// TODO use constructor with all matchers, and to their structs private +// TODO glue multiple Text nodes (like after QuoteMeta) + +import ( + "fmt" + "reflect" + + "github.com/gobwas/glob/match" + "github.com/gobwas/glob/syntax/ast" + "github.com/gobwas/glob/util/runes" +) + +func optimizeMatcher(matcher match.Matcher) match.Matcher { + switch m := matcher.(type) { + + case match.Any: + if len(m.Separators) == 0 { + return match.NewSuper() + } + + case match.AnyOf: + if len(m.Matchers) == 1 { + return m.Matchers[0] + } + + return m + + case match.List: + if m.Not == false && len(m.List) == 1 { + return match.NewText(string(m.List)) + } + + return m + + case match.BTree: + m.Left = optimizeMatcher(m.Left) + m.Right = optimizeMatcher(m.Right) + + r, ok := m.Value.(match.Text) + if !ok { + return m + } + + var ( + leftNil = m.Left == nil + rightNil = m.Right == nil + ) + if leftNil && rightNil { + return match.NewText(r.Str) + } + + _, leftSuper := m.Left.(match.Super) + lp, leftPrefix := m.Left.(match.Prefix) + la, leftAny := m.Left.(match.Any) + + _, rightSuper := m.Right.(match.Super) + rs, rightSuffix := m.Right.(match.Suffix) + ra, rightAny := m.Right.(match.Any) + + switch { + case leftSuper && rightSuper: + return match.NewContains(r.Str, false) + + case leftSuper && rightNil: + return match.NewSuffix(r.Str) + + case rightSuper && leftNil: + return match.NewPrefix(r.Str) + + case leftNil && rightSuffix: + return match.NewPrefixSuffix(r.Str, rs.Suffix) + + case rightNil && leftPrefix: + return match.NewPrefixSuffix(lp.Prefix, r.Str) + + case rightNil && leftAny: + return match.NewSuffixAny(r.Str, la.Separators) + + case leftNil && rightAny: + return match.NewPrefixAny(r.Str, ra.Separators) + } + + return m + } + + return matcher +} + +func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { + if len(matchers) == 0 { + return nil, fmt.Errorf("compile error: need at least one matcher") + } + if len(matchers) == 1 { + return matchers[0], nil + } + if m := glueMatchers(matchers); m != nil { + return m, nil + } + + idx := -1 + maxLen := -1 + var val match.Matcher + for i, matcher := range matchers { + if l := matcher.Len(); l != -1 && l >= maxLen { + maxLen = l + idx = i + val = matcher + } + } + + if val == nil { // not found matcher with static length + r, err := compileMatchers(matchers[1:]) + if err != nil { + return nil, err + } + return match.NewBTree(matchers[0], nil, r), nil + } + + left := matchers[:idx] + var right []match.Matcher + if len(matchers) > idx+1 { + right = matchers[idx+1:] + } + + var l, r match.Matcher + var err error + if len(left) > 0 { + l, err = compileMatchers(left) + if err != nil { + return nil, err + } + } + + if len(right) > 0 { + r, err = compileMatchers(right) + if err != nil { + return nil, err + } + } + + return match.NewBTree(val, l, r), nil +} + +func glueMatchers(matchers []match.Matcher) match.Matcher { + if m := glueMatchersAsEvery(matchers); m != nil { + return m + } + if m := glueMatchersAsRow(matchers); m != nil { + return m + } + return nil +} + +func glueMatchersAsRow(matchers []match.Matcher) match.Matcher { + if len(matchers) <= 1 { + return nil + } + + var ( + c []match.Matcher + l int + ) + for _, matcher := range matchers { + if ml := matcher.Len(); ml == -1 { + return nil + } else { + c = append(c, matcher) + l += ml + } + } + return match.NewRow(l, c...) +} + +func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher { + if len(matchers) <= 1 { + return nil + } + + var ( + hasAny bool + hasSuper bool + hasSingle bool + min int + separator []rune + ) + + for i, matcher := range matchers { + var sep []rune + + switch m := matcher.(type) { + case match.Super: + sep = []rune{} + hasSuper = true + + case match.Any: + sep = m.Separators + hasAny = true + + case match.Single: + sep = m.Separators + hasSingle = true + min++ + + case match.List: + if !m.Not { + return nil + } + sep = m.List + hasSingle = true + min++ + + default: + return nil + } + + // initialize + if i == 0 { + separator = sep + } + + if runes.Equal(sep, separator) { + continue + } + + return nil + } + + if hasSuper && !hasAny && !hasSingle { + return match.NewSuper() + } + + if hasAny && !hasSuper && !hasSingle { + return match.NewAny(separator) + } + + if (hasAny || hasSuper) && min > 0 && len(separator) == 0 { + return match.NewMin(min) + } + + every := match.NewEveryOf() + + if min > 0 { + every.Add(match.NewMin(min)) + + if !hasAny && !hasSuper { + every.Add(match.NewMax(min)) + } + } + + if len(separator) > 0 { + every.Add(match.NewContains(string(separator), true)) + } + + return every +} + +func minimizeMatchers(matchers []match.Matcher) []match.Matcher { + var done match.Matcher + var left, right, count int + + for l := 0; l < len(matchers); l++ { + for r := len(matchers); r > l; r-- { + if glued := glueMatchers(matchers[l:r]); glued != nil { + var swap bool + + if done == nil { + swap = true + } else { + cl, gl := done.Len(), glued.Len() + swap = cl > -1 && gl > -1 && gl > cl + swap = swap || count < r-l + } + + if swap { + done = glued + left = l + right = r + count = r - l + } + } + } + } + + if done == nil { + return matchers + } + + next := append(append([]match.Matcher{}, matchers[:left]...), done) + if right < len(matchers) { + next = append(next, matchers[right:]...) + } + + if len(next) == len(matchers) { + return next + } + + return minimizeMatchers(next) +} + +// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree +func minimizeTree(tree *ast.Node) *ast.Node { + switch tree.Kind { + case ast.KindAnyOf: + return minimizeTreeAnyOf(tree) + default: + return nil + } +} + +// minimizeAnyOf tries to find common children of given node of AnyOf pattern +// it searches for common children from left and from right +// if any common children are found – then it returns new optimized ast tree +// else it returns nil +func minimizeTreeAnyOf(tree *ast.Node) *ast.Node { + if !areOfSameKind(tree.Children, ast.KindPattern) { + return nil + } + + commonLeft, commonRight := commonChildren(tree.Children) + commonLeftCount, commonRightCount := len(commonLeft), len(commonRight) + if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts + return nil + } + + var result []*ast.Node + if commonLeftCount > 0 { + result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...)) + } + + var anyOf []*ast.Node + for _, child := range tree.Children { + reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount] + var node *ast.Node + if len(reuse) == 0 { + // this pattern is completely reduced by commonLeft and commonRight patterns + // so it become nothing + node = ast.NewNode(ast.KindNothing, nil) + } else { + node = ast.NewNode(ast.KindPattern, nil, reuse...) + } + anyOf = appendIfUnique(anyOf, node) + } + switch { + case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing: + result = append(result, anyOf[0]) + case len(anyOf) > 1: + result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...)) + } + + if commonRightCount > 0 { + result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...)) + } + + return ast.NewNode(ast.KindPattern, nil, result...) +} + +func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) { + if len(nodes) <= 1 { + return + } + + // find node that has least number of children + idx := leastChildren(nodes) + if idx == -1 { + return + } + tree := nodes[idx] + treeLength := len(tree.Children) + + // allocate max able size for rightCommon slice + // to get ability insert elements in reverse order (from end to start) + // without sorting + commonRight = make([]*ast.Node, treeLength) + lastRight := treeLength // will use this to get results as commonRight[lastRight:] + + var ( + breakLeft bool + breakRight bool + commonTotal int + ) + for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 { + treeLeft := tree.Children[i] + treeRight := tree.Children[j] + + for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ { + // skip least children node + if k == idx { + continue + } + + restLeft := nodes[k].Children[i] + restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength] + + breakLeft = breakLeft || !treeLeft.Equal(restLeft) + + // disable searching for right common parts, if left part is already overlapping + breakRight = breakRight || (!breakLeft && j <= i) + breakRight = breakRight || !treeRight.Equal(restRight) + } + + if !breakLeft { + commonTotal++ + commonLeft = append(commonLeft, treeLeft) + } + if !breakRight { + commonTotal++ + lastRight = j + commonRight[j] = treeRight + } + } + + commonRight = commonRight[lastRight:] + + return +} + +func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node { + for _, n := range target { + if reflect.DeepEqual(n, val) { + return target + } + } + return append(target, val) +} + +func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool { + for _, n := range nodes { + if n.Kind != kind { + return false + } + } + return true +} + +func leastChildren(nodes []*ast.Node) int { + min := -1 + idx := -1 + for i, n := range nodes { + if idx == -1 || (len(n.Children) < min) { + min = len(n.Children) + idx = i + } + } + return idx +} + +func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) { + var matchers []match.Matcher + for _, desc := range tree.Children { + m, err := compile(desc, sep) + if err != nil { + return nil, err + } + matchers = append(matchers, optimizeMatcher(m)) + } + return matchers, nil +} + +func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) { + switch tree.Kind { + case ast.KindAnyOf: + // todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go) + if n := minimizeTree(tree); n != nil { + return compile(n, sep) + } + matchers, err := compileTreeChildren(tree, sep) + if err != nil { + return nil, err + } + return match.NewAnyOf(matchers...), nil + + case ast.KindPattern: + if len(tree.Children) == 0 { + return match.NewNothing(), nil + } + matchers, err := compileTreeChildren(tree, sep) + if err != nil { + return nil, err + } + m, err = compileMatchers(minimizeMatchers(matchers)) + if err != nil { + return nil, err + } + + case ast.KindAny: + m = match.NewAny(sep) + + case ast.KindSuper: + m = match.NewSuper() + + case ast.KindSingle: + m = match.NewSingle(sep) + + case ast.KindNothing: + m = match.NewNothing() + + case ast.KindList: + l := tree.Value.(ast.List) + m = match.NewList([]rune(l.Chars), l.Not) + + case ast.KindRange: + r := tree.Value.(ast.Range) + m = match.NewRange(r.Lo, r.Hi, r.Not) + + case ast.KindText: + t := tree.Value.(ast.Text) + m = match.NewText(t.Text) + + default: + return nil, fmt.Errorf("could not compile tree: unknown node type") + } + + return optimizeMatcher(m), nil +} + +func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) { + m, err := compile(tree, sep) + if err != nil { + return nil, err + } + + return m, nil +} diff --git a/vendor/github.com/gobwas/glob/glob.go b/vendor/github.com/gobwas/glob/glob.go new file mode 100644 index 0000000000000..2afde343af83e --- /dev/null +++ b/vendor/github.com/gobwas/glob/glob.go @@ -0,0 +1,80 @@ +package glob + +import ( + "github.com/gobwas/glob/compiler" + "github.com/gobwas/glob/syntax" +) + +// Glob represents compiled glob pattern. +type Glob interface { + Match(string) bool +} + +// Compile creates Glob for given pattern and strings (if any present after pattern) as separators. +// The pattern syntax is: +// +// pattern: +// { term } +// +// term: +// `*` matches any sequence of non-separator characters +// `**` matches any sequence of characters +// `?` matches any single non-separator character +// `[` [ `!` ] { character-range } `]` +// character class (must be non-empty) +// `{` pattern-list `}` +// pattern alternatives +// c matches character c (c != `*`, `**`, `?`, `\`, `[`, `{`, `}`) +// `\` c matches character c +// +// character-range: +// c matches character c (c != `\\`, `-`, `]`) +// `\` c matches character c +// lo `-` hi matches character c for lo <= c <= hi +// +// pattern-list: +// pattern { `,` pattern } +// comma-separated (without spaces) patterns +// +func Compile(pattern string, separators ...rune) (Glob, error) { + ast, err := syntax.Parse(pattern) + if err != nil { + return nil, err + } + + matcher, err := compiler.Compile(ast, separators) + if err != nil { + return nil, err + } + + return matcher, nil +} + +// MustCompile is the same as Compile, except that if Compile returns error, this will panic +func MustCompile(pattern string, separators ...rune) Glob { + g, err := Compile(pattern, separators...) + if err != nil { + panic(err) + } + + return g +} + +// QuoteMeta returns a string that quotes all glob pattern meta characters +// inside the argument text; For example, QuoteMeta(`{foo*}`) returns `\[foo\*\]`. +func QuoteMeta(s string) string { + b := make([]byte, 2*len(s)) + + // a byte loop is correct because all meta characters are ASCII + j := 0 + for i := 0; i < len(s); i++ { + if syntax.Special(s[i]) { + b[j] = '\\' + j++ + } + b[j] = s[i] + j++ + } + + return string(b[0:j]) +} diff --git a/vendor/github.com/gobwas/glob/match/any.go b/vendor/github.com/gobwas/glob/match/any.go new file mode 100644 index 0000000000000..514a9a5c450a2 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/any.go @@ -0,0 +1,45 @@ +package match + +import ( + "fmt" + "github.com/gobwas/glob/util/strings" +) + +type Any struct { + Separators []rune +} + +func NewAny(s []rune) Any { + return Any{s} +} + +func (self Any) Match(s string) bool { + return strings.IndexAnyRunes(s, self.Separators) == -1 +} + +func (self Any) Index(s string) (int, []int) { + found := strings.IndexAnyRunes(s, self.Separators) + switch found { + case -1: + case 0: + return 0, segments0 + default: + s = s[:found] + } + + segments := acquireSegments(len(s)) + for i := range s { + segments = append(segments, i) + } + segments = append(segments, len(s)) + + return 0, segments +} + +func (self Any) Len() int { + return lenNo +} + +func (self Any) String() string { + return fmt.Sprintf("", string(self.Separators)) +} diff --git a/vendor/github.com/gobwas/glob/match/any_of.go b/vendor/github.com/gobwas/glob/match/any_of.go new file mode 100644 index 0000000000000..8e65356cdc9f0 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/any_of.go @@ -0,0 +1,82 @@ +package match + +import "fmt" + +type AnyOf struct { + Matchers Matchers +} + +func NewAnyOf(m ...Matcher) AnyOf { + return AnyOf{Matchers(m)} +} + +func (self *AnyOf) Add(m Matcher) error { + self.Matchers = append(self.Matchers, m) + return nil +} + +func (self AnyOf) Match(s string) bool { + for _, m := range self.Matchers { + if m.Match(s) { + return true + } + } + + return false +} + +func (self AnyOf) Index(s string) (int, []int) { + index := -1 + + segments := acquireSegments(len(s)) + for _, m := range self.Matchers { + idx, seg := m.Index(s) + if idx == -1 { + continue + } + + if index == -1 || idx < index { + index = idx + segments = append(segments[:0], seg...) + continue + } + + if idx > index { + continue + } + + // here idx == index + segments = appendMerge(segments, seg) + } + + if index == -1 { + releaseSegments(segments) + return -1, nil + } + + return index, segments +} + +func (self AnyOf) Len() (l int) { + l = -1 + for _, m := range self.Matchers { + ml := m.Len() + switch { + case l == -1: + l = ml + continue + + case ml == -1: + return -1 + + case l != ml: + return -1 + } + } + + return +} + +func (self AnyOf) String() string { + return fmt.Sprintf("", self.Matchers) +} diff --git a/vendor/github.com/gobwas/glob/match/btree.go b/vendor/github.com/gobwas/glob/match/btree.go new file mode 100644 index 0000000000000..a8130e93eae54 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/btree.go @@ -0,0 +1,146 @@ +package match + +import ( + "fmt" + "unicode/utf8" +) + +type BTree struct { + Value Matcher + Left Matcher + Right Matcher + ValueLengthRunes int + LeftLengthRunes int + RightLengthRunes int + LengthRunes int +} + +func NewBTree(Value, Left, Right Matcher) (tree BTree) { + tree.Value = Value + tree.Left = Left + tree.Right = Right + + lenOk := true + if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 { + lenOk = false + } + + if Left != nil { + if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 { + lenOk = false + } + } + + if Right != nil { + if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 { + lenOk = false + } + } + + if lenOk { + tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes + } else { + tree.LengthRunes = -1 + } + + return tree +} + +func (self BTree) Len() int { + return self.LengthRunes +} + +// todo? +func (self BTree) Index(s string) (int, []int) { + return -1, nil +} + +func (self BTree) Match(s string) bool { + inputLen := len(s) + + // self.Length, self.RLen and self.LLen are values meaning the length of runes for each part + // here we manipulating byte length for better optimizations + // but these checks still works, cause minLen of 1-rune string is 1 byte. + if self.LengthRunes != -1 && self.LengthRunes > inputLen { + return false + } + + // try to cut unnecessary parts + // by knowledge of length of right and left part + var offset, limit int + if self.LeftLengthRunes >= 0 { + offset = self.LeftLengthRunes + } + if self.RightLengthRunes >= 0 { + limit = inputLen - self.RightLengthRunes + } else { + limit = inputLen + } + + for offset < limit { + // search for matching part in substring + index, segments := self.Value.Index(s[offset:limit]) + if index == -1 { + releaseSegments(segments) + return false + } + + l := s[:offset+index] + var left bool + if self.Left != nil { + left = self.Left.Match(l) + } else { + left = l == "" + } + + if left { + for i := len(segments) - 1; i >= 0; i-- { + length := segments[i] + + var right bool + var r string + // if there is no string for the right branch + if inputLen <= offset+index+length { + r = "" + } else { + r = s[offset+index+length:] + } + + if self.Right != nil { + right = self.Right.Match(r) + } else { + right = r == "" + } + + if right { + releaseSegments(segments) + return true + } + } + } + + _, step := utf8.DecodeRuneInString(s[offset+index:]) + offset += index + step + + releaseSegments(segments) + } + + return false +} + +func (self BTree) String() string { + const n string = "" + var l, r string + if self.Left == nil { + l = n + } else { + l = self.Left.String() + } + if self.Right == nil { + r = n + } else { + r = self.Right.String() + } + + return fmt.Sprintf("%s]>", l, self.Value, r) +} diff --git a/vendor/github.com/gobwas/glob/match/contains.go b/vendor/github.com/gobwas/glob/match/contains.go new file mode 100644 index 0000000000000..0998e95b0eaf5 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/contains.go @@ -0,0 +1,58 @@ +package match + +import ( + "fmt" + "strings" +) + +type Contains struct { + Needle string + Not bool +} + +func NewContains(needle string, not bool) Contains { + return Contains{needle, not} +} + +func (self Contains) Match(s string) bool { + return strings.Contains(s, self.Needle) != self.Not +} + +func (self Contains) Index(s string) (int, []int) { + var offset int + + idx := strings.Index(s, self.Needle) + + if !self.Not { + if idx == -1 { + return -1, nil + } + + offset = idx + len(self.Needle) + if len(s) <= offset { + return 0, []int{offset} + } + s = s[offset:] + } else if idx != -1 { + s = s[:idx] + } + + segments := acquireSegments(len(s) + 1) + for i := range s { + segments = append(segments, offset+i) + } + + return 0, append(segments, offset+len(s)) +} + +func (self Contains) Len() int { + return lenNo +} + +func (self Contains) String() string { + var not string + if self.Not { + not = "!" + } + return fmt.Sprintf("", not, self.Needle) +} diff --git a/vendor/github.com/gobwas/glob/match/every_of.go b/vendor/github.com/gobwas/glob/match/every_of.go new file mode 100644 index 0000000000000..7c968ee368b0b --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/every_of.go @@ -0,0 +1,99 @@ +package match + +import ( + "fmt" +) + +type EveryOf struct { + Matchers Matchers +} + +func NewEveryOf(m ...Matcher) EveryOf { + return EveryOf{Matchers(m)} +} + +func (self *EveryOf) Add(m Matcher) error { + self.Matchers = append(self.Matchers, m) + return nil +} + +func (self EveryOf) Len() (l int) { + for _, m := range self.Matchers { + if ml := m.Len(); l > 0 { + l += ml + } else { + return -1 + } + } + + return +} + +func (self EveryOf) Index(s string) (int, []int) { + var index int + var offset int + + // make `in` with cap as len(s), + // cause it is the maximum size of output segments values + next := acquireSegments(len(s)) + current := acquireSegments(len(s)) + + sub := s + for i, m := range self.Matchers { + idx, seg := m.Index(sub) + if idx == -1 { + releaseSegments(next) + releaseSegments(current) + return -1, nil + } + + if i == 0 { + // we use copy here instead of `current = seg` + // cause seg is a slice from reusable buffer `in` + // and it could be overwritten in next iteration + current = append(current, seg...) + } else { + // clear the next + next = next[:0] + + delta := index - (idx + offset) + for _, ex := range current { + for _, n := range seg { + if ex+delta == n { + next = append(next, n) + } + } + } + + if len(next) == 0 { + releaseSegments(next) + releaseSegments(current) + return -1, nil + } + + current = append(current[:0], next...) + } + + index = idx + offset + sub = s[index:] + offset += idx + } + + releaseSegments(next) + + return index, current +} + +func (self EveryOf) Match(s string) bool { + for _, m := range self.Matchers { + if !m.Match(s) { + return false + } + } + + return true +} + +func (self EveryOf) String() string { + return fmt.Sprintf("", self.Matchers) +} diff --git a/vendor/github.com/gobwas/glob/match/list.go b/vendor/github.com/gobwas/glob/match/list.go new file mode 100644 index 0000000000000..7fd763ecd8ea6 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/list.go @@ -0,0 +1,49 @@ +package match + +import ( + "fmt" + "github.com/gobwas/glob/util/runes" + "unicode/utf8" +) + +type List struct { + List []rune + Not bool +} + +func NewList(list []rune, not bool) List { + return List{list, not} +} + +func (self List) Match(s string) bool { + r, w := utf8.DecodeRuneInString(s) + if len(s) > w { + return false + } + + inList := runes.IndexRune(self.List, r) != -1 + return inList == !self.Not +} + +func (self List) Len() int { + return lenOne +} + +func (self List) Index(s string) (int, []int) { + for i, r := range s { + if self.Not == (runes.IndexRune(self.List, r) == -1) { + return i, segmentsByRuneLength[utf8.RuneLen(r)] + } + } + + return -1, nil +} + +func (self List) String() string { + var not string + if self.Not { + not = "!" + } + + return fmt.Sprintf("", not, string(self.List)) +} diff --git a/vendor/github.com/gobwas/glob/match/match.go b/vendor/github.com/gobwas/glob/match/match.go new file mode 100644 index 0000000000000..f80e007fb838c --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/match.go @@ -0,0 +1,81 @@ +package match + +// todo common table of rune's length + +import ( + "fmt" + "strings" +) + +const lenOne = 1 +const lenZero = 0 +const lenNo = -1 + +type Matcher interface { + Match(string) bool + Index(string) (int, []int) + Len() int + String() string +} + +type Matchers []Matcher + +func (m Matchers) String() string { + var s []string + for _, matcher := range m { + s = append(s, fmt.Sprint(matcher)) + } + + return fmt.Sprintf("%s", strings.Join(s, ",")) +} + +// appendMerge merges and sorts given already SORTED and UNIQUE segments. +func appendMerge(target, sub []int) []int { + lt, ls := len(target), len(sub) + out := make([]int, 0, lt+ls) + + for x, y := 0, 0; x < lt || y < ls; { + if x >= lt { + out = append(out, sub[y:]...) + break + } + + if y >= ls { + out = append(out, target[x:]...) + break + } + + xValue := target[x] + yValue := sub[y] + + switch { + + case xValue == yValue: + out = append(out, xValue) + x++ + y++ + + case xValue < yValue: + out = append(out, xValue) + x++ + + case yValue < xValue: + out = append(out, yValue) + y++ + + } + } + + target = append(target[:0], out...) + + return target +} + +func reverseSegments(input []int) { + l := len(input) + m := l / 2 + + for i := 0; i < m; i++ { + input[i], input[l-i-1] = input[l-i-1], input[i] + } +} diff --git a/vendor/github.com/gobwas/glob/match/max.go b/vendor/github.com/gobwas/glob/match/max.go new file mode 100644 index 0000000000000..d72f69efff75b --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/max.go @@ -0,0 +1,49 @@ +package match + +import ( + "fmt" + "unicode/utf8" +) + +type Max struct { + Limit int +} + +func NewMax(l int) Max { + return Max{l} +} + +func (self Max) Match(s string) bool { + var l int + for range s { + l += 1 + if l > self.Limit { + return false + } + } + + return true +} + +func (self Max) Index(s string) (int, []int) { + segments := acquireSegments(self.Limit + 1) + segments = append(segments, 0) + var count int + for i, r := range s { + count++ + if count > self.Limit { + break + } + segments = append(segments, i+utf8.RuneLen(r)) + } + + return 0, segments +} + +func (self Max) Len() int { + return lenNo +} + +func (self Max) String() string { + return fmt.Sprintf("", self.Limit) +} diff --git a/vendor/github.com/gobwas/glob/match/min.go b/vendor/github.com/gobwas/glob/match/min.go new file mode 100644 index 0000000000000..db57ac8eb49d6 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/min.go @@ -0,0 +1,57 @@ +package match + +import ( + "fmt" + "unicode/utf8" +) + +type Min struct { + Limit int +} + +func NewMin(l int) Min { + return Min{l} +} + +func (self Min) Match(s string) bool { + var l int + for range s { + l += 1 + if l >= self.Limit { + return true + } + } + + return false +} + +func (self Min) Index(s string) (int, []int) { + var count int + + c := len(s) - self.Limit + 1 + if c <= 0 { + return -1, nil + } + + segments := acquireSegments(c) + for i, r := range s { + count++ + if count >= self.Limit { + segments = append(segments, i+utf8.RuneLen(r)) + } + } + + if len(segments) == 0 { + return -1, nil + } + + return 0, segments +} + +func (self Min) Len() int { + return lenNo +} + +func (self Min) String() string { + return fmt.Sprintf("", self.Limit) +} diff --git a/vendor/github.com/gobwas/glob/match/nothing.go b/vendor/github.com/gobwas/glob/match/nothing.go new file mode 100644 index 0000000000000..0d4ecd36b80b2 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/nothing.go @@ -0,0 +1,27 @@ +package match + +import ( + "fmt" +) + +type Nothing struct{} + +func NewNothing() Nothing { + return Nothing{} +} + +func (self Nothing) Match(s string) bool { + return len(s) == 0 +} + +func (self Nothing) Index(s string) (int, []int) { + return 0, segments0 +} + +func (self Nothing) Len() int { + return lenZero +} + +func (self Nothing) String() string { + return fmt.Sprintf("") +} diff --git a/vendor/github.com/gobwas/glob/match/prefix.go b/vendor/github.com/gobwas/glob/match/prefix.go new file mode 100644 index 0000000000000..a7347250e8d5a --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/prefix.go @@ -0,0 +1,50 @@ +package match + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +type Prefix struct { + Prefix string +} + +func NewPrefix(p string) Prefix { + return Prefix{p} +} + +func (self Prefix) Index(s string) (int, []int) { + idx := strings.Index(s, self.Prefix) + if idx == -1 { + return -1, nil + } + + length := len(self.Prefix) + var sub string + if len(s) > idx+length { + sub = s[idx+length:] + } else { + sub = "" + } + + segments := acquireSegments(len(sub) + 1) + segments = append(segments, length) + for i, r := range sub { + segments = append(segments, length+i+utf8.RuneLen(r)) + } + + return idx, segments +} + +func (self Prefix) Len() int { + return lenNo +} + +func (self Prefix) Match(s string) bool { + return strings.HasPrefix(s, self.Prefix) +} + +func (self Prefix) String() string { + return fmt.Sprintf("", self.Prefix) +} diff --git a/vendor/github.com/gobwas/glob/match/prefix_any.go b/vendor/github.com/gobwas/glob/match/prefix_any.go new file mode 100644 index 0000000000000..8ee58fe1b3c7f --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/prefix_any.go @@ -0,0 +1,55 @@ +package match + +import ( + "fmt" + "strings" + "unicode/utf8" + + sutil "github.com/gobwas/glob/util/strings" +) + +type PrefixAny struct { + Prefix string + Separators []rune +} + +func NewPrefixAny(s string, sep []rune) PrefixAny { + return PrefixAny{s, sep} +} + +func (self PrefixAny) Index(s string) (int, []int) { + idx := strings.Index(s, self.Prefix) + if idx == -1 { + return -1, nil + } + + n := len(self.Prefix) + sub := s[idx+n:] + i := sutil.IndexAnyRunes(sub, self.Separators) + if i > -1 { + sub = sub[:i] + } + + seg := acquireSegments(len(sub) + 1) + seg = append(seg, n) + for i, r := range sub { + seg = append(seg, n+i+utf8.RuneLen(r)) + } + + return idx, seg +} + +func (self PrefixAny) Len() int { + return lenNo +} + +func (self PrefixAny) Match(s string) bool { + if !strings.HasPrefix(s, self.Prefix) { + return false + } + return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1 +} + +func (self PrefixAny) String() string { + return fmt.Sprintf("", self.Prefix, string(self.Separators)) +} diff --git a/vendor/github.com/gobwas/glob/match/prefix_suffix.go b/vendor/github.com/gobwas/glob/match/prefix_suffix.go new file mode 100644 index 0000000000000..8208085a19963 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/prefix_suffix.go @@ -0,0 +1,62 @@ +package match + +import ( + "fmt" + "strings" +) + +type PrefixSuffix struct { + Prefix, Suffix string +} + +func NewPrefixSuffix(p, s string) PrefixSuffix { + return PrefixSuffix{p, s} +} + +func (self PrefixSuffix) Index(s string) (int, []int) { + prefixIdx := strings.Index(s, self.Prefix) + if prefixIdx == -1 { + return -1, nil + } + + suffixLen := len(self.Suffix) + if suffixLen <= 0 { + return prefixIdx, []int{len(s) - prefixIdx} + } + + if (len(s) - prefixIdx) <= 0 { + return -1, nil + } + + segments := acquireSegments(len(s) - prefixIdx) + for sub := s[prefixIdx:]; ; { + suffixIdx := strings.LastIndex(sub, self.Suffix) + if suffixIdx == -1 { + break + } + + segments = append(segments, suffixIdx+suffixLen) + sub = sub[:suffixIdx] + } + + if len(segments) == 0 { + releaseSegments(segments) + return -1, nil + } + + reverseSegments(segments) + + return prefixIdx, segments +} + +func (self PrefixSuffix) Len() int { + return lenNo +} + +func (self PrefixSuffix) Match(s string) bool { + return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix) +} + +func (self PrefixSuffix) String() string { + return fmt.Sprintf("", self.Prefix, self.Suffix) +} diff --git a/vendor/github.com/gobwas/glob/match/range.go b/vendor/github.com/gobwas/glob/match/range.go new file mode 100644 index 0000000000000..ce30245a40bcc --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/range.go @@ -0,0 +1,48 @@ +package match + +import ( + "fmt" + "unicode/utf8" +) + +type Range struct { + Lo, Hi rune + Not bool +} + +func NewRange(lo, hi rune, not bool) Range { + return Range{lo, hi, not} +} + +func (self Range) Len() int { + return lenOne +} + +func (self Range) Match(s string) bool { + r, w := utf8.DecodeRuneInString(s) + if len(s) > w { + return false + } + + inRange := r >= self.Lo && r <= self.Hi + + return inRange == !self.Not +} + +func (self Range) Index(s string) (int, []int) { + for i, r := range s { + if self.Not != (r >= self.Lo && r <= self.Hi) { + return i, segmentsByRuneLength[utf8.RuneLen(r)] + } + } + + return -1, nil +} + +func (self Range) String() string { + var not string + if self.Not { + not = "!" + } + return fmt.Sprintf("", not, string(self.Lo), string(self.Hi)) +} diff --git a/vendor/github.com/gobwas/glob/match/row.go b/vendor/github.com/gobwas/glob/match/row.go new file mode 100644 index 0000000000000..4379042e42f37 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/row.go @@ -0,0 +1,77 @@ +package match + +import ( + "fmt" +) + +type Row struct { + Matchers Matchers + RunesLength int + Segments []int +} + +func NewRow(len int, m ...Matcher) Row { + return Row{ + Matchers: Matchers(m), + RunesLength: len, + Segments: []int{len}, + } +} + +func (self Row) matchAll(s string) bool { + var idx int + for _, m := range self.Matchers { + length := m.Len() + + var next, i int + for next = range s[idx:] { + i++ + if i == length { + break + } + } + + if i < length || !m.Match(s[idx:idx+next+1]) { + return false + } + + idx += next + 1 + } + + return true +} + +func (self Row) lenOk(s string) bool { + var i int + for range s { + i++ + if i > self.RunesLength { + return false + } + } + return self.RunesLength == i +} + +func (self Row) Match(s string) bool { + return self.lenOk(s) && self.matchAll(s) +} + +func (self Row) Len() (l int) { + return self.RunesLength +} + +func (self Row) Index(s string) (int, []int) { + for i := range s { + if len(s[i:]) < self.RunesLength { + break + } + if self.matchAll(s[i:]) { + return i, self.Segments + } + } + return -1, nil +} + +func (self Row) String() string { + return fmt.Sprintf("", self.RunesLength, self.Matchers) +} diff --git a/vendor/github.com/gobwas/glob/match/segments.go b/vendor/github.com/gobwas/glob/match/segments.go new file mode 100644 index 0000000000000..9ea6f30943981 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/segments.go @@ -0,0 +1,91 @@ +package match + +import ( + "sync" +) + +type SomePool interface { + Get() []int + Put([]int) +} + +var segmentsPools [1024]sync.Pool + +func toPowerOfTwo(v int) int { + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ + + return v +} + +const ( + cacheFrom = 16 + cacheToAndHigher = 1024 + cacheFromIndex = 15 + cacheToAndHigherIndex = 1023 +) + +var ( + segments0 = []int{0} + segments1 = []int{1} + segments2 = []int{2} + segments3 = []int{3} + segments4 = []int{4} +) + +var segmentsByRuneLength [5][]int = [5][]int{ + 0: segments0, + 1: segments1, + 2: segments2, + 3: segments3, + 4: segments4, +} + +func init() { + for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { + func(i int) { + segmentsPools[i-1] = sync.Pool{New: func() interface{} { + return make([]int, 0, i) + }} + }(i) + } +} + +func getTableIndex(c int) int { + p := toPowerOfTwo(c) + switch { + case p >= cacheToAndHigher: + return cacheToAndHigherIndex + case p <= cacheFrom: + return cacheFromIndex + default: + return p - 1 + } +} + +func acquireSegments(c int) []int { + // make []int with less capacity than cacheFrom + // is faster than acquiring it from pool + if c < cacheFrom { + return make([]int, 0, c) + } + + return segmentsPools[getTableIndex(c)].Get().([]int)[:0] +} + +func releaseSegments(s []int) { + c := cap(s) + + // make []int with less capacity than cacheFrom + // is faster than acquiring it from pool + if c < cacheFrom { + return + } + + segmentsPools[getTableIndex(c)].Put(s) +} diff --git a/vendor/github.com/gobwas/glob/match/single.go b/vendor/github.com/gobwas/glob/match/single.go new file mode 100644 index 0000000000000..ee6e3954c1f30 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/single.go @@ -0,0 +1,43 @@ +package match + +import ( + "fmt" + "github.com/gobwas/glob/util/runes" + "unicode/utf8" +) + +// single represents ? +type Single struct { + Separators []rune +} + +func NewSingle(s []rune) Single { + return Single{s} +} + +func (self Single) Match(s string) bool { + r, w := utf8.DecodeRuneInString(s) + if len(s) > w { + return false + } + + return runes.IndexRune(self.Separators, r) == -1 +} + +func (self Single) Len() int { + return lenOne +} + +func (self Single) Index(s string) (int, []int) { + for i, r := range s { + if runes.IndexRune(self.Separators, r) == -1 { + return i, segmentsByRuneLength[utf8.RuneLen(r)] + } + } + + return -1, nil +} + +func (self Single) String() string { + return fmt.Sprintf("", string(self.Separators)) +} diff --git a/vendor/github.com/gobwas/glob/match/suffix.go b/vendor/github.com/gobwas/glob/match/suffix.go new file mode 100644 index 0000000000000..85bea8c68ec49 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/suffix.go @@ -0,0 +1,35 @@ +package match + +import ( + "fmt" + "strings" +) + +type Suffix struct { + Suffix string +} + +func NewSuffix(s string) Suffix { + return Suffix{s} +} + +func (self Suffix) Len() int { + return lenNo +} + +func (self Suffix) Match(s string) bool { + return strings.HasSuffix(s, self.Suffix) +} + +func (self Suffix) Index(s string) (int, []int) { + idx := strings.Index(s, self.Suffix) + if idx == -1 { + return -1, nil + } + + return 0, []int{idx + len(self.Suffix)} +} + +func (self Suffix) String() string { + return fmt.Sprintf("", self.Suffix) +} diff --git a/vendor/github.com/gobwas/glob/match/suffix_any.go b/vendor/github.com/gobwas/glob/match/suffix_any.go new file mode 100644 index 0000000000000..c5106f8196caa --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/suffix_any.go @@ -0,0 +1,43 @@ +package match + +import ( + "fmt" + "strings" + + sutil "github.com/gobwas/glob/util/strings" +) + +type SuffixAny struct { + Suffix string + Separators []rune +} + +func NewSuffixAny(s string, sep []rune) SuffixAny { + return SuffixAny{s, sep} +} + +func (self SuffixAny) Index(s string) (int, []int) { + idx := strings.Index(s, self.Suffix) + if idx == -1 { + return -1, nil + } + + i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1 + + return i, []int{idx + len(self.Suffix) - i} +} + +func (self SuffixAny) Len() int { + return lenNo +} + +func (self SuffixAny) Match(s string) bool { + if !strings.HasSuffix(s, self.Suffix) { + return false + } + return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1 +} + +func (self SuffixAny) String() string { + return fmt.Sprintf("", string(self.Separators), self.Suffix) +} diff --git a/vendor/github.com/gobwas/glob/match/super.go b/vendor/github.com/gobwas/glob/match/super.go new file mode 100644 index 0000000000000..3875950bb8c70 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/super.go @@ -0,0 +1,33 @@ +package match + +import ( + "fmt" +) + +type Super struct{} + +func NewSuper() Super { + return Super{} +} + +func (self Super) Match(s string) bool { + return true +} + +func (self Super) Len() int { + return lenNo +} + +func (self Super) Index(s string) (int, []int) { + segments := acquireSegments(len(s) + 1) + for i := range s { + segments = append(segments, i) + } + segments = append(segments, len(s)) + + return 0, segments +} + +func (self Super) String() string { + return fmt.Sprintf("") +} diff --git a/vendor/github.com/gobwas/glob/match/text.go b/vendor/github.com/gobwas/glob/match/text.go new file mode 100644 index 0000000000000..0a17616d3cb87 --- /dev/null +++ b/vendor/github.com/gobwas/glob/match/text.go @@ -0,0 +1,45 @@ +package match + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +// raw represents raw string to match +type Text struct { + Str string + RunesLength int + BytesLength int + Segments []int +} + +func NewText(s string) Text { + return Text{ + Str: s, + RunesLength: utf8.RuneCountInString(s), + BytesLength: len(s), + Segments: []int{len(s)}, + } +} + +func (self Text) Match(s string) bool { + return self.Str == s +} + +func (self Text) Len() int { + return self.RunesLength +} + +func (self Text) Index(s string) (int, []int) { + index := strings.Index(s, self.Str) + if index == -1 { + return -1, nil + } + + return index, self.Segments +} + +func (self Text) String() string { + return fmt.Sprintf("", self.Str) +} diff --git a/vendor/github.com/gobwas/glob/readme.md b/vendor/github.com/gobwas/glob/readme.md new file mode 100644 index 0000000000000..f58144e733e2c --- /dev/null +++ b/vendor/github.com/gobwas/glob/readme.md @@ -0,0 +1,148 @@ +# glob.[go](https://golang.org) + +[![GoDoc][godoc-image]][godoc-url] [![Build Status][travis-image]][travis-url] + +> Go Globbing Library. + +## Install + +```shell + go get github.com/gobwas/glob +``` + +## Example + +```go + +package main + +import "github.com/gobwas/glob" + +func main() { + var g glob.Glob + + // create simple glob + g = glob.MustCompile("*.github.com") + g.Match("api.github.com") // true + + // quote meta characters and then create simple glob + g = glob.MustCompile(glob.QuoteMeta("*.github.com")) + g.Match("*.github.com") // true + + // create new glob with set of delimiters as ["."] + g = glob.MustCompile("api.*.com", '.') + g.Match("api.github.com") // true + g.Match("api.gi.hub.com") // false + + // create new glob with set of delimiters as ["."] + // but now with super wildcard + g = glob.MustCompile("api.**.com", '.') + g.Match("api.github.com") // true + g.Match("api.gi.hub.com") // true + + // create glob with single symbol wildcard + g = glob.MustCompile("?at") + g.Match("cat") // true + g.Match("fat") // true + g.Match("at") // false + + // create glob with single symbol wildcard and delimiters ['f'] + g = glob.MustCompile("?at", 'f') + g.Match("cat") // true + g.Match("fat") // false + g.Match("at") // false + + // create glob with character-list matchers + g = glob.MustCompile("[abc]at") + g.Match("cat") // true + g.Match("bat") // true + g.Match("fat") // false + g.Match("at") // false + + // create glob with character-list matchers + g = glob.MustCompile("[!abc]at") + g.Match("cat") // false + g.Match("bat") // false + g.Match("fat") // true + g.Match("at") // false + + // create glob with character-range matchers + g = glob.MustCompile("[a-c]at") + g.Match("cat") // true + g.Match("bat") // true + g.Match("fat") // false + g.Match("at") // false + + // create glob with character-range matchers + g = glob.MustCompile("[!a-c]at") + g.Match("cat") // false + g.Match("bat") // false + g.Match("fat") // true + g.Match("at") // false + + // create glob with pattern-alternatives list + g = glob.MustCompile("{cat,bat,[fr]at}") + g.Match("cat") // true + g.Match("bat") // true + g.Match("fat") // true + g.Match("rat") // true + g.Match("at") // false + g.Match("zat") // false +} + +``` + +## Performance + +This library is created for compile-once patterns. This means, that compilation could take time, but +strings matching is done faster, than in case when always parsing template. + +If you will not use compiled `glob.Glob` object, and do `g := glob.MustCompile(pattern); g.Match(...)` every time, then your code will be much more slower. + +Run `go test -bench=.` from source root to see the benchmarks: + +Pattern | Fixture | Match | Speed (ns/op) +--------|---------|-------|-------------- +`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my cat has very bright eyes` | `true` | 432 +`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my dog has very bright eyes` | `false` | 199 +`https://*.google.*` | `https://account.google.com` | `true` | 96 +`https://*.google.*` | `https://google.com` | `false` | 66 +`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://yahoo.com` | `true` | 163 +`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://google.com` | `false` | 197 +`{https://*gobwas.com,http://exclude.gobwas.com}` | `https://safe.gobwas.com` | `true` | 22 +`{https://*gobwas.com,http://exclude.gobwas.com}` | `http://safe.gobwas.com` | `false` | 24 +`abc*` | `abcdef` | `true` | 8.15 +`abc*` | `af` | `false` | 5.68 +`*def` | `abcdef` | `true` | 8.84 +`*def` | `af` | `false` | 5.74 +`ab*ef` | `abcdef` | `true` | 15.2 +`ab*ef` | `af` | `false` | 10.4 + +The same things with `regexp` package: + +Pattern | Fixture | Match | Speed (ns/op) +--------|---------|-------|-------------- +`^[a-z][^a-x].*cat.*[h][^b].*eyes.*$` | `my cat has very bright eyes` | `true` | 2553 +`^[a-z][^a-x].*cat.*[h][^b].*eyes.*$` | `my dog has very bright eyes` | `false` | 1383 +`^https:\/\/.*\.google\..*$` | `https://account.google.com` | `true` | 1205 +`^https:\/\/.*\.google\..*$` | `https://google.com` | `false` | 767 +`^(https:\/\/.*\.google\..*|.*yandex\..*|.*yahoo\..*|.*mail\.ru)$` | `http://yahoo.com` | `true` | 1435 +`^(https:\/\/.*\.google\..*|.*yandex\..*|.*yahoo\..*|.*mail\.ru)$` | `http://google.com` | `false` | 1674 +`^(https:\/\/.*gobwas\.com|http://exclude.gobwas.com)$` | `https://safe.gobwas.com` | `true` | 1039 +`^(https:\/\/.*gobwas\.com|http://exclude.gobwas.com)$` | `http://safe.gobwas.com` | `false` | 272 +`^abc.*$` | `abcdef` | `true` | 237 +`^abc.*$` | `af` | `false` | 100 +`^.*def$` | `abcdef` | `true` | 464 +`^.*def$` | `af` | `false` | 265 +`^ab.*ef$` | `abcdef` | `true` | 375 +`^ab.*ef$` | `af` | `false` | 145 + +[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg +[godoc-url]: https://godoc.org/github.com/gobwas/glob +[travis-image]: https://travis-ci.org/gobwas/glob.svg?branch=master +[travis-url]: https://travis-ci.org/gobwas/glob + +## Syntax + +Syntax is inspired by [standard wildcards](http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm), +except that `**` is aka super-asterisk, that do not sensitive for separators. \ No newline at end of file diff --git a/vendor/github.com/gobwas/glob/syntax/ast/ast.go b/vendor/github.com/gobwas/glob/syntax/ast/ast.go new file mode 100644 index 0000000000000..3220a694a9cbf --- /dev/null +++ b/vendor/github.com/gobwas/glob/syntax/ast/ast.go @@ -0,0 +1,122 @@ +package ast + +import ( + "bytes" + "fmt" +) + +type Node struct { + Parent *Node + Children []*Node + Value interface{} + Kind Kind +} + +func NewNode(k Kind, v interface{}, ch ...*Node) *Node { + n := &Node{ + Kind: k, + Value: v, + } + for _, c := range ch { + Insert(n, c) + } + return n +} + +func (a *Node) Equal(b *Node) bool { + if a.Kind != b.Kind { + return false + } + if a.Value != b.Value { + return false + } + if len(a.Children) != len(b.Children) { + return false + } + for i, c := range a.Children { + if !c.Equal(b.Children[i]) { + return false + } + } + return true +} + +func (a *Node) String() string { + var buf bytes.Buffer + buf.WriteString(a.Kind.String()) + if a.Value != nil { + buf.WriteString(" =") + buf.WriteString(fmt.Sprintf("%v", a.Value)) + } + if len(a.Children) > 0 { + buf.WriteString(" [") + for i, c := range a.Children { + if i > 0 { + buf.WriteString(", ") + } + buf.WriteString(c.String()) + } + buf.WriteString("]") + } + return buf.String() +} + +func Insert(parent *Node, children ...*Node) { + parent.Children = append(parent.Children, children...) + for _, ch := range children { + ch.Parent = parent + } +} + +type List struct { + Not bool + Chars string +} + +type Range struct { + Not bool + Lo, Hi rune +} + +type Text struct { + Text string +} + +type Kind int + +const ( + KindNothing Kind = iota + KindPattern + KindList + KindRange + KindText + KindAny + KindSuper + KindSingle + KindAnyOf +) + +func (k Kind) String() string { + switch k { + case KindNothing: + return "Nothing" + case KindPattern: + return "Pattern" + case KindList: + return "List" + case KindRange: + return "Range" + case KindText: + return "Text" + case KindAny: + return "Any" + case KindSuper: + return "Super" + case KindSingle: + return "Single" + case KindAnyOf: + return "AnyOf" + default: + return "" + } +} diff --git a/vendor/github.com/gobwas/glob/syntax/ast/parser.go b/vendor/github.com/gobwas/glob/syntax/ast/parser.go new file mode 100644 index 0000000000000..429b4094303e4 --- /dev/null +++ b/vendor/github.com/gobwas/glob/syntax/ast/parser.go @@ -0,0 +1,157 @@ +package ast + +import ( + "errors" + "fmt" + "github.com/gobwas/glob/syntax/lexer" + "unicode/utf8" +) + +type Lexer interface { + Next() lexer.Token +} + +type parseFn func(*Node, Lexer) (parseFn, *Node, error) + +func Parse(lexer Lexer) (*Node, error) { + var parser parseFn + + root := NewNode(KindPattern, nil) + + var ( + tree *Node + err error + ) + for parser, tree = parserMain, root; parser != nil; { + parser, tree, err = parser(tree, lexer) + if err != nil { + return nil, err + } + } + + return root, nil +} + +func parserMain(tree *Node, lex Lexer) (parseFn, *Node, error) { + for { + token := lex.Next() + switch token.Type { + case lexer.EOF: + return nil, tree, nil + + case lexer.Error: + return nil, tree, errors.New(token.Raw) + + case lexer.Text: + Insert(tree, NewNode(KindText, Text{token.Raw})) + return parserMain, tree, nil + + case lexer.Any: + Insert(tree, NewNode(KindAny, nil)) + return parserMain, tree, nil + + case lexer.Super: + Insert(tree, NewNode(KindSuper, nil)) + return parserMain, tree, nil + + case lexer.Single: + Insert(tree, NewNode(KindSingle, nil)) + return parserMain, tree, nil + + case lexer.RangeOpen: + return parserRange, tree, nil + + case lexer.TermsOpen: + a := NewNode(KindAnyOf, nil) + Insert(tree, a) + + p := NewNode(KindPattern, nil) + Insert(a, p) + + return parserMain, p, nil + + case lexer.Separator: + p := NewNode(KindPattern, nil) + Insert(tree.Parent, p) + + return parserMain, p, nil + + case lexer.TermsClose: + return parserMain, tree.Parent.Parent, nil + + default: + return nil, tree, fmt.Errorf("unexpected token: %s", token) + } + } + return nil, tree, fmt.Errorf("unknown error") +} + +func parserRange(tree *Node, lex Lexer) (parseFn, *Node, error) { + var ( + not bool + lo rune + hi rune + chars string + ) + for { + token := lex.Next() + switch token.Type { + case lexer.EOF: + return nil, tree, errors.New("unexpected end") + + case lexer.Error: + return nil, tree, errors.New(token.Raw) + + case lexer.Not: + not = true + + case lexer.RangeLo: + r, w := utf8.DecodeRuneInString(token.Raw) + if len(token.Raw) > w { + return nil, tree, fmt.Errorf("unexpected length of lo character") + } + lo = r + + case lexer.RangeBetween: + // + + case lexer.RangeHi: + r, w := utf8.DecodeRuneInString(token.Raw) + if len(token.Raw) > w { + return nil, tree, fmt.Errorf("unexpected length of lo character") + } + + hi = r + + if hi < lo { + return nil, tree, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo)) + } + + case lexer.Text: + chars = token.Raw + + case lexer.RangeClose: + isRange := lo != 0 && hi != 0 + isChars := chars != "" + + if isChars == isRange { + return nil, tree, fmt.Errorf("could not parse range") + } + + if isRange { + Insert(tree, NewNode(KindRange, Range{ + Lo: lo, + Hi: hi, + Not: not, + })) + } else { + Insert(tree, NewNode(KindList, List{ + Chars: chars, + Not: not, + })) + } + + return parserMain, tree, nil + } + } +} diff --git a/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go b/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go new file mode 100644 index 0000000000000..a1c8d1962a06f --- /dev/null +++ b/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go @@ -0,0 +1,273 @@ +package lexer + +import ( + "bytes" + "fmt" + "github.com/gobwas/glob/util/runes" + "unicode/utf8" +) + +const ( + char_any = '*' + char_comma = ',' + char_single = '?' + char_escape = '\\' + char_range_open = '[' + char_range_close = ']' + char_terms_open = '{' + char_terms_close = '}' + char_range_not = '!' + char_range_between = '-' +) + +var specials = []byte{ + char_any, + char_single, + char_escape, + char_range_open, + char_range_close, + char_terms_open, + char_terms_close, +} + +func Special(c byte) bool { + return bytes.IndexByte(specials, c) != -1 +} + +type tokens []Token + +func (i *tokens) shift() (ret Token) { + ret = (*i)[0] + copy(*i, (*i)[1:]) + *i = (*i)[:len(*i)-1] + return +} + +func (i *tokens) push(v Token) { + *i = append(*i, v) +} + +func (i *tokens) empty() bool { + return len(*i) == 0 +} + +var eof rune = 0 + +type lexer struct { + data string + pos int + err error + + tokens tokens + termsLevel int + + lastRune rune + lastRuneSize int + hasRune bool +} + +func NewLexer(source string) *lexer { + l := &lexer{ + data: source, + tokens: tokens(make([]Token, 0, 4)), + } + return l +} + +func (l *lexer) Next() Token { + if l.err != nil { + return Token{Error, l.err.Error()} + } + if !l.tokens.empty() { + return l.tokens.shift() + } + + l.fetchItem() + return l.Next() +} + +func (l *lexer) peek() (r rune, w int) { + if l.pos == len(l.data) { + return eof, 0 + } + + r, w = utf8.DecodeRuneInString(l.data[l.pos:]) + if r == utf8.RuneError { + l.errorf("could not read rune") + r = eof + w = 0 + } + + return +} + +func (l *lexer) read() rune { + if l.hasRune { + l.hasRune = false + l.seek(l.lastRuneSize) + return l.lastRune + } + + r, s := l.peek() + l.seek(s) + + l.lastRune = r + l.lastRuneSize = s + + return r +} + +func (l *lexer) seek(w int) { + l.pos += w +} + +func (l *lexer) unread() { + if l.hasRune { + l.errorf("could not unread rune") + return + } + l.seek(-l.lastRuneSize) + l.hasRune = true +} + +func (l *lexer) errorf(f string, v ...interface{}) { + l.err = fmt.Errorf(f, v...) +} + +func (l *lexer) inTerms() bool { + return l.termsLevel > 0 +} + +func (l *lexer) termsEnter() { + l.termsLevel++ +} + +func (l *lexer) termsLeave() { + l.termsLevel-- +} + +var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open} +var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma) + +func (l *lexer) fetchItem() { + r := l.read() + switch { + case r == eof: + l.tokens.push(Token{EOF, ""}) + + case r == char_terms_open: + l.termsEnter() + l.tokens.push(Token{TermsOpen, string(r)}) + + case r == char_comma && l.inTerms(): + l.tokens.push(Token{Separator, string(r)}) + + case r == char_terms_close && l.inTerms(): + l.tokens.push(Token{TermsClose, string(r)}) + l.termsLeave() + + case r == char_range_open: + l.tokens.push(Token{RangeOpen, string(r)}) + l.fetchRange() + + case r == char_single: + l.tokens.push(Token{Single, string(r)}) + + case r == char_any: + if l.read() == char_any { + l.tokens.push(Token{Super, string(r) + string(r)}) + } else { + l.unread() + l.tokens.push(Token{Any, string(r)}) + } + + default: + l.unread() + + var breakers []rune + if l.inTerms() { + breakers = inTermsBreakers + } else { + breakers = inTextBreakers + } + l.fetchText(breakers) + } +} + +func (l *lexer) fetchRange() { + var wantHi bool + var wantClose bool + var seenNot bool + for { + r := l.read() + if r == eof { + l.errorf("unexpected end of input") + return + } + + if wantClose { + if r != char_range_close { + l.errorf("expected close range character") + } else { + l.tokens.push(Token{RangeClose, string(r)}) + } + return + } + + if wantHi { + l.tokens.push(Token{RangeHi, string(r)}) + wantClose = true + continue + } + + if !seenNot && r == char_range_not { + l.tokens.push(Token{Not, string(r)}) + seenNot = true + continue + } + + if n, w := l.peek(); n == char_range_between { + l.seek(w) + l.tokens.push(Token{RangeLo, string(r)}) + l.tokens.push(Token{RangeBetween, string(n)}) + wantHi = true + continue + } + + l.unread() // unread first peek and fetch as text + l.fetchText([]rune{char_range_close}) + wantClose = true + } +} + +func (l *lexer) fetchText(breakers []rune) { + var data []rune + var escaped bool + +reading: + for { + r := l.read() + if r == eof { + break + } + + if !escaped { + if r == char_escape { + escaped = true + continue + } + + if runes.IndexRune(breakers, r) != -1 { + l.unread() + break reading + } + } + + escaped = false + data = append(data, r) + } + + if len(data) > 0 { + l.tokens.push(Token{Text, string(data)}) + } +} diff --git a/vendor/github.com/gobwas/glob/syntax/lexer/token.go b/vendor/github.com/gobwas/glob/syntax/lexer/token.go new file mode 100644 index 0000000000000..2797c4e83a4f9 --- /dev/null +++ b/vendor/github.com/gobwas/glob/syntax/lexer/token.go @@ -0,0 +1,88 @@ +package lexer + +import "fmt" + +type TokenType int + +const ( + EOF TokenType = iota + Error + Text + Char + Any + Super + Single + Not + Separator + RangeOpen + RangeClose + RangeLo + RangeHi + RangeBetween + TermsOpen + TermsClose +) + +func (tt TokenType) String() string { + switch tt { + case EOF: + return "eof" + + case Error: + return "error" + + case Text: + return "text" + + case Char: + return "char" + + case Any: + return "any" + + case Super: + return "super" + + case Single: + return "single" + + case Not: + return "not" + + case Separator: + return "separator" + + case RangeOpen: + return "range_open" + + case RangeClose: + return "range_close" + + case RangeLo: + return "range_lo" + + case RangeHi: + return "range_hi" + + case RangeBetween: + return "range_between" + + case TermsOpen: + return "terms_open" + + case TermsClose: + return "terms_close" + + default: + return "undef" + } +} + +type Token struct { + Type TokenType + Raw string +} + +func (t Token) String() string { + return fmt.Sprintf("%v<%q>", t.Type, t.Raw) +} diff --git a/vendor/github.com/gobwas/glob/syntax/syntax.go b/vendor/github.com/gobwas/glob/syntax/syntax.go new file mode 100644 index 0000000000000..1d168b1482990 --- /dev/null +++ b/vendor/github.com/gobwas/glob/syntax/syntax.go @@ -0,0 +1,14 @@ +package syntax + +import ( + "github.com/gobwas/glob/syntax/ast" + "github.com/gobwas/glob/syntax/lexer" +) + +func Parse(s string) (*ast.Node, error) { + return ast.Parse(lexer.NewLexer(s)) +} + +func Special(b byte) bool { + return lexer.Special(b) +} diff --git a/vendor/github.com/gobwas/glob/util/runes/runes.go b/vendor/github.com/gobwas/glob/util/runes/runes.go new file mode 100644 index 0000000000000..a72355641072e --- /dev/null +++ b/vendor/github.com/gobwas/glob/util/runes/runes.go @@ -0,0 +1,154 @@ +package runes + +func Index(s, needle []rune) int { + ls, ln := len(s), len(needle) + + switch { + case ln == 0: + return 0 + case ln == 1: + return IndexRune(s, needle[0]) + case ln == ls: + if Equal(s, needle) { + return 0 + } + return -1 + case ln > ls: + return -1 + } + +head: + for i := 0; i < ls && ls-i >= ln; i++ { + for y := 0; y < ln; y++ { + if s[i+y] != needle[y] { + continue head + } + } + + return i + } + + return -1 +} + +func LastIndex(s, needle []rune) int { + ls, ln := len(s), len(needle) + + switch { + case ln == 0: + if ls == 0 { + return 0 + } + return ls + case ln == 1: + return IndexLastRune(s, needle[0]) + case ln == ls: + if Equal(s, needle) { + return 0 + } + return -1 + case ln > ls: + return -1 + } + +head: + for i := ls - 1; i >= 0 && i >= ln; i-- { + for y := ln - 1; y >= 0; y-- { + if s[i-(ln-y-1)] != needle[y] { + continue head + } + } + + return i - ln + 1 + } + + return -1 +} + +// IndexAny returns the index of the first instance of any Unicode code point +// from chars in s, or -1 if no Unicode code point from chars is present in s. +func IndexAny(s, chars []rune) int { + if len(chars) > 0 { + for i, c := range s { + for _, m := range chars { + if c == m { + return i + } + } + } + } + return -1 +} + +func Contains(s, needle []rune) bool { + return Index(s, needle) >= 0 +} + +func Max(s []rune) (max rune) { + for _, r := range s { + if r > max { + max = r + } + } + + return +} + +func Min(s []rune) rune { + min := rune(-1) + for _, r := range s { + if min == -1 { + min = r + continue + } + + if r < min { + min = r + } + } + + return min +} + +func IndexRune(s []rune, r rune) int { + for i, c := range s { + if c == r { + return i + } + } + return -1 +} + +func IndexLastRune(s []rune, r rune) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == r { + return i + } + } + + return -1 +} + +func Equal(a, b []rune) bool { + if len(a) == len(b) { + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + + return true + } + + return false +} + +// HasPrefix tests whether the string s begins with prefix. +func HasPrefix(s, prefix []rune) bool { + return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) +} + +// HasSuffix tests whether the string s ends with suffix. +func HasSuffix(s, suffix []rune) bool { + return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) +} diff --git a/vendor/github.com/gobwas/glob/util/strings/strings.go b/vendor/github.com/gobwas/glob/util/strings/strings.go new file mode 100644 index 0000000000000..e8ee1920b17ea --- /dev/null +++ b/vendor/github.com/gobwas/glob/util/strings/strings.go @@ -0,0 +1,39 @@ +package strings + +import ( + "strings" + "unicode/utf8" +) + +func IndexAnyRunes(s string, rs []rune) int { + for _, r := range rs { + if i := strings.IndexRune(s, r); i != -1 { + return i + } + } + + return -1 +} + +func LastIndexAnyRunes(s string, rs []rune) int { + for _, r := range rs { + i := -1 + if 0 <= r && r < utf8.RuneSelf { + i = strings.LastIndexByte(s, byte(r)) + } else { + sub := s + for len(sub) > 0 { + j := strings.IndexRune(s, r) + if j == -1 { + break + } + i = j + sub = sub[i+1:] + } + } + if i != -1 { + return i + } + } + return -1 +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 703f161a8aabc..6db4d2664e79a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -158,6 +158,15 @@ github.com/go-redis/redis/internal/util github.com/go-sql-driver/mysql # github.com/go-xorm/xorm v0.7.4 github.com/go-xorm/xorm +# github.com/gobwas/glob v0.2.3 +github.com/gobwas/glob +github.com/gobwas/glob/compiler +github.com/gobwas/glob/syntax +github.com/gobwas/glob/match +github.com/gobwas/glob/syntax/ast +github.com/gobwas/glob/util/runes +github.com/gobwas/glob/syntax/lexer +github.com/gobwas/glob/util/strings # github.com/gogits/chardet v0.0.0-20150115103509-2404f7772561 github.com/gogits/chardet # github.com/gogs/cron v0.0.0-20171120032916-9f6c956d3e14 From 55a93a305e6d0c65cf46e5f6ec1f9ab34e83ec04 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 28 Aug 2019 19:30:32 -0300 Subject: [PATCH 09/15] manually set appengine new release --- vendor/modules.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/modules.txt b/vendor/modules.txt index c3ab1bf795dea..ffb0a0f7a8983 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -409,7 +409,7 @@ golang.org/x/text/internal/language/compact golang.org/x/text/internal/utf8internal golang.org/x/text/runes golang.org/x/text/internal/tag -# google.golang.org/appengine v1.6.1 +# google.golang.org/appengine v1.6.2 google.golang.org/appengine/cloudsql google.golang.org/appengine/urlfetch google.golang.org/appengine/internal From 6bd0ac8877682383bea8aa9b038b99669a4638e0 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Wed, 4 Sep 2019 19:41:41 -0300 Subject: [PATCH 10/15] Implement better REPO_INDEXER_INCLUDE and REPO_INDEXER_EXCLUDE --- .../doc/advanced/config-cheat-sheet.en-us.md | 4 ++-- models/repo_indexer.go | 15 ++++++++++----- modules/setting/indexer.go | 16 +++++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index df83d68e3bee7..74393baf9d345 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -181,8 +181,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_PATTERNS`: **empty**: A comma separated list of file name patterns (see https://github.com/gobwas/glob) to **exclude** from the index. An empty list means do not exclude any files. Use `**.txt` to match any files with .txt extension. -- `REPO_PATTERNS_INCLUDE`: **false**: If true, `REPO_INDEXER_PATTERNS` are the file extensions to **include** rather than exclude from the index. +- `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of file name or path patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files. +- `REPO_INDEXER_EXCLUDE`: **empty**: A comma separated list of file name or path patterns (see https://github.com/gobwas/glob) to **exclude** from the index. Files that match this list will not be indexed, even if they match in `REPO_INDEXER_INCLUDE`. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. diff --git a/models/repo_indexer.go b/models/repo_indexer.go index d78a1b63b5026..6b24d62b7bc02 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -237,16 +237,21 @@ func isIndexable(entry *git.TreeEntry) bool { if !entry.IsRegular() && !entry.IsExecutable() { return false } - if setting.Indexer.FilePatterns == nil { + name := strings.ToLower(entry.Name()) + for _, g := range setting.Indexer.ExcludePatterns { + if g.Match(name) { + return false + } + } + if setting.Indexer.IncludePatterns == nil { return true } - name := strings.ToLower(entry.Name()) - for _, g := range setting.Indexer.FilePatterns { + for _, g := range setting.Indexer.IncludePatterns { if g.Match(name) { - return setting.Indexer.IncludePatterns + return true } } - return !setting.Indexer.IncludePatterns + return false } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 3ee3b2e680582..b9af7962ecd8e 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/modules/log" "github.com/gobwas/glob" + ini "gopkg.in/ini.v1" ) // enumerates all the indexer queue types @@ -34,8 +35,8 @@ var ( IssueQueueDir string IssueQueueConnStr string IssueQueueBatchNumber int - FilePatterns []glob.Glob - IncludePatterns bool + IncludePatterns []glob.Glob + ExcludePatterns []glob.Glob }{ IssueType: "bleve", IssuePath: "indexers/issues.bleve", @@ -58,8 +59,8 @@ func newIndexerService() { if !filepath.IsAbs(Indexer.RepoPath) { Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath) } - Indexer.FilePatterns = extensionsFromString(sec.Key("REPO_INDEXER_PATTERNS").MustString("")) - Indexer.IncludePatterns = sec.Key("REPO_PATTERNS_INCLUDE").MustBool(false) + Indexer.IncludePatterns = globFromString(sec, "REPO_INDEXER_INCLUDE") + Indexer.ExcludePatterns = globFromString(sec, "REPO_INDEXER_EXCLUDE") Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) @@ -69,13 +70,14 @@ func newIndexerService() { Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20) } -func extensionsFromString(from string) []glob.Glob { +func globFromString(sec *ini.Section, valueName string) []glob.Glob { + globlist := sec.Key(valueName).MustString(""); extarr := make([]glob.Glob, 0, 10) - for _, expr := range strings.Split(strings.ToLower(from), ",") { + for _, expr := range strings.Split(strings.ToLower(globlist), ",") { expr = strings.TrimSpace(expr) if expr != "" { if g, err := glob.Compile(expr, '.', '/'); err != nil { - log.Trace("Index file extensions: '%s': bad pattern: %v", expr, err) + log.Info("Invalid expresion '%s' in %s (skipped): %v", expr, valueName, err) } else { extarr = append(extarr, g) } From 4de7be9c73f0965f8a1da4c4bed56dfd74eb10bd Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Thu, 5 Sep 2019 00:37:03 -0300 Subject: [PATCH 11/15] Add unit and integration tests --- integrations/api_repo_test.go | 6 +- .../user2/glob.git/HEAD | 1 + .../user2/glob.git/config | 4 + .../user2/glob.git/description | 1 + .../glob.git/hooks/applypatch-msg.sample | 15 ++ .../user2/glob.git/hooks/commit-msg.sample | 24 +++ .../glob.git/hooks/fsmonitor-watchman.sample | 114 ++++++++++++ .../user2/glob.git/hooks/post-update.sample | 8 + .../glob.git/hooks/pre-applypatch.sample | 14 ++ .../user2/glob.git/hooks/pre-commit.sample | 49 +++++ .../user2/glob.git/hooks/pre-push.sample | 53 ++++++ .../user2/glob.git/hooks/pre-rebase.sample | 169 ++++++++++++++++++ .../user2/glob.git/hooks/pre-receive.sample | 24 +++ .../glob.git/hooks/prepare-commit-msg.sample | 42 +++++ .../user2/glob.git/hooks/update.sample | 128 +++++++++++++ .../user2/glob.git/info/exclude | 6 + .../48/06cb9df135782b818c968c2fadbd2c150d23d6 | Bin 0 -> 21 bytes .../59/fee614e09d1f1cd1e15e4b2a7e9c8873a81498 | Bin 0 -> 34 bytes .../7c/8ac2f8d82a1eb5f6aaece6629ff11015f91eb4 | Bin 0 -> 21 bytes .../8e/592e636d27ac144f92f7fe8c33631cbdea594d | Bin 0 -> 78 bytes .../95/aff026f99a9ab76fbd01decb63dd3dbc03e498 | Bin 0 -> 34 bytes .../ae/d1ffed24cc3cf9b80490795e893cae4bddd684 | Bin 0 -> 108 bytes .../bf/d6a6583f9a9ac59bd726c1df26c64a89427ede | Bin 0 -> 21 bytes .../c8/eb3b6c767ccb68411d0a1f6c769be69fb4d95a | 1 + .../de/6be43fe8eb19ca3f4e934cb8b9a9a0b20fe865 | Bin 0 -> 50 bytes .../ef/6b814b610d8e7717aa0f71fbe5842bcf814697 | 2 + .../user2/glob.git/refs/heads/master | 1 + integrations/repo_search_test.go | 39 +++- models/fixtures/repo_unit.yml | 37 +++- models/fixtures/repository.yml | 11 ++ models/fixtures/user.yml | 2 +- models/repo_list_test.go | 6 +- modules/setting/indexer.go | 13 +- modules/setting/indexer_test.go | 73 ++++++++ 34 files changed, 820 insertions(+), 23 deletions(-) create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/HEAD create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/config create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/description create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/applypatch-msg.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/commit-msg.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/fsmonitor-watchman.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/post-update.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-applypatch.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-commit.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-push.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-rebase.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-receive.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/prepare-commit-msg.sample create mode 100755 integrations/gitea-repositories-meta/user2/glob.git/hooks/update.sample create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/info/exclude create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/48/06cb9df135782b818c968c2fadbd2c150d23d6 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/59/fee614e09d1f1cd1e15e4b2a7e9c8873a81498 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/7c/8ac2f8d82a1eb5f6aaece6629ff11015f91eb4 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/8e/592e636d27ac144f92f7fe8c33631cbdea594d create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/95/aff026f99a9ab76fbd01decb63dd3dbc03e498 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/ae/d1ffed24cc3cf9b80490795e893cae4bddd684 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/bf/d6a6583f9a9ac59bd726c1df26c64a89427ede create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/c8/eb3b6c767ccb68411d0a1f6c769be69fb4d95a create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/de/6be43fe8eb19ca3f4e934cb8b9a9a0b20fe865 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/objects/ef/6b814b610d8e7717aa0f71fbe5842bcf814697 create mode 100644 integrations/gitea-repositories-meta/user2/glob.git/refs/heads/master create mode 100644 modules/setting/indexer_test.go diff --git a/integrations/api_repo_test.go b/integrations/api_repo_test.go index 8dedfd1ae0d44..60fe4a3649576 100644 --- a/integrations/api_repo_test.go +++ b/integrations/api_repo_test.go @@ -70,9 +70,9 @@ func TestAPISearchRepo(t *testing.T) { expectedResults }{ {name: "RepositoriesMax50", requestURL: "/api/v1/repos/search?limit=50&private=false", expectedResults: expectedResults{ - nil: {count: 21}, - user: {count: 21}, - user2: {count: 21}}, + nil: {count: 22}, + user: {count: 22}, + user2: {count: 22}}, }, {name: "RepositoriesMax10", requestURL: "/api/v1/repos/search?limit=10&private=false", expectedResults: expectedResults{ nil: {count: 10}, diff --git a/integrations/gitea-repositories-meta/user2/glob.git/HEAD b/integrations/gitea-repositories-meta/user2/glob.git/HEAD new file mode 100644 index 0000000000000..cb089cd89a7d7 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/HEAD @@ -0,0 +1 @@ +ref: refs/heads/master diff --git a/integrations/gitea-repositories-meta/user2/glob.git/config b/integrations/gitea-repositories-meta/user2/glob.git/config new file mode 100644 index 0000000000000..07d359d07cf1e --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/config @@ -0,0 +1,4 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = true diff --git a/integrations/gitea-repositories-meta/user2/glob.git/description b/integrations/gitea-repositories-meta/user2/glob.git/description new file mode 100644 index 0000000000000..498b267a8c781 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/description @@ -0,0 +1 @@ +Unnamed repository; edit this file 'description' to name the repository. diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/applypatch-msg.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/applypatch-msg.sample new file mode 100755 index 0000000000000..a5d7b84a67345 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/applypatch-msg.sample @@ -0,0 +1,15 @@ +#!/bin/sh +# +# An example hook script to check the commit log message taken by +# applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. The hook is +# allowed to edit the commit message file. +# +# To enable this hook, rename this file to "applypatch-msg". + +. git-sh-setup +commitmsg="$(git rev-parse --git-path hooks/commit-msg)" +test -x "$commitmsg" && exec "$commitmsg" ${1+"$@"} +: diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/commit-msg.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/commit-msg.sample new file mode 100755 index 0000000000000..b58d1184a9d43 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/commit-msg.sample @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to check the commit log message. +# Called by "git commit" with one argument, the name of the file +# that has the commit message. The hook should exit with non-zero +# status after issuing an appropriate message if it wants to stop the +# commit. The hook is allowed to edit the commit message file. +# +# To enable this hook, rename this file to "commit-msg". + +# Uncomment the below to add a Signed-off-by line to the message. +# Doing this in a hook is a bad idea in general, but the prepare-commit-msg +# hook is more suited to it. +# +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" + +# This example catches duplicate Signed-off-by lines. + +test "" = "$(grep '^Signed-off-by: ' "$1" | + sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { + echo >&2 Duplicate Signed-off-by lines. + exit 1 +} diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/fsmonitor-watchman.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/fsmonitor-watchman.sample new file mode 100755 index 0000000000000..e673bb3980f3c --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/fsmonitor-watchman.sample @@ -0,0 +1,114 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use IPC::Open2; + +# An example hook script to integrate Watchman +# (https://facebook.github.io/watchman/) with git to speed up detecting +# new and modified files. +# +# The hook is passed a version (currently 1) and a time in nanoseconds +# formatted as a string and outputs to stdout all files that have been +# modified since the given time. Paths must be relative to the root of +# the working tree and separated by a single NUL. +# +# To enable this hook, rename this file to "query-watchman" and set +# 'git config core.fsmonitor .git/hooks/query-watchman' +# +my ($version, $time) = @ARGV; + +# Check the hook interface version + +if ($version == 1) { + # convert nanoseconds to seconds + $time = int $time / 1000000000; +} else { + die "Unsupported query-fsmonitor hook version '$version'.\n" . + "Falling back to scanning...\n"; +} + +my $git_work_tree; +if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $git_work_tree = Win32::GetCwd(); + $git_work_tree =~ tr/\\/\//; +} else { + require Cwd; + $git_work_tree = Cwd::cwd(); +} + +my $retry = 1; + +launch_watchman(); + +sub launch_watchman { + + my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; + + # In the query expression below we're asking for names of files that + # changed since $time but were not transient (ie created after + # $time but no longer exist). + # + # To accomplish this, we're using the "since" generator to use the + # recency index to select candidate nodes and "fields" to limit the + # output to file names only. Then we're using the "expression" term to + # further constrain the results. + # + # The category of transient files that we want to ignore will have a + # creation clock (cclock) newer than $time_t value and will also not + # currently exist. + + my $query = <<" END"; + ["query", "$git_work_tree", { + "since": $time, + "fields": ["name"], + "expression": ["not", ["allof", ["since", $time, "cclock"], ["not", "exists"]]] + }] + END + + print CHLD_IN $query; + close CHLD_IN; + my $response = do {local $/; }; + + die "Watchman: command returned no output.\n" . + "Falling back to scanning...\n" if $response eq ""; + die "Watchman: command returned invalid output: $response\n" . + "Falling back to scanning...\n" unless $response =~ /^\{/; + + my $json_pkg; + eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; + } or do { + require JSON::PP; + $json_pkg = "JSON::PP"; + }; + + my $o = $json_pkg->new->utf8->decode($response); + + if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) { + print STDERR "Adding '$git_work_tree' to watchman's watch list.\n"; + $retry--; + qx/watchman watch "$git_work_tree"/; + die "Failed to make watchman watch '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + # Watchman will always return all files on the first query so + # return the fast "everything is dirty" flag to git and do the + # Watchman query just to get it over with now so we won't pay + # the cost in git to look up each individual file. + print "/\0"; + eval { launch_watchman() }; + exit 0; + } + + die "Watchman: $o->{error}.\n" . + "Falling back to scanning...\n" if $o->{error}; + + binmode STDOUT, ":utf8"; + local $, = "\0"; + print @{$o->{files}}; +} diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/post-update.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/post-update.sample new file mode 100755 index 0000000000000..ec17ec1939b7c --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/post-update.sample @@ -0,0 +1,8 @@ +#!/bin/sh +# +# An example hook script to prepare a packed repository for use over +# dumb transports. +# +# To enable this hook, rename this file to "post-update". + +exec git update-server-info diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-applypatch.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-applypatch.sample new file mode 100755 index 0000000000000..4142082bcb939 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-applypatch.sample @@ -0,0 +1,14 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed +# by applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-applypatch". + +. git-sh-setup +precommit="$(git rev-parse --git-path hooks/pre-commit)" +test -x "$precommit" && exec "$precommit" ${1+"$@"} +: diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-commit.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-commit.sample new file mode 100755 index 0000000000000..6a756416384c2 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-commit.sample @@ -0,0 +1,49 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed. +# Called by "git commit" with no arguments. The hook should +# exit with non-zero status after issuing an appropriate message if +# it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-commit". + +if git rev-parse --verify HEAD >/dev/null 2>&1 +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=$(git hash-object -t tree /dev/null) +fi + +# If you want to allow non-ASCII filenames set this variable to true. +allownonascii=$(git config --bool hooks.allownonascii) + +# Redirect output to stderr. +exec 1>&2 + +# Cross platform projects tend to avoid non-ASCII filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +if [ "$allownonascii" != "true" ] && + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + test $(git diff --cached --name-only --diff-filter=A -z $against | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 +then + cat <<\EOF +Error: Attempt to add a non-ASCII file name. + +This can cause problems if you want to work with people on other platforms. + +To be portable it is advisable to rename the file. + +If you know what you are doing you can disable this check using: + + git config hooks.allownonascii true +EOF + exit 1 +fi + +# If there are whitespace errors, print the offending file names and fail. +exec git diff-index --check --cached $against -- diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-push.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-push.sample new file mode 100755 index 0000000000000..6187dbf4390fc --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-push.sample @@ -0,0 +1,53 @@ +#!/bin/sh + +# An example hook script to verify what is about to be pushed. Called by "git +# push" after it has checked the remote status, but before anything has been +# pushed. If this script exits with a non-zero status nothing will be pushed. +# +# This hook is called with the following parameters: +# +# $1 -- Name of the remote to which the push is being done +# $2 -- URL to which the push is being done +# +# If pushing without using a named remote those arguments will be equal. +# +# Information about the commits which are being pushed is supplied as lines to +# the standard input in the form: +# +# +# +# This sample shows how to prevent push of commits where the log message starts +# with "WIP" (work in progress). + +remote="$1" +url="$2" + +z40=0000000000000000000000000000000000000000 + +while read local_ref local_sha remote_ref remote_sha +do + if [ "$local_sha" = $z40 ] + then + # Handle delete + : + else + if [ "$remote_sha" = $z40 ] + then + # New branch, examine all commits + range="$local_sha" + else + # Update to existing branch, examine new commits + range="$remote_sha..$local_sha" + fi + + # Check for WIP commit + commit=`git rev-list -n 1 --grep '^WIP' "$range"` + if [ -n "$commit" ] + then + echo >&2 "Found WIP commit in $local_ref, not pushing" + exit 1 + fi + fi +done + +exit 0 diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-rebase.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-rebase.sample new file mode 100755 index 0000000000000..6cbef5c370d8c --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-rebase.sample @@ -0,0 +1,169 @@ +#!/bin/sh +# +# Copyright (c) 2006, 2008 Junio C Hamano +# +# The "pre-rebase" hook is run just before "git rebase" starts doing +# its job, and can prevent the command from running by exiting with +# non-zero status. +# +# The hook is called with the following parameters: +# +# $1 -- the upstream the series was forked from. +# $2 -- the branch being rebased (or empty when rebasing the current branch). +# +# This sample shows how to prevent topic branches that are already +# merged to 'next' branch from getting rebased, because allowing it +# would result in rebasing already published history. + +publish=next +basebranch="$1" +if test "$#" = 2 +then + topic="refs/heads/$2" +else + topic=`git symbolic-ref HEAD` || + exit 0 ;# we do not interrupt rebasing detached HEAD +fi + +case "$topic" in +refs/heads/??/*) + ;; +*) + exit 0 ;# we do not interrupt others. + ;; +esac + +# Now we are dealing with a topic branch being rebased +# on top of master. Is it OK to rebase it? + +# Does the topic really exist? +git show-ref -q "$topic" || { + echo >&2 "No such branch $topic" + exit 1 +} + +# Is topic fully merged to master? +not_in_master=`git rev-list --pretty=oneline ^master "$topic"` +if test -z "$not_in_master" +then + echo >&2 "$topic is fully merged to master; better remove it." + exit 1 ;# we could allow it, but there is no point. +fi + +# Is topic ever merged to next? If so you should not be rebasing it. +only_next_1=`git rev-list ^master "^$topic" ${publish} | sort` +only_next_2=`git rev-list ^master ${publish} | sort` +if test "$only_next_1" = "$only_next_2" +then + not_in_topic=`git rev-list "^$topic" master` + if test -z "$not_in_topic" + then + echo >&2 "$topic is already up to date with master" + exit 1 ;# we could allow it, but there is no point. + else + exit 0 + fi +else + not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"` + /usr/bin/perl -e ' + my $topic = $ARGV[0]; + my $msg = "* $topic has commits already merged to public branch:\n"; + my (%not_in_next) = map { + /^([0-9a-f]+) /; + ($1 => 1); + } split(/\n/, $ARGV[1]); + for my $elem (map { + /^([0-9a-f]+) (.*)$/; + [$1 => $2]; + } split(/\n/, $ARGV[2])) { + if (!exists $not_in_next{$elem->[0]}) { + if ($msg) { + print STDERR $msg; + undef $msg; + } + print STDERR " $elem->[1]\n"; + } + } + ' "$topic" "$not_in_next" "$not_in_master" + exit 1 +fi + +<<\DOC_END + +This sample hook safeguards topic branches that have been +published from being rewound. + +The workflow assumed here is: + + * Once a topic branch forks from "master", "master" is never + merged into it again (either directly or indirectly). + + * Once a topic branch is fully cooked and merged into "master", + it is deleted. If you need to build on top of it to correct + earlier mistakes, a new topic branch is created by forking at + the tip of the "master". This is not strictly necessary, but + it makes it easier to keep your history simple. + + * Whenever you need to test or publish your changes to topic + branches, merge them into "next" branch. + +The script, being an example, hardcodes the publish branch name +to be "next", but it is trivial to make it configurable via +$GIT_DIR/config mechanism. + +With this workflow, you would want to know: + +(1) ... if a topic branch has ever been merged to "next". Young + topic branches can have stupid mistakes you would rather + clean up before publishing, and things that have not been + merged into other branches can be easily rebased without + affecting other people. But once it is published, you would + not want to rewind it. + +(2) ... if a topic branch has been fully merged to "master". + Then you can delete it. More importantly, you should not + build on top of it -- other people may already want to + change things related to the topic as patches against your + "master", so if you need further changes, it is better to + fork the topic (perhaps with the same name) afresh from the + tip of "master". + +Let's look at this example: + + o---o---o---o---o---o---o---o---o---o "next" + / / / / + / a---a---b A / / + / / / / + / / c---c---c---c B / + / / / \ / + / / / b---b C \ / + / / / / \ / + ---o---o---o---o---o---o---o---o---o---o---o "master" + + +A, B and C are topic branches. + + * A has one fix since it was merged up to "next". + + * B has finished. It has been fully merged up to "master" and "next", + and is ready to be deleted. + + * C has not merged to "next" at all. + +We would want to allow C to be rebased, refuse A, and encourage +B to be deleted. + +To compute (1): + + git rev-list ^master ^topic next + git rev-list ^master next + + if these match, topic has not merged in next at all. + +To compute (2): + + git rev-list master..topic + + if this is empty, it is fully merged to "master". + +DOC_END diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-receive.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-receive.sample new file mode 100755 index 0000000000000..a1fd29ec14823 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/pre-receive.sample @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to make use of push options. +# The example simply echoes all push options that start with 'echoback=' +# and rejects all pushes when the "reject" push option is used. +# +# To enable this hook, rename this file to "pre-receive". + +if test -n "$GIT_PUSH_OPTION_COUNT" +then + i=0 + while test "$i" -lt "$GIT_PUSH_OPTION_COUNT" + do + eval "value=\$GIT_PUSH_OPTION_$i" + case "$value" in + echoback=*) + echo "echo from the pre-receive-hook: ${value#*=}" >&2 + ;; + reject) + exit 1 + esac + i=$((i + 1)) + done +fi diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/prepare-commit-msg.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/prepare-commit-msg.sample new file mode 100755 index 0000000000000..10fa14c5ab013 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/prepare-commit-msg.sample @@ -0,0 +1,42 @@ +#!/bin/sh +# +# An example hook script to prepare the commit log message. +# Called by "git commit" with the name of the file that has the +# commit message, followed by the description of the commit +# message's source. The hook's purpose is to edit the commit +# message file. If the hook fails with a non-zero status, +# the commit is aborted. +# +# To enable this hook, rename this file to "prepare-commit-msg". + +# This hook includes three examples. The first one removes the +# "# Please enter the commit message..." help message. +# +# The second includes the output of "git diff --name-status -r" +# into the message, just before the "git status" output. It is +# commented because it doesn't cope with --amend or with squashed +# commits. +# +# The third example adds a Signed-off-by line to the message, that can +# still be edited. This is rarely a good idea. + +COMMIT_MSG_FILE=$1 +COMMIT_SOURCE=$2 +SHA1=$3 + +/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE" + +# case "$COMMIT_SOURCE,$SHA1" in +# ,|template,) +# /usr/bin/perl -i.bak -pe ' +# print "\n" . `git diff --cached --name-status -r` +# if /^#/ && $first++ == 0' "$COMMIT_MSG_FILE" ;; +# *) ;; +# esac + +# SOB=$(git var GIT_COMMITTER_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# git interpret-trailers --in-place --trailer "$SOB" "$COMMIT_MSG_FILE" +# if test -z "$COMMIT_SOURCE" +# then +# /usr/bin/perl -i.bak -pe 'print "\n" if !$first_line++' "$COMMIT_MSG_FILE" +# fi diff --git a/integrations/gitea-repositories-meta/user2/glob.git/hooks/update.sample b/integrations/gitea-repositories-meta/user2/glob.git/hooks/update.sample new file mode 100755 index 0000000000000..80ba94135cc37 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/hooks/update.sample @@ -0,0 +1,128 @@ +#!/bin/sh +# +# An example hook script to block unannotated tags from entering. +# Called by "git receive-pack" with arguments: refname sha1-old sha1-new +# +# To enable this hook, rename this file to "update". +# +# Config +# ------ +# hooks.allowunannotated +# This boolean sets whether unannotated tags will be allowed into the +# repository. By default they won't be. +# hooks.allowdeletetag +# This boolean sets whether deleting tags will be allowed in the +# repository. By default they won't be. +# hooks.allowmodifytag +# This boolean sets whether a tag may be modified after creation. By default +# it won't be. +# hooks.allowdeletebranch +# This boolean sets whether deleting branches will be allowed in the +# repository. By default they won't be. +# hooks.denycreatebranch +# This boolean sets whether remotely creating branches will be denied +# in the repository. By default this is allowed. +# + +# --- Command line +refname="$1" +oldrev="$2" +newrev="$3" + +# --- Safety check +if [ -z "$GIT_DIR" ]; then + echo "Don't run this script from the command line." >&2 + echo " (if you want, you could supply GIT_DIR then run" >&2 + echo " $0 )" >&2 + exit 1 +fi + +if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +# --- Config +allowunannotated=$(git config --bool hooks.allowunannotated) +allowdeletebranch=$(git config --bool hooks.allowdeletebranch) +denycreatebranch=$(git config --bool hooks.denycreatebranch) +allowdeletetag=$(git config --bool hooks.allowdeletetag) +allowmodifytag=$(git config --bool hooks.allowmodifytag) + +# check for no description +projectdesc=$(sed -e '1q' "$GIT_DIR/description") +case "$projectdesc" in +"Unnamed repository"* | "") + echo "*** Project description file hasn't been set" >&2 + exit 1 + ;; +esac + +# --- Check types +# if $newrev is 0000...0000, it's a commit to delete a ref. +zero="0000000000000000000000000000000000000000" +if [ "$newrev" = "$zero" ]; then + newrev_type=delete +else + newrev_type=$(git cat-file -t $newrev) +fi + +case "$refname","$newrev_type" in + refs/tags/*,commit) + # un-annotated tag + short_refname=${refname##refs/tags/} + if [ "$allowunannotated" != "true" ]; then + echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2 + echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2 + exit 1 + fi + ;; + refs/tags/*,delete) + # delete tag + if [ "$allowdeletetag" != "true" ]; then + echo "*** Deleting a tag is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/tags/*,tag) + # annotated tag + if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1 + then + echo "*** Tag '$refname' already exists." >&2 + echo "*** Modifying a tag is not allowed in this repository." >&2 + exit 1 + fi + ;; + refs/heads/*,commit) + # branch + if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then + echo "*** Creating a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/heads/*,delete) + # delete branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/remotes/*,commit) + # tracking branch + ;; + refs/remotes/*,delete) + # delete tracking branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a tracking branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + *) + # Anything else (is there anything else?) + echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2 + exit 1 + ;; +esac + +# --- Finished +exit 0 diff --git a/integrations/gitea-repositories-meta/user2/glob.git/info/exclude b/integrations/gitea-repositories-meta/user2/glob.git/info/exclude new file mode 100644 index 0000000000000..a5196d1be8fb5 --- /dev/null +++ b/integrations/gitea-repositories-meta/user2/glob.git/info/exclude @@ -0,0 +1,6 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ diff --git a/integrations/gitea-repositories-meta/user2/glob.git/objects/48/06cb9df135782b818c968c2fadbd2c150d23d6 b/integrations/gitea-repositories-meta/user2/glob.git/objects/48/06cb9df135782b818c968c2fadbd2c150d23d6 new file mode 100644 index 0000000000000000000000000000000000000000..a393a43282858079ea6983bdba600664969f025b GIT binary patch literal 21 ccmb<{9 literal 0 HcmV?d00001 diff --git a/integrations/gitea-repositories-meta/user2/glob.git/objects/59/fee614e09d1f1cd1e15e4b2a7e9c8873a81498 b/integrations/gitea-repositories-meta/user2/glob.git/objects/59/fee614e09d1f1cd1e15e4b2a7e9c8873a81498 new file mode 100644 index 0000000000000000000000000000000000000000..a55c8cc6e69bdfd2433025eb8961c2c9bc51ccd2 GIT binary patch literal 34 qcmb!NCp${7X literal 0 HcmV?d00001 diff --git a/integrations/gitea-repositories-meta/user2/glob.git/objects/7c/8ac2f8d82a1eb5f6aaece6629ff11015f91eb4 b/integrations/gitea-repositories-meta/user2/glob.git/objects/7c/8ac2f8d82a1eb5f6aaece6629ff11015f91eb4 new file mode 100644 index 0000000000000000000000000000000000000000..d5176e68c637f3eba8b8711529832d0a4a9d9c12 GIT binary patch literal 21 ccmb6WH2-^Ff%bxNYpE-C}G%tZCQl|D8E6aR};0H1yxy10rWrT_o{ literal 0 HcmV?d00001 diff --git a/integrations/gitea-repositories-meta/user2/glob.git/objects/95/aff026f99a9ab76fbd01decb63dd3dbc03e498 b/integrations/gitea-repositories-meta/user2/glob.git/objects/95/aff026f99a9ab76fbd01decb63dd3dbc03e498 new file mode 100644 index 0000000000000000000000000000000000000000..0883f2b3eb679bc51391d23bdffba8149f187cac GIT binary patch literal 34 qcmb Date: Fri, 6 Sep 2019 01:21:48 -0300 Subject: [PATCH 12/15] Update app.ini.sample and reword config-cheat-sheet --- custom/conf/app.ini.sample | 5 +++++ docs/content/doc/advanced/config-cheat-sheet.en-us.md | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index a2ac7248e770b..373f9b211dbe7 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -296,6 +296,11 @@ ISSUE_INDEXER_QUEUE_DIR = indexers/issues.queue ISSUE_INDEXER_QUEUE_CONN_STR = "addrs=127.0.0.1:6379 db=0" ; Batch queue number, default is 20 ISSUE_INDEXER_QUEUE_BATCH_NUMBER = 20 +; A comma separated list of glob patterns (see https://github.com/gobwas/glob) to include +; in the index; default is empty +REPO_INDEXER_INCLUDE = +; A comma separated list of glob patterns to exclude from the index; ; default is empty +REPO_INDEXER_EXCLUDE = ; repo indexer by default disabled, since it uses a lot of disk space REPO_INDEXER_ENABLED = false diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 74393baf9d345..e8251350fdb6f 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -181,8 +181,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of file name or path patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files. -- `REPO_INDEXER_EXCLUDE`: **empty**: A comma separated list of file name or path patterns (see https://github.com/gobwas/glob) to **exclude** from the index. Files that match this list will not be indexed, even if they match in `REPO_INDEXER_INCLUDE`. +- `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files. +- `REPO_INDEXER_EXCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **exclude** from the index. Files that match this list will not be indexed, even if they match in `REPO_INDEXER_INCLUDE`. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. From 59fe641ad500c42c2daa4f5e17f714ee960fefeb Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Fri, 6 Sep 2019 02:01:21 -0300 Subject: [PATCH 13/15] Add doc page and correct app.ini.sample --- custom/conf/app.ini.sample | 10 ++-- .../doc/advanced/repo-indexer.en-us.md | 57 +++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 docs/content/doc/advanced/repo-indexer.en-us.md diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index 373f9b211dbe7..9bfddc97e8f25 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -296,17 +296,17 @@ ISSUE_INDEXER_QUEUE_DIR = indexers/issues.queue ISSUE_INDEXER_QUEUE_CONN_STR = "addrs=127.0.0.1:6379 db=0" ; Batch queue number, default is 20 ISSUE_INDEXER_QUEUE_BATCH_NUMBER = 20 -; A comma separated list of glob patterns (see https://github.com/gobwas/glob) to include -; in the index; default is empty -REPO_INDEXER_INCLUDE = -; A comma separated list of glob patterns to exclude from the index; ; default is empty -REPO_INDEXER_EXCLUDE = ; repo indexer by default disabled, since it uses a lot of disk space REPO_INDEXER_ENABLED = false REPO_INDEXER_PATH = indexers/repos.bleve UPDATE_BUFFER_LEN = 20 MAX_FILE_SIZE = 1048576 +; A comma separated list of glob patterns (see https://github.com/gobwas/glob) to include +; in the index; default is empty +REPO_INDEXER_INCLUDE = +; A comma separated list of glob patterns to exclude from the index; ; default is empty +REPO_INDEXER_EXCLUDE = [admin] ; Disallow regular (non-admin) users from creating organizations. diff --git a/docs/content/doc/advanced/repo-indexer.en-us.md b/docs/content/doc/advanced/repo-indexer.en-us.md new file mode 100644 index 0000000000000..ef9b13f83846f --- /dev/null +++ b/docs/content/doc/advanced/repo-indexer.en-us.md @@ -0,0 +1,57 @@ +--- +date: "2019-09-06T01:35:00-03:00" +title: "Repository indexer" +slug: "repo-indexer" +weight: 45 +toc: true +draft: false +menu: + sidebar: + parent: "advanced" + name: "Repository indexer" + weight: 45 + identifier: "repo-indexer" +--- + +# Repository indexer + +## Setting up the repository indexer + +Gitea can search through the files of the repositories by enabling this function in your `app.ini`: + +``` +[indexer] +; ... +REPO_INDEXER_ENABLED = true +REPO_INDEXER_PATH = indexers/repos.bleve +UPDATE_BUFFER_LEN = 20 +MAX_FILE_SIZE = 1048576 +REPO_INDEXER_INCLUDE = +REPO_INDEXER_EXCLUDE = resources/bin/** +``` + +Please bear in mind that indexing the contents can consume a lot of system resources, especially when the index is created or globally updated (e.g. after upgrading Gitea). + +### Choosing the files for indexing by size + +The `MAX_FILE_SIZE` option will make the indexer to skip all files larger than the specified value. + +### Choosing the files for indexing by path + +Gitea applies glob pattern matching from the [`gobwas/glob` library](https://github.com/gobwas/glob) to choose which files will be included in the index. + +Limiting the list of files can help preventing the indexes to become polluted with derived or irrelevant files (e.g. lss, sym, map, etc.), so the search results are more relevant. It can also help reduce the index size. + +`REPO_INDEXER_INCLUDE` (default: empty) is a comma separated list of glob patterns to include in the index. An empty list means "include all files". +`REPO_INDEXER_EXCLUDE` (default: empty) is a comma separated list of glob patterns to exclude from the index. Files that match this list will not be indexed. `REPO_INDEXER_EXCLUDE` takes precedence over `REPO_INDEXER_INCLUDE`. + +Pattern matching works as follows: + +* To match all files with a `.txt` extension no matter what directory, use `**.txt`. +* To match all files with a `.txt` extension at the root level of the repository, use `*.txt`. +* To match all files inside `resources/bin` and below, use `resources/bin/**`. +* To match all files immediately inside `resources/bin`, use `resources/bin/*`. +* To match all files named `Makefile`, use `**Makefile`. +* Matching a directory has no effect; the pattern `resources/bin` will not include/exclude files inside that directory; `resources/bin/**` will. +* All files and patterns are normalized to lower case. + From a1f675b11e7d97326c5824cad895a361c748ef43 Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Fri, 6 Sep 2019 02:08:57 -0300 Subject: [PATCH 14/15] Some polish on the doc --- .../doc/advanced/repo-indexer.en-us.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/content/doc/advanced/repo-indexer.en-us.md b/docs/content/doc/advanced/repo-indexer.en-us.md index ef9b13f83846f..66f09df939a47 100644 --- a/docs/content/doc/advanced/repo-indexer.en-us.md +++ b/docs/content/doc/advanced/repo-indexer.en-us.md @@ -17,7 +17,7 @@ menu: ## Setting up the repository indexer -Gitea can search through the files of the repositories by enabling this function in your `app.ini`: +Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](https://docs.gitea.io/en-us/config-cheat-sheet/): ``` [indexer] @@ -30,28 +30,29 @@ REPO_INDEXER_INCLUDE = REPO_INDEXER_EXCLUDE = resources/bin/** ``` -Please bear in mind that indexing the contents can consume a lot of system resources, especially when the index is created or globally updated (e.g. after upgrading Gitea). +Please bear in mind that indexing the contents can consume a lot of system resources, especially when the index is created for the first time or globally updated (e.g. after upgrading Gitea). ### Choosing the files for indexing by size -The `MAX_FILE_SIZE` option will make the indexer to skip all files larger than the specified value. +The `MAX_FILE_SIZE` option will make the indexer skip all files larger than the specified value. ### Choosing the files for indexing by path Gitea applies glob pattern matching from the [`gobwas/glob` library](https://github.com/gobwas/glob) to choose which files will be included in the index. -Limiting the list of files can help preventing the indexes to become polluted with derived or irrelevant files (e.g. lss, sym, map, etc.), so the search results are more relevant. It can also help reduce the index size. +Limiting the list of files prevents the indexes from becoming polluted with derived or irrelevant files (e.g. lss, sym, map, etc.), so the search results are more relevant. It can also help reduce the index size. -`REPO_INDEXER_INCLUDE` (default: empty) is a comma separated list of glob patterns to include in the index. An empty list means "include all files". -`REPO_INDEXER_EXCLUDE` (default: empty) is a comma separated list of glob patterns to exclude from the index. Files that match this list will not be indexed. `REPO_INDEXER_EXCLUDE` takes precedence over `REPO_INDEXER_INCLUDE`. +`REPO_INDEXER_INCLUDE` (default: empty) is a comma separated list of glob patterns to **include** in the index. An empty list means "_include all files_". +`REPO_INDEXER_EXCLUDE` (default: empty) is a comma separated list of glob patterns to **exclude** from the index. Files that match this list will not be indexed. `REPO_INDEXER_EXCLUDE` takes precedence over `REPO_INDEXER_INCLUDE`. Pattern matching works as follows: * To match all files with a `.txt` extension no matter what directory, use `**.txt`. -* To match all files with a `.txt` extension at the root level of the repository, use `*.txt`. +* To match all files with a `.txt` extension _only at the root level of the repository_, use `*.txt`. * To match all files inside `resources/bin` and below, use `resources/bin/**`. -* To match all files immediately inside `resources/bin`, use `resources/bin/*`. +* To match all files _immediately inside_ `resources/bin`, use `resources/bin/*`. * To match all files named `Makefile`, use `**Makefile`. * Matching a directory has no effect; the pattern `resources/bin` will not include/exclude files inside that directory; `resources/bin/**` will. -* All files and patterns are normalized to lower case. +* All files and patterns are normalized to lower case, so `**Makefile`, `**makefile` and `**MAKEFILE` are equivalent. + From 480a406ae8831a9e1832ef77a25f9f240e2b2fdb Mon Sep 17 00:00:00 2001 From: Guillermo Prandi Date: Fri, 6 Sep 2019 02:52:24 -0300 Subject: [PATCH 15/15] Simplify code as suggested by @lafriks --- models/repo_indexer.go | 5 +---- modules/setting/indexer.go | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/models/repo_indexer.go b/models/repo_indexer.go index 6b24d62b7bc02..b842a1c87f229 100644 --- a/models/repo_indexer.go +++ b/models/repo_indexer.go @@ -243,15 +243,12 @@ func isIndexable(entry *git.TreeEntry) bool { return false } } - if setting.Indexer.IncludePatterns == nil { - return true - } for _, g := range setting.Indexer.IncludePatterns { if g.Match(name) { return true } } - return false + return len(setting.Indexer.IncludePatterns) == 0 } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go index 8aea3ed7eb07f..30c670d40765b 100644 --- a/modules/setting/indexer.go +++ b/modules/setting/indexer.go @@ -82,8 +82,5 @@ func IndexerGlobFromString(globstr string) []glob.Glob { } } } - if len(extarr) == 0 { - return nil - } return extarr }