5
5
package models
6
6
7
7
import (
8
- "io/ioutil"
9
- "os"
10
- "path"
8
+ "fmt"
11
9
"strconv"
12
10
"strings"
13
11
@@ -16,8 +14,6 @@ import (
16
14
"code.gitea.io/gitea/modules/indexer"
17
15
"code.gitea.io/gitea/modules/log"
18
16
"code.gitea.io/gitea/modules/setting"
19
-
20
- "github.com/Unknwon/com"
21
17
)
22
18
23
19
// RepoIndexerStatus status of a repo's entry in the repo indexer
@@ -132,77 +128,86 @@ func populateRepoIndexer(maxRepoID int64) {
132
128
}
133
129
134
130
func updateRepoIndexer (repo * Repository ) error {
135
- changes , err := getRepoChanges (repo )
131
+ sha , err := getDefaultBranchSha (repo )
132
+ if err != nil {
133
+ return err
134
+ }
135
+ changes , err := getRepoChanges (repo , sha )
136
136
if err != nil {
137
137
return err
138
138
} else if changes == nil {
139
139
return nil
140
140
}
141
141
142
142
batch := indexer .RepoIndexerBatch ()
143
- for _ , filename := range changes .UpdatedFiles {
144
- if err := addUpdate (filename , repo , batch ); err != nil {
143
+ for _ , update := range changes .Updates {
144
+ if err := addUpdate (update , repo , batch ); err != nil {
145
145
return err
146
146
}
147
147
}
148
- for _ , filename := range changes .RemovedFiles {
148
+ for _ , filename := range changes .RemovedFilenames {
149
149
if err := addDelete (filename , repo , batch ); err != nil {
150
150
return err
151
151
}
152
152
}
153
153
if err = batch .Flush (); err != nil {
154
154
return err
155
155
}
156
- return updateLastIndexSync ( repo )
156
+ return repo . updateIndexerStatus ( sha )
157
157
}
158
158
159
159
// repoChanges changes (file additions/updates/removals) to a repo
160
160
type repoChanges struct {
161
- UpdatedFiles [] string
162
- RemovedFiles []string
161
+ Updates [] fileUpdate
162
+ RemovedFilenames []string
163
163
}
164
164
165
- // getRepoChanges returns changes to repo since last indexer update
166
- func getRepoChanges ( repo * Repository ) ( * repoChanges , error ) {
167
- repoWorkingPool . CheckIn ( com . ToStr ( repo . ID ))
168
- defer repoWorkingPool . CheckOut ( com . ToStr ( repo . ID ))
165
+ type fileUpdate struct {
166
+ Filename string
167
+ BlobSha string
168
+ }
169
169
170
- if err := repo .UpdateLocalCopyBranch ("" ); err != nil {
171
- return nil , err
172
- } else if ! git .IsBranchExist (repo .LocalCopyPath (), repo .DefaultBranch ) {
173
- // repo does not have any commits yet, so nothing to update
174
- return nil , nil
175
- } else if err = repo .UpdateLocalCopyBranch (repo .DefaultBranch ); err != nil {
176
- return nil , err
177
- } else if err = repo .getIndexerStatus (); err != nil {
170
+ func getDefaultBranchSha (repo * Repository ) (string , error ) {
171
+ stdout , err := git .NewCommand ("show-ref" , "-s" , repo .DefaultBranch ).RunInDir (repo .RepoPath ())
172
+ if err != nil {
173
+ return "" , err
174
+ }
175
+ return strings .TrimSpace (stdout ), nil
176
+ }
177
+
178
+ // getRepoChanges returns changes to repo since last indexer update
179
+ func getRepoChanges (repo * Repository , revision string ) (* repoChanges , error ) {
180
+ if err := repo .getIndexerStatus (); err != nil {
178
181
return nil , err
179
182
}
180
183
181
184
if len (repo .IndexerStatus .CommitSha ) == 0 {
182
- return genesisChanges (repo )
185
+ return genesisChanges (repo , revision )
183
186
}
184
- return nonGenesisChanges (repo )
187
+ return nonGenesisChanges (repo , revision )
185
188
}
186
189
187
- func addUpdate (filename string , repo * Repository , batch * indexer.Batch ) error {
188
- filepath := path .Join (repo .LocalCopyPath (), filename )
189
- if stat , err := os .Stat (filepath ); err != nil {
190
+ func addUpdate (update fileUpdate , repo * Repository , batch * indexer.Batch ) error {
191
+ stdout , err := git .NewCommand ("cat-file" , "-s" , update .BlobSha ).
192
+ RunInDir (repo .RepoPath ())
193
+ if err != nil {
190
194
return err
191
- } else if stat .Size () > setting .Indexer .MaxIndexerFileSize {
192
- return nil
193
- } else if stat .IsDir () {
194
- // file could actually be a directory, if it is the root of a submodule.
195
- // We do not index submodule contents, so don't do anything.
195
+ }
196
+ if size , err := strconv .Atoi (strings .TrimSpace (stdout )); err != nil {
197
+ return fmt .Errorf ("Misformatted git cat-file output: %v" , err )
198
+ } else if int64 (size ) > setting .Indexer .MaxIndexerFileSize {
196
199
return nil
197
200
}
198
- fileContents , err := ioutil .ReadFile (filepath )
201
+
202
+ fileContents , err := git .NewCommand ("cat-file" , "blob" , update .BlobSha ).
203
+ RunInDirBytes (repo .RepoPath ())
199
204
if err != nil {
200
205
return err
201
206
} else if ! base .IsTextFile (fileContents ) {
202
207
return nil
203
208
}
204
209
return batch .Add (indexer.RepoIndexerUpdate {
205
- Filepath : filename ,
210
+ Filepath : update . Filename ,
206
211
Op : indexer .RepoIndexerOpUpdate ,
207
212
Data : & indexer.RepoIndexerData {
208
213
RepoID : repo .ID ,
@@ -221,42 +226,76 @@ func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
221
226
})
222
227
}
223
228
224
- // genesisChanges get changes to add repo to the indexer for the first time
225
- func genesisChanges (repo * Repository ) (* repoChanges , error ) {
226
- var changes repoChanges
227
- stdout , err := git .NewCommand ("ls-files" ).RunInDir (repo .LocalCopyPath ())
228
- if err != nil {
229
- return nil , err
230
- }
231
- for _ , line := range strings .Split (stdout , "\n " ) {
232
- filename := strings .TrimSpace (line )
233
- if len (filename ) == 0 {
229
+ // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
230
+ func parseGitLsTreeOutput (stdout string ) ([]fileUpdate , error ) {
231
+ lines := strings .Split (stdout , "\n " )
232
+ updates := make ([]fileUpdate , 0 , len (lines ))
233
+ for _ , line := range lines {
234
+ // expect line to be "<mode> <object-type> <object-sha>\t<filename>"
235
+ line = strings .TrimSpace (line )
236
+ if len (line ) == 0 {
234
237
continue
235
- } else if filename [0 ] == '"' {
238
+ }
239
+ firstSpaceIndex := strings .IndexByte (line , ' ' )
240
+ if firstSpaceIndex < 0 {
241
+ log .Error (4 , "Misformatted git ls-tree output: %s" , line )
242
+ continue
243
+ }
244
+ tabIndex := strings .IndexByte (line , '\t' )
245
+ if tabIndex < 42 + firstSpaceIndex || tabIndex == len (line )- 1 {
246
+ log .Error (4 , "Misformatted git ls-tree output: %s" , line )
247
+ continue
248
+ }
249
+ if objectType := line [firstSpaceIndex + 1 : tabIndex - 41 ]; objectType != "blob" {
250
+ // submodules appear as commit objects, we do not index submodules
251
+ continue
252
+ }
253
+
254
+ blobSha := line [tabIndex - 40 : tabIndex ]
255
+ filename := line [tabIndex + 1 :]
256
+ if filename [0 ] == '"' {
257
+ var err error
236
258
filename , err = strconv .Unquote (filename )
237
259
if err != nil {
238
260
return nil , err
239
261
}
240
262
}
241
- changes .UpdatedFiles = append (changes .UpdatedFiles , filename )
263
+ updates = append (updates , fileUpdate {
264
+ Filename : filename ,
265
+ BlobSha : blobSha ,
266
+ })
267
+ }
268
+ return updates , nil
269
+ }
270
+
271
+ // genesisChanges get changes to add repo to the indexer for the first time
272
+ func genesisChanges (repo * Repository , revision string ) (* repoChanges , error ) {
273
+ var changes repoChanges
274
+ stdout , err := git .NewCommand ("ls-tree" , "--full-tree" , "-r" , revision ).
275
+ RunInDir (repo .RepoPath ())
276
+ if err != nil {
277
+ return nil , err
242
278
}
243
- return & changes , nil
279
+ changes .Updates , err = parseGitLsTreeOutput (stdout )
280
+ return & changes , err
244
281
}
245
282
246
283
// nonGenesisChanges get changes since the previous indexer update
247
- func nonGenesisChanges (repo * Repository ) (* repoChanges , error ) {
284
+ func nonGenesisChanges (repo * Repository , revision string ) (* repoChanges , error ) {
248
285
diffCmd := git .NewCommand ("diff" , "--name-status" ,
249
- repo .IndexerStatus .CommitSha , "HEAD" )
250
- stdout , err := diffCmd .RunInDir (repo .LocalCopyPath ())
286
+ repo .IndexerStatus .CommitSha , revision )
287
+ stdout , err := diffCmd .RunInDir (repo .RepoPath ())
251
288
if err != nil {
252
289
// previous commit sha may have been removed by a force push, so
253
290
// try rebuilding from scratch
291
+ log .Warn ("git diff: %v" , err )
254
292
if err = indexer .DeleteRepoFromIndexer (repo .ID ); err != nil {
255
293
return nil , err
256
294
}
257
- return genesisChanges (repo )
295
+ return genesisChanges (repo , revision )
258
296
}
259
297
var changes repoChanges
298
+ updatedFilenames := make ([]string , 0 , 10 )
260
299
for _ , line := range strings .Split (stdout , "\n " ) {
261
300
line = strings .TrimSpace (line )
262
301
if len (line ) == 0 {
@@ -274,23 +313,22 @@ func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
274
313
275
314
switch status := line [0 ]; status {
276
315
case 'M' , 'A' :
277
- changes . UpdatedFiles = append (changes . UpdatedFiles , filename )
316
+ updatedFilenames = append (updatedFilenames , filename )
278
317
case 'D' :
279
- changes .RemovedFiles = append (changes .RemovedFiles , filename )
318
+ changes .RemovedFilenames = append (changes .RemovedFilenames , filename )
280
319
default :
281
320
log .Warn ("Unrecognized status: %c (line=%s)" , status , line )
282
321
}
283
322
}
284
- return & changes , nil
285
- }
286
323
287
- func updateLastIndexSync (repo * Repository ) error {
288
- stdout , err := git .NewCommand ("rev-parse" , "HEAD" ).RunInDir (repo .LocalCopyPath ())
324
+ cmd := git .NewCommand ("ls-tree" , "--full-tree" , revision , "--" )
325
+ cmd .AddArguments (updatedFilenames ... )
326
+ stdout , err = cmd .RunInDir (repo .RepoPath ())
289
327
if err != nil {
290
- return err
328
+ return nil , err
291
329
}
292
- sha := strings . TrimSpace (stdout )
293
- return repo . updateIndexerStatus ( sha )
330
+ changes . Updates , err = parseGitLsTreeOutput (stdout )
331
+ return & changes , err
294
332
}
295
333
296
334
func processRepoIndexerOperationQueue () {
0 commit comments