5
5
package repofiles
6
6
7
7
import (
8
+ "bytes"
8
9
"fmt"
9
10
"path"
10
11
"strings"
11
12
13
+ "golang.org/x/net/html/charset"
14
+ "golang.org/x/text/transform"
15
+
12
16
"code.gitea.io/gitea/models"
17
+ "code.gitea.io/gitea/modules/base"
13
18
"code.gitea.io/gitea/modules/git"
14
19
"code.gitea.io/gitea/modules/lfs"
20
+ "code.gitea.io/gitea/modules/log"
15
21
"code.gitea.io/gitea/modules/setting"
16
22
"code.gitea.io/sdk/gitea"
17
23
)
@@ -37,6 +43,70 @@ type UpdateRepoFileOptions struct {
37
43
Committer * IdentityOptions
38
44
}
39
45
46
+ func detectEncodingAndBOM (entry * git.TreeEntry , repo * models.Repository ) (string , bool ) {
47
+ reader , err := entry .Blob ().DataAsync ()
48
+ if err != nil {
49
+ // return default
50
+ return "UTF-8" , false
51
+ }
52
+ defer reader .Close ()
53
+ buf := make ([]byte , 1024 )
54
+ n , err := reader .Read (buf )
55
+ if err != nil {
56
+ // return default
57
+ return "UTF-8" , false
58
+ }
59
+ buf = buf [:n ]
60
+
61
+ if setting .LFS .StartServer {
62
+ meta := lfs .IsPointerFile (& buf )
63
+ if meta != nil {
64
+ meta , err = repo .GetLFSMetaObjectByOid (meta .Oid )
65
+ if err != nil && err != models .ErrLFSObjectNotExist {
66
+ // return default
67
+ return "UTF-8" , false
68
+ }
69
+ }
70
+ if meta != nil {
71
+ dataRc , err := lfs .ReadMetaObject (meta )
72
+ if err != nil {
73
+ // return default
74
+ return "UTF-8" , false
75
+ }
76
+ defer dataRc .Close ()
77
+ buf = make ([]byte , 1024 )
78
+ n , err = dataRc .Read (buf )
79
+ if err != nil {
80
+ // return default
81
+ return "UTF-8" , false
82
+ }
83
+ buf = buf [:n ]
84
+ }
85
+
86
+ }
87
+
88
+ encoding , err := base .DetectEncoding (buf )
89
+ if err != nil {
90
+ // just default to utf-8 and no bom
91
+ return "UTF-8" , false
92
+ }
93
+ if encoding == "UTF-8" {
94
+ return encoding , bytes .Equal (buf [0 :3 ], base .UTF8BOM )
95
+ }
96
+ charsetEncoding , _ := charset .Lookup (encoding )
97
+ if charsetEncoding == nil {
98
+ return "UTF-8" , false
99
+ }
100
+
101
+ result , n , err := transform .String (charsetEncoding .NewDecoder (), string (buf ))
102
+
103
+ if n > 2 {
104
+ return encoding , bytes .Equal ([]byte (result )[0 :3 ], base .UTF8BOM )
105
+ }
106
+
107
+ return encoding , false
108
+ }
109
+
40
110
// CreateOrUpdateRepoFile adds or updates a file in the given repository
41
111
func CreateOrUpdateRepoFile (repo * models.Repository , doer * models.User , opts * UpdateRepoFileOptions ) (* gitea.FileResponse , error ) {
42
112
// If no branch name is set, assume master
@@ -118,6 +188,9 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
118
188
opts .LastCommitID = commit .ID .String ()
119
189
}
120
190
191
+ encoding := "UTF-8"
192
+ bom := false
193
+
121
194
if ! opts .IsNewFile {
122
195
fromEntry , err := commit .GetTreeEntryByPath (fromTreePath )
123
196
if err != nil {
@@ -151,6 +224,7 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
151
224
// haven't been made. We throw an error if one wasn't provided.
152
225
return nil , models.ErrSHAOrCommitIDNotProvided {}
153
226
}
227
+ encoding , bom = detectEncodingAndBOM (fromEntry , repo )
154
228
}
155
229
156
230
// For the path where this file will be created/updated, we need to make
@@ -235,9 +309,28 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
235
309
}
236
310
237
311
content := opts .Content
312
+ if bom {
313
+ content = string (base .UTF8BOM ) + content
314
+ }
315
+ if encoding != "UTF-8" {
316
+ charsetEncoding , _ := charset .Lookup (encoding )
317
+ if charsetEncoding != nil {
318
+ result , _ , err := transform .String (charsetEncoding .NewEncoder (), string (content ))
319
+ if err != nil {
320
+ // Look if we can't encode back in to the original we should just stick with utf-8
321
+ log .Error ("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v" , opts .TreePath , opts .FromTreePath , encoding , err )
322
+ result = content
323
+ }
324
+ content = result
325
+ } else {
326
+ log .Error ("Unknown encoding: %s" , encoding )
327
+ }
328
+ }
329
+ // Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
330
+ opts .Content = content
238
331
var lfsMetaObject * models.LFSMetaObject
239
332
240
- if filename2attribute2info [treePath ] != nil && filename2attribute2info [treePath ]["filter" ] == "lfs" {
333
+ if setting . LFS . StartServer && filename2attribute2info [treePath ] != nil && filename2attribute2info [treePath ]["filter" ] == "lfs" {
241
334
// OK so we are supposed to LFS this data!
242
335
oid , err := models .GenerateLFSOid (strings .NewReader (opts .Content ))
243
336
if err != nil {
0 commit comments