@@ -7,6 +7,7 @@ package cmp
77import (
88 "bytes"
99 "fmt"
10+ "math"
1011 "reflect"
1112 "strconv"
1213 "strings"
@@ -96,30 +97,29 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
9697 }
9798
9899 // Auto-detect the type of the data.
99- var isLinedText , isText , isBinary bool
100100 var sx , sy string
101101 var ssx , ssy []string
102+ var isString , isMostlyText , isPureLinedText , isBinary bool
102103 switch {
103104 case t .Kind () == reflect .String :
104105 sx , sy = vx .String (), vy .String ()
105- isText = true // Initial estimate, verify later
106+ isString = true
106107 case t .Kind () == reflect .Slice && t .Elem () == reflect .TypeOf (byte (0 )):
107108 sx , sy = string (vx .Bytes ()), string (vy .Bytes ())
108- isBinary = true // Initial estimate, verify later
109+ isString = true
109110 case t .Kind () == reflect .Array :
110111 // Arrays need to be addressable for slice operations to work.
111112 vx2 , vy2 := reflect .New (t ).Elem (), reflect .New (t ).Elem ()
112113 vx2 .Set (vx )
113114 vy2 .Set (vy )
114115 vx , vy = vx2 , vy2
115116 }
116- if isText || isBinary {
117- var numLines , lastLineIdx , maxLineLen int
118- isBinary = ! utf8 .ValidString (sx ) || ! utf8 .ValidString (sy )
117+ if isString {
118+ var numTotalRunes , numValidRunes , numLines , lastLineIdx , maxLineLen int
119119 for i , r := range sx + sy {
120- if ! ( unicode . IsPrint ( r ) || unicode . IsSpace ( r )) || r == utf8 . RuneError {
121- isBinary = true
122- break
120+ numTotalRunes ++
121+ if ( unicode . IsPrint ( r ) || unicode . IsSpace ( r )) && r != utf8 . RuneError {
122+ numValidRunes ++
123123 }
124124 if r == '\n' {
125125 if maxLineLen < i - lastLineIdx {
@@ -129,12 +129,14 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
129129 numLines ++
130130 }
131131 }
132- isText = ! isBinary
133- isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
132+ isPureText := numValidRunes == numTotalRunes
133+ isMostlyText = float64 (numValidRunes ) > math .Floor (0.90 * float64 (numTotalRunes ))
134+ isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
135+ isBinary = ! isMostlyText
134136
135137 // Avoid diffing by lines if it produces a significantly more complex
136138 // edit script than diffing by bytes.
137- if isLinedText {
139+ if isPureLinedText {
138140 ssx = strings .Split (sx , "\n " )
139141 ssy = strings .Split (sy , "\n " )
140142 esLines := diff .Difference (len (ssx ), len (ssy ), func (ix , iy int ) diff.Result {
@@ -145,7 +147,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
145147 })
146148 efficiencyLines := float64 (esLines .Dist ()) / float64 (len (esLines ))
147149 efficiencyBytes := float64 (esBytes .Dist ()) / float64 (len (esBytes ))
148- isLinedText = efficiencyLines < 4 * efficiencyBytes
150+ isPureLinedText = efficiencyLines < 4 * efficiencyBytes
149151 }
150152 }
151153
@@ -155,7 +157,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
155157 switch {
156158 // If the text appears to be multi-lined text,
157159 // then perform differencing across individual lines.
158- case isLinedText :
160+ case isPureLinedText :
159161 list = opts .formatDiffSlice (
160162 reflect .ValueOf (ssx ), reflect .ValueOf (ssy ), 1 , "line" ,
161163 func (v reflect.Value , d diffMode ) textRecord {
@@ -244,15 +246,14 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
244246 // If the text appears to be single-lined text,
245247 // then perform differencing in approximately fixed-sized chunks.
246248 // The output is printed as quoted strings.
247- case isText :
249+ case isMostlyText :
248250 list = opts .formatDiffSlice (
249251 reflect .ValueOf (sx ), reflect .ValueOf (sy ), 64 , "byte" ,
250252 func (v reflect.Value , d diffMode ) textRecord {
251253 s := formatString (v .String ())
252254 return textRecord {Diff : d , Value : textLine (s )}
253255 },
254256 )
255- delim = ""
256257
257258 // If the text appears to be binary data,
258259 // then perform differencing in approximately fixed-sized chunks.
@@ -314,7 +315,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
314315
315316 // Wrap the output with appropriate type information.
316317 var out textNode = & textWrap {Prefix : "{" , Value : list , Suffix : "}" }
317- if ! isText {
318+ if ! isMostlyText {
318319 // The "{...}" byte-sequence literal is not valid Go syntax for strings.
319320 // Emit the type for extra clarity (e.g. "string{...}").
320321 if t .Kind () == reflect .String {
0 commit comments