@@ -3243,11 +3243,25 @@ func walkcompare(n *Node, init *Nodes) *Node {
3243
3243
// inline or call an eq alg.
3244
3244
t := n .Left .Type
3245
3245
var inline bool
3246
+
3247
+ maxcmpsize := int64 (4 )
3248
+ unalignedLoad := false
3249
+ switch thearch .LinkArch .Family {
3250
+ case sys .AMD64 , sys .ARM64 , sys .S390X :
3251
+ // Keep this low enough, to generate less code than function call.
3252
+ maxcmpsize = 16
3253
+ unalignedLoad = true
3254
+ case sys .I386 :
3255
+ maxcmpsize = 8
3256
+ unalignedLoad = true
3257
+ }
3258
+
3246
3259
switch t .Etype {
3247
3260
default :
3248
3261
return n
3249
3262
case TARRAY :
3250
- inline = t .NumElem () <= 1 || (t .NumElem () <= 4 && issimple [t .Elem ().Etype ])
3263
+ // We can compare several elements at once with 2/4/8 byte integer compares
3264
+ inline = t .NumElem () <= 1 || (issimple [t .Elem ().Etype ] && (t .NumElem () <= 4 || t .Elem ().Width * t .NumElem () <= maxcmpsize ))
3251
3265
case TSTRUCT :
3252
3266
inline = t .NumFields () <= 4
3253
3267
}
@@ -3333,11 +3347,54 @@ func walkcompare(n *Node, init *Nodes) *Node {
3333
3347
)
3334
3348
}
3335
3349
} else {
3336
- for i := 0 ; int64 (i ) < t .NumElem (); i ++ {
3337
- compare (
3338
- nod (OINDEX , cmpl , nodintconst (int64 (i ))),
3339
- nod (OINDEX , cmpr , nodintconst (int64 (i ))),
3340
- )
3350
+ step := int64 (1 )
3351
+ remains := t .NumElem () * t .Elem ().Width
3352
+ combine64bit := unalignedLoad && Widthreg == 8 && t .Elem ().Width <= 4 && t .Elem ().IsInteger ()
3353
+ combine32bit := unalignedLoad && t .Elem ().Width <= 2 && t .Elem ().IsInteger ()
3354
+ combine16bit := unalignedLoad && t .Elem ().Width == 1 && t .Elem ().IsInteger ()
3355
+ for i := int64 (0 ); remains > 0 ; {
3356
+ var convType * types.Type
3357
+ switch {
3358
+ case remains >= 8 && combine64bit :
3359
+ convType = types .Types [TINT64 ]
3360
+ step = 8 / t .Elem ().Width
3361
+ case remains >= 4 && combine32bit :
3362
+ convType = types .Types [TUINT32 ]
3363
+ step = 4 / t .Elem ().Width
3364
+ case remains >= 2 && combine16bit :
3365
+ convType = types .Types [TUINT16 ]
3366
+ step = 2 / t .Elem ().Width
3367
+ default :
3368
+ step = 1
3369
+ }
3370
+ if step == 1 {
3371
+ compare (
3372
+ nod (OINDEX , cmpl , nodintconst (int64 (i ))),
3373
+ nod (OINDEX , cmpr , nodintconst (int64 (i ))),
3374
+ )
3375
+ i ++
3376
+ remains -= t .Elem ().Width
3377
+ } else {
3378
+ cmplw := nod (OINDEX , cmpl , nodintconst (int64 (i )))
3379
+ cmplw = conv (cmplw , convType )
3380
+ cmprw := nod (OINDEX , cmpr , nodintconst (int64 (i )))
3381
+ cmprw = conv (cmprw , convType )
3382
+ // For code like this: uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 ...
3383
+ // ssa will generate a single large load.
3384
+ for offset := int64 (1 ); offset < step ; offset ++ {
3385
+ lb := nod (OINDEX , cmpl , nodintconst (int64 (i + offset )))
3386
+ lb = conv (lb , convType )
3387
+ lb = nod (OLSH , lb , nodintconst (int64 (8 * t .Elem ().Width * offset )))
3388
+ cmplw = nod (OOR , cmplw , lb )
3389
+ rb := nod (OINDEX , cmpr , nodintconst (int64 (i + offset )))
3390
+ rb = conv (rb , convType )
3391
+ rb = nod (OLSH , rb , nodintconst (int64 (8 * t .Elem ().Width * offset )))
3392
+ cmprw = nod (OOR , cmprw , rb )
3393
+ }
3394
+ compare (cmplw , cmprw )
3395
+ i += step
3396
+ remains -= step * t .Elem ().Width
3397
+ }
3341
3398
}
3342
3399
}
3343
3400
if expr == nil {
0 commit comments