Skip to content

Commit 0be8280

Browse files
michalderkacz4a6f656c
authored andcommitted
cmd/compile: optimize small zeroing/moving on riscv64
Optimize small (s <= 32) zeroing/moving operations on riscv64. Avoid generating unaligned memory accesses. The code is almost one to one translation of the corresponding mips64 rules with additional rule for s=32. Change-Id: I753b0b8e53cb9efcf43c8080cab90f3d03539fb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/266217 Reviewed-by: Joel Sing <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent 5a267c8 commit 0be8280

File tree

2 files changed

+751
-36
lines changed

2 files changed

+751
-36
lines changed

src/cmd/compile/internal/ssa/gen/RISCV64.rules

Lines changed: 112 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
// * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants.
1010
// * Arrange for non-trivial Zero and Move lowerings to use aligned loads and stores.
1111
// * Eliminate zero immediate shifts, adds, etc.
12-
// * Use a Duff's device for some moves and zeros.
1312
// * Avoid using Neq32 for writeBarrier.enabled checks.
1413

1514
// Lowering arithmetic
@@ -352,18 +351,64 @@
352351
// with OffPtr -> ADDI.
353352
(ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x)
354353

355-
// Zeroing
356-
// TODO: more optimized zeroing, including attempting to use aligned accesses.
357-
(Zero [0] _ mem) => mem
358-
(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst) mem)
359-
(Zero [2] ptr mem) => (MOVHstore ptr (MOVHconst) mem)
360-
(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
361-
(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)
362-
363-
// Medium zeroing uses a Duff's device
354+
// Small zeroing
355+
(Zero [0] _ mem) => mem
356+
(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst [0]) mem)
357+
(Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 =>
358+
(MOVHstore ptr (MOVHconst [0]) mem)
359+
(Zero [2] ptr mem) =>
360+
(MOVBstore [1] ptr (MOVBconst [0])
361+
(MOVBstore ptr (MOVBconst [0]) mem))
362+
(Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 =>
363+
(MOVWstore ptr (MOVWconst [0]) mem)
364+
(Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 =>
365+
(MOVHstore [2] ptr (MOVHconst [0])
366+
(MOVHstore ptr (MOVHconst [0]) mem))
367+
(Zero [4] ptr mem) =>
368+
(MOVBstore [3] ptr (MOVBconst [0])
369+
(MOVBstore [2] ptr (MOVBconst [0])
370+
(MOVBstore [1] ptr (MOVBconst [0])
371+
(MOVBstore ptr (MOVBconst [0]) mem))))
372+
(Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 =>
373+
(MOVDstore ptr (MOVDconst [0]) mem)
374+
(Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 =>
375+
(MOVWstore [4] ptr (MOVWconst [0])
376+
(MOVWstore ptr (MOVWconst [0]) mem))
377+
(Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 =>
378+
(MOVHstore [6] ptr (MOVHconst [0])
379+
(MOVHstore [4] ptr (MOVHconst [0])
380+
(MOVHstore [2] ptr (MOVHconst [0])
381+
(MOVHstore ptr (MOVHconst [0]) mem))))
382+
383+
(Zero [3] ptr mem) =>
384+
(MOVBstore [2] ptr (MOVBconst [0])
385+
(MOVBstore [1] ptr (MOVBconst [0])
386+
(MOVBstore ptr (MOVBconst [0]) mem)))
387+
(Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 =>
388+
(MOVHstore [4] ptr (MOVHconst [0])
389+
(MOVHstore [2] ptr (MOVHconst [0])
390+
(MOVHstore ptr (MOVHconst [0]) mem)))
391+
(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
392+
(MOVWstore [8] ptr (MOVWconst [0])
393+
(MOVWstore [4] ptr (MOVWconst [0])
394+
(MOVWstore ptr (MOVWconst [0]) mem)))
395+
(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
396+
(MOVDstore [8] ptr (MOVDconst [0])
397+
(MOVDstore ptr (MOVDconst [0]) mem))
398+
(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
399+
(MOVDstore [16] ptr (MOVDconst [0])
400+
(MOVDstore [8] ptr (MOVDconst [0])
401+
(MOVDstore ptr (MOVDconst [0]) mem)))
402+
(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 =>
403+
(MOVDstore [24] ptr (MOVDconst [0])
404+
(MOVDstore [16] ptr (MOVDconst [0])
405+
(MOVDstore [8] ptr (MOVDconst [0])
406+
(MOVDstore ptr (MOVDconst [0]) mem))))
407+
408+
// Medium 8-aligned zeroing uses a Duff's device
364409
// 8 and 128 are magic constants, see runtime/mkduff.go
365410
(Zero [s] {t} ptr mem)
366-
&& s%8 == 0 && s >= 16 && s <= 8*128
411+
&& s%8 == 0 && s <= 8*128
367412
&& t.Alignment()%8 == 0 && !config.noDuffDevice =>
368413
(DUFFZERO [8 * (128 - s/8)] ptr mem)
369414

@@ -377,7 +422,7 @@
377422
(Convert ...) => (MOVconvert ...)
378423

379424
// Checks
380-
(IsNonNil p) => (NeqPtr (MOVDconst) p)
425+
(IsNonNil p) => (NeqPtr (MOVDconst [0]) p)
381426
(IsInBounds ...) => (Less64U ...)
382427
(IsSliceInBounds ...) => (Leq64U ...)
383428

@@ -394,18 +439,64 @@
394439
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
395440
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
396441

397-
// Moves
398-
// TODO: more optimized moves, including attempting to use aligned accesses.
399-
(Move [0] _ _ mem) => mem
442+
// Small moves
443+
(Move [0] _ _ mem) => mem
400444
(Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
401-
(Move [2] dst src mem) => (MOVHstore dst (MOVHload src mem) mem)
402-
(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
403-
(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
404-
405-
// Medium move uses a Duff's device
445+
(Move [2] {t} dst src mem) && t.Alignment()%2 == 0 =>
446+
(MOVHstore dst (MOVHload src mem) mem)
447+
(Move [2] dst src mem) =>
448+
(MOVBstore [1] dst (MOVBload [1] src mem)
449+
(MOVBstore dst (MOVBload src mem) mem))
450+
(Move [4] {t} dst src mem) && t.Alignment()%4 == 0 =>
451+
(MOVWstore dst (MOVWload src mem) mem)
452+
(Move [4] {t} dst src mem) && t.Alignment()%2 == 0 =>
453+
(MOVHstore [2] dst (MOVHload [2] src mem)
454+
(MOVHstore dst (MOVHload src mem) mem))
455+
(Move [4] dst src mem) =>
456+
(MOVBstore [3] dst (MOVBload [3] src mem)
457+
(MOVBstore [2] dst (MOVBload [2] src mem)
458+
(MOVBstore [1] dst (MOVBload [1] src mem)
459+
(MOVBstore dst (MOVBload src mem) mem))))
460+
(Move [8] {t} dst src mem) && t.Alignment()%8 == 0 =>
461+
(MOVDstore dst (MOVDload src mem) mem)
462+
(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 =>
463+
(MOVWstore [4] dst (MOVWload [4] src mem)
464+
(MOVWstore dst (MOVWload src mem) mem))
465+
(Move [8] {t} dst src mem) && t.Alignment()%2 == 0 =>
466+
(MOVHstore [6] dst (MOVHload [6] src mem)
467+
(MOVHstore [4] dst (MOVHload [4] src mem)
468+
(MOVHstore [2] dst (MOVHload [2] src mem)
469+
(MOVHstore dst (MOVHload src mem) mem))))
470+
471+
(Move [3] dst src mem) =>
472+
(MOVBstore [2] dst (MOVBload [2] src mem)
473+
(MOVBstore [1] dst (MOVBload [1] src mem)
474+
(MOVBstore dst (MOVBload src mem) mem)))
475+
(Move [6] {t} dst src mem) && t.Alignment()%2 == 0 =>
476+
(MOVHstore [4] dst (MOVHload [4] src mem)
477+
(MOVHstore [2] dst (MOVHload [2] src mem)
478+
(MOVHstore dst (MOVHload src mem) mem)))
479+
(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 =>
480+
(MOVWstore [8] dst (MOVWload [8] src mem)
481+
(MOVWstore [4] dst (MOVWload [4] src mem)
482+
(MOVWstore dst (MOVWload src mem) mem)))
483+
(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 =>
484+
(MOVDstore [8] dst (MOVDload [8] src mem)
485+
(MOVDstore dst (MOVDload src mem) mem))
486+
(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 =>
487+
(MOVDstore [16] dst (MOVDload [16] src mem)
488+
(MOVDstore [8] dst (MOVDload [8] src mem)
489+
(MOVDstore dst (MOVDload src mem) mem)))
490+
(Move [32] {t} dst src mem) && t.Alignment()%8 == 0 =>
491+
(MOVDstore [24] dst (MOVDload [24] src mem)
492+
(MOVDstore [16] dst (MOVDload [16] src mem)
493+
(MOVDstore [8] dst (MOVDload [8] src mem)
494+
(MOVDstore dst (MOVDload src mem) mem))))
495+
496+
// Medium 8-aligned move uses a Duff's device
406497
// 16 and 128 are magic constants, see runtime/mkduff.go
407498
(Move [s] {t} dst src mem)
408-
&& s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0
499+
&& s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
409500
&& !config.noDuffDevice && logLargeCopy(v, s) =>
410501
(DUFFCOPY [16 * (128 - s/8)] dst src mem)
411502

0 commit comments

Comments
 (0)