|
9 | 9 | // * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants. |
10 | 10 | // * Arrange for non-trivial Zero and Move lowerings to use aligned loads and stores. |
11 | 11 | // * Eliminate zero immediate shifts, adds, etc. |
12 | | -// * Use a Duff's device for some moves and zeros. |
13 | 12 | // * Avoid using Neq32 for writeBarrier.enabled checks. |
14 | 13 |
|
15 | 14 | // Lowering arithmetic |
|
352 | 351 | // with OffPtr -> ADDI. |
353 | 352 | (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x) |
354 | 353 |
|
355 | | -// Zeroing |
356 | | -// TODO: more optimized zeroing, including attempting to use aligned accesses. |
357 | | -(Zero [0] _ mem) => mem |
358 | | -(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst) mem) |
359 | | -(Zero [2] ptr mem) => (MOVHstore ptr (MOVHconst) mem) |
360 | | -(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem) |
361 | | -(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem) |
362 | | - |
363 | | -// Medium zeroing uses a Duff's device |
| 354 | +// Small zeroing |
| 355 | +(Zero [0] _ mem) => mem |
| 356 | +(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst [0]) mem) |
| 357 | +(Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 => |
| 358 | + (MOVHstore ptr (MOVHconst [0]) mem) |
| 359 | +(Zero [2] ptr mem) => |
| 360 | + (MOVBstore [1] ptr (MOVBconst [0]) |
| 361 | + (MOVBstore ptr (MOVBconst [0]) mem)) |
| 362 | +(Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 => |
| 363 | + (MOVWstore ptr (MOVWconst [0]) mem) |
| 364 | +(Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 => |
| 365 | + (MOVHstore [2] ptr (MOVHconst [0]) |
| 366 | + (MOVHstore ptr (MOVHconst [0]) mem)) |
| 367 | +(Zero [4] ptr mem) => |
| 368 | + (MOVBstore [3] ptr (MOVBconst [0]) |
| 369 | + (MOVBstore [2] ptr (MOVBconst [0]) |
| 370 | + (MOVBstore [1] ptr (MOVBconst [0]) |
| 371 | + (MOVBstore ptr (MOVBconst [0]) mem)))) |
| 372 | +(Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 => |
| 373 | + (MOVDstore ptr (MOVDconst [0]) mem) |
| 374 | +(Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 => |
| 375 | + (MOVWstore [4] ptr (MOVWconst [0]) |
| 376 | + (MOVWstore ptr (MOVWconst [0]) mem)) |
| 377 | +(Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 => |
| 378 | + (MOVHstore [6] ptr (MOVHconst [0]) |
| 379 | + (MOVHstore [4] ptr (MOVHconst [0]) |
| 380 | + (MOVHstore [2] ptr (MOVHconst [0]) |
| 381 | + (MOVHstore ptr (MOVHconst [0]) mem)))) |
| 382 | + |
| 383 | +(Zero [3] ptr mem) => |
| 384 | + (MOVBstore [2] ptr (MOVBconst [0]) |
| 385 | + (MOVBstore [1] ptr (MOVBconst [0]) |
| 386 | + (MOVBstore ptr (MOVBconst [0]) mem))) |
| 387 | +(Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 => |
| 388 | + (MOVHstore [4] ptr (MOVHconst [0]) |
| 389 | + (MOVHstore [2] ptr (MOVHconst [0]) |
| 390 | + (MOVHstore ptr (MOVHconst [0]) mem))) |
| 391 | +(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 => |
| 392 | + (MOVWstore [8] ptr (MOVWconst [0]) |
| 393 | + (MOVWstore [4] ptr (MOVWconst [0]) |
| 394 | + (MOVWstore ptr (MOVWconst [0]) mem))) |
| 395 | +(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 => |
| 396 | + (MOVDstore [8] ptr (MOVDconst [0]) |
| 397 | + (MOVDstore ptr (MOVDconst [0]) mem)) |
| 398 | +(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 => |
| 399 | + (MOVDstore [16] ptr (MOVDconst [0]) |
| 400 | + (MOVDstore [8] ptr (MOVDconst [0]) |
| 401 | + (MOVDstore ptr (MOVDconst [0]) mem))) |
| 402 | +(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 => |
| 403 | + (MOVDstore [24] ptr (MOVDconst [0]) |
| 404 | + (MOVDstore [16] ptr (MOVDconst [0]) |
| 405 | + (MOVDstore [8] ptr (MOVDconst [0]) |
| 406 | + (MOVDstore ptr (MOVDconst [0]) mem)))) |
| 407 | + |
| 408 | +// Medium 8-aligned zeroing uses a Duff's device |
364 | 409 | // 8 and 128 are magic constants, see runtime/mkduff.go |
365 | 410 | (Zero [s] {t} ptr mem) |
366 | | - && s%8 == 0 && s >= 16 && s <= 8*128 |
| 411 | + && s%8 == 0 && s <= 8*128 |
367 | 412 | && t.Alignment()%8 == 0 && !config.noDuffDevice => |
368 | 413 | (DUFFZERO [8 * (128 - s/8)] ptr mem) |
369 | 414 |
|
|
377 | 422 | (Convert ...) => (MOVconvert ...) |
378 | 423 |
|
379 | 424 | // Checks |
380 | | -(IsNonNil p) => (NeqPtr (MOVDconst) p) |
| 425 | +(IsNonNil p) => (NeqPtr (MOVDconst [0]) p) |
381 | 426 | (IsInBounds ...) => (Less64U ...) |
382 | 427 | (IsSliceInBounds ...) => (Leq64U ...) |
383 | 428 |
|
|
394 | 439 | (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) |
395 | 440 | (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) |
396 | 441 |
|
397 | | -// Moves |
398 | | -// TODO: more optimized moves, including attempting to use aligned accesses. |
399 | | -(Move [0] _ _ mem) => mem |
| 442 | +// Small moves |
| 443 | +(Move [0] _ _ mem) => mem |
400 | 444 | (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) |
401 | | -(Move [2] dst src mem) => (MOVHstore dst (MOVHload src mem) mem) |
402 | | -(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) |
403 | | -(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) |
404 | | - |
405 | | -// Medium move uses a Duff's device |
| 445 | +(Move [2] {t} dst src mem) && t.Alignment()%2 == 0 => |
| 446 | + (MOVHstore dst (MOVHload src mem) mem) |
| 447 | +(Move [2] dst src mem) => |
| 448 | + (MOVBstore [1] dst (MOVBload [1] src mem) |
| 449 | + (MOVBstore dst (MOVBload src mem) mem)) |
| 450 | +(Move [4] {t} dst src mem) && t.Alignment()%4 == 0 => |
| 451 | + (MOVWstore dst (MOVWload src mem) mem) |
| 452 | +(Move [4] {t} dst src mem) && t.Alignment()%2 == 0 => |
| 453 | + (MOVHstore [2] dst (MOVHload [2] src mem) |
| 454 | + (MOVHstore dst (MOVHload src mem) mem)) |
| 455 | +(Move [4] dst src mem) => |
| 456 | + (MOVBstore [3] dst (MOVBload [3] src mem) |
| 457 | + (MOVBstore [2] dst (MOVBload [2] src mem) |
| 458 | + (MOVBstore [1] dst (MOVBload [1] src mem) |
| 459 | + (MOVBstore dst (MOVBload src mem) mem)))) |
| 460 | +(Move [8] {t} dst src mem) && t.Alignment()%8 == 0 => |
| 461 | + (MOVDstore dst (MOVDload src mem) mem) |
| 462 | +(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 => |
| 463 | + (MOVWstore [4] dst (MOVWload [4] src mem) |
| 464 | + (MOVWstore dst (MOVWload src mem) mem)) |
| 465 | +(Move [8] {t} dst src mem) && t.Alignment()%2 == 0 => |
| 466 | + (MOVHstore [6] dst (MOVHload [6] src mem) |
| 467 | + (MOVHstore [4] dst (MOVHload [4] src mem) |
| 468 | + (MOVHstore [2] dst (MOVHload [2] src mem) |
| 469 | + (MOVHstore dst (MOVHload src mem) mem)))) |
| 470 | + |
| 471 | +(Move [3] dst src mem) => |
| 472 | + (MOVBstore [2] dst (MOVBload [2] src mem) |
| 473 | + (MOVBstore [1] dst (MOVBload [1] src mem) |
| 474 | + (MOVBstore dst (MOVBload src mem) mem))) |
| 475 | +(Move [6] {t} dst src mem) && t.Alignment()%2 == 0 => |
| 476 | + (MOVHstore [4] dst (MOVHload [4] src mem) |
| 477 | + (MOVHstore [2] dst (MOVHload [2] src mem) |
| 478 | + (MOVHstore dst (MOVHload src mem) mem))) |
| 479 | +(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 => |
| 480 | + (MOVWstore [8] dst (MOVWload [8] src mem) |
| 481 | + (MOVWstore [4] dst (MOVWload [4] src mem) |
| 482 | + (MOVWstore dst (MOVWload src mem) mem))) |
| 483 | +(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 => |
| 484 | + (MOVDstore [8] dst (MOVDload [8] src mem) |
| 485 | + (MOVDstore dst (MOVDload src mem) mem)) |
| 486 | +(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 => |
| 487 | + (MOVDstore [16] dst (MOVDload [16] src mem) |
| 488 | + (MOVDstore [8] dst (MOVDload [8] src mem) |
| 489 | + (MOVDstore dst (MOVDload src mem) mem))) |
| 490 | +(Move [32] {t} dst src mem) && t.Alignment()%8 == 0 => |
| 491 | + (MOVDstore [24] dst (MOVDload [24] src mem) |
| 492 | + (MOVDstore [16] dst (MOVDload [16] src mem) |
| 493 | + (MOVDstore [8] dst (MOVDload [8] src mem) |
| 494 | + (MOVDstore dst (MOVDload src mem) mem)))) |
| 495 | + |
| 496 | +// Medium 8-aligned move uses a Duff's device |
406 | 497 | // 16 and 128 are magic constants, see runtime/mkduff.go |
407 | 498 | (Move [s] {t} dst src mem) |
408 | | - && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 |
| 499 | + && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0 |
409 | 500 | && !config.noDuffDevice && logLargeCopy(v, s) => |
410 | 501 | (DUFFCOPY [16 * (128 - s/8)] dst src mem) |
411 | 502 |
|
|
0 commit comments