|
30 | 30 | #include "basic-block.h"
|
31 | 31 | #include "function.h"
|
32 | 32 | #include "gimple.h"
|
| 33 | +#include "emit-rtl.h" |
33 | 34 | #include "arm-mve-builtins.h"
|
34 | 35 | #include "arm-mve-builtins-shapes.h"
|
35 | 36 | #include "arm-mve-builtins-base.h"
|
@@ -402,6 +403,115 @@ class vcvtxq_impl : public function_base
|
402 | 403 | }
|
403 | 404 | };
|
404 | 405 |
|
| 406 | +/* Map the vidup / vddup function directly to CODE (UNSPEC, M) where M is the |
| 407 | + vector mode associated with type suffix 0. We need this special case |
| 408 | + because in MODE_wb the builtins derefrence the first parameter and update |
| 409 | + its contents. We also have to insert the two additional parameters needed |
| 410 | + by the builtins compared to the intrinsics. */ |
| 411 | +class viddup_impl : public function_base |
| 412 | +{ |
| 413 | +public: |
| 414 | + CONSTEXPR viddup_impl (bool inc_dec) |
| 415 | + : m_inc_dec (inc_dec) |
| 416 | + {} |
| 417 | + |
| 418 | + /* Increment (true) or decrement (false). */ |
| 419 | + bool m_inc_dec; |
| 420 | + |
| 421 | + unsigned int |
| 422 | + call_properties (const function_instance &fi) const override |
| 423 | + { |
| 424 | + if (fi.mode_suffix_id == MODE_wb) |
| 425 | + return CP_WRITE_MEMORY | CP_READ_MEMORY; |
| 426 | + else |
| 427 | + return 0; |
| 428 | + } |
| 429 | + |
| 430 | + tree |
| 431 | + memory_scalar_type (const function_instance &) const override |
| 432 | + { |
| 433 | + return get_typenode_from_name (UINT32_TYPE); |
| 434 | + } |
| 435 | + |
| 436 | + rtx |
| 437 | + expand (function_expander &e) const override |
| 438 | + { |
| 439 | + machine_mode mode = e.vector_mode (0); |
| 440 | + insn_code code; |
| 441 | + rtx insns, offset_ptr; |
| 442 | + rtx new_offset; |
| 443 | + int offset_arg_no; |
| 444 | + rtx incr, total_incr; |
| 445 | + |
| 446 | + if (! e.type_suffix (0).integer_p) |
| 447 | + gcc_unreachable (); |
| 448 | + |
| 449 | + if ((e.mode_suffix_id != MODE_n) |
| 450 | + && (e.mode_suffix_id != MODE_wb)) |
| 451 | + gcc_unreachable (); |
| 452 | + |
| 453 | + offset_arg_no = (e.pred == PRED_m) ? 1 : 0; |
| 454 | + |
| 455 | + /* In _wb mode, the start offset is passed via a pointer, |
| 456 | + dereference it. */ |
| 457 | + if (e.mode_suffix_id == MODE_wb) |
| 458 | + { |
| 459 | + rtx offset = gen_reg_rtx (SImode); |
| 460 | + offset_ptr = e.args[offset_arg_no]; |
| 461 | + emit_insn (gen_rtx_SET (offset, gen_rtx_MEM (SImode, offset_ptr))); |
| 462 | + e.args[offset_arg_no] = offset; |
| 463 | + } |
| 464 | + |
| 465 | + /* We have to shuffle parameters because the builtin needs additional |
| 466 | + arguments: |
| 467 | + - the updated "new_offset" |
| 468 | + - total increment (incr * number of lanes) */ |
| 469 | + new_offset = gen_reg_rtx (SImode); |
| 470 | + e.args.quick_insert (offset_arg_no, new_offset); |
| 471 | + |
| 472 | + incr = e.args[offset_arg_no + 2]; |
| 473 | + total_incr = gen_int_mode (INTVAL (incr) |
| 474 | + * GET_MODE_NUNITS (e.vector_mode (0)), |
| 475 | + SImode); |
| 476 | + e.args.quick_push (total_incr); |
| 477 | + |
| 478 | + /* _wb mode uses the _n builtins and adds code to update the |
| 479 | + offset. */ |
| 480 | + switch (e.pred) |
| 481 | + { |
| 482 | + case PRED_none: |
| 483 | + /* No predicate. */ |
| 484 | + code = m_inc_dec |
| 485 | + ? code_for_mve_q_u_insn (VIDUPQ, mode) |
| 486 | + : code_for_mve_q_u_insn (VDDUPQ, mode); |
| 487 | + insns = e.use_exact_insn (code); |
| 488 | + break; |
| 489 | + |
| 490 | + case PRED_m: |
| 491 | + case PRED_x: |
| 492 | + /* "m" or "x" predicate. */ |
| 493 | + code = m_inc_dec |
| 494 | + ? code_for_mve_q_m_wb_u_insn (VIDUPQ_M, mode) |
| 495 | + : code_for_mve_q_m_wb_u_insn (VDDUPQ_M, mode); |
| 496 | + |
| 497 | + if (e.pred == PRED_m) |
| 498 | + insns = e.use_cond_insn (code, 0); |
| 499 | + else |
| 500 | + insns = e.use_pred_x_insn (code); |
| 501 | + break; |
| 502 | + |
| 503 | + default: |
| 504 | + gcc_unreachable (); |
| 505 | + } |
| 506 | + |
| 507 | + /* Update offset as appropriate. */ |
| 508 | + if (e.mode_suffix_id == MODE_wb) |
| 509 | + emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, offset_ptr), new_offset)); |
| 510 | + |
| 511 | + return insns; |
| 512 | + } |
| 513 | +}; |
| 514 | + |
405 | 515 | } /* end anonymous namespace */
|
406 | 516 |
|
407 | 517 | namespace arm_mve {
|
@@ -614,7 +724,9 @@ FUNCTION_WITHOUT_N_NO_F (vcvtmq, VCVTMQ)
|
614 | 724 | FUNCTION_WITHOUT_N_NO_F (vcvtnq, VCVTNQ)
|
615 | 725 | FUNCTION_WITHOUT_N_NO_F (vcvtpq, VCVTPQ)
|
616 | 726 | FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16))
|
| 727 | +FUNCTION (vddupq, viddup_impl, (false)) |
617 | 728 | FUNCTION (vdupq, vdupq_impl, (VDUPQ_M_N_S, VDUPQ_M_N_U, VDUPQ_M_N_F))
|
| 729 | +FUNCTION (vidupq, viddup_impl, (true)) |
618 | 730 | FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
|
619 | 731 | FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
|
620 | 732 | FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_F, -1, -1, -1, -1, -1, VFMASQ_M_N_F))
|
|
0 commit comments