@@ -2665,8 +2665,6 @@ let mayLoad=1, hasSideEffects=0 in {
2665
2665
defm LDV_i16 : LD_VEC<Int16Regs>;
2666
2666
defm LDV_i32 : LD_VEC<Int32Regs>;
2667
2667
defm LDV_i64 : LD_VEC<Int64Regs>;
2668
- defm LDV_f16 : LD_VEC<Int16Regs>;
2669
- defm LDV_f16x2 : LD_VEC<Int32Regs>;
2670
2668
defm LDV_f32 : LD_VEC<Float32Regs>;
2671
2669
defm LDV_f64 : LD_VEC<Float64Regs>;
2672
2670
}
@@ -2760,8 +2758,6 @@ let mayStore=1, hasSideEffects=0 in {
2760
2758
defm STV_i16 : ST_VEC<Int16Regs>;
2761
2759
defm STV_i32 : ST_VEC<Int32Regs>;
2762
2760
defm STV_i64 : ST_VEC<Int64Regs>;
2763
- defm STV_f16 : ST_VEC<Int16Regs>;
2764
- defm STV_f16x2 : ST_VEC<Int32Regs>;
2765
2761
defm STV_f32 : ST_VEC<Float32Regs>;
2766
2762
defm STV_f64 : ST_VEC<Float64Regs>;
2767
2763
}
@@ -3074,6 +3070,10 @@ let hasSideEffects = false in {
3074
3070
(ins Int32Regs:$s),
3075
3071
"{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
3076
3072
[]>;
3073
+ def I32toI16L : NVPTXInst<(outs Int16Regs:$low),
3074
+ (ins Int32Regs:$s),
3075
+ "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
3076
+ []>;
3077
3077
def I64toI32H : NVPTXInst<(outs Int32Regs:$high),
3078
3078
(ins Int64Regs:$s),
3079
3079
"{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
@@ -3091,47 +3091,12 @@ def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))),
3091
3091
def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))),
3092
3092
(I64toI32H Int64Regs:$s)>;
3093
3093
3094
- let hasSideEffects = false in {
3095
- // Extract element of f16x2 register. PTX does not provide any way
3096
- // to access elements of f16x2 vector directly, so we need to
3097
- // extract it using a temporary register.
3098
- def F16x2toF16_0 : NVPTXInst<(outs Int16Regs:$dst),
3099
- (ins Int32Regs:$src),
3100
- "{{ .reg .b16 \t%tmp_hi;\n\t"
3101
- " mov.b32 \t{$dst, %tmp_hi}, $src; }}",
3102
- [(set Int16Regs:$dst,
3103
- (extractelt (v2f16 Int32Regs:$src), 0))]>;
3104
- def F16x2toF16_1 : NVPTXInst<(outs Int16Regs:$dst),
3105
- (ins Int32Regs:$src),
3106
- "{{ .reg .b16 \t%tmp_lo;\n\t"
3107
- " mov.b32 \t{%tmp_lo, $dst}, $src; }}",
3108
- [(set Int16Regs:$dst,
3109
- (extractelt (v2f16 Int32Regs:$src), 1))]>;
3110
-
3111
- // Coalesce two f16 registers into f16x2
3112
- def BuildF16x2 : NVPTXInst<(outs Int32Regs:$dst),
3113
- (ins Int16Regs:$a, Int16Regs:$b),
3114
- "mov.b32 \t$dst, {{$a, $b}};",
3115
- [(set (v2f16 Int32Regs:$dst),
3116
- (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>;
3117
-
3118
- // Directly initializing underlying the b32 register is one less SASS
3119
- // instruction than than vector-packing move.
3120
- def BuildF16x2i : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
3121
- "mov.b32 \t$dst, $src;",
3122
- []>;
3123
-
3124
- // Split f16x2 into two f16 registers.
3125
- def SplitF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi),
3126
- (ins Int32Regs:$src),
3127
- "mov.b32 \t{{$lo, $hi}}, $src;",
3128
- []>;
3129
- // Split an i32 into two f16
3130
- def SplitI32toF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi),
3131
- (ins Int32Regs:$src),
3132
- "mov.b32 \t{{$lo, $hi}}, $src;",
3133
- []>;
3134
- }
3094
+ def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 0)),
3095
+ (I32toI16L Int32Regs:$src)>;
3096
+ def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 1)),
3097
+ (I32toI16H Int32Regs:$src)>;
3098
+ def : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
3099
+ (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
3135
3100
3136
3101
// Count leading zeros
3137
3102
let hasSideEffects = false in {
0 commit comments