|
| 1 | +; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_BEFORE |
| 2 | +; RUN: opt < %s -passes='loop(loop-rotate),print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_AFTER |
| 3 | +; RUN: opt < %s -passes='loop(loop-rotate)' -S | FileCheck %s --check-prefixes=IR |
| 4 | + |
| 5 | +@g = global i32 0 |
| 6 | + |
| 7 | +; We should get the same "count =" results for "outer_loop_body" and |
| 8 | +; "inner_loop_body" before and after the transformation. |
| 9 | + |
| 10 | +; BFI_BEFORE-LABEL: block-frequency-info: func0 |
| 11 | +; BFI_BEFORE: - entry: {{.*}} count = 1 |
| 12 | +; BFI_BEFORE: - outer_loop_header: {{.*}} count = 1001 |
| 13 | +; BFI_BEFORE: - outer_loop_body: {{.*}} count = 1000 |
| 14 | +; BFI_BEFORE: - inner_loop_header: {{.*}} count = 4000 |
| 15 | +; BFI_BEFORE: - inner_loop_body: {{.*}} count = 3000 |
| 16 | +; BFI_BEFORE: - inner_loop_exit: {{.*}} count = 1000 |
| 17 | +; BFI_BEFORE: - outer_loop_exit: {{.*}} count = 1 |
| 18 | + |
| 19 | +; BFI_AFTER-LABEL: block-frequency-info: func0 |
| 20 | +; BFI_AFTER: - entry: {{.*}} count = 1 |
| 21 | +; BFI_AFTER: - outer_loop_body: {{.*}} count = 1000 |
| 22 | +; BFI_AFTER: - inner_loop_body: {{.*}} count = 3000 |
| 23 | +; BFI_AFTER: - inner_loop_exit: {{.*}} count = 1000 |
| 24 | +; BFI_AFTER: - outer_loop_exit: {{.*}} count = 1 |
| 25 | + |
| 26 | +; IR: inner_loop_body: |
| 27 | +; IR: br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof [[PROF_FUNC0_0:![0-9]+]] |
| 28 | +; IR: inner_loop_exit: |
| 29 | +; IR: br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof [[PROF_FUNC0_1:![0-9]+]] |
| 30 | +; |
| 31 | +; A function with known loop-bounds where after loop-rotation we end with an |
| 32 | +; unconditional branch in the pre-header. |
| 33 | +define void @func0() !prof !0 { |
| 34 | +entry: |
| 35 | + br label %outer_loop_header |
| 36 | + |
| 37 | +outer_loop_header: |
| 38 | + %i0 = phi i32 [0, %entry], [%i0_inc, %inner_loop_exit] |
| 39 | + %cmp0 = icmp slt i32 %i0, 1000 |
| 40 | + br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof !1 |
| 41 | + |
| 42 | +outer_loop_body: |
| 43 | + store volatile i32 %i0, ptr @g, align 4 |
| 44 | + br label %inner_loop_header |
| 45 | + |
| 46 | +inner_loop_header: |
| 47 | + %i1 = phi i32 [0, %outer_loop_body], [%i1_inc, %inner_loop_body] |
| 48 | + %cmp1 = icmp slt i32 %i1, 3 |
| 49 | + br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof !2 |
| 50 | + |
| 51 | +inner_loop_body: |
| 52 | + store volatile i32 %i1, ptr @g, align 4 |
| 53 | + %i1_inc = add i32 %i1, 1 |
| 54 | + br label %inner_loop_header |
| 55 | + |
| 56 | +inner_loop_exit: |
| 57 | + %i0_inc = add i32 %i0, 1 |
| 58 | + br label %outer_loop_header |
| 59 | + |
| 60 | +outer_loop_exit: |
| 61 | + ret void |
| 62 | +} |
| 63 | + |
| 64 | +; BFI_BEFORE-LABEL: block-frequency-info: func1 |
| 65 | +; BFI_BEFORE: - entry: {{.*}} count = 1024 |
| 66 | +; BFI_BEFORE: - loop_header: {{.*}} count = 21504 |
| 67 | +; BFI_BEFORE: - loop_body: {{.*}} count = 20480 |
| 68 | +; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 |
| 69 | + |
| 70 | +; BFI_AFTER-LABEL: block-frequency-info: func1 |
| 71 | +; BFI_AFTER: - entry: {{.*}} count = 1024 |
| 72 | +; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024 |
| 73 | +; BFI_AFTER: - loop_body: {{.*}} count = 20608 |
| 74 | +; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024 |
| 75 | +; BFI_AFTER: - loop_exit: {{.*}} count = 1024 |
| 76 | + |
| 77 | +; IR: entry: |
| 78 | +; IR: br i1 %cmp1, label %loop_body.lr.ph, label %loop_exit, !prof [[PROF_FUNC1_0:![0-9]+]] |
| 79 | + |
| 80 | +; IR: loop_body: |
| 81 | +; IR: br i1 %cmp, label %loop_body, label %loop_header.loop_exit_crit_edge, !prof [[PROF_FUNC1_1:![0-9]+]] |
| 82 | + |
| 83 | +; A function with unknown loop-bounds so loop-rotation ends up with a |
| 84 | +; condition jump in pre-header and loop body. branch_weight shows body is |
| 85 | +; executed more often than header. |
| 86 | +define void @func1(i32 %n) !prof !3 { |
| 87 | +entry: |
| 88 | + br label %loop_header |
| 89 | + |
| 90 | +loop_header: |
| 91 | + %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| 92 | + %cmp = icmp slt i32 %i, %n |
| 93 | + br i1 %cmp, label %loop_body, label %loop_exit, !prof !4 |
| 94 | + |
| 95 | +loop_body: |
| 96 | + store volatile i32 %i, ptr @g, align 4 |
| 97 | + %i_inc = add i32 %i, 1 |
| 98 | + br label %loop_header |
| 99 | + |
| 100 | +loop_exit: |
| 101 | + ret void |
| 102 | +} |
| 103 | + |
| 104 | +; BFI_BEFORE-LABEL: block-frequency-info: func2 |
| 105 | +; BFI_BEFORE: - entry: {{.*}} count = 1024 |
| 106 | +; BFI_BEFORE: - loop_header: {{.*}} count = 1056 |
| 107 | +; BFI_BEFORE: - loop_body: {{.*}} count = 32 |
| 108 | +; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 |
| 109 | + |
| 110 | +; BFI_AFTER-LABEL: block-frequency-info: func2 |
| 111 | +; - entry: {{.*}} count = 1024 |
| 112 | +; - loop_body.lr.ph: {{.*}} count = 32 |
| 113 | +; - loop_body: {{.*}} count = 32 |
| 114 | +; - loop_header.loop_exit_crit_edge: {{.*}} count = 32 |
| 115 | +; - loop_exit: {{.*}} count = 1024 |
| 116 | + |
| 117 | +; IR: entry: |
| 118 | +; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC2_0:![0-9]+]] |
| 119 | + |
| 120 | +; IR: loop_body: |
| 121 | +; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC2_1:![0-9]+]] |
| 122 | + |
| 123 | +; A function with unknown loop-bounds so loop-rotation ends up with a |
| 124 | +; condition jump in pre-header and loop body. Similar to `func1` but here |
| 125 | +; loop-exit count is higher than backedge count. |
| 126 | +define void @func2(i32 %n) !prof !3 { |
| 127 | +entry: |
| 128 | + br label %loop_header |
| 129 | + |
| 130 | +loop_header: |
| 131 | + %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| 132 | + %cmp = icmp slt i32 %i, %n |
| 133 | + br i1 %cmp, label %loop_exit, label %loop_body, !prof !5 |
| 134 | + |
| 135 | +loop_body: |
| 136 | + store volatile i32 %i, ptr @g, align 4 |
| 137 | + %i_inc = add i32 %i, 1 |
| 138 | + br label %loop_header |
| 139 | + |
| 140 | +loop_exit: |
| 141 | + ret void |
| 142 | +} |
| 143 | + |
| 144 | +!0 = !{!"function_entry_count", i64 1} |
| 145 | +!1 = !{!"branch_weights", i32 1000, i32 1} |
| 146 | +!2 = !{!"branch_weights", i32 3000, i32 1000} |
| 147 | +!3 = !{!"function_entry_count", i64 1024} |
| 148 | +!4 = !{!"branch_weights", i32 40, i32 2} |
| 149 | +!5 = !{!"branch_weights", i32 10240, i32 320} |
| 150 | + |
| 151 | +; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000} |
| 152 | +; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1} |
| 153 | +; IR: [[PROF_FUNC1_0]] = !{!"branch_weights", i32 127, i32 1} |
| 154 | +; IR: [[PROF_FUNC1_1]] = !{!"branch_weights", i32 2433, i32 127} |
| 155 | +; IR: [[PROF_FUNC2_0]] = !{!"branch_weights", i32 9920, i32 320} |
| 156 | +; IR: [[PROF_FUNC2_1]] = !{!"branch_weights", i32 320, i32 0} |
0 commit comments