5
5
#include " FlatbuffersWriters.h"
6
6
#include " common.h"
7
7
8
- /* * C++ implementation of a non-oblivious sort merge join.
8
+ /* *
9
+ * C++ implementation of a non-oblivious sort merge join.
9
10
* Rows MUST be tagged primary or secondary for this to work.
10
11
*/
12
+
13
+ void test_rows_same_group (FlatbuffersJoinExprEvaluator &join_expr_eval,
14
+ const tuix::Row *primary,
15
+ const tuix::Row *current) {
16
+ if (!join_expr_eval.is_same_group (primary, current)) {
17
+ throw std::runtime_error (
18
+ std::string (" Invariant violation: rows of primary_group "
19
+ " are not of the same group: " )
20
+ + to_string (primary)
21
+ + std::string (" vs " )
22
+ + to_string (current));
23
+ }
24
+ }
25
+
26
+ void write_output_rows (RowWriter &group, RowWriter &w) {
27
+ auto group_buffer = group.output_buffer ();
28
+ RowReader group_reader (group_buffer.view ());
29
+
30
+ while (group_reader.has_next ()) {
31
+ const tuix::Row *row = group_reader.next ();
32
+ w.append (row);
33
+ }
34
+ }
35
+
36
+ /* *
37
+ * Sort merge equi join algorithm
38
+ * Input: the rows are unioned from both the primary (or left) table and the non-primary (or right) table
39
+ *
40
+ * Outer loop: iterate over all input rows
41
+ *
42
+ * If it's a row from the left table
43
+ * - Add it to the current group
44
+ * - Otherwise start a new group
45
+ * - If it's a left semi/anti join, output the primary_matched_rows/primary_unmatched_rows
46
+ *
47
+ * If it's a row from the right table
48
+ * - Inner join: iterate over current left group, output the joined row only if the condition is satisfied
49
+ * - Left semi/anti join: iterate over `primary_unmatched_rows`, add a matched row to `primary_matched_rows`
50
+ * and remove from `primary_unmatched_rows`
51
+ *
52
+ * After loop: output the last group left semi/anti join
53
+ */
54
+
11
55
void non_oblivious_sort_merge_join (
12
56
uint8_t *join_expr, size_t join_expr_length,
13
57
uint8_t *input_rows, size_t input_rows_length,
@@ -20,81 +64,84 @@ void non_oblivious_sort_merge_join(
20
64
21
65
RowWriter primary_group;
22
66
FlatbuffersTemporaryRow last_primary_of_group;
23
-
24
- bool pk_fk_match = false ;
67
+ RowWriter primary_matched_rows, primary_unmatched_rows; // This is only used for left semi/anti join
25
68
26
69
while (r.has_next ()) {
27
70
const tuix::Row *current = r.next ();
28
71
29
72
if (join_expr_eval.is_primary (current)) {
30
73
if (last_primary_of_group.get ()
31
- && join_expr_eval.eval_condition (last_primary_of_group.get (), current)) {
74
+ && join_expr_eval.is_same_group (last_primary_of_group.get (), current)) {
75
+
32
76
// Add this primary row to the current group
77
+ // If this is a left semi/anti join, also add the rows to primary_unmatched_rows
33
78
primary_group.append (current);
79
+ if (join_type == tuix::JoinType_LeftSemi || join_type == tuix::JoinType_LeftAnti) {
80
+ primary_unmatched_rows.append (current);
81
+ }
34
82
last_primary_of_group.set (current);
83
+
35
84
} else {
36
85
// If a new primary group is encountered
37
- if (join_type == tuix::JoinType_LeftAnti && !pk_fk_match) {
38
- auto primary_group_buffer = primary_group.output_buffer ();
39
- RowReader primary_group_reader (primary_group_buffer.view ());
40
-
41
- while (primary_group_reader.has_next ()) {
42
- const tuix::Row *primary = primary_group_reader.next ();
43
- w.append (primary);
44
- }
86
+ if (join_type == tuix::JoinType_LeftSemi) {
87
+ write_output_rows (primary_matched_rows, w);
88
+ } else if (join_type == tuix::JoinType_LeftAnti) {
89
+ write_output_rows (primary_unmatched_rows, w);
45
90
}
46
91
47
92
primary_group.clear ();
93
+ primary_unmatched_rows.clear ();
94
+ primary_matched_rows.clear ();
95
+
48
96
primary_group.append (current);
97
+ primary_unmatched_rows.append (current);
49
98
last_primary_of_group.set (current);
50
-
51
- pk_fk_match = false ;
52
99
}
53
100
} else {
54
- // Output the joined rows resulting from this foreign row
55
101
if (last_primary_of_group.get ()
56
- && join_expr_eval.eval_condition (last_primary_of_group.get (), current)) {
57
- auto primary_group_buffer = primary_group.output_buffer ();
58
- RowReader primary_group_reader (primary_group_buffer.view ());
59
- while (primary_group_reader.has_next ()) {
60
- const tuix::Row *primary = primary_group_reader.next ();
102
+ && join_expr_eval.is_same_group (last_primary_of_group.get (), current)) {
103
+ if (join_type == tuix::JoinType_Inner) {
104
+ auto primary_group_buffer = primary_group.output_buffer ();
105
+ RowReader primary_group_reader (primary_group_buffer.view ());
106
+ while (primary_group_reader.has_next ()) {
107
+ const tuix::Row *primary = primary_group_reader.next ();
108
+ test_rows_same_group (join_expr_eval, primary, current);
61
109
62
- if (!join_expr_eval.eval_condition (primary, current)) {
63
- throw std::runtime_error (
64
- std::string (" Invariant violation: rows of primary_group "
65
- " are not of the same group: " )
66
- + to_string (primary)
67
- + std::string (" vs " )
68
- + to_string (current));
110
+ if (join_expr_eval.eval_condition (primary, current)) {
111
+ w.append (primary, current);
112
+ }
69
113
}
114
+ } else if (join_type == tuix::JoinType_LeftSemi || join_type == tuix::JoinType_LeftAnti) {
115
+ auto primary_unmatched_rows_buffer = primary_unmatched_rows.output_buffer ();
116
+ RowReader primary_unmatched_rows_reader (primary_unmatched_rows_buffer.view ());
117
+ RowWriter new_primary_unmatched_rows;
70
118
71
- if (join_type == tuix::JoinType_Inner) {
72
- w.append (primary, current);
73
- } else if (join_type == tuix::JoinType_LeftSemi) {
74
- // Only output the pk group ONCE
75
- if (!pk_fk_match) {
76
- w.append (primary);
119
+ while (primary_unmatched_rows_reader.has_next ()) {
120
+ const tuix::Row *primary = primary_unmatched_rows_reader.next ();
121
+ test_rows_same_group (join_expr_eval, primary, current);
122
+ if (join_expr_eval.eval_condition (primary, current)) {
123
+ primary_matched_rows.append (primary);
124
+ } else {
125
+ new_primary_unmatched_rows.append (primary);
77
126
}
78
127
}
128
+
129
+ // Reset primary_unmatched_rows
130
+ primary_unmatched_rows.clear ();
131
+ auto new_primary_unmatched_rows_buffer = new_primary_unmatched_rows.output_buffer ();
132
+ RowReader new_primary_unmatched_rows_reader (new_primary_unmatched_rows_buffer.view ());
133
+ while (new_primary_unmatched_rows_reader.has_next ()) {
134
+ primary_unmatched_rows.append (new_primary_unmatched_rows_reader.next ());
135
+ }
79
136
}
80
-
81
- pk_fk_match = true ;
82
- } else {
83
- // If pk_fk_match were true, and the code got to here, then that means the group match has not been "cleared" yet
84
- // It will be processed when the code advances to the next pk group
85
- pk_fk_match &= true ;
86
137
}
87
138
}
88
139
}
89
140
90
- if (join_type == tuix::JoinType_LeftAnti && !pk_fk_match) {
91
- auto primary_group_buffer = primary_group.output_buffer ();
92
- RowReader primary_group_reader (primary_group_buffer.view ());
93
-
94
- while (primary_group_reader.has_next ()) {
95
- const tuix::Row *primary = primary_group_reader.next ();
96
- w.append (primary);
97
- }
141
+ if (join_type == tuix::JoinType_LeftSemi) {
142
+ write_output_rows (primary_matched_rows, w);
143
+ } else if (join_type == tuix::JoinType_LeftAnti) {
144
+ write_output_rows (primary_unmatched_rows, w);
98
145
}
99
146
100
147
w.output_buffer (output_rows, output_rows_length);
0 commit comments