@@ -138,71 +138,6 @@ impl CachingDeleteFileManager {
138138 }
139139 }
140140
141- /// Load the deletes for all the specified tasks
142- ///
143- /// Returned future completes once all loading has finished.
144- ///
145- /// * Create a single stream of all delete file tasks irrespective of type,
146- /// so that we can respect the combined concurrency limit
147- /// * We then process each in two phases: load and parse.
148- /// * for positional deletes the load phase instantiates an ArrowRecordBatchStream to
149- /// stream the file contents out
150- /// * for eq deletes, we first check if the EQ delete is already loaded or being loaded by
151- /// another concurrently processing data file scan task. If it is, we return a future
152- /// for the pre-existing task from the load phase. If not, we create such a future
153- /// and store it in the state to prevent other data file tasks from starting to load
154- /// the same equality delete file, and return a record batch stream from the load phase
155- /// as per the other delete file types - only this time it is accompanied by a one-shot
156- /// channel sender that we will eventually use to resolve the shared future that we stored
157- /// in the state.
158- /// * When this gets updated to add support for delete vectors, the load phase will return
159- /// a PuffinReader for them.
160- /// * The parse phase parses each record batch stream according to its associated data type.
161- /// The result of this is a map of data file paths to delete vectors for the positional
162- /// delete tasks (and in future for the delete vector tasks). For equality delete
163- /// file tasks, this results in an unbound Predicate.
164- /// * The unbound Predicates resulting from equality deletes are sent to their associated oneshot
165- /// channel to store them in the right place in the delete file managers state.
166- /// * The results of all of these futures are awaited on in parallel with the specified
167- /// level of concurrency and collected into a vec. We then combine all of the delete
168- /// vector maps that resulted from any positional delete or delete vector files into a
169- /// single map and persist it in the state.
170- ///
171- ///
172- /// Conceptually, the data flow is like this:
173- ///
174- /// FileScanTaskDeleteFile
175- /// |
176- /// Already-loading EQ Delete | Everything Else
177- /// +---------------------------------------------------+
178- /// | |
179- /// [get existing future] [load recordbatch stream / puffin]
180- /// DeleteFileContext::InProgEqDel DeleteFileContext
181- /// | |
182- /// | |
183- /// | +-----------------------------+--------------------------+
184- /// | Pos Del Del Vec (Not yet Implemented) EQ Del
185- /// | | | |
186- /// | [parse pos del stream] [parse del vec puffin] [parse eq del]
187- /// | HashMap<String, RoaringTreeMap> HashMap<String, RoaringTreeMap> (Predicate, Sender)
188- /// | | | |
189- /// | | | [persist to state]
190- /// | | | ()
191- /// | | | |
192- /// | +-----------------------------+--------------------------+
193- /// | |
194- /// | [buffer unordered]
195- /// | |
196- /// | [combine del vectors]
197- /// | HashMap<String, RoaringTreeMap>
198- /// | |
199- /// | [persist del vectors to state]
200- /// | ()
201- /// | |
202- /// +-------------------------+-------------------------+
203- /// |
204- /// [join!]
205- /// */
206141 pub ( crate ) async fn load_deletes (
207142 & self ,
208143 delete_file_entries : & [ FileScanTaskDeleteFile ] ,
0 commit comments