-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Subgraph composition sql more entities #5614
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,36 +132,52 @@ impl<'a> QueryFragment<Pg> for BlockRangeUpperBoundClause<'a> { | |
| } | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, Copy)] | ||
| pub enum BoundSide { | ||
| Lower, | ||
| Upper, | ||
| } | ||
|
|
||
| /// Helper for generating SQL fragments for selecting entities in a specific block range | ||
| #[derive(Debug, Clone, Copy)] | ||
| pub enum EntityBlockRange { | ||
| Mutable(BlockRange), // TODO: check if this is a proper type here (maybe Range<BlockNumber>?) | ||
| Mutable((BlockRange, BoundSide)), | ||
| Immutable(BlockRange), | ||
| } | ||
|
|
||
| impl EntityBlockRange { | ||
| pub fn new(table: &Table, block_range: std::ops::Range<BlockNumber>) -> Self { | ||
| pub fn new( | ||
| immutable: bool, | ||
| block_range: std::ops::Range<BlockNumber>, | ||
| bound_side: BoundSide, | ||
| ) -> Self { | ||
| let start: Bound<BlockNumber> = Bound::Included(block_range.start); | ||
| let end: Bound<BlockNumber> = Bound::Excluded(block_range.end); | ||
| let block_range: BlockRange = BlockRange(start, end); | ||
| if table.immutable { | ||
| if immutable { | ||
| Self::Immutable(block_range) | ||
| } else { | ||
| Self::Mutable(block_range) | ||
| Self::Mutable((block_range, bound_side)) | ||
| } | ||
| } | ||
|
|
||
| /// Output SQL that matches only rows whose block range contains `block`. | ||
| /// Outputs SQL that matches only rows whose entities would trigger a change | ||
| /// event (Create, Modify, Delete) in a given interval of blocks. Otherwise said | ||
| /// a block_range border is contained in an interval of blocks. For instance | ||
| /// one of the following: | ||
| /// lower(block_range) >= $1 and lower(block_range) <= $2 | ||
| /// upper(block_range) >= $1 and upper(block_range) <= $2 | ||
| /// block$ >= $1 and block$ <= $2 | ||
| pub fn contains<'b>(&'b self, out: &mut AstPass<'_, 'b, Pg>) -> QueryResult<()> { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find this very confusing. It would help if the comment described what the SQL text is that can be generated by this method
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some sensible comment. Hope it's clear now. |
||
| out.unsafe_to_cache_prepared(); | ||
| let block_range = match self { | ||
| EntityBlockRange::Mutable(br) => br, | ||
| EntityBlockRange::Mutable((br, _)) => br, | ||
| EntityBlockRange::Immutable(br) => br, | ||
| }; | ||
| let BlockRange(start, finish) = block_range; | ||
|
|
||
| self.compare_column(out); | ||
| out.push_sql(" >= "); | ||
| out.push_sql(">= "); | ||
| match start { | ||
| Bound::Included(block) => out.push_bind_param::<Integer, _>(block)?, | ||
| Bound::Excluded(block) => { | ||
|
|
@@ -170,9 +186,9 @@ impl EntityBlockRange { | |
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could just push the bind param
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't able to make the compiler borrower happy as the addition creates a temporary variable that can't outlive it's block... |
||
| Bound::Unbounded => unimplemented!(), | ||
| }; | ||
| out.push_sql(" AND "); | ||
| out.push_sql(" and"); | ||
| self.compare_column(out); | ||
| out.push_sql(" <= "); | ||
| out.push_sql("<= "); | ||
| match finish { | ||
| Bound::Included(block) => { | ||
| out.push_bind_param::<Integer, _>(block)?; | ||
|
|
@@ -186,7 +202,12 @@ impl EntityBlockRange { | |
|
|
||
| pub fn compare_column(&self, out: &mut AstPass<Pg>) { | ||
| match self { | ||
| EntityBlockRange::Mutable(_) => out.push_sql(" lower(block_range) "), | ||
| EntityBlockRange::Mutable((_, BoundSide::Lower)) => { | ||
| out.push_sql(" lower(block_range) ") | ||
| } | ||
| EntityBlockRange::Mutable((_, BoundSide::Upper)) => { | ||
| out.push_sql(" upper(block_range) ") | ||
| } | ||
| EntityBlockRange::Immutable(_) => out.push_sql(" block$ "), | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ use diesel::{connection::SimpleConnection, Connection}; | |
| use diesel::{ | ||
| debug_query, sql_query, OptionalExtension, PgConnection, QueryDsl, QueryResult, RunQueryDsl, | ||
| }; | ||
| use graph::blockchain::block_stream::{EntitySubgraphOperation, EntityWithType}; | ||
| use graph::blockchain::BlockTime; | ||
| use graph::cheap_clone::CheapClone; | ||
| use graph::components::store::write::{RowGroup, WriteChunk}; | ||
|
|
@@ -57,8 +58,8 @@ use std::time::{Duration, Instant}; | |
|
|
||
| use crate::relational::value::{FromOidRow, OidRow}; | ||
| use crate::relational_queries::{ | ||
| ConflictingEntitiesData, ConflictingEntitiesQuery, FindChangesQuery, FindDerivedQuery, | ||
| FindPossibleDeletionsQuery, ReturnedEntityData, | ||
| ConflictingEntitiesData, ConflictingEntitiesQuery, EntityDataExt, FindChangesQuery, | ||
| FindDerivedQuery, FindPossibleDeletionsQuery, ReturnedEntityData, | ||
| }; | ||
| use crate::{ | ||
| primary::{Namespace, Site}, | ||
|
|
@@ -75,7 +76,7 @@ use graph::prelude::{ | |
| QueryExecutionError, StoreError, StoreEvent, ValueType, BLOCK_NUMBER_MAX, | ||
| }; | ||
|
|
||
| use crate::block_range::{BLOCK_COLUMN, BLOCK_RANGE_COLUMN}; | ||
| use crate::block_range::{BoundSide, BLOCK_COLUMN, BLOCK_RANGE_COLUMN}; | ||
| pub use crate::catalog::Catalog; | ||
| use crate::connection_pool::ForeignServer; | ||
| use crate::{catalog, deployment}; | ||
|
|
@@ -545,21 +546,129 @@ impl Layout { | |
| pub fn find_range( | ||
| &self, | ||
| conn: &mut PgConnection, | ||
| entity_type: &EntityType, | ||
| entity_types: Vec<EntityType>, | ||
| causality_region: CausalityRegion, | ||
| block_range: Range<BlockNumber>, | ||
| ) -> Result<BTreeMap<BlockNumber, Vec<Entity>>, StoreError> { | ||
| let table = self.table_for_entity(entity_type)?; | ||
| let mut entities: BTreeMap<BlockNumber, Vec<Entity>> = BTreeMap::new(); | ||
| if let Some(vec) = FindRangeQuery::new(table.as_ref(), block_range) | ||
| .get_results::<EntityData>(conn) | ||
| .optional()? | ||
| { | ||
| for e in vec { | ||
| let block = e.clone().deserialize_block_number::<Entity>()?; | ||
| let en = e.deserialize_with_layout::<Entity>(self, None)?; | ||
| entities.entry(block).or_default().push(en); | ||
| } | ||
| ) -> Result<BTreeMap<BlockNumber, Vec<EntityWithType>>, StoreError> { | ||
| let mut tables = vec![]; | ||
| for et in entity_types { | ||
| tables.push(self.table_for_entity(&et)?.as_ref()); | ||
| } | ||
| let mut entities: BTreeMap<BlockNumber, Vec<EntityWithType>> = BTreeMap::new(); | ||
|
|
||
| // Collect all entities that have their 'lower(block_range)' attribute in the | ||
| // interval of blocks defined by the variable block_range. For the immutable | ||
| // entities the respective attribute is 'block$'. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This helps, but somehow buries the lede: |
||
| // Here are all entities that are created or modified in the block_range. | ||
| let lower_vec = FindRangeQuery::new( | ||
| &tables, | ||
| causality_region, | ||
| BoundSide::Lower, | ||
| block_range.clone(), | ||
| ) | ||
| .get_results::<EntityDataExt>(conn) | ||
| .optional()? | ||
| .unwrap_or_default(); | ||
| // Collect all entities that have their 'upper(block_range)' attribute in the | ||
| // interval of blocks defined by the variable block_range. For the immutable | ||
| // entities no entries are returned. | ||
| // Here are all entities that are modified or deleted in the block_range, | ||
| // but will have the previous versions, i.e. in the case of an update, it's | ||
| // the version before the update, and lower_vec will have a corresponding | ||
| // entry with the new version. | ||
| let upper_vec = | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll just copy this text. |
||
| FindRangeQuery::new(&tables, causality_region, BoundSide::Upper, block_range) | ||
| .get_results::<EntityDataExt>(conn) | ||
| .optional()? | ||
| .unwrap_or_default(); | ||
| let mut lower_iter = lower_vec.iter().fuse().peekable(); | ||
| let mut upper_iter = upper_vec.iter().fuse().peekable(); | ||
| let mut lower_now = lower_iter.next(); | ||
| let mut upper_now = upper_iter.next(); | ||
| // A closure to convert the entity data from the database into entity operation. | ||
| let transform = |ede: &EntityDataExt, | ||
| entity_op: EntitySubgraphOperation| | ||
| -> Result<(EntityWithType, BlockNumber), StoreError> { | ||
| let e = EntityData::new(ede.entity.clone(), ede.data.clone()); | ||
| let block = ede.block_number; | ||
| let entity_type = e.entity_type(&self.input_schema); | ||
| let entity = e.deserialize_with_layout::<Entity>(self, None)?; | ||
| let vid = ede.vid; | ||
| let ewt = EntityWithType { | ||
| entity_op, | ||
| entity_type, | ||
| entity, | ||
| vid, | ||
| }; | ||
| Ok((ewt, block)) | ||
| }; | ||
|
|
||
| // The algorithm is a similar to merge sort algorithm and it relays on the fact that both vectors | ||
| // are ordered by (block_number, entity_type, entity_id). It advances simultaneously entities from | ||
| // both lower_vec and upper_vec and tries to match entities that have entries in both vectors for | ||
| // a particular block. The match is successful if an entry in one array has the same values in the | ||
| // other one for the number of the block, entity type and the entity id. The comparison operation | ||
| // over the EntityDataExt implements that check. If there is a match it’s a modification operation, | ||
| // since both sides of a range are present for that block, entity type and id. If one side of the | ||
| // range exists and the other is missing it is a creation or deletion depending on which side is | ||
| // present. For immutable entities the entries in upper_vec are missing, hence they are considered | ||
| // having a lower bound at particular block and upper bound at infinity. | ||
| while lower_now.is_some() || upper_now.is_some() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The body of this is pretty intricate. There should be a comment about what the strategy here is and how it works. |
||
| let (ewt, block) = match (lower_now, upper_now) { | ||
| (Some(lower), Some(upper)) => { | ||
| match lower.cmp(&upper) { | ||
| std::cmp::Ordering::Greater => { | ||
| // we have upper bound at this block, but no lower bounds at the same block so it's deletion | ||
| let (ewt, block) = transform(upper, EntitySubgraphOperation::Delete)?; | ||
| // advance upper_vec pointer | ||
| upper_now = upper_iter.next(); | ||
| (ewt, block) | ||
| } | ||
| std::cmp::Ordering::Less => { | ||
| // we have lower bound at this block but no upper bound at the same block so its creation | ||
| let (ewt, block) = transform(lower, EntitySubgraphOperation::Create)?; | ||
| // advance lower_vec pointer | ||
| lower_now = lower_iter.next(); | ||
| (ewt, block) | ||
| } | ||
| std::cmp::Ordering::Equal => { | ||
| let (ewt, block) = transform(lower, EntitySubgraphOperation::Modify)?; | ||
| // advance both lower_vec and upper_vec pointers | ||
| lower_now = lower_iter.next(); | ||
| upper_now = upper_iter.next(); | ||
| (ewt, block) | ||
| } | ||
| } | ||
| } | ||
| (Some(lower), None) => { | ||
| // we have lower bound at this block but no upper bound at the same block so its creation | ||
| let (ewt, block) = transform(lower, EntitySubgraphOperation::Create)?; | ||
| // advance lower_vec pointer | ||
| lower_now = lower_iter.next(); | ||
| (ewt, block) | ||
| } | ||
| (None, Some(upper)) => { | ||
| let (ewt, block) = transform(upper, EntitySubgraphOperation::Delete)?; | ||
| // advance upper_vec pointer | ||
| upper_now = upper_iter.next(); | ||
| (ewt, block) | ||
| } | ||
| _ => panic!("Imposible case to happen"), | ||
| }; | ||
|
|
||
| match entities.get_mut(&block) { | ||
| Some(vec) => vec.push(ewt), | ||
| None => { | ||
| let _ = entities.insert(block, vec![ewt]); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| // sort the elements in each blocks bucket by vid | ||
| for (_, vec) in &mut entities { | ||
| vec.sort_by(|a, b| a.vid.cmp(&b.vid)); | ||
| } | ||
|
|
||
| Ok(entities) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure if this makes sense, but might be better to stick the entity into the operation, i.e. to have
Create(Entity),Modify(Entity),Delete(Entity); that way, users of this enum are forced to consider each of these cases, and it would make it easy to, e.g., change toDelete(Id), i.e., not load the entire entity if that's useful.But I don't understand how all of composition fits together, so I am not sure if this suggestions is really an improvement.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess here the naming is poor. The enum
EntitySubgraphOperationis a kind of operation, with definition more akin to the C style enums. Where the struct just bellow itEntityWithTypecontains the whole 'entity event' with all the fields including this operation kind and the entity itself.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did some renaming which I believe is closer to the semantics. I done it in a separate PR in order to save on rebasing.