Skip to content

Commit 74291bb

Browse files
committed
chain/ethereum, graph, store: use block cache to load blocks by numbers
1 parent b4ad24f commit 74291bb

File tree

3 files changed

+282
-15
lines changed

3 files changed

+282
-15
lines changed

chain/ethereum/src/ethereum_adapter.rs

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,45 @@ impl EthereumAdapter {
782782
.buffered(ENV_VARS.block_batch_size)
783783
}
784784

785+
/// Request blocks by number through JSON-RPC.
786+
fn load_blocks_by_numbers_rpc(
787+
&self,
788+
logger: Logger,
789+
numbers: Vec<BlockNumber>,
790+
) -> impl Stream<Item = Arc<LightEthereumBlock>, Error = Error> + Send {
791+
let web3 = self.web3.clone();
792+
793+
stream::iter_ok::<_, Error>(numbers.into_iter().map(move |number| {
794+
let web3 = web3.clone();
795+
retry(format!("load block {}", number), &logger)
796+
.limit(ENV_VARS.request_retries)
797+
.timeout_secs(ENV_VARS.json_rpc_timeout.as_secs())
798+
.run(move || {
799+
Box::pin(
800+
web3.eth()
801+
.block_with_txs(BlockId::Number(Web3BlockNumber::Number(
802+
number.into(),
803+
))),
804+
)
805+
.compat()
806+
.from_err::<Error>()
807+
.and_then(move |block| {
808+
block.map(Arc::new).ok_or_else(|| {
809+
anyhow::anyhow!(
810+
"Ethereum node did not find block with number {:?}",
811+
number
812+
)
813+
})
814+
})
815+
.compat()
816+
})
817+
.boxed()
818+
.compat()
819+
.from_err()
820+
}))
821+
.buffered(ENV_VARS.block_batch_size)
822+
}
823+
785824
/// Request blocks ptrs for numbers through JSON-RPC.
786825
///
787826
/// Reorg safety: If ids are numbers, they must be a final blocks.
@@ -1650,26 +1689,61 @@ impl EthereumAdapterTrait for EthereumAdapter {
16501689
Ok(decoded)
16511690
}
16521691

1653-
// This is a ugly temporary implementation to get the block ptrs for a range of blocks
1692+
/// Load Ethereum blocks in bulk by number, returning results as they come back as a Stream.
16541693
async fn load_blocks_by_numbers(
16551694
&self,
16561695
logger: Logger,
16571696
chain_store: Arc<dyn ChainStore>,
16581697
block_numbers: HashSet<BlockNumber>,
16591698
) -> Box<dyn Stream<Item = Arc<LightEthereumBlock>, Error = Error> + Send> {
1660-
let block_hashes = block_numbers
1699+
let blocks_map: BTreeMap<i32, Vec<json::Value>> = chain_store
1700+
.cheap_clone()
1701+
.blocks_by_numbers(block_numbers.iter().map(|&b| b.into()).collect::<Vec<_>>())
1702+
.await
1703+
.map_err(|e| {
1704+
error!(&logger, "Error accessing block cache {}", e);
1705+
e
1706+
})
1707+
.unwrap_or_default();
1708+
1709+
let mut blocks: Vec<Arc<LightEthereumBlock>> = blocks_map
16611710
.into_iter()
1662-
.map(|number| {
1663-
chain_store
1664-
.block_hashes_by_block_number(number)
1665-
.unwrap()
1666-
.first()
1667-
.unwrap()
1668-
.as_h256()
1711+
.filter_map(|(_number, values)| {
1712+
if values.len() == 1 {
1713+
json::from_value(values[0].clone()).ok()
1714+
} else {
1715+
None
1716+
}
16691717
})
1670-
.collect::<HashSet<_>>();
1718+
.collect::<Vec<_>>();
16711719

1672-
self.load_blocks(logger, chain_store, block_hashes).await
1720+
let missing_blocks: Vec<i32> = block_numbers
1721+
.into_iter()
1722+
.filter(|&number| !blocks.iter().any(|block| block.number() == number))
1723+
.collect();
1724+
1725+
debug!(logger, "Loading {} block(s)", missing_blocks.len());
1726+
Box::new(
1727+
self.load_blocks_by_numbers_rpc(logger.clone(), missing_blocks)
1728+
.collect()
1729+
.map(move |new_blocks| {
1730+
let upsert_blocks: Vec<_> = new_blocks
1731+
.iter()
1732+
.map(|block| BlockFinality::Final(block.clone()))
1733+
.collect();
1734+
let block_refs: Vec<_> = upsert_blocks
1735+
.iter()
1736+
.map(|block| block as &dyn graph::blockchain::Block)
1737+
.collect();
1738+
if let Err(e) = chain_store.upsert_light_blocks(block_refs.as_slice()) {
1739+
error!(logger, "Error writing to block cache {}", e);
1740+
}
1741+
blocks.extend(new_blocks);
1742+
blocks.sort_by_key(|block| block.number);
1743+
stream::iter_ok(blocks)
1744+
})
1745+
.flatten_stream(),
1746+
)
16731747
}
16741748

16751749
/// Load Ethereum blocks in bulk, returning results as they come back as a Stream.

graph/src/components/store/traits.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,12 @@ pub trait ChainStore: Send + Sync + 'static {
494494
hashes: Vec<BlockHash>,
495495
) -> Result<Vec<serde_json::Value>, Error>;
496496

497+
/// Returns the blocks present in the store for the given block numbers.
498+
async fn blocks_by_numbers(
499+
self: Arc<Self>,
500+
numbers: Vec<BlockNumber>,
501+
) -> Result<BTreeMap<BlockNumber, Vec<serde_json::Value>>, Error>;
502+
497503
/// Get the `offset`th ancestor of `block_hash`, where offset=0 means the block matching
498504
/// `block_hash` and offset=1 means its parent. If `root` is passed, short-circuit upon finding
499505
/// a child of `root`. Returns None if unable to complete due to missing blocks in the chain

store/postgres/src/chain_store.rs

Lines changed: 191 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use graph::slog::Logger;
1313
use graph::stable_hash::crypto_stable_hash;
1414
use graph::util::herd_cache::HerdCache;
1515

16+
use std::collections::BTreeMap;
1617
use std::{
1718
collections::HashMap,
1819
convert::{TryFrom, TryInto},
@@ -579,6 +580,50 @@ mod data {
579580
Ok(())
580581
}
581582

583+
pub(super) fn blocks_by_numbers(
584+
&self,
585+
conn: &mut PgConnection,
586+
chain: &str,
587+
numbers: &[BlockNumber],
588+
) -> Result<Vec<JsonBlock>, StoreError> {
589+
let x = match self {
590+
Storage::Shared => {
591+
use public::ethereum_blocks as b;
592+
593+
b::table
594+
.select((
595+
b::hash,
596+
b::number,
597+
b::parent_hash,
598+
sql::<Jsonb>("coalesce(data -> 'block', data)"),
599+
))
600+
.filter(b::network_name.eq(chain))
601+
.filter(b::number.eq_any(Vec::from_iter(numbers.iter().map(|&n| n as i64))))
602+
.load::<(BlockHash, i64, BlockHash, json::Value)>(conn)
603+
}
604+
Storage::Private(Schema { blocks, .. }) => blocks
605+
.table()
606+
.select((
607+
blocks.hash(),
608+
blocks.number(),
609+
blocks.parent_hash(),
610+
sql::<Jsonb>("coalesce(data -> 'block', data)"),
611+
))
612+
.filter(
613+
blocks
614+
.number()
615+
.eq_any(Vec::from_iter(numbers.iter().map(|&n| n as i64))),
616+
)
617+
.load::<(BlockHash, i64, BlockHash, json::Value)>(conn),
618+
}?;
619+
620+
Ok(x.into_iter()
621+
.map(|(hash, nr, parent, data)| {
622+
JsonBlock::new(BlockPtr::new(hash, nr as i32), parent, Some(data))
623+
})
624+
.collect())
625+
}
626+
582627
pub(super) fn blocks(
583628
&self,
584629
conn: &mut PgConnection,
@@ -1651,7 +1696,10 @@ impl ChainStoreMetrics {
16511696
}
16521697

16531698
#[derive(Clone, CheapClone)]
1654-
struct BlocksLookupResult(Arc<Result<Vec<JsonBlock>, StoreError>>);
1699+
enum BlocksLookupResult {
1700+
ByHash(Arc<Result<Vec<JsonBlock>, StoreError>>),
1701+
ByNumber(Arc<Result<BTreeMap<BlockNumber, Vec<JsonBlock>>, StoreError>>),
1702+
}
16551703

16561704
pub struct ChainStore {
16571705
logger: Logger,
@@ -1870,6 +1918,35 @@ impl ChainStore {
18701918
.await?;
18711919
Ok(values)
18721920
}
1921+
1922+
async fn blocks_from_store_by_numbers(
1923+
self: &Arc<Self>,
1924+
numbers: Vec<BlockNumber>,
1925+
) -> Result<BTreeMap<BlockNumber, Vec<JsonBlock>>, StoreError> {
1926+
let store = self.cheap_clone();
1927+
let pool = self.pool.clone();
1928+
1929+
let values = pool
1930+
.with_conn(move |conn, _| {
1931+
store
1932+
.storage
1933+
.blocks_by_numbers(conn, &store.chain, &numbers)
1934+
.map_err(CancelableError::from)
1935+
})
1936+
.await?;
1937+
1938+
let mut block_map = BTreeMap::new();
1939+
1940+
for block in values {
1941+
let block_number = block.ptr.block_number();
1942+
block_map
1943+
.entry(block_number)
1944+
.or_insert_with(Vec::new)
1945+
.push(block);
1946+
}
1947+
1948+
Ok(block_map)
1949+
}
18731950
}
18741951

18751952
#[async_trait]
@@ -2065,6 +2142,85 @@ impl ChainStoreTrait for ChainStore {
20652142
Ok(())
20662143
}
20672144

2145+
async fn blocks_by_numbers(
2146+
self: Arc<Self>,
2147+
numbers: Vec<BlockNumber>,
2148+
) -> Result<BTreeMap<BlockNumber, Vec<json::Value>>, Error> {
2149+
if ENV_VARS.store.disable_block_cache_for_lookup {
2150+
let values = self
2151+
.blocks_from_store_by_numbers(numbers)
2152+
.await?
2153+
.into_iter()
2154+
.map(|(num, blocks)| {
2155+
(
2156+
num,
2157+
blocks
2158+
.into_iter()
2159+
.filter_map(|block| block.data)
2160+
.collect::<Vec<_>>(),
2161+
)
2162+
})
2163+
.collect();
2164+
Ok(values)
2165+
} else {
2166+
let cached = self.recent_blocks_cache.get_blocks_by_numbers(&numbers);
2167+
2168+
let stored = if cached.len() < numbers.len() {
2169+
let missing_numbers = numbers
2170+
.iter()
2171+
.filter(|num| !cached.iter().any(|(ptr, _)| ptr.block_number() == **num))
2172+
.cloned()
2173+
.collect::<Vec<_>>();
2174+
2175+
let hash = crypto_stable_hash(&missing_numbers);
2176+
let this = self.clone();
2177+
let lookup_fut = async move {
2178+
let res = this.blocks_from_store_by_numbers(missing_numbers).await;
2179+
BlocksLookupResult::ByNumber(Arc::new(res))
2180+
};
2181+
let lookup_herd = self.lookup_herd.cheap_clone();
2182+
let logger = self.logger.cheap_clone();
2183+
let res = match lookup_herd.cached_query(hash, lookup_fut, &logger).await {
2184+
(BlocksLookupResult::ByNumber(res), _) => res,
2185+
_ => unreachable!(),
2186+
};
2187+
let res = Arc::try_unwrap(res).unwrap_or_else(|arc| (*arc).clone());
2188+
2189+
match res {
2190+
Ok(blocks) => {
2191+
for (_, blocks_for_num) in &blocks {
2192+
if blocks.len() == 1 {
2193+
self.recent_blocks_cache
2194+
.insert_block(blocks_for_num[0].clone());
2195+
}
2196+
}
2197+
blocks
2198+
}
2199+
Err(e) => {
2200+
return Err(e.into());
2201+
}
2202+
}
2203+
} else {
2204+
BTreeMap::new()
2205+
};
2206+
2207+
let cached_map = cached
2208+
.into_iter()
2209+
.map(|(ptr, data)| (ptr.block_number(), vec![data]))
2210+
.collect::<BTreeMap<_, _>>();
2211+
2212+
let mut result: BTreeMap<BlockNumber, Vec<json::Value>> = cached_map;
2213+
for (num, blocks) in stored {
2214+
result
2215+
.entry(num)
2216+
.or_default()
2217+
.extend(blocks.into_iter().filter_map(|block| block.data));
2218+
}
2219+
2220+
Ok(result)
2221+
}
2222+
}
2223+
20682224
async fn blocks(self: Arc<Self>, hashes: Vec<BlockHash>) -> Result<Vec<json::Value>, Error> {
20692225
if ENV_VARS.store.disable_block_cache_for_lookup {
20702226
let values = self
@@ -2094,12 +2250,15 @@ impl ChainStoreTrait for ChainStore {
20942250
let this = self.clone();
20952251
let lookup_fut = async move {
20962252
let res = this.blocks_from_store(hashes).await;
2097-
BlocksLookupResult(Arc::new(res))
2253+
BlocksLookupResult::ByHash(Arc::new(res))
20982254
};
20992255
let lookup_herd = self.lookup_herd.cheap_clone();
21002256
let logger = self.logger.cheap_clone();
2101-
let (BlocksLookupResult(res), _) =
2102-
lookup_herd.cached_query(hash, lookup_fut, &logger).await;
2257+
//TODO(krishna): Add comments explaining the return value of cached_query
2258+
let res = match lookup_herd.cached_query(hash, lookup_fut, &logger).await {
2259+
(BlocksLookupResult::ByHash(res), _) => res,
2260+
_ => unreachable!(),
2261+
};
21032262
// Try to avoid cloning a non-concurrent lookup; it's not
21042263
// entirely clear whether that will actually avoid a clone
21052264
// since it depends on a lot of the details of how the
@@ -2361,6 +2520,12 @@ mod recent_blocks_cache {
23612520
.and_then(|block| block.data.as_ref().map(|data| (&block.ptr, data)))
23622521
}
23632522

2523+
fn get_block_by_number(&self, number: BlockNumber) -> Option<(&BlockPtr, &json::Value)> {
2524+
self.blocks
2525+
.get(&number)
2526+
.and_then(|block| block.data.as_ref().map(|data| (&block.ptr, data)))
2527+
}
2528+
23642529
fn get_ancestor(
23652530
&self,
23662531
child_ptr: &BlockPtr,
@@ -2483,6 +2648,28 @@ mod recent_blocks_cache {
24832648
blocks
24842649
}
24852650

2651+
pub fn get_blocks_by_numbers(
2652+
&self,
2653+
numbers: &[BlockNumber],
2654+
) -> Vec<(BlockPtr, json::Value)> {
2655+
let inner = self.inner.read();
2656+
let mut blocks: Vec<(BlockPtr, json::Value)> = Vec::new();
2657+
2658+
for &number in numbers {
2659+
if let Some((ptr, block)) = inner.get_block_by_number(number) {
2660+
blocks.push((ptr.clone(), block.clone()));
2661+
}
2662+
}
2663+
2664+
inner.metrics.record_hit_and_miss(
2665+
&inner.network,
2666+
blocks.len(),
2667+
numbers.len() - blocks.len(),
2668+
);
2669+
2670+
blocks
2671+
}
2672+
24862673
/// Tentatively caches the `ancestor` of a [`BlockPtr`] (`child`), together with
24872674
/// its associated `data`. Note that for this to work, `child` must be
24882675
/// in the cache already. The first block in the cache should be

0 commit comments

Comments
 (0)