-
Notifications
You must be signed in to change notification settings - Fork 933
Attestation processing optimization with batching #8285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
hopinheimer
wants to merge
18
commits into
sigp:unstable
Choose a base branch
from
hopinheimer:attestation-optimisation
base: unstable
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+259
−33
Open
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
7575986
initial commit
hopinheimer 364ffd3
clean up
hopinheimer aa2b4b0
send directly to reprocessing queue
hopinheimer fe3f9bc
Merge branch 'unstable' into attestation-optimisation
hopinheimer 3ed5a0a
clean up
hopinheimer 62d91b8
Merge branch 'attestation-optimisation' of github.com:hopinheimer/lig…
hopinheimer 3abbf39
some work unoptimised code
hopinheimer fc85634
changes
hopinheimer 4c7d304
Merge branch 'unstable' into attestation-optimisation
hopinheimer 6eacf0f
latest compiler changes
hopinheimer 1033f07
Merge branch 'attestation-optimisation' of github.com:hopinheimer/lig…
hopinheimer d7c587b
removing some complexity
hopinheimer c173dc1
adding metrics
hopinheimer 84e2163
fixing janky metrics
hopinheimer 519805d
addressing comment and fmt
hopinheimer fbccd8c
fix
hopinheimer e4f29c9
making queue params configurable
hopinheimer ad24a79
clippy
hopinheimer File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ | |
| //! block will be re-queued until their block is imported, or until they expire. | ||
| use crate::metrics; | ||
| use crate::{AsyncFn, BlockingFn, Work, WorkEvent}; | ||
| use crate::{GossipAttestationBatch, GossipAttestationPackage, SingleAttestation}; | ||
| use fnv::FnvHashMap; | ||
| use futures::task::Poll; | ||
| use futures::{Stream, StreamExt}; | ||
|
|
@@ -48,6 +49,9 @@ pub const ADDITIONAL_QUEUED_BLOCK_DELAY: Duration = Duration::from_millis(5); | |
| /// For how long to queue aggregated and unaggregated attestations for re-processing. | ||
| pub const QUEUED_ATTESTATION_DELAY: Duration = Duration::from_secs(12); | ||
|
|
||
| /// Batched attestation delay. | ||
| pub const QUEUED_BATCH_ATTESTATION_DELAY: Duration = Duration::from_millis(50); | ||
hopinheimer marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| /// For how long to queue light client updates for re-processing. | ||
| pub const QUEUED_LIGHT_CLIENT_UPDATE_DELAY: Duration = Duration::from_secs(12); | ||
|
|
||
|
|
@@ -60,6 +64,9 @@ pub const QUEUED_SAMPLING_REQUESTS_DELAY: Duration = Duration::from_secs(12); | |
| /// For how long to queue delayed column reconstruction. | ||
| pub const QUEUED_RECONSTRUCTION_DELAY: Duration = Duration::from_millis(150); | ||
|
|
||
| /// Maximum attestation for batches during batch processing. | ||
| pub const MAXIMUM_BATCHED_ATTESTATIONS: usize = 1_024; | ||
|
|
||
| /// Set an arbitrary upper-bound on the number of queued blocks to avoid DoS attacks. The fact that | ||
| /// we signature-verify blocks before putting them in the queue *should* protect against this, but | ||
| /// it's nice to have extra protection. | ||
|
|
@@ -115,6 +122,8 @@ pub enum ReprocessQueueMessage { | |
| BackfillSync(QueuedBackfillBatch), | ||
| /// A delayed column reconstruction that needs checking | ||
| DelayColumnReconstruction(QueuedColumnReconstruction), | ||
| /// A delayed attestation which will be batched for optimization. | ||
| BatchedAttestation(QueuedBatchedAttestation), | ||
| } | ||
|
|
||
| /// Events sent by the scheduler once they are ready for re-processing. | ||
|
|
@@ -127,6 +136,7 @@ pub enum ReadyWork { | |
| LightClientUpdate(QueuedLightClientUpdate), | ||
| BackfillSync(QueuedBackfillBatch), | ||
| ColumnReconstruction(QueuedColumnReconstruction), | ||
| DelayedAttestationBatch(QueuedAttestationBatch), | ||
| } | ||
|
|
||
| /// An Attestation for which the corresponding block was not seen while processing, queued for | ||
|
|
@@ -173,6 +183,18 @@ pub struct IgnoredRpcBlock { | |
| pub process_fn: BlockingFn, | ||
| } | ||
|
|
||
| pub struct QueuedBatchedAttestation { | ||
| pub attestation: Box<GossipAttestationPackage<SingleAttestation>>, | ||
| pub process_individual: | ||
| Box<dyn FnOnce(GossipAttestationPackage<SingleAttestation>) + Send + Sync>, | ||
| pub process_batch: Box<dyn FnOnce(GossipAttestationBatch) + Send + Sync>, | ||
| } | ||
|
|
||
| pub struct QueuedAttestationBatch { | ||
| pub attestations: GossipAttestationBatch, | ||
| pub process_batch: Box<dyn FnOnce(GossipAttestationBatch) + Send + Sync>, | ||
| } | ||
|
|
||
| /// A backfill batch work that has been queued for processing later. | ||
| pub struct QueuedBackfillBatch(pub BlockingFn); | ||
|
|
||
|
|
@@ -220,6 +242,8 @@ enum InboundEvent { | |
| ReadyBackfillSync(QueuedBackfillBatch), | ||
| /// A column reconstruction that was queued is ready for processing. | ||
| ReadyColumnReconstruction(QueuedColumnReconstruction), | ||
| /// An attestation batched is now ready for processing. | ||
| ReadyBatchedAttestation(QueuedAttestationId), | ||
| /// A message sent to the `ReprocessQueue` | ||
| Msg(ReprocessQueueMessage), | ||
| } | ||
|
|
@@ -242,6 +266,8 @@ struct ReprocessQueue<S> { | |
| lc_updates_delay_queue: DelayQueue<QueuedLightClientUpdateId>, | ||
| /// Queue to manage scheduled column reconstructions. | ||
| column_reconstructions_delay_queue: DelayQueue<QueuedColumnReconstruction>, | ||
| /// Queue for batched attestation with a delay | ||
| batched_attestation_queue: DelayQueue<QueuedAttestationId>, | ||
|
|
||
| /* Queued items */ | ||
| /// Queued blocks. | ||
|
|
@@ -250,6 +276,8 @@ struct ReprocessQueue<S> { | |
| queued_aggregates: FnvHashMap<usize, (QueuedAggregate, DelayKey)>, | ||
| /// Queued attestations. | ||
| queued_unaggregates: FnvHashMap<usize, (QueuedUnaggregate, DelayKey)>, | ||
| /// Queued batch attestations. | ||
| queued_batch_attestations: FnvHashMap<usize, (Vec<QueuedBatchedAttestation>, DelayKey)>, | ||
| /// Attestations (aggregated and unaggregated) per root. | ||
| awaiting_attestations_per_root: HashMap<Hash256, Vec<QueuedAttestationId>>, | ||
| /// Queued Light Client Updates. | ||
|
|
@@ -264,6 +292,7 @@ struct ReprocessQueue<S> { | |
| /* Aux */ | ||
| /// Next attestation id, used for both aggregated and unaggregated attestations | ||
| next_attestation: usize, | ||
| current_attestation_batch: usize, | ||
| next_lc_update: usize, | ||
| early_block_debounce: TimeLatch, | ||
| rpc_block_debounce: TimeLatch, | ||
|
|
@@ -279,6 +308,7 @@ pub type QueuedLightClientUpdateId = usize; | |
| enum QueuedAttestationId { | ||
| Aggregate(usize), | ||
| Unaggregate(usize), | ||
| Batched(usize), | ||
| } | ||
|
|
||
| impl QueuedAggregate { | ||
|
|
@@ -335,6 +365,17 @@ impl<S: SlotClock> Stream for ReprocessQueue<S> { | |
| Poll::Ready(None) | Poll::Pending => (), | ||
| } | ||
|
|
||
| match self.batched_attestation_queue.poll_expired(cx) { | ||
| Poll::Ready(Some(attestation_id)) => { | ||
| return Poll::Ready(Some(InboundEvent::ReadyBatchedAttestation( | ||
| attestation_id.into_inner(), | ||
| ))); | ||
| } | ||
| // `Poll::Ready(None)` means that there are no more entries in the delay queue and we | ||
| // will continue to get this result until something else is added into the queue. | ||
| Poll::Ready(None) | Poll::Pending => (), | ||
| } | ||
|
|
||
| match self.lc_updates_delay_queue.poll_expired(cx) { | ||
| Poll::Ready(Some(lc_id)) => { | ||
| return Poll::Ready(Some(InboundEvent::ReadyLightClientUpdate( | ||
|
|
@@ -420,17 +461,20 @@ impl<S: SlotClock> ReprocessQueue<S> { | |
| gossip_block_delay_queue: DelayQueue::new(), | ||
| rpc_block_delay_queue: DelayQueue::new(), | ||
| attestations_delay_queue: DelayQueue::new(), | ||
| batched_attestation_queue: DelayQueue::new(), | ||
| lc_updates_delay_queue: DelayQueue::new(), | ||
| column_reconstructions_delay_queue: DelayQueue::new(), | ||
| queued_gossip_block_roots: HashSet::new(), | ||
| queued_lc_updates: FnvHashMap::default(), | ||
| queued_aggregates: FnvHashMap::default(), | ||
| queued_unaggregates: FnvHashMap::default(), | ||
| queued_batch_attestations: FnvHashMap::default(), | ||
| awaiting_attestations_per_root: HashMap::new(), | ||
| awaiting_lc_updates_per_parent_root: HashMap::new(), | ||
| queued_backfill_batches: Vec::new(), | ||
| queued_column_reconstructions: HashMap::new(), | ||
| next_attestation: 0, | ||
| current_attestation_batch: 0, | ||
| next_lc_update: 0, | ||
| early_block_debounce: TimeLatch::default(), | ||
| rpc_block_debounce: TimeLatch::default(), | ||
|
|
@@ -670,6 +714,10 @@ impl<S: SlotClock> ReprocessQueue<S> { | |
| .map(|(unaggregate, delay_key)| { | ||
| (ReadyWork::Unaggregate(unaggregate), delay_key) | ||
| }), | ||
| QueuedAttestationId::Batched(_) => { | ||
| error!("this should never occur"); | ||
| None | ||
| } | ||
| } { | ||
| // Remove the delay. | ||
| self.attestations_delay_queue.remove(&delay_key); | ||
|
|
@@ -784,6 +832,68 @@ impl<S: SlotClock> ReprocessQueue<S> { | |
| } | ||
| } | ||
| } | ||
| InboundEvent::Msg(BatchedAttestation(queued_batch_attestation)) => { | ||
| let batch_processing_delay = QUEUED_BATCH_ATTESTATION_DELAY; | ||
|
|
||
| let mut time_to_next_batch = 0; | ||
|
|
||
| if let Some(batched_queue) = self | ||
| .queued_batch_attestations | ||
| .get_mut(&self.current_attestation_batch) | ||
| { | ||
| if batched_queue.0.len() >= MAXIMUM_BATCHED_ATTESTATIONS { | ||
michaelsproul marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.current_attestation_batch += 1; | ||
| if let Some(current_slot_time) = | ||
| self.slot_clock.millis_from_current_slot_start() | ||
| { | ||
| let slot_time = current_slot_time.as_millis() as usize; | ||
| let total_slot_duration = | ||
| self.slot_clock.slot_duration().as_millis() as usize; | ||
|
|
||
| time_to_next_batch = (0..=total_slot_duration) | ||
| .step_by(batch_processing_delay.as_millis() as usize) | ||
| .find(|&t| t > slot_time) | ||
| .map_or(0, |t| t - slot_time); | ||
| } | ||
|
|
||
| let delay_key = self.batched_attestation_queue.insert( | ||
| QueuedAttestationId::Batched(self.current_attestation_batch), | ||
| Duration::from_millis(time_to_next_batch as u64), | ||
| ); | ||
|
|
||
| self.queued_batch_attestations.insert( | ||
| self.current_attestation_batch, | ||
| (vec![queued_batch_attestation], delay_key), | ||
| ); | ||
| } else { | ||
| batched_queue.0.push(queued_batch_attestation); | ||
| } | ||
| } else { | ||
| self.current_attestation_batch += 1; | ||
| if let Some(current_slot_time) = | ||
| self.slot_clock.millis_from_current_slot_start() | ||
| { | ||
| let slot_time = current_slot_time.as_millis() as usize; | ||
| let total_slot_duration = | ||
| self.slot_clock.slot_duration().as_millis() as usize; | ||
|
|
||
| time_to_next_batch = (0..=total_slot_duration) | ||
| .step_by(batch_processing_delay.as_millis() as usize) | ||
| .find(|&t| t > slot_time) | ||
| .map_or(0, |t| t - slot_time); | ||
| } | ||
|
|
||
| let delay_key = self.batched_attestation_queue.insert( | ||
| QueuedAttestationId::Batched(self.current_attestation_batch), | ||
| Duration::from_millis(time_to_next_batch as u64), | ||
| ); | ||
|
|
||
| self.queued_batch_attestations.insert( | ||
| self.current_attestation_batch, | ||
| (vec![queued_batch_attestation], delay_key), | ||
| ); | ||
| } | ||
| } | ||
| // A block that was queued for later processing is now ready to be processed. | ||
| InboundEvent::ReadyGossipBlock(ready_block) => { | ||
| let block_root = ready_block.beacon_block_root; | ||
|
|
@@ -827,6 +937,10 @@ impl<S: SlotClock> ReprocessQueue<S> { | |
| ReadyWork::Unaggregate(unaggregate), | ||
| ) | ||
| }), | ||
| QueuedAttestationId::Batched(_) => { | ||
| error!("batched attestation ID reached ReadyAttestation handler"); | ||
| None | ||
| } | ||
| } { | ||
| if self.ready_work_tx.try_send(work).is_err() { | ||
| error!( | ||
|
|
@@ -852,6 +966,42 @@ impl<S: SlotClock> ReprocessQueue<S> { | |
| } | ||
| } | ||
| } | ||
| InboundEvent::ReadyBatchedAttestation(queued_id) => { | ||
| metrics::inc_counter( | ||
| &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_EXPIRED_ATTESTATIONS, | ||
| ); | ||
|
|
||
| let QueuedAttestationId::Batched(batch_id) = queued_id else { | ||
| crit!("Invalid attestation Id batched for attestation"); | ||
| return; | ||
| }; | ||
|
|
||
| if let Some(batch_attestation) = self.queued_batch_attestations.remove(&batch_id) { | ||
| let mut attestations = GossipAttestationBatch::new(); | ||
| let mut iter = batch_attestation.0.into_iter(); | ||
|
|
||
| if let Some(first) = iter.next() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it easier to read to do |
||
| attestations.push(*first.attestation); | ||
| let process_batch = first.process_batch; | ||
|
|
||
| for unaggregate in iter { | ||
| attestations.push(*unaggregate.attestation); | ||
| } | ||
|
|
||
| if self | ||
| .ready_work_tx | ||
| .try_send(ReadyWork::DelayedAttestationBatch(QueuedAttestationBatch { | ||
| attestations, | ||
| process_batch, | ||
| })) | ||
| .is_err() | ||
| { | ||
| error!("Failed to send batched attestations"); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| InboundEvent::ReadyLightClientUpdate(queued_id) => { | ||
| metrics::inc_counter( | ||
| &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_EXPIRED_OPTIMISTIC_UPDATES, | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.