Skip to content

Commit d2ed734

Browse files
terseczah
authored andcommitted
pre-emptive duplicate validator detection heuristic
1 parent 921fe5a commit d2ed734

File tree

8 files changed

+132
-9
lines changed

8 files changed

+132
-9
lines changed

beacon_chain/attestation_pool.nim

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,18 +135,18 @@ proc updateCurrent(pool: var AttestationPool, wallSlot: Slot) =
135135

136136
func addToAggregates(pool: var AttestationPool, attestation: Attestation) =
137137
# do a lookup for the current slot and get it's associated htrs/attestations
138-
var aggreated_attestation = pool.attestationAggregates.mgetOrPut(
138+
var aggregated_attestation = pool.attestationAggregates.mgetOrPut(
139139
attestation.data.slot, Table[Eth2Digest, Attestation]()).
140140
# do a lookup for the same attestation data htr and get the attestation
141141
mgetOrPut(attestation.data.hash_tree_root, attestation)
142142
# if the aggregation bits differ (we didn't just insert it into the table)
143143
# and only if there is no overlap of the signatures ==> aggregate!
144-
if not aggreated_attestation.aggregation_bits.overlaps(attestation.aggregation_bits):
144+
if not aggregated_attestation.aggregation_bits.overlaps(attestation.aggregation_bits):
145145
var agg {.noInit.}: AggregateSignature
146-
agg.init(aggreated_attestation.signature)
147-
aggreated_attestation.aggregation_bits.combine(attestation.aggregation_bits)
146+
agg.init(aggregated_attestation.signature)
147+
aggregated_attestation.aggregation_bits.combine(attestation.aggregation_bits)
148148
agg.aggregate(attestation.signature)
149-
aggreated_attestation.signature = agg.finish()
149+
aggregated_attestation.signature = agg.finish()
150150

151151
proc addAttestation*(pool: var AttestationPool,
152152
attestation: Attestation,

beacon_chain/conf.nim

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ type
5151
enabled = "Always enabled"
5252
disabled = "Always disabled"
5353

54+
GossipSlashingProtectionMode* {.pure.} = enum
55+
dontcheck
56+
warn
57+
stop
58+
5459
BeaconNodeConf* = object
5560
logLevel* {.
5661
defaultValue: "INFO"
@@ -255,6 +260,12 @@ type
255260
desc: "Write SSZ dumps of blocks, attestations and states to data dir"
256261
name: "dump" }: bool
257262

263+
gossipSlashingProtection* {.
264+
defaultValue: GossipSlashingProtectionMode.warn
265+
desc: "[=warn*|stop] What to do when another validator is detected to be running the same validator keys (default `warn`, will become `stop` in the future)"
266+
name: "gossip-slashing-protection"
267+
}: GossipSlashingProtectionMode
268+
258269
of createTestnet:
259270
testnetDepositsFile* {.
260271
desc: "A LaunchPad deposits file for the genesis state validators"

beacon_chain/eth2_processor.nim

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import
1313
chronicles, chronos, metrics,
1414
./spec/[crypto, datatypes, digest],
1515
./block_pools/[clearance, chain_dag],
16-
./attestation_aggregation, ./exit_pool,
16+
./attestation_aggregation, ./exit_pool, ./validator_pool,
1717
./beacon_node_types, ./attestation_pool,
1818
./time, ./conf, ./sszdump
1919

@@ -31,6 +31,9 @@ declareCounter beacon_proposer_slashings_received,
3131
declareCounter beacon_voluntary_exits_received,
3232
"Number of beacon chain voluntary exits received by this peer"
3333

34+
declareCounter beacon_duplicate_validator_protection_activated,
35+
"Number of times duplicate validator protection was activated"
36+
3437
const delayBuckets = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, Inf]
3538

3639
declareHistogram beacon_attestation_delay,
@@ -67,13 +70,16 @@ type
6770
chainDag*: ChainDAGRef
6871
attestationPool*: ref AttestationPool
6972
exitPool: ref ExitPool
73+
validatorPool: ref ValidatorPool
7074
quarantine*: QuarantineRef
7175
blockReceivedDuringSlot*: Future[void]
7276

7377
blocksQueue*: AsyncQueue[BlockEntry]
7478
attestationsQueue*: AsyncQueue[AttestationEntry]
7579
aggregatesQueue*: AsyncQueue[AggregateEntry]
7680

81+
gossipSlashingProtection*: DupProtection
82+
7783
proc updateHead*(self: var Eth2Processor, wallSlot: Slot) =
7884
## Trigger fork choice and returns the new head block.
7985
## Can return `nil`
@@ -298,6 +304,42 @@ proc blockValidator*(
298304

299305
{.push raises: [Defect].}
300306

307+
proc checkForPotentialSelfSlashing(
308+
self: var Eth2Processor, attestationData: AttestationData,
309+
attesterIndices: HashSet[ValidatorIndex], wallSlot: Slot) =
310+
# Attestations remain valid for 32 slots, so avoid confusing with one's own
311+
# reflections, for a ATTESTATION_PROPAGATION_SLOT_RANGE div SLOTS_PER_EPOCH
312+
# period after the attestation slot. For mainnet this can be one additional
313+
# epoch, and for minimal, four epochs. Unlike in the attestation validation
314+
# checks, use the spec version of the constant here.
315+
const
316+
# https://github.com/ethereum/eth2.0-specs/blob/v1.0.0/specs/phase0/p2p-interface.md#configuration
317+
ATTESTATION_PROPAGATION_SLOT_RANGE = 32
318+
319+
GUARD_EPOCHS = ATTESTATION_PROPAGATION_SLOT_RANGE div SLOTS_PER_EPOCH
320+
321+
# If gossipSlashingProtection not dontcheck or stop, it's the default "warn".
322+
let epoch = wallSlot.epoch
323+
if epoch < self.gossipSlashingProtection.broadcastStartEpoch and
324+
epoch >= self.gossipSlashingProtection.probeEpoch and
325+
epoch <= self.gossipSlashingProtection.probeEpoch + GUARD_EPOCHS:
326+
let tgtBlck = self.chainDag.getRef(attestationData.target.root)
327+
doAssert not tgtBlck.isNil # because attestation is valid above
328+
329+
let epochRef = self.chainDag.getEpochRef(
330+
tgtBlck, attestationData.target.epoch)
331+
for validatorIndex in attesterIndices:
332+
let validatorPubkey = epochRef.validator_keys[validatorIndex]
333+
if self.validatorPool[].getValidator(validatorPubkey) !=
334+
default(AttachedValidator):
335+
warn "Duplicate validator detected; would be slashed",
336+
validatorIndex,
337+
validatorPubkey
338+
beacon_duplicate_validator_protection_activated.inc()
339+
if self.config.gossipSlashingProtection == GossipSlashingProtectionMode.stop:
340+
warn "We believe you are currently running another instance of the same validator. We've disconnected you from the network as this presents a significant slashing risk. Possible next steps are (a) making sure you've disconnected your validator from your old machine before restarting the client; and (b) running the client again with the gossip-slashing-protection option disabled, only if you are absolutely sure this is the only instance of your validator running, and reporting the issue at https://github.com/status-im/nimbus-eth2/issues."
341+
quit QuitFailure
342+
301343
proc attestationValidator*(
302344
self: var Eth2Processor,
303345
attestation: Attestation,
@@ -329,6 +371,8 @@ proc attestationValidator*(
329371
beacon_attestations_received.inc()
330372
beacon_attestation_delay.observe(delay.toFloatSeconds())
331373

374+
self.checkForPotentialSelfSlashing(attestation.data, v.value, wallSlot)
375+
332376
while self.attestationsQueue.full():
333377
try:
334378
notice "Queue full, dropping attestation",
@@ -381,6 +425,9 @@ proc aggregateValidator*(
381425
beacon_aggregates_received.inc()
382426
beacon_aggregate_delay.observe(delay.toFloatSeconds())
383427

428+
self.checkForPotentialSelfSlashing(
429+
signedAggregateAndProof.message.aggregate.data, v.value, wallSlot)
430+
384431
while self.aggregatesQueue.full():
385432
try:
386433
notice "Queue full, dropping aggregate",
@@ -500,6 +547,7 @@ proc new*(T: type Eth2Processor,
500547
chainDag: ChainDAGRef,
501548
attestationPool: ref AttestationPool,
502549
exitPool: ref ExitPool,
550+
validatorPool: ref ValidatorPool,
503551
quarantine: QuarantineRef,
504552
getWallTime: GetWallTimeFn): ref Eth2Processor =
505553
(ref Eth2Processor)(
@@ -508,6 +556,7 @@ proc new*(T: type Eth2Processor,
508556
chainDag: chainDag,
509557
attestationPool: attestationPool,
510558
exitPool: exitPool,
559+
validatorPool: validatorPool,
511560
quarantine: quarantine,
512561
blockReceivedDuringSlot: newFuture[void](),
513562
blocksQueue: newAsyncQueue[BlockEntry](1),

beacon_chain/nimbus_beacon_node.nim

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,8 @@ proc init*(T: type BeaconNode,
322322
proc getWallTime(): BeaconTime = res.beaconClock.now()
323323

324324
res.processor = Eth2Processor.new(
325-
conf, chainDag, attestationPool, exitPool, quarantine, getWallTime)
325+
conf, chainDag, attestationPool, exitPool, newClone(res.attachedValidators),
326+
quarantine, getWallTime)
326327

327328
res.requestManager = RequestManager.init(
328329
network, res.processor.blocksQueue)
@@ -556,6 +557,45 @@ proc removeMessageHandlers(node: BeaconNode) =
556557
for subnet in 0'u64 ..< ATTESTATION_SUBNET_COUNT:
557558
node.network.unsubscribe(getAttestationTopic(node.forkDigest, subnet))
558559

560+
proc setupSelfSlashingProtection(node: BeaconNode, slot: Slot) =
561+
# When another client's already running, this is very likely to detect
562+
# potential duplicate validators, which can trigger slashing. Assuming
563+
# the most pessimal case of two validators started simultaneously, the
564+
# probability of triggering a slashable condition is up to 1/n, with n
565+
# being the number of epochs one waits before proposing or attesting.
566+
#
567+
# Every missed attestation costs approximately 3*get_base_reward(), which
568+
# can be up to around 10,000 Wei. Thus, skipping attestations isn't cheap
569+
# and one should gauge the likelihood of this simultaneous launch to tune
570+
# the epoch delay to one's perceived risk.
571+
#
572+
# This approach catches both startup and network outage conditions.
573+
574+
const duplicateValidatorEpochs = 2
575+
576+
node.processor.gossipSlashingProtection.broadcastStartEpoch =
577+
slot.epoch + duplicateValidatorEpochs
578+
# randomize() already called; also, never probe on first epoch in guard
579+
# period, so that existing, running validators can be picked up. Whilst
580+
# this reduces entropy for overlapping-start cases, and increases their
581+
# collision likelihood, that can be compensated for by increasing guard
582+
# epoch periods by 1. As a corollary, 1 guard epoch won't detect when a
583+
# duplicate pair overlaps exactly, only the running/starting case. Even
584+
# 2 epochs is dangerous because it'll guarantee colliding probes in the
585+
# overlapping case.
586+
587+
# So dPE == 2 -> epoch + 1, always; dPE == 3 -> epoch + (1 or 2), etc.
588+
node.processor.gossipSlashingProtection.probeEpoch =
589+
slot.epoch + 1 + rand(duplicateValidatorEpochs.int - 2).uint64
590+
doAssert node.processor.gossipSlashingProtection.probeEpoch <
591+
node.processor.gossipSlashingProtection.broadcastStartEpoch
592+
593+
debug "Setting up self-slashing protection",
594+
epoch = slot.epoch,
595+
probeEpoch = node.processor.gossipSlashingProtection.probeEpoch,
596+
broadcastStartEpoch =
597+
node.processor.gossipSlashingProtection.broadcastStartEpoch
598+
559599
proc updateGossipStatus(node: BeaconNode, slot: Slot) =
560600
# Syncing tends to be ~1 block/s, and allow for an epoch of time for libp2p
561601
# subscribing to spin up. The faster the sync, the more wallSlot - headSlot
@@ -589,6 +629,7 @@ proc updateGossipStatus(node: BeaconNode, slot: Slot) =
589629
headSlot = node.chainDag.head.slot,
590630
syncQueueLen
591631

632+
node.setupSelfSlashingProtection(slot)
592633
node.addMessageHandlers()
593634
doAssert node.getTopicSubscriptionEnabled()
594635
elif
@@ -907,6 +948,7 @@ proc run*(node: BeaconNode) =
907948
node.startSyncManager()
908949

909950
if not node.beaconClock.now().toSlot().afterGenesis:
951+
node.setupSelfSlashingProtection(curSlot)
910952
node.addMessageHandlers()
911953
doAssert node.getTopicSubscriptionEnabled()
912954

beacon_chain/spec/datatypes.nim

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,10 @@ type
526526
current_justified_checkpoint*: Checkpoint
527527
finalized_checkpoint*: Checkpoint
528528

529+
DupProtection* = object
530+
broadcastStartEpoch*: Epoch
531+
probeEpoch*: Epoch
532+
529533
func shortValidatorKey*(state: BeaconState, validatorIdx: int): string =
530534
($state.validators[validatorIdx].pubkey)[0..7]
531535

beacon_chain/validator_duties.nim

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,21 @@ proc handleValidatorDuties*(node: BeaconNode, lastSlot, slot: Slot) {.async.} =
615615

616616
var curSlot = lastSlot + 1
617617

618+
# The dontcheck option's a deliberately undocumented escape hatch for the
619+
# local testnets and similar development and testing use cases.
620+
doAssert node.config.gossipSlashingProtection == GossipSlashingProtectionMode.dontcheck or (
621+
node.processor[].gossipSlashingProtection.probeEpoch <
622+
node.processor[].gossipSlashingProtection.broadcastStartEpoch)
623+
if curSlot.epoch <
624+
node.processor[].gossipSlashingProtection.broadcastStartEpoch and
625+
curSlot.epoch != node.processor[].gossipSlashingProtection.probeEpoch and
626+
node.config.gossipSlashingProtection == GossipSlashingProtectionMode.stop:
627+
notice "Waiting to gossip out to detect potential duplicate validators",
628+
broadcastStartEpoch =
629+
node.processor[].gossipSlashingProtection.broadcastStartEpoch,
630+
probeEpoch = node.processor[].gossipSlashingProtection.probeEpoch
631+
return
632+
618633
# Start by checking if there's work we should have done in the past that we
619634
# can still meaningfully do
620635
while curSlot < slot:

scripts/launch_local_testnet.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ for NUM_NODE in $(seq 0 $(( NUM_NODES - 1 ))); do
385385
--metrics \
386386
--metrics-address="127.0.0.1" \
387387
--metrics-port="$(( BASE_METRICS_PORT + NUM_NODE ))" \
388+
--gossip-slashing-protection=dontcheck \
388389
${EXTRA_ARGS} \
389390
> "${DATA_DIR}/log${NUM_NODE}.txt" 2>&1 &
390391

tests/simulation/run_node.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ shift
99
# shellcheck source=/dev/null
1010
source "$(dirname "$0")/vars.sh"
1111

12-
if [[ ! -z "$1" ]]; then
12+
if [[ -n "$1" ]]; then
1313
ADDITIONAL_BEACON_NODE_ARGS=$1
1414
shift
1515
else
@@ -18,7 +18,7 @@ fi
1818

1919
BOOTSTRAP_ARG=""
2020

21-
if [[ ! -z "$1" ]]; then
21+
if [[ -n "$1" ]]; then
2222
BOOTSTRAP_NODE_ID=$1
2323
shift
2424
else
@@ -105,5 +105,6 @@ $BEACON_NODE_BIN \
105105
--metrics \
106106
--metrics-address="127.0.0.1" \
107107
--metrics-port="$(( $BASE_METRICS_PORT + $NODE_ID ))" \
108+
--gossip-slashing-protection=dontcheck \
108109
${ADDITIONAL_BEACON_NODE_ARGS} \
109110
"$@"

0 commit comments

Comments
 (0)