From 3d473bd3897dc4e6c3579722c413961e00bdc033 Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Tue, 12 May 2026 14:04:40 -0700 Subject: [PATCH 1/4] plt-329 Added otel metrics for x/evm package --- app/app.go | 2 + x/evm/ante/fee.go | 5 +- x/evm/ante/metrics.go | 59 ++++++++++++++++++ x/evm/ante/metrics_test.go | 32 ++++++++++ x/evm/ante/preprocess.go | 9 ++- x/evm/ante/sig.go | 25 ++++++-- x/evm/keeper/abci.go | 17 +++-- x/evm/keeper/evm.go | 8 ++- x/evm/keeper/metrics.go | 107 ++++++++++++++++++++++++++++++++ x/evm/keeper/metrics_test.go | 41 ++++++++++++ x/evm/keeper/msg_server.go | 59 ++++++------------ x/evm/keeper/storage_cleanup.go | 9 ++- 12 files changed, 314 insertions(+), 59 deletions(-) create mode 100644 x/evm/ante/metrics.go create mode 100644 x/evm/ante/metrics_test.go create mode 100644 x/evm/keeper/metrics.go create mode 100644 x/evm/keeper/metrics_test.go diff --git a/app/app.go b/app/app.go index a737c31108..2499261810 100644 --- a/app/app.go +++ b/app/app.go @@ -512,6 +512,8 @@ func New( if err := utilmetrics.SetupOtelMetricsProvider(); err != nil { logger.Error(err.Error()) } + evmkeeper.InitEvmKeeperMetrics() + evmante.InitEvmAnteMetrics() keys := sdk.NewKVStoreKeys(kvStoreKeyNames...) tkeys := sdk.NewTransientStoreKeys(paramstypes.TStoreKey, evmtypes.TransientStoreKey) diff --git a/x/evm/ante/fee.go b/x/evm/ante/fee.go index d127a61bdd..95cac951a6 100644 --- a/x/evm/ante/fee.go +++ b/x/evm/ante/fee.go @@ -12,7 +12,7 @@ import ( sdkerrors "github.com/sei-protocol/sei-chain/sei-cosmos/types/errors" upgradekeeper "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/keeper" "github.com/sei-protocol/sei-chain/utils" - "github.com/sei-protocol/sei-chain/utils/metrics" + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" "github.com/sei-protocol/sei-chain/x/evm/derived" evmkeeper "github.com/sei-protocol/sei-chain/x/evm/keeper" "github.com/sei-protocol/sei-chain/x/evm/state" @@ -127,7 +127,8 @@ func (fc EVMFeeCheckDecorator) getMinimumFee(ctx sdk.Context) *big.Int { func (fc EVMFeeCheckDecorator) CalculatePriority(ctx sdk.Context, txData ethtx.TxData) *big.Int { gp := txData.EffectiveGasPrice(utils.Big0) if !ctx.IsCheckTx() && !ctx.IsReCheckTx() { - metrics.HistogramEvmEffectiveGasPrice(gp) + utilmetrics.HistogramEvmEffectiveGasPrice(gp) // TODO(PLT-330): remove once evm_effective_gas_price verified + evmAnteMetrics.effectiveGasPrice.Record(ctx.Context(), float64(gp.Uint64())) } priority := sdk.NewDecFromBigInt(gp).Quo(fc.evmKeeper.GetPriorityNormalizer(ctx)).TruncateInt().BigInt() if priority.Cmp(big.NewInt(antedecorators.MaxPriority)) > 0 { diff --git a/x/evm/ante/metrics.go b/x/evm/ante/metrics.go new file mode 100644 index 0000000000..ae59ed4a74 --- /dev/null +++ b/x/evm/ante/metrics.go @@ -0,0 +1,59 @@ +package ante + +import ( + "sync" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +type evmAnteMetricsType struct { + once sync.Once + + // Nonce tracking + pendingNonce metric.Int64Counter + nonceMismatch metric.Int64Counter + + // Gas price histogram + effectiveGasPrice metric.Float64Histogram + + // Association errors + associationError metric.Int64Counter +} + +var evmAnteMetrics evmAnteMetricsType + +func mustAnteMetric[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} + +// InitEvmAnteMetrics registers all OTel instruments for the x/evm ante package. +// Safe to call concurrently; instruments are registered exactly once. +func InitEvmAnteMetrics() { + evmAnteMetrics.once.Do(func() { + meter := otel.Meter("evm_ante") + + evmAnteMetrics.pendingNonce = mustAnteMetric(meter.Int64Counter( + "evm_pending_nonce_total", + metric.WithDescription("EVM pending nonce events by type (added, expired, rejected, accepted)"), + )) + + evmAnteMetrics.nonceMismatch = mustAnteMetric(meter.Int64Counter( + "evm_nonce_mismatch_total", + metric.WithDescription("EVM nonce mismatches by cause (too_high, too_low)"), + )) + + evmAnteMetrics.effectiveGasPrice = mustAnteMetric(meter.Float64Histogram( + "evm_effective_gas_price", + metric.WithDescription("Effective gas price for EVM transactions"), + )) + + evmAnteMetrics.associationError = mustAnteMetric(meter.Int64Counter( + "evm_association_error_total", + metric.WithDescription("EVM address association errors by scenario and address type"), + )) + }) +} diff --git a/x/evm/ante/metrics_test.go b/x/evm/ante/metrics_test.go new file mode 100644 index 0000000000..f9f9944ef9 --- /dev/null +++ b/x/evm/ante/metrics_test.go @@ -0,0 +1,32 @@ +package ante + +import ( + "context" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" +) + +func TestInitEvmAnteMetricsNoPanic(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + InitEvmAnteMetrics() +} + +func TestEvmAnteMetricsAllInstrumentsUsable(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + InitEvmAnteMetrics() + + ctx := context.Background() + + for _, event := range []string{"added", "expired", "rejected", "accepted"} { + evmAnteMetrics.pendingNonce.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("event", event))) + } + for _, cause := range []string{"too_high", "too_low"} { + evmAnteMetrics.nonceMismatch.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("cause", cause))) + } + evmAnteMetrics.effectiveGasPrice.Record(ctx, 1e9) + evmAnteMetrics.associationError.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("scenario", "associate_tx_insufficient_funds"), attribute.String("type", "sei"))) +} diff --git a/x/evm/ante/preprocess.go b/x/evm/ante/preprocess.go index 4ca9e8a3e6..eb8f13a3c2 100644 --- a/x/evm/ante/preprocess.go +++ b/x/evm/ante/preprocess.go @@ -18,8 +18,11 @@ import ( sdkerrors "github.com/sei-protocol/sei-chain/sei-cosmos/types/errors" accountkeeper "github.com/sei-protocol/sei-chain/sei-cosmos/x/auth/keeper" authsigning "github.com/sei-protocol/sei-chain/sei-cosmos/x/auth/signing" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "github.com/sei-protocol/sei-chain/utils" - "github.com/sei-protocol/sei-chain/utils/metrics" + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" "github.com/sei-protocol/sei-chain/x/evm/derived" evmkeeper "github.com/sei-protocol/sei-chain/x/evm/keeper" evmtypes "github.com/sei-protocol/sei-chain/x/evm/types" @@ -77,7 +80,9 @@ func (p *EVMPreprocessDecorator) AnteHandle(ctx sdk.Context, tx sdk.Tx, simulate } else if isAssociateTx { // check if the account has enough balance (without charging) if !p.IsAccountBalancePositive(ctx, seiAddr, evmAddr) { - metrics.IncrementAssociationError("associate_tx_insufficient_funds", evmtypes.NewAssociationMissingErr(seiAddr.String())) + assocErr := evmtypes.NewAssociationMissingErr(seiAddr.String()) + utilmetrics.IncrementAssociationError("associate_tx_insufficient_funds", assocErr) // TODO(PLT-330): remove once evm_association_error_total verified + evmAnteMetrics.associationError.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("scenario", "associate_tx_insufficient_funds"), attribute.String("type", assocErr.AddressType()))) return ctx, sdkerrors.Wrap(sdkerrors.ErrInsufficientFunds, "account needs to have at least 1 wei to force association") } if err := associateHelper.AssociateAddresses(ctx, seiAddr, evmAddr, pubkey, false); err != nil { diff --git a/x/evm/ante/sig.go b/x/evm/ante/sig.go index 867ac0c04b..82acf90428 100644 --- a/x/evm/ante/sig.go +++ b/x/evm/ante/sig.go @@ -10,7 +10,10 @@ import ( tmtypes "github.com/sei-protocol/sei-chain/sei-tendermint/types" "github.com/sei-protocol/seilog" - "github.com/sei-protocol/sei-chain/utils/metrics" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" evmkeeper "github.com/sei-protocol/sei-chain/x/evm/keeper" "github.com/sei-protocol/sei-chain/x/evm/types" ) @@ -74,14 +77,16 @@ func (svd *EVMSigVerifyDecorator) AnteHandle(ctx sdk.Context, tx sdk.Tx, simulat ctx = ctx.WithCheckTxCallback(func(priority int64) { txHash := tmtypes.Tx(ctx.TxBytes()).Hash() svd.evmKeeper.AddPendingNonce(txHash, evmAddr, txNonce, priority) - metrics.IncrementPendingNonce("added") + utilmetrics.IncrementPendingNonce("added") // TODO(PLT-330): remove once evm_pending_nonce_total verified + evmAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "added"))) }) // if the mempool expires a transaction, this handler is invoked ctx = ctx.WithExpireTxHandler(func() { txHash := tmtypes.Tx(ctx.TxBytes()).Hash() svd.evmKeeper.RemovePendingNonce(txHash) - metrics.IncrementPendingNonce("expired") + utilmetrics.IncrementPendingNonce("expired") // TODO(PLT-330): remove once evm_pending_nonce_total verified + evmAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "expired"))) }) if txNonce > nextNonce { @@ -100,7 +105,8 @@ func (svd *EVMSigVerifyDecorator) AnteHandle(ctx sdk.Context, tx sdk.Tx, simulat if txNonce < nextNonceToBeMined { // this nonce has already been mined, we cannot accept it again - metrics.IncrementPendingNonce("rejected") + utilmetrics.IncrementPendingNonce("rejected") // TODO(PLT-330): remove once evm_pending_nonce_total verified + evmAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "rejected"))) return abci.Rejected } else if txNonce < nextPendingNonce { // check if the sender still has enough funds to pay for gas @@ -112,14 +118,21 @@ func (svd *EVMSigVerifyDecorator) AnteHandle(ctx sdk.Context, tx sdk.Tx, simulat // this nonce is allowed to process as it is part of the // consecutive nonces from nextNonceToBeMined to nextPendingNonce // This logic allows multiple nonces from an account to be processed in a block. - metrics.IncrementPendingNonce("accepted") + utilmetrics.IncrementPendingNonce("accepted") // TODO(PLT-330): remove once evm_pending_nonce_total verified + evmAnteMetrics.pendingNonce.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("event", "accepted"))) return abci.Accepted } return abci.Pending }) } } else if txNonce != nextNonce { - metrics.IncrementNonceMismatch(txNonce > nextNonce) + tooHigh := txNonce > nextNonce + utilmetrics.IncrementNonceMismatch(tooHigh) // TODO(PLT-330): remove once evm_nonce_mismatch_total verified + cause := "too_low" + if tooHigh { + cause = "too_high" + } + evmAnteMetrics.nonceMismatch.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("cause", cause))) return ctx, sdkerrors.ErrWrongSequence } diff --git a/x/evm/keeper/abci.go b/x/evm/keeper/abci.go index 60e950f83b..c1032f0318 100644 --- a/x/evm/keeper/abci.go +++ b/x/evm/keeper/abci.go @@ -14,13 +14,17 @@ import ( authtypes "github.com/sei-protocol/sei-chain/sei-cosmos/x/auth/types" abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" "github.com/sei-protocol/sei-chain/utils" - "github.com/sei-protocol/sei-chain/utils/metrics" + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" "github.com/sei-protocol/sei-chain/x/evm/state" "github.com/sei-protocol/sei-chain/x/evm/types" ) func (k *Keeper) BeginBlock(ctx sdk.Context) { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + beginBlockerStart := time.Now() + defer func() { + telemetry.ModuleMeasureSince(types.ModuleName, beginBlockerStart, telemetry.MetricKeyBeginBlocker) // TODO(PLT-330): remove once evm_abci_begin_blocker_duration_seconds verified + evmKeeperMetrics.beginBlockerDuration.Record(ctx.Context(), time.Since(beginBlockerStart).Seconds()) + }() // clear tx/tx responses from last block if !ctx.IsTracing() { k.SetMsgs([]*types.MsgEVMTransaction{}) @@ -59,7 +63,11 @@ func (k *Keeper) BeginBlock(ctx sdk.Context) { } func (k *Keeper) EndBlock(ctx sdk.Context, height int64, blockGasUsed int64) { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker) + endBlockerStart := time.Now() + defer func() { + telemetry.ModuleMeasureSince(types.ModuleName, endBlockerStart, telemetry.MetricKeyEndBlocker) // TODO(PLT-330): remove once evm_abci_end_blocker_duration_seconds verified + evmKeeperMetrics.endBlockerDuration.Record(ctx.Context(), time.Since(endBlockerStart).Seconds()) + }() // Bake height-1: at EndBlock(N) the indexer's safe latest is N-1. When // the snapshot store is wired, also Put a memiavl snapshot keyed by // its committed version (= N-1, since Commit fires after EndBlock); @@ -90,7 +98,8 @@ func (k *Keeper) EndBlock(ctx sdk.Context, height int64, blockGasUsed int64) { newBaseFee := k.AdjustDynamicBaseFeePerGas(ctx, uint64(blockGasUsed)) // nolint:gosec if newBaseFee != nil { - metrics.GaugeEvmBlockBaseFee(newBaseFee.TruncateInt().BigInt(), height) + utilmetrics.GaugeEvmBlockBaseFee(newBaseFee.TruncateInt().BigInt(), height) // TODO(PLT-330): remove once evm_block_base_fee verified + evmKeeperMetrics.blockBaseFee.Record(ctx.Context(), float64(newBaseFee.TruncateInt().BigInt().Uint64())) } var coinbase sdk.AccAddress if k.EthBlockTestConfig.Enabled { diff --git a/x/evm/keeper/evm.go b/x/evm/keeper/evm.go index eacfd85f02..3614b7878d 100644 --- a/x/evm/keeper/evm.go +++ b/x/evm/keeper/evm.go @@ -14,9 +14,12 @@ import ( sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" sdkerrors "github.com/sei-protocol/sei-chain/sei-cosmos/types/errors" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "github.com/sei-protocol/sei-chain/precompiles/solo" "github.com/sei-protocol/sei-chain/utils" - "github.com/sei-protocol/sei-chain/utils/metrics" + utilmetrics "github.com/sei-protocol/sei-chain/utils/metrics" "github.com/sei-protocol/sei-chain/x/evm/state" "github.com/sei-protocol/sei-chain/x/evm/types" ) @@ -64,7 +67,8 @@ func (k *Keeper) HandleInternalEVMDelegateCall(ctx sdk.Context, req *types.MsgIn senderEvmAddr, found := k.GetEVMAddress(ctx, senderAddr) if !found { err := types.NewAssociationMissingErr(req.Sender) - metrics.IncrementAssociationError("evm_handle_internal_evm_delegate_call", err) + utilmetrics.IncrementAssociationError("evm_handle_internal_evm_delegate_call", err) // TODO(PLT-330): remove once evm_association_error_total verified + evmKeeperMetrics.associationError.Add(ctx.Context(), 1, otelmetric.WithAttributes(attribute.String("scenario", "evm_handle_internal_evm_delegate_call"), attribute.String("type", err.AddressType()))) return nil, err } ret, err := k.CallEVM(ctx, senderEvmAddr, to, &zeroInt, req.Data) diff --git a/x/evm/keeper/metrics.go b/x/evm/keeper/metrics.go new file mode 100644 index 0000000000..23bb90fef7 --- /dev/null +++ b/x/evm/keeper/metrics.go @@ -0,0 +1,107 @@ +package keeper + +import ( + "sync" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +var evmMillisecondBuckets = metric.WithExplicitBucketBoundaries( + 0.000025, 0.000050, 0.0001, 0.0005, 0.001, 0.0025, 0.005, 0.010, 0.020, 0.050, 0.075, 0.1, 0.25, 0.5, 1, 10, +) + +type evmKeeperMetricsType struct { + once sync.Once + + // ABCI phase durations + beginBlockerDuration metric.Float64Histogram + endBlockerDuration metric.Float64Histogram + + // Block base fee (set each EndBlock) + blockBaseFee metric.Float64Gauge + + // EVMTransaction error counters + panics metric.Int64Counter + errors metric.Int64Counter + receiptStatus metric.Int64Counter + + // Association errors + associationError metric.Int64Counter + + // Zero-storage cleanup counters + zeroStorageProcessedKeys metric.Int64Counter + zeroStoragePrunedKeys metric.Int64Counter + zeroStoragePrunedBytes metric.Int64Counter +} + +var evmKeeperMetrics evmKeeperMetricsType + +func mustMetric[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} + +// InitEvmKeeperMetrics registers all OTel instruments for the x/evm keeper package. +// Safe to call concurrently; instruments are registered exactly once. +func InitEvmKeeperMetrics() { + evmKeeperMetrics.once.Do(func() { + meter := otel.Meter("evm_keeper") + + evmKeeperMetrics.beginBlockerDuration = mustMetric(meter.Float64Histogram( + "evm_abci_begin_blocker_duration_seconds", + metric.WithDescription("Duration of EVM module BeginBlock"), + metric.WithUnit("s"), + evmMillisecondBuckets, + )) + + evmKeeperMetrics.endBlockerDuration = mustMetric(meter.Float64Histogram( + "evm_abci_end_blocker_duration_seconds", + metric.WithDescription("Duration of EVM module EndBlock"), + metric.WithUnit("s"), + evmMillisecondBuckets, + )) + + evmKeeperMetrics.blockBaseFee = mustMetric(meter.Float64Gauge( + "evm_block_base_fee", + metric.WithDescription("Current EVM block base fee per gas"), + )) + + evmKeeperMetrics.panics = mustMetric(meter.Int64Counter( + "evm_panics_total", + metric.WithDescription("Number of panics recovered during EVM transaction processing"), + )) + + evmKeeperMetrics.errors = mustMetric(meter.Int64Counter( + "evm_errors_total", + metric.WithDescription("EVM processing errors by type (state_transition, stateDB_finalize, write_receipt, apply_message, vm_execution)"), + )) + + evmKeeperMetrics.receiptStatus = mustMetric(meter.Int64Counter( + "evm_receipt_status_total", + metric.WithDescription("EVM transaction receipt outcomes by status (success, failed)"), + )) + + evmKeeperMetrics.associationError = mustMetric(meter.Int64Counter( + "evm_association_error_total", + metric.WithDescription("EVM address association errors by scenario and address type"), + )) + + evmKeeperMetrics.zeroStorageProcessedKeys = mustMetric(meter.Int64Counter( + "evm_zero_storage_processed_keys_total", + metric.WithDescription("Storage slots scanned during zero-value cleanup"), + )) + + evmKeeperMetrics.zeroStoragePrunedKeys = mustMetric(meter.Int64Counter( + "evm_zero_storage_pruned_keys_total", + metric.WithDescription("Zero-value storage slots deleted during cleanup"), + )) + + evmKeeperMetrics.zeroStoragePrunedBytes = mustMetric(meter.Int64Counter( + "evm_zero_storage_pruned_bytes_total", + metric.WithDescription("Bytes reclaimed by zero-value storage slot cleanup"), + )) + }) +} diff --git a/x/evm/keeper/metrics_test.go b/x/evm/keeper/metrics_test.go new file mode 100644 index 0000000000..d74740448b --- /dev/null +++ b/x/evm/keeper/metrics_test.go @@ -0,0 +1,41 @@ +package keeper + +import ( + "context" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" +) + +func TestInitEvmKeeperMetricsNoPanic(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + InitEvmKeeperMetrics() +} + +func TestEvmKeeperMetricsAllInstrumentsUsable(t *testing.T) { + otel.SetMeterProvider(noop.NewMeterProvider()) + InitEvmKeeperMetrics() + + ctx := context.Background() + + evmKeeperMetrics.beginBlockerDuration.Record(ctx, 0.1) + evmKeeperMetrics.endBlockerDuration.Record(ctx, 0.2) + evmKeeperMetrics.blockBaseFee.Record(ctx, 1e9) + + evmKeeperMetrics.panics.Add(ctx, 1) + evmKeeperMetrics.errors.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "state_transition"))) + evmKeeperMetrics.errors.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "stateDB_finalize"))) + evmKeeperMetrics.errors.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "write_receipt"))) + evmKeeperMetrics.errors.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "apply_message"))) + evmKeeperMetrics.errors.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("type", "vm_execution"))) + evmKeeperMetrics.receiptStatus.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("status", "success"))) + evmKeeperMetrics.receiptStatus.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("status", "failed"))) + evmKeeperMetrics.associationError.Add(ctx, 1, otelmetric.WithAttributes(attribute.String("scenario", "evm_handle_internal_evm_delegate_call"), attribute.String("type", "sei"))) + + evmKeeperMetrics.zeroStorageProcessedKeys.Add(ctx, 10) + evmKeeperMetrics.zeroStoragePrunedKeys.Add(ctx, 5) + evmKeeperMetrics.zeroStoragePrunedBytes.Add(ctx, 100) +} diff --git a/x/evm/keeper/msg_server.go b/x/evm/keeper/msg_server.go index d7f55a62d0..374d45f377 100644 --- a/x/evm/keeper/msg_server.go +++ b/x/evm/keeper/msg_server.go @@ -10,17 +10,18 @@ import ( "runtime/debug" "strings" - "github.com/armon/go-metrics" + armonmetrics "github.com/armon/go-metrics" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" ethtypes "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" - "github.com/sei-protocol/sei-chain/sei-cosmos/telemetry" sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" occtypes "github.com/sei-protocol/sei-chain/sei-cosmos/types/occ" bankkeeper "github.com/sei-protocol/sei-chain/sei-cosmos/x/bank/keeper" banktypes "github.com/sei-protocol/sei-chain/sei-cosmos/x/bank/types" "github.com/sei-protocol/seilog" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" "github.com/sei-protocol/sei-chain/precompiles/wasmd" "github.com/sei-protocol/sei-chain/utils" @@ -82,20 +83,16 @@ func (server msgServer) EVMTransaction(goCtx context.Context, msg *types.MsgEVMT if !strings.Contains(fmt.Sprintf("%s", pe), occtypes.ErrReadEstimate.Error()) { debug.PrintStack() logger.Error("EVM PANIC", "err", pe) - seimetrics.SafeTelemetryIncrCounter(1, types.ModuleName, "panics") + seimetrics.SafeTelemetryIncrCounter(1, types.ModuleName, "panics") // TODO(PLT-330): remove once evm_panics_total verified + evmKeeperMetrics.panics.Add(goCtx, 1) } panic(pe) } if err != nil { logger.Error("Got EVM state transition error (not VM error)", "err", err) - seimetrics.SafeTelemetryIncrCounterWithLabels( - []string{types.ModuleName, "errors", "state_transition"}, - 1, - []metrics.Label{ - telemetry.NewLabel("type", err.Error()), - }, - ) + seimetrics.SafeTelemetryIncrCounterWithLabels([]string{types.ModuleName, "errors", "state_transition"}, 1, []armonmetrics.Label{{Name: "type", Value: err.Error()}}) // TODO(PLT-330): remove once evm_errors_total verified + evmKeeperMetrics.errors.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("type", "state_transition"))) return } extraSurplus := sdk.ZeroInt() @@ -104,13 +101,8 @@ func (server msgServer) EVMTransaction(goCtx context.Context, msg *types.MsgEVMT err = ferr logger.Error("failed to finalize EVM stateDB", "err", err) - seimetrics.SafeTelemetryIncrCounterWithLabels( - []string{types.ModuleName, "errors", "stateDB_finalize"}, - 1, - []metrics.Label{ - telemetry.NewLabel("type", err.Error()), - }, - ) + seimetrics.SafeTelemetryIncrCounterWithLabels([]string{types.ModuleName, "errors", "stateDB_finalize"}, 1, []armonmetrics.Label{{Name: "type", Value: err.Error()}}) // TODO(PLT-330): remove once evm_errors_total verified + evmKeeperMetrics.errors.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("type", "stateDB_finalize"))) return } if ctx.EVMEntryViaWasmdPrecompile() { @@ -138,21 +130,18 @@ func (server msgServer) EVMTransaction(goCtx context.Context, msg *types.MsgEVMT err = rerr logger.Error("failed to write EVM receipt", "err", err) - seimetrics.SafeTelemetryIncrCounterWithLabels( - []string{types.ModuleName, "errors", "write_receipt"}, - 1, - []metrics.Label{ - telemetry.NewLabel("type", err.Error()), - }, - ) + seimetrics.SafeTelemetryIncrCounterWithLabels([]string{types.ModuleName, "errors", "write_receipt"}, 1, []armonmetrics.Label{{Name: "type", Value: err.Error()}}) // TODO(PLT-330): remove once evm_errors_total verified + evmKeeperMetrics.errors.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("type", "write_receipt"))) return } // Add metrics for receipt status if receipt.Status == uint32(ethtypes.ReceiptStatusFailed) { - seimetrics.SafeTelemetryIncrCounter(1, "receipt", "status", "failed") + seimetrics.SafeTelemetryIncrCounter(1, "receipt", "status", "failed") // TODO(PLT-330): remove once evm_receipt_status_total verified + evmKeeperMetrics.receiptStatus.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("status", "failed"))) } else { - seimetrics.SafeTelemetryIncrCounter(1, "receipt", "status", "success") + seimetrics.SafeTelemetryIncrCounter(1, "receipt", "status", "success") // TODO(PLT-330): remove once evm_receipt_status_total verified + evmKeeperMetrics.receiptStatus.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("status", "success"))) } surplus = surplus.Add(extraSurplus) @@ -178,13 +167,8 @@ func (server msgServer) EVMTransaction(goCtx context.Context, msg *types.MsgEVMT // be checked in CheckTx first err = applyErr - seimetrics.SafeTelemetryIncrCounterWithLabels( - []string{types.ModuleName, "errors", "apply_message"}, - 1, - []metrics.Label{ - telemetry.NewLabel("type", err.Error()), - }, - ) + seimetrics.SafeTelemetryIncrCounterWithLabels([]string{types.ModuleName, "errors", "apply_message"}, 1, []armonmetrics.Label{{Name: "type", Value: err.Error()}}) // TODO(PLT-330): remove once evm_errors_total verified + evmKeeperMetrics.errors.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("type", "apply_message"))) return } @@ -193,13 +177,8 @@ func (server msgServer) EVMTransaction(goCtx context.Context, msg *types.MsgEVMT if res.Err != nil { serverRes.VmError = res.Err.Error() - seimetrics.SafeTelemetryIncrCounterWithLabels( - []string{types.ModuleName, "errors", "vm_execution"}, - 1, - []metrics.Label{ - telemetry.NewLabel("type", serverRes.VmError), - }, - ) + seimetrics.SafeTelemetryIncrCounterWithLabels([]string{types.ModuleName, "errors", "vm_execution"}, 1, []armonmetrics.Label{{Name: "type", Value: serverRes.VmError}}) // TODO(PLT-330): remove once evm_errors_total verified + evmKeeperMetrics.errors.Add(goCtx, 1, otelmetric.WithAttributes(attribute.String("type", "vm_execution"))) } serverRes.GasUsed = res.UsedGas diff --git a/x/evm/keeper/storage_cleanup.go b/x/evm/keeper/storage_cleanup.go index acd709d0a9..612cd1aa9f 100644 --- a/x/evm/keeper/storage_cleanup.go +++ b/x/evm/keeper/storage_cleanup.go @@ -85,11 +85,14 @@ func (k *Keeper) PruneZeroStorageSlots(ctx sdk.Context, limit int) (int, int) { k.setZeroStorageCleanupCheckpoint(ctx, nil) } - seimetrics.IncrEvmZeroStorageProcessedKeys(processedMetric) + seimetrics.IncrEvmZeroStorageProcessedKeys(processedMetric) // TODO(PLT-330): remove once evm_zero_storage_processed_keys_total verified + evmKeeperMetrics.zeroStorageProcessedKeys.Add(ctx.Context(), int64(processedMetric)) //nolint:gosec if deleted > 0 { - seimetrics.IncrEvmZeroStoragePrunedKeys(deletedMetric) - seimetrics.IncrEvmZeroStoragePrunedBytes(bytesPruned) + seimetrics.IncrEvmZeroStoragePrunedKeys(deletedMetric) // TODO(PLT-330): remove once evm_zero_storage_pruned_keys_total verified + seimetrics.IncrEvmZeroStoragePrunedBytes(bytesPruned) // TODO(PLT-330): remove once evm_zero_storage_pruned_bytes_total verified + evmKeeperMetrics.zeroStoragePrunedKeys.Add(ctx.Context(), int64(deletedMetric)) //nolint:gosec + evmKeeperMetrics.zeroStoragePrunedBytes.Add(ctx.Context(), int64(bytesPruned)) //nolint:gosec logger.Info("pruned zero storage slots", "processed", processed, "deleted", deleted, "bytes_saved", bytesPruned) } return processed, deleted From f54efecc57ace70fd519788a488c15fa10ac4e44 Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Tue, 12 May 2026 15:21:47 -0700 Subject: [PATCH 2/4] plt-329 Added new makefile target for bringing up grafana and prometheus containers with docker compose --- Makefile | 19 ++++++++++++ docker/docker-compose.monitoring.yml | 37 +++++++++++++++++++++++ docker/monitornode/config/grafana.yaml | 2 +- docker/monitornode/config/prometheus.yaml | 11 +++++-- 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 docker/docker-compose.monitoring.yml diff --git a/Makefile b/Makefile index c9f42798de..38fad9a582 100644 --- a/Makefile +++ b/Makefile @@ -318,6 +318,25 @@ docker-cluster-stop: @cd docker && DOCKER_PLATFORM=$(DOCKER_PLATFORM) USERID=$(shell id -u) GROUPID=$(shell id -g) GOCACHE=$(shell go env GOCACHE) docker compose down .PHONY: localnet-stop +# Start 4-node cluster with Prometheus and Grafana monitoring +docker-cluster-start-monitoring: docker-cluster-stop build-docker-node + @rm -rf $(PROJECT_HOME)/build/generated + @mkdir -p $(shell go env GOPATH)/pkg/mod + @mkdir -p $(shell go env GOCACHE) + @cd docker && \ + if [ "$${DOCKER_DETACH:-}" = "true" ]; then \ + DETACH_FLAG="-d"; \ + else \ + DETACH_FLAG=""; \ + fi; \ + DOCKER_PLATFORM=$(DOCKER_PLATFORM) USERID=$(shell id -u) GROUPID=$(shell id -g) GOCACHE=$(shell go env GOCACHE) NUM_ACCOUNTS=10 INVARIANT_CHECK_INTERVAL=${INVARIANT_CHECK_INTERVAL} UPGRADE_VERSION_LIST=${UPGRADE_VERSION_LIST} MOCK_BALANCES=${MOCK_BALANCES} GIGA_EXECUTOR=${GIGA_EXECUTOR} GIGA_OCC=${GIGA_OCC} RECEIPT_BACKEND=${RECEIPT_BACKEND} AUTOBAHN=${AUTOBAHN} GIGA_STORAGE=${GIGA_STORAGE} docker compose -f docker-compose.yml -f docker-compose.monitoring.yml up --no-attach grafana --no-attach prometheus $$DETACH_FLAG +.PHONY: docker-cluster-start-monitoring + +# Stop monitoring containers (Prometheus and Grafana) and cluster +docker-cluster-stop-monitoring: + @cd docker && DOCKER_PLATFORM=$(DOCKER_PLATFORM) USERID=$(shell id -u) GROUPID=$(shell id -g) GOCACHE=$(shell go env GOCACHE) docker compose -f docker-compose.yml -f docker-compose.monitoring.yml down +.PHONY: docker-cluster-stop-monitoring + # Run GIGA EVM integration tests with a GIGA-enabled cluster # This starts a fresh cluster with GIGA_EXECUTOR and GIGA_OCC enabled, # runs the EVM GIGA tests, then stops the cluster. diff --git a/docker/docker-compose.monitoring.yml b/docker/docker-compose.monitoring.yml new file mode 100644 index 0000000000..c12bfe19a7 --- /dev/null +++ b/docker/docker-compose.monitoring.yml @@ -0,0 +1,37 @@ +services: + prometheus: + container_name: sei-prometheus + image: prom/prometheus:latest + ports: + - "9099:9090" + volumes: + - ./monitornode/config/prometheus.yaml:/etc/prometheus/prometheus.yml:ro + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + networks: + - localnet + + grafana: + container_name: sei-grafana + image: grafana/grafana:latest + ports: + - "3000:3000" + volumes: + - ./monitornode/config/grafana.yaml:/etc/grafana/provisioning/datasources/grafana.yaml:ro + - ./monitornode/config/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/grafana-dashboards.yaml:ro + - ./monitornode/dashboards:/var/lib/grafana/dashboards:ro + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + - prometheus + networks: + - localnet + +networks: + localnet: diff --git a/docker/monitornode/config/grafana.yaml b/docker/monitornode/config/grafana.yaml index 49c5cb5b58..6cf1204668 100644 --- a/docker/monitornode/config/grafana.yaml +++ b/docker/monitornode/config/grafana.yaml @@ -3,6 +3,6 @@ datasources: - name: Prometheus type: prometheus access: proxy - url: http://host.docker.internal:9091 + url: http://host.docker.internal:9099 isDefault: true editable: false diff --git a/docker/monitornode/config/prometheus.yaml b/docker/monitornode/config/prometheus.yaml index e8c478f098..8e108bcfb2 100644 --- a/docker/monitornode/config/prometheus.yaml +++ b/docker/monitornode/config/prometheus.yaml @@ -3,7 +3,14 @@ global: evaluation_interval: 15s scrape_configs: - - job_name: 'cryptosim' + - job_name: 'sei-localnet' + metrics_path: '/metrics' + params: + format: ['prometheus'] static_configs: - - targets: ['host.docker.internal:9090'] + - targets: + - 'sei-node-0:1317' + - 'sei-node-1:1317' + - 'sei-node-2:1317' + - 'sei-node-3:1317' scrape_interval: 5s From 191e9ed39831e65d06ea9425f85f3aad96fb89e2 Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Tue, 12 May 2026 15:24:51 -0700 Subject: [PATCH 3/4] plt-329 Made metric names unique --- x/evm/ante/metrics.go | 2 +- x/evm/keeper/metrics.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x/evm/ante/metrics.go b/x/evm/ante/metrics.go index ae59ed4a74..9ac2ddc428 100644 --- a/x/evm/ante/metrics.go +++ b/x/evm/ante/metrics.go @@ -52,7 +52,7 @@ func InitEvmAnteMetrics() { )) evmAnteMetrics.associationError = mustAnteMetric(meter.Int64Counter( - "evm_association_error_total", + "evm_ante_association_error_total", metric.WithDescription("EVM address association errors by scenario and address type"), )) }) diff --git a/x/evm/keeper/metrics.go b/x/evm/keeper/metrics.go index 23bb90fef7..db8d6032d6 100644 --- a/x/evm/keeper/metrics.go +++ b/x/evm/keeper/metrics.go @@ -85,7 +85,7 @@ func InitEvmKeeperMetrics() { )) evmKeeperMetrics.associationError = mustMetric(meter.Int64Counter( - "evm_association_error_total", + "evm_keeper_association_error_total", metric.WithDescription("EVM address association errors by scenario and address type"), )) From f9e0114d18ba2fc6e6fed361fb678c729c5abd40 Mon Sep 17 00:00:00 2001 From: Amir Deris Date: Tue, 12 May 2026 15:46:31 -0700 Subject: [PATCH 4/4] plt-329 Used new configs for grafana and prometheus instead of modifying existing ones --- docker/docker-compose.monitoring.yml | 8 +++----- .../grafana-dashboards.yaml | 11 +++++++++++ .../grafana-datasource.yaml | 8 ++++++++ docker/docker_compose_monitoring/prometheus.yaml | 16 ++++++++++++++++ docker/monitornode/config/grafana.yaml | 2 +- docker/monitornode/config/prometheus.yaml | 11 ++--------- 6 files changed, 41 insertions(+), 15 deletions(-) create mode 100644 docker/docker_compose_monitoring/grafana-dashboards.yaml create mode 100644 docker/docker_compose_monitoring/grafana-datasource.yaml create mode 100644 docker/docker_compose_monitoring/prometheus.yaml diff --git a/docker/docker-compose.monitoring.yml b/docker/docker-compose.monitoring.yml index c12bfe19a7..556a1dcd99 100644 --- a/docker/docker-compose.monitoring.yml +++ b/docker/docker-compose.monitoring.yml @@ -5,7 +5,7 @@ services: ports: - "9099:9090" volumes: - - ./monitornode/config/prometheus.yaml:/etc/prometheus/prometheus.yml:ro + - ./docker_compose_monitoring/prometheus.yaml:/etc/prometheus/prometheus.yml:ro command: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus @@ -19,15 +19,13 @@ services: ports: - "3000:3000" volumes: - - ./monitornode/config/grafana.yaml:/etc/grafana/provisioning/datasources/grafana.yaml:ro - - ./monitornode/config/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/grafana-dashboards.yaml:ro + - ./docker_compose_monitoring/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/grafana-datasource.yaml:ro + - ./docker_compose_monitoring/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/grafana-dashboards.yaml:ro - ./monitornode/dashboards:/var/lib/grafana/dashboards:ro environment: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=admin - GF_USERS_ALLOW_SIGN_UP=false - extra_hosts: - - "host.docker.internal:host-gateway" depends_on: - prometheus networks: diff --git a/docker/docker_compose_monitoring/grafana-dashboards.yaml b/docker/docker_compose_monitoring/grafana-dashboards.yaml new file mode 100644 index 0000000000..ee29cc7112 --- /dev/null +++ b/docker/docker_compose_monitoring/grafana-dashboards.yaml @@ -0,0 +1,11 @@ +apiVersion: 1 +providers: + - name: default + orgId: 1 + folder: "" + type: file + disableDeletion: false + updateIntervalSeconds: 30 + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: false diff --git a/docker/docker_compose_monitoring/grafana-datasource.yaml b/docker/docker_compose_monitoring/grafana-datasource.yaml new file mode 100644 index 0000000000..2d433996f6 --- /dev/null +++ b/docker/docker_compose_monitoring/grafana-datasource.yaml @@ -0,0 +1,8 @@ +apiVersion: 1 +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/docker/docker_compose_monitoring/prometheus.yaml b/docker/docker_compose_monitoring/prometheus.yaml new file mode 100644 index 0000000000..8e108bcfb2 --- /dev/null +++ b/docker/docker_compose_monitoring/prometheus.yaml @@ -0,0 +1,16 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'sei-localnet' + metrics_path: '/metrics' + params: + format: ['prometheus'] + static_configs: + - targets: + - 'sei-node-0:1317' + - 'sei-node-1:1317' + - 'sei-node-2:1317' + - 'sei-node-3:1317' + scrape_interval: 5s diff --git a/docker/monitornode/config/grafana.yaml b/docker/monitornode/config/grafana.yaml index 6cf1204668..49c5cb5b58 100644 --- a/docker/monitornode/config/grafana.yaml +++ b/docker/monitornode/config/grafana.yaml @@ -3,6 +3,6 @@ datasources: - name: Prometheus type: prometheus access: proxy - url: http://host.docker.internal:9099 + url: http://host.docker.internal:9091 isDefault: true editable: false diff --git a/docker/monitornode/config/prometheus.yaml b/docker/monitornode/config/prometheus.yaml index 8e108bcfb2..e8c478f098 100644 --- a/docker/monitornode/config/prometheus.yaml +++ b/docker/monitornode/config/prometheus.yaml @@ -3,14 +3,7 @@ global: evaluation_interval: 15s scrape_configs: - - job_name: 'sei-localnet' - metrics_path: '/metrics' - params: - format: ['prometheus'] + - job_name: 'cryptosim' static_configs: - - targets: - - 'sei-node-0:1317' - - 'sei-node-1:1317' - - 'sei-node-2:1317' - - 'sei-node-3:1317' + - targets: ['host.docker.internal:9090'] scrape_interval: 5s