diff --git a/app/legacyabci/begin_block.go b/app/legacyabci/begin_block.go index b0262ab46e..5114dd931f 100644 --- a/app/legacyabci/begin_block.go +++ b/app/legacyabci/begin_block.go @@ -47,7 +47,12 @@ func BeginBlock( byzantineValidators []abci.Misbehavior, keepers BeginBlockKeepers, ) { - defer telemetry.MeasureSince(time.Now(), "module", "total_begin_block") + start := time.Now() + defer func() { + legacyAbciMetrics.totalBeginBlockDuration.Record(ctx.Context(), time.Since(start).Seconds()) + // TODO(PLT-343): remove once begin_blocker_duration verified + telemetry.MeasureSince(start, "module", "total_begin_block") + }() keepers.EpochKeeper.BeginBlock(ctx) upgrade.BeginBlocker(*keepers.UpgradeKeeper, ctx) @@ -57,7 +62,12 @@ func BeginBlock( evidence.BeginBlocker(ctx, byzantineValidators, *keepers.EvidenceKeeper) staking.BeginBlocker(ctx, *keepers.StakingKeeper) func() { - defer telemetry.ModuleMeasureSince("ibc", time.Now(), telemetry.MetricKeyBeginBlocker) + ibcStart := time.Now() + defer func() { + legacyAbciMetrics.ibcBeginBlockerDuration.Record(ctx.Context(), time.Since(ibcStart).Seconds()) + // TODO(PLT-343): remove once ibc_begin_blocker_duration verified + telemetry.ModuleMeasureSince("ibc", ibcStart, telemetry.MetricKeyBeginBlocker) + }() ibcclient.BeginBlocker(ctx, keepers.IBCKeeper.ClientKeeper) }() keepers.EvmKeeper.BeginBlock(ctx) diff --git a/app/legacyabci/check_tx.go b/app/legacyabci/check_tx.go index 1f95f56e7d..bec4c1564a 100644 --- a/app/legacyabci/check_tx.go +++ b/app/legacyabci/check_tx.go @@ -24,6 +24,7 @@ import ( upgradekeeper "github.com/sei-protocol/sei-chain/sei-cosmos/x/upgrade/keeper" ibckeeper "github.com/sei-protocol/sei-chain/sei-ibc-go/modules/core/keeper" oraclekeeper "github.com/sei-protocol/sei-chain/x/oracle/keeper" + otelmetric "go.opentelemetry.io/otel/metric" ) var defaultRecoveryMiddleware = newDefaultRecoveryMiddleware() @@ -58,13 +59,18 @@ func CheckTx( if ctx.IsReCheckTx() { label = "recheck" } - defer telemetry.MeasureThroughputSinceWithLabels( - telemetry.TxCount, - []gometrics.Label{ - telemetry.NewLabel("mode", label), - }, - time.Now(), - ) + txStart := time.Now() + defer func() { + legacyAbciMetrics.txDuration.Record(ctx.Context(), time.Since(txStart).Seconds(), otelmetric.WithAttributes(attribute.String("mode", label))) + // TODO(PLT-343): remove once tx_duration verified + telemetry.MeasureThroughputSinceWithLabels( + telemetry.TxCount, + []gometrics.Label{ + telemetry.NewLabel("mode", label), + }, + txStart, + ) + }() spanCtx, span := tracingInfo.StartWithContext("CheckTx", ctx.TraceSpanContext()) defer span.End() ctx = ctx.WithTraceSpanContext(spanCtx) diff --git a/app/legacyabci/deliver_tx.go b/app/legacyabci/deliver_tx.go index 314f78534f..0d73f4b25d 100644 --- a/app/legacyabci/deliver_tx.go +++ b/app/legacyabci/deliver_tx.go @@ -21,6 +21,7 @@ import ( evmkeeper "github.com/sei-protocol/sei-chain/x/evm/keeper" oraclekeeper "github.com/sei-protocol/sei-chain/x/oracle/keeper" "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/trace" ) @@ -51,13 +52,18 @@ func DeliverTx( txCtx sdk.Context, err error, ) { - defer telemetry.MeasureThroughputSinceWithLabels( - telemetry.TxCount, - []metrics.Label{ - telemetry.NewLabel("mode", "deliver"), - }, - time.Now(), - ) + txStart := time.Now() + defer func() { + legacyAbciMetrics.txDuration.Record(ctx.Context(), time.Since(txStart).Seconds(), otelmetric.WithAttributes(attribute.String("mode", "deliver"))) + // TODO(PLT-343): remove once tx_duration verified + telemetry.MeasureThroughputSinceWithLabels( + telemetry.TxCount, + []metrics.Label{ + telemetry.NewLabel("mode", "deliver"), + }, + txStart, + ) + }() // check for existing parent tracer, and if applicable, use it spanCtx, span := tracingInfo.StartWithContext("DeliverTx", ctx.TraceSpanContext()) defer span.End() diff --git a/app/legacyabci/metrics.go b/app/legacyabci/metrics.go new file mode 100644 index 0000000000..2e1c4320f4 --- /dev/null +++ b/app/legacyabci/metrics.go @@ -0,0 +1,47 @@ +package legacyabci + +import ( + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +var ( + meter = otel.Meter("legacyabci") + + // finerGrainedBuckets units are in seconds + finerGrainedBuckets = metric.WithExplicitBucketBoundaries( + 0.000025, 0.000050, 0.0001, 0.0005, 0.001, 0.0025, 0.005, 0.010, 0.020, 0.050, 0.075, 0.1, 0.25, 0.5, 1, 10, + ) + + legacyAbciMetrics = struct { + totalBeginBlockDuration metric.Float64Histogram + ibcBeginBlockerDuration metric.Float64Histogram + txDuration metric.Float64Histogram + }{ + totalBeginBlockDuration: must(meter.Float64Histogram( + "begin_blocker_duration", + metric.WithDescription("Total duration of begin-block execution in seconds"), + finerGrainedBuckets, + metric.WithUnit("s"), + )), + ibcBeginBlockerDuration: must(meter.Float64Histogram( + "ibc_begin_blocker_duration", + metric.WithDescription("Duration of IBC begin-blocker execution in seconds"), + finerGrainedBuckets, + metric.WithUnit("s"), + )), + txDuration: must(meter.Float64Histogram( + "tx_duration", + metric.WithDescription("Duration of tx processing by mode (check, recheck, deliver)"), + finerGrainedBuckets, + metric.WithUnit("s"), + )), + } +) + +func must[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} diff --git a/loadtest/loadtest_client.go b/loadtest/loadtest_client.go index 9028494ee8..0ca0ee0646 100644 --- a/loadtest/loadtest_client.go +++ b/loadtest/loadtest_client.go @@ -20,7 +20,8 @@ import ( "github.com/sei-protocol/sei-chain/sei-cosmos/types" typestx "github.com/sei-protocol/sei-chain/sei-cosmos/types/tx" stakingtypes "github.com/sei-protocol/sei-chain/sei-cosmos/x/staking/types" - "github.com/sei-protocol/sei-chain/utils/metrics" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" "golang.org/x/sync/semaphore" "google.golang.org/grpc" "google.golang.org/grpc/connectivity" @@ -194,7 +195,7 @@ func (c *LoadTestClient) BuildTxs( } // Generate a message type first messageType := c.getRandomMessageType(config.MessageTypes) - metrics.IncrProducerEventCount(messageType) + loadtestMetrics.produceCount.Add(context.Background(), 1, otelmetric.WithAttributes(attribute.String("msg_type", messageType))) var signedTx SignedTx // Sign EVM and Cosmos TX differently switch messageType { @@ -264,7 +265,7 @@ func (c *LoadTestClient) SendTxs( return case tx, ok := <-txQueue: atomic.AddInt64(sentCountPerMsgType[tx.MsgType], 1) - metrics.IncrConsumerEventCount(tx.MsgType) + loadtestMetrics.consumeCount.Add(context.Background(), 1, otelmetric.WithAttributes(attribute.String("msg_type", tx.MsgType))) if !ok { fmt.Printf("Stopping consumers\n") return diff --git a/loadtest/main.go b/loadtest/main.go index 55b766719f..df0c80869f 100644 --- a/loadtest/main.go +++ b/loadtest/main.go @@ -39,8 +39,9 @@ import ( "golang.org/x/time/rate" "github.com/sei-protocol/sei-chain/app" - "github.com/sei-protocol/sei-chain/utils/metrics" tokenfactorytypes "github.com/sei-protocol/sei-chain/x/tokenfactory/types" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" ) var TestConfig EncodingConfig @@ -273,7 +274,7 @@ func printStats( //nolint:gosec tps := float64(sentCount-prevTotalSent) / elapsed.Seconds() totalTps += tps - defer metrics.SetThroughputMetricByType("tps", float32(tps), msgType) + defer loadtestMetrics.tps.Record(context.Background(), tps, otelmetric.WithAttributes(attribute.String("msg_type", msgType))) } var totalDuration time.Duration diff --git a/loadtest/metrics.go b/loadtest/metrics.go index 8e1a4d9241..0c77ecec29 100644 --- a/loadtest/metrics.go +++ b/loadtest/metrics.go @@ -9,8 +9,43 @@ import ( "github.com/sei-protocol/sei-chain/sei-cosmos/telemetry" "github.com/sei-protocol/sei-chain/sei-cosmos/types/rest" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" ) +var ( + ltMeter = otel.Meter("loadtest") + + loadtestMetrics = struct { + produceCount metric.Int64Counter + consumeCount metric.Int64Counter + tps metric.Float64Gauge + }{ + produceCount: must(ltMeter.Int64Counter( + "produce", + metric.WithDescription("Number of transactions produced by message type"), + metric.WithUnit("{count}"), + )), + consumeCount: must(ltMeter.Int64Counter( + "consume", + metric.WithDescription("Number of transactions consumed by message type"), + metric.WithUnit("{count}"), + )), + tps: must(ltMeter.Float64Gauge( + "tps", + metric.WithDescription("Transactions per second by message type"), + metric.WithUnit("{tps}"), + )), + } +) + +func must[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} + const ( defaultListenAddress = "0.0.0.0" defaultMetricsPort = 9696 diff --git a/utils/logging/metrics.go b/utils/logging/metrics.go new file mode 100644 index 0000000000..56dae1b176 --- /dev/null +++ b/utils/logging/metrics.go @@ -0,0 +1,27 @@ +package logging + +import ( + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +var ( + meter = otel.Meter("utils_logging") + + loggingMetrics = struct { + logNotDoneAfter metric.Int64Counter + }{ + logNotDoneAfter: must(meter.Int64Counter( + "log_not_done_after", + metric.WithDescription("Number of times an operation was still not finished after the expected duration by label"), + metric.WithUnit("{count}"), + )), + } +) + +func must[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} diff --git a/utils/logging/time.go b/utils/logging/time.go index c8fd5caa47..a192761c37 100644 --- a/utils/logging/time.go +++ b/utils/logging/time.go @@ -1,10 +1,12 @@ package logging import ( + "context" "time" - "github.com/sei-protocol/sei-chain/utils/metrics" "github.com/sei-protocol/seilog" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" ) var logger = seilog.NewLogger("utils", "logging") @@ -37,7 +39,7 @@ func LogIfNotDoneAfter[R any](task func() (R, error), after time.Duration, label // reraise panic in main goroutine panic(err) case <-time.After(after): - metrics.IncrLogIfNotDoneAfter(label) + loggingMetrics.logNotDoneAfter.Add(context.Background(), 1, otelmetric.WithAttributes(attribute.String("label", label))) logger.Error("operation still not finished", "label", label, "after", after) } } diff --git a/utils/metrics/metrics_util.go b/utils/metrics/metrics_util.go index 695b734717..4de74139ed 100644 --- a/utils/metrics/metrics_util.go +++ b/utils/metrics/metrics_util.go @@ -149,20 +149,6 @@ func IncrFailedConcurrentDeliverTxCounter() { ) } -// Counts the number of operations that failed due to operation timeout -// Metric Names: -// -// sei_log_not_done_after_counter -func IncrLogIfNotDoneAfter(label string) { - SafeMetricsIncrCounterWithLabels( - []string{"sei", "log", "not", "done", "after"}, - 1, - []metrics.Label{ - telemetry.NewLabel("label", label), - }, - ) -} - // Measures the time taken to execute a sudo msg // Metric Names: // @@ -256,18 +242,6 @@ func IncrPriceUpdateDenom(denom string) { ) } -// Measures throughput per message type -// Metric Name: -// -// sei_throughput_ -func SetThroughputMetricByType(metricName string, value float32, msgType string) { - telemetry.SetGaugeWithLabels( - []string{"sei", "loadtest", "tps", metricName}, - value, - []metrics.Label{telemetry.NewLabel("msg_type", msgType)}, - ) -} - // Measures the number of times the total block gas wanted in the proposal exceeds the max // Metric Name: // @@ -445,32 +419,6 @@ func IncrementPendingNonce(event string) { ) } -// IncrProducerEventCount increments the counter for events produced. -// This metric counts the number of events produced by the system. -// Metric Name: -// -// sei_loadtest_produce_count -func IncrProducerEventCount(msgType string) { - SafeTelemetryIncrCounterWithLabels( - []string{"sei", "loadtest", "produce", "count"}, - 1, - []metrics.Label{telemetry.NewLabel("msg_type", msgType)}, - ) -} - -// IncrConsumerEventCount increments the counter for events consumed. -// This metric counts the number of events consumed by the system. -// Metric Name: -// -// sei_loadtest_consume_count -func IncrConsumerEventCount(msgType string) { - SafeTelemetryIncrCounterWithLabels( - []string{"sei", "loadtest", "consume", "count"}, - 1, - []metrics.Label{telemetry.NewLabel("msg_type", msgType)}, - ) -} - func AddHistogramMetric(key []string, value float32) { metrics.AddSample(key, value) } diff --git a/utils/panic.go b/utils/panic.go index fcb173f2f8..f142a0d620 100644 --- a/utils/panic.go +++ b/utils/panic.go @@ -5,8 +5,6 @@ import ( "runtime/debug" "strings" - "github.com/armon/go-metrics" - "github.com/sei-protocol/sei-chain/sei-cosmos/telemetry" sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" "github.com/sei-protocol/seilog" ) @@ -35,23 +33,6 @@ func LogPanicCallback(ctx sdk.Context, r any) func(any) { } } -func MetricsPanicCallback(err any, ctx sdk.Context, key string) { - logger.Error("panic occurred during order matching for key", "key", key, "err", err) - defer func() { - if e := recover(); e != nil { - return - } - }() - telemetry.IncrCounterWithLabels( - []string{"panic"}, - 1, - []metrics.Label{ - telemetry.NewLabel("error", fmt.Sprintf("%s", err)), - telemetry.NewLabel("module", key), - }, - ) -} - func DecorateHardFailError(err error) error { return fmt.Errorf("%s:%s", HardFailPrefix, err.Error()) } diff --git a/wasmbinding/metrics.go b/wasmbinding/metrics.go new file mode 100644 index 0000000000..953896610b --- /dev/null +++ b/wasmbinding/metrics.go @@ -0,0 +1,62 @@ +package wasmbinding + +import ( + "context" + "errors" + "fmt" + + sdkerrors "github.com/sei-protocol/sei-chain/sei-cosmos/types/errors" + "github.com/sei-protocol/sei-chain/utils/metrics" + evmtypes "github.com/sei-protocol/sei-chain/x/evm/types" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +var ( + meter = otel.Meter("wasmbinding") + + wasmQueryMetrics = struct { + associationError metric.Int64Counter + sdkError metric.Int64Counter + }{ + associationError: must(meter.Int64Counter( + "wasm_query_association_error", + metric.WithDescription("Association errors during wasm query handling by scenario and address type"), + metric.WithUnit("{count}"), + )), + sdkError: must(meter.Int64Counter( + "wasm_query_sdk_error", + metric.WithDescription("SDK errors during wasm query handling by scenario, codespace, and code"), + metric.WithUnit("{count}"), + )), + } +) + +func must[V any](v V, err error) V { + if err != nil { + panic(err) + } + return v +} + +func recordQueryError(ctx context.Context, scenario string, err error) { + if err == nil { + return + } + var assocErr evmtypes.AssociationMissingErr + if errors.As(err, &assocErr) { + wasmQueryMetrics.associationError.Add(ctx, 1, metric.WithAttributes( + attribute.String("scenario", scenario), + attribute.String("type", assocErr.AddressType()), + )) + } else if codespace, code, _ := sdkerrors.ABCIInfo(err, false); codespace != sdkerrors.UndefinedCodespace { + wasmQueryMetrics.sdkError.Add(ctx, 1, metric.WithAttributes( + attribute.String("scenario", scenario), + attribute.String("codespace", codespace), + attribute.String("code", fmt.Sprintf("%d", code)), + )) + } + // TODO(PLT-343): remove once wasm_query_association_error and wasm_query_sdk_error verified + metrics.IncrementErrorMetrics(scenario, err) +} diff --git a/wasmbinding/queries.go b/wasmbinding/queries.go index a45b7d49e9..61f337c693 100644 --- a/wasmbinding/queries.go +++ b/wasmbinding/queries.go @@ -9,7 +9,6 @@ import ( sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" stakingkeeper "github.com/sei-protocol/sei-chain/sei-cosmos/x/staking/keeper" - "github.com/sei-protocol/sei-chain/utils/metrics" epochwasm "github.com/sei-protocol/sei-chain/x/epoch/client/wasm" epochbindings "github.com/sei-protocol/sei-chain/x/epoch/client/wasm/bindings" epochtypes "github.com/sei-protocol/sei-chain/x/epoch/types" @@ -138,9 +137,7 @@ func (qp QueryPlugin) HandleEVMQuery(ctx sdk.Context, queryData json.RawMessage) } queryType = parsedQuery.GetQueryType() - defer func() { - metrics.IncrementErrorMetrics(string(queryType), err) - }() + defer func() { recordQueryError(ctx.Context(), string(queryType), err) }() switch queryType { case evmbindings.StaticCallType: @@ -284,9 +281,7 @@ func (qp QueryPlugin) HandleStakingExtQuery(ctx sdk.Context, queryData json.RawM } queryType = parsedQuery.GetQueryType() - defer func() { - metrics.IncrementErrorMetrics(string(queryType), err) - }() + defer func() { recordQueryError(ctx.Context(), string(queryType), err) }() switch queryType { case UnbondingDelegationsType: