diff --git a/relay/cmd/config.go b/relay/cmd/config.go index 4931b69635..c76227f946 100644 --- a/relay/cmd/config.go +++ b/relay/cmd/config.go @@ -94,7 +94,10 @@ func NewConfig(ctx *cli.Context) (Config, error) { InternalGetProofsTimeout: ctx.Duration(flags.InternalGetProofsTimeoutFlag.Name), InternalGetCoefficientsTimeout: ctx.Duration(flags.InternalGetCoefficientsTimeoutFlag.Name), }, - MetricsPort: ctx.Int(flags.MetricsPortFlag.Name), + MetricsPort: ctx.Int(flags.MetricsPortFlag.Name), + EnableMetrics: ctx.Bool(flags.EnableMetricsFlag.Name), + EnablePprof: ctx.Bool(flags.EnablePprofFlag.Name), + PprofHttpPort: ctx.Int(flags.PprofHttpPortFlag.Name), }, EthClientConfig: geth.ReadEthClientConfigRPCOnly(ctx), BLSOperatorStateRetrieverAddr: ctx.String(flags.BlsOperatorStateRetrieverAddrFlag.Name), diff --git a/relay/cmd/flags/flags.go b/relay/cmd/flags/flags.go index 2c82a1c128..6e3f205193 100644 --- a/relay/cmd/flags/flags.go +++ b/relay/cmd/flags/flags.go @@ -1,13 +1,13 @@ package flags import ( - "github.com/docker/go-units" "time" "github.com/Layr-Labs/eigenda/common" "github.com/Layr-Labs/eigenda/common/aws" "github.com/Layr-Labs/eigenda/common/geth" "github.com/Layr-Labs/eigenda/core/thegraph" + "github.com/docker/go-units" "github.com/urfave/cli" ) @@ -290,6 +290,25 @@ var ( EnvVar: common.PrefixEnvVar(envVarPrefix, "METRICS_PORT"), Value: 9101, } + EnableMetricsFlag = cli.BoolFlag{ + Name: common.PrefixFlag(FlagPrefix, "enable-metrics"), + Usage: "Enable prometheus metrics collection", + Required: true, + EnvVar: common.PrefixEnvVar(envVarPrefix, "ENABLE_METRICS"), + } + EnablePprofFlag = cli.BoolFlag{ + Name: common.PrefixFlag(FlagPrefix, "enable-pprof"), + Usage: "Enable pprof profiling", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "ENABLE_PPROF"), + } + PprofHttpPortFlag = cli.IntFlag{ + Name: common.PrefixFlag(FlagPrefix, "pprof-port"), + Usage: "Port to listen on for pprof", + Required: false, + EnvVar: common.PrefixEnvVar(envVarPrefix, "PPROF_PORT"), + Value: 6060, + } ) var requiredFlags = []cli.Flag{ @@ -299,6 +318,7 @@ var requiredFlags = []cli.Flag{ RelayKeysFlag, BlsOperatorStateRetrieverAddrFlag, EigenDAServiceManagerAddrFlag, + EnableMetricsFlag, } var optionalFlags = []cli.Flag{ @@ -336,6 +356,8 @@ var optionalFlags = []cli.Flag{ InternalGetCoefficientsTimeoutFlag, OnchainStateRefreshIntervalFlag, MetricsPortFlag, + EnablePprofFlag, + PprofHttpPortFlag, } var Flags []cli.Flag diff --git a/relay/config.go b/relay/config.go index dd1ca4c4d3..834f50fb07 100644 --- a/relay/config.go +++ b/relay/config.go @@ -1,9 +1,10 @@ package relay import ( + "time" + v2 "github.com/Layr-Labs/eigenda/core/v2" "github.com/Layr-Labs/eigenda/relay/limiter" - "time" ) // Config is the configuration for the relay Server. @@ -64,4 +65,13 @@ type Config struct { // MetricsPort is the port that the relay metrics server listens on. MetricsPort int + + // EnableMetrics enables the metrics HTTP server for prometheus metrics collection + EnableMetrics bool + + // EnablePprof enables the pprof HTTP server for profiling + EnablePprof bool + + // PprofHttpPort is the port that the pprof HTTP server listens on + PprofHttpPort int } diff --git a/relay/server.go b/relay/server.go index 47aaf14ce1..db19500442 100644 --- a/relay/server.go +++ b/relay/server.go @@ -11,6 +11,7 @@ import ( "github.com/Layr-Labs/eigenda/api" pb "github.com/Layr-Labs/eigenda/api/grpc/relay" "github.com/Layr-Labs/eigenda/common/healthcheck" + "github.com/Layr-Labs/eigenda/common/pprof" "github.com/Layr-Labs/eigenda/core" v2 "github.com/Layr-Labs/eigenda/core/v2" "github.com/Layr-Labs/eigenda/disperser/common/v2/blobstore" @@ -489,7 +490,18 @@ func buildInsufficientGetChunksBandwidthError( // Start starts the server listening for requests. This method will block until the server is stopped. func (s *Server) Start(ctx context.Context) error { - s.metrics.Start() + // Start metrics server if enabled + if s.config.EnableMetrics { + s.metrics.Start() + s.logger.Info("Enabled metrics for relay server", "port", s.config.MetricsPort) + } + + // Start pprof server if enabled + if s.config.EnablePprof { + pprofProfiler := pprof.NewPprofProfiler(fmt.Sprintf("%d", s.config.PprofHttpPort), s.logger) + go pprofProfiler.Start() + s.logger.Info("Enabled pprof for relay server", "port", s.config.PprofHttpPort) + } if s.chainReader != nil && s.metadataProvider != nil { go func() { @@ -548,9 +560,11 @@ func (s *Server) Stop() error { s.grpcServer.GracefulStop() } - err := s.metrics.Stop() - if err != nil { - return fmt.Errorf("error stopping metrics server: %w", err) + if s.config.EnableMetrics { + err := s.metrics.Stop() + if err != nil { + return fmt.Errorf("error stopping metrics server: %w", err) + } } return nil