Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add clickhouse-cluster and clickhouse-use-replication flags to setup tables #28

Merged
merged 3 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cmd/substreams-sink-sql/common_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ func newDBLoader(
batchRowFlushInterval int,
liveBlockFlushInterval int,
cursorsTable string,
clickhouseCluster string,
useClickhouseReplication bool,
handleReorgs bool,
) (*db.Loader, error) {
moduleMismatchMode, err := db.ParseOnModuleHashMismatch(sflags.MustGetString(cmd, onModuleHashMistmatchFlag))
cli.NoError(err, "invalid mistmatch mode")

dbLoader, err := db.NewLoader(psqlDSN, batchBlockFlushInterval, batchRowFlushInterval, liveBlockFlushInterval, cursorsTable, moduleMismatchMode, &handleReorgs, zlog, tracer)
dbLoader, err := db.NewLoader(psqlDSN, batchBlockFlushInterval, batchRowFlushInterval, liveBlockFlushInterval, cursorsTable, clickhouseCluster, useClickhouseReplication, moduleMismatchMode, &handleReorgs, zlog, tracer)
if err != nil {
return nil, fmt.Errorf("new psql loader: %w", err)
}
Expand Down Expand Up @@ -97,6 +99,8 @@ func AddCommonSinkerFlags(flags *pflag.FlagSet) {
updates to the cursor will overwrite the module hash in the database.
`))
flags.String("cursors-table", "cursors", "name of the cursors table")
flags.String("clickhouse-cluster", "", "if set. this will append the ON CLUSTER clause when setting up tables in Clickhouse")
flags.Bool("clickhouse-use-replication", false, "if set, this will replace non-replicated table engines with replicated ones when setting up tables in Clickhouse")
}

func readBlockRangeArgument(in string) (blockRange *bstream.Range, err error) {
Expand Down
6 changes: 5 additions & 1 deletion cmd/substreams-sink-sql/create_user.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ func createUserE(cmd *cobra.Command, args []string) error {
}

if err := retry(ctx, func(ctx context.Context) error {
dbLoader, err := db.NewLoader(dsn, 0, 0, 0, "cursors", db.OnModuleHashMismatchError, nil, zlog, tracer)
dbLoader, err := db.NewLoader(dsn, 0, 0, 0,
"cursors",
sflags.MustGetString(cmd, "clickhouse-cluster"),
sflags.MustGetBool(cmd, "clickhouse-use-replication"),
db.OnModuleHashMismatchError, nil, zlog, tracer)
if err != nil {
return fmt.Errorf("new psql loader: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/substreams-sink-sql/generate_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func generateCsvE(cmd *cobra.Command, args []string) error {
return fmt.Errorf("new base sinker: %w", err)
}

dbLoader, err := newDBLoader(cmd, dsn, 0, 0, 0, sflags.MustGetString(cmd, "cursors-table"), false) // flush interval not used in CSV mode
dbLoader, err := newDBLoader(cmd, dsn, 0, 0, 0, sflags.MustGetString(cmd, "cursors-table"), "", false, false) // flush interval not used in CSV mode
if err != nil {
return fmt.Errorf("new db loader: %w", err)
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/substreams-sink-sql/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ func sinkRunE(cmd *cobra.Command, args []string) error {
sflags.MustGetInt(cmd, "batch-row-flush-interval"),
sflags.MustGetInt(cmd, "live-block-flush-interval"),
sflags.MustGetString(cmd, "cursors-table"),
sflags.MustGetString(cmd, "clickhouse-cluster"),
sflags.MustGetBool(cmd, "clickhouse-use-replication"),
handleReorgs,
)
if err != nil {
Expand Down
6 changes: 5 additions & 1 deletion cmd/substreams-sink-sql/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ func sinkSetupE(cmd *cobra.Command, args []string) error {
return fmt.Errorf("extract sink config: %w", err)
}

dbLoader, err := db.NewLoader(dsn, 0, 0, 0, sflags.MustGetString(cmd, "cursors-table"), db.OnModuleHashMismatchError, nil, zlog, tracer)
dbLoader, err := db.NewLoader(dsn, 0, 0, 0,
sflags.MustGetString(cmd, "cursors-table"),
sflags.MustGetString(cmd, "clickhouse-cluster"),
sflags.MustGetBool(cmd, "clickhouse-use-replication"),
db.OnModuleHashMismatchError, nil, zlog, tracer)
if err != nil {
return fmt.Errorf("new psql loader: %w", err)
}
Expand Down
7 changes: 6 additions & 1 deletion cmd/substreams-sink-sql/tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,12 @@ func toolsDeleteCursorE(cmd *cobra.Command, args []string) error {

func toolsCreateLoader(cmd *cobra.Command) *db.Loader {
dsn := viper.GetString("tools-global-dsn")
loader, err := db.NewLoader(dsn, 0, 0, 0, sflags.MustGetString(cmd, "cursors-table"), db.OnModuleHashMismatchIgnore, nil, zlog, tracer)
loader, err := db.NewLoader(
dsn, 0, 0, 0,
sflags.MustGetString(cmd, "cursors-table"),
sflags.MustGetString(cmd, "clickhouse-cluster"),
sflags.MustGetBool(cmd, "clickhouse-use-replication"),
db.OnModuleHashMismatchIgnore, nil, zlog, tracer)
cli.NoError(err, "Unable to instantiate database manager from DSN %q", dsn)

if err := loader.LoadTables(); err != nil {
Expand Down
33 changes: 20 additions & 13 deletions db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ type Loader struct {
cursorTableName string
cursorTable *TableInfo

clickhouseCluster string
useClickhouseReplication bool

handleReorgs bool
batchBlockFlushInterval int
batchRowFlushInterval int
Expand All @@ -56,6 +59,8 @@ func NewLoader(
batchRowFlushInterval int,
liveBlockFlushInterval int,
cursorTableName string,
clickhouseCluster string,
useClickhouseReplication bool,
moduleMismatchMode OnModuleHashMismatch,
handleReorgs *bool,
logger *zap.Logger,
Expand All @@ -72,18 +77,20 @@ func NewLoader(
}

l := &Loader{
DB: db,
database: dsn.database,
schema: dsn.schema,
entries: NewOrderedMap[string, *OrderedMap[string, *Operation]](),
tables: map[string]*TableInfo{},
cursorTableName: cursorTableName,
batchBlockFlushInterval: batchBlockFlushInterval,
batchRowFlushInterval: batchRowFlushInterval,
liveBlockFlushInterval: liveBlockFlushInterval,
moduleMismatchMode: moduleMismatchMode,
logger: logger,
tracer: tracer,
DB: db,
database: dsn.database,
schema: dsn.schema,
entries: NewOrderedMap[string, *OrderedMap[string, *Operation]](),
tables: map[string]*TableInfo{},
cursorTableName: cursorTableName,
batchBlockFlushInterval: batchBlockFlushInterval,
batchRowFlushInterval: batchRowFlushInterval,
liveBlockFlushInterval: liveBlockFlushInterval,
clickhouseCluster: clickhouseCluster,
useClickhouseReplication: useClickhouseReplication,
moduleMismatchMode: moduleMismatchMode,
logger: logger,
tracer: tracer,
}
_, err = l.tryDialect()
if err != nil {
Expand Down Expand Up @@ -327,7 +334,7 @@ func (l *Loader) Setup(ctx context.Context, schemaSql string, withPostgraphile b
}

func (l *Loader) setupCursorTable(ctx context.Context, withPostgraphile bool) error {
query := l.getDialect().GetCreateCursorQuery(l.schema, l.cursorTableName, withPostgraphile)
query := l.getDialect().GetCreateCursorQuery(l, withPostgraphile)
_, err := l.ExecContext(ctx, query)
return err
}
Expand Down
2 changes: 1 addition & 1 deletion db/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func (e UnknownDriverError) Error() string {
}

type dialect interface {
GetCreateCursorQuery(schema string, cursorsTable string, withPostgraphile bool) string
GetCreateCursorQuery(l *Loader, withPostgraphile bool) string
GetCreateHistoryQuery(schema string, withPostgraphile bool) string
ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error
DriverSupportRowsAffected() bool
Expand Down
61 changes: 44 additions & 17 deletions db/dialect_clickhouse.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import (
"strings"
"time"

clickhouse "github.com/AfterShip/clickhouse-sql-parser/parser"
_ "github.com/ClickHouse/clickhouse-go/v2"

"github.com/streamingfast/cli"
sink "github.com/streamingfast/substreams-sink"
"go.uber.org/zap"
Expand Down Expand Up @@ -85,30 +85,57 @@ func (d clickhouseDialect) Revert(tx Tx, ctx context.Context, l *Loader, lastVal
return fmt.Errorf("clickhouse driver does not support reorg management.")
}

func (d clickhouseDialect) GetCreateCursorQuery(schema string, cursorsTable string, withPostgraphile bool) string {
_ = withPostgraphile // TODO: see if this can work
func (d clickhouseDialect) GetCreateCursorQuery(l *Loader, _ bool) string {

clusterClause := ""
if l.clickhouseCluster != "" {
clusterClause = fmt.Sprintf(" ON CLUSTER %s", EscapeIdentifier(l.clickhouseCluster))
}
engine := "ReplacingMergeTree()"
if l.useClickhouseReplication {
engine = "Replicated" + engine
}

return fmt.Sprintf(cli.Dedent(`
CREATE TABLE IF NOT EXISTS %s.%s
(
id String,
cursor String,
block_num Int64,
block_id String
) Engine = ReplacingMergeTree() ORDER BY id;
`), EscapeIdentifier(schema), EscapeIdentifier(cursorsTable))
CREATE TABLE IF NOT EXISTS %s.%s %s
(
id String,
cursor String,
block_num Int64,
block_id String
) Engine = %s ORDER BY id;
`), EscapeIdentifier(l.schema), EscapeIdentifier(l.cursorTableName), clusterClause, engine)
}

func (d clickhouseDialect) GetCreateHistoryQuery(schema string, withPostgraphile bool) string {
panic("clickhouse does not support reorg management")
}

func (d clickhouseDialect) ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error {
for _, query := range strings.Split(schemaSql, ";") {
if len(strings.TrimSpace(query)) == 0 {
continue
}
if _, err := l.ExecContext(ctx, query); err != nil {
return fmt.Errorf("exec schema: %w", err)

stmts, err := clickhouse.NewParser(schemaSql).ParseStmts()
if err != nil {
return fmt.Errorf("parsing schema: %w", err)
}

for _, stmt := range stmts {

if createTable, ok := stmt.(*clickhouse.CreateTable); ok {
if l.clickhouseCluster != "" {
l.logger.Info("appending 'ON CLUSTER' clause to 'CREATE TABLE'", zap.String("cluster", l.clickhouseCluster), zap.String("table", createTable.Name.String()))
createTable.OnCluster = &clickhouse.ClusterClause{Expr: &clickhouse.StringLiteral{Literal: l.clickhouseCluster}}
}
if l.useClickhouseReplication &&
!strings.HasPrefix(createTable.Engine.Name, "Replicated") &&
strings.HasSuffix(createTable.Engine.Name, "MergeTree") {
newEngine := "Replicated" + createTable.Engine.Name
createTable.Engine.Name = newEngine
l.logger.Info("replacing table engine with replicated one", zap.String("engine", createTable.Engine.Name), zap.String("new_engine", newEngine))
}

if _, err := l.ExecContext(ctx, stmt.String()); err != nil {
return fmt.Errorf("exec schema: %w", err)
}
}
}
return nil
Expand Down
6 changes: 3 additions & 3 deletions db/dialect_postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func (d postgresDialect) pruneReversibleSegment(tx Tx, ctx context.Context, sche
return nil
}

func (d postgresDialect) GetCreateCursorQuery(schema string, cursorsTable string, withPostgraphile bool) string {
func (d postgresDialect) GetCreateCursorQuery(l *Loader, withPostgraphile bool) string {
out := fmt.Sprintf(cli.Dedent(`
create table if not exists %s.%s
(
Expand All @@ -192,10 +192,10 @@ func (d postgresDialect) GetCreateCursorQuery(schema string, cursorsTable string
block_num bigint,
block_id text
);
`), EscapeIdentifier(schema), EscapeIdentifier(cursorsTable))
`), EscapeIdentifier(l.schema), EscapeIdentifier(l.cursorTableName))
if withPostgraphile {
out += fmt.Sprintf("COMMENT ON TABLE %s.%s IS E'@omit';",
EscapeIdentifier(schema), EscapeIdentifier(cursorsTable))
EscapeIdentifier(l.schema), EscapeIdentifier(l.cursorTableName))
}
return out
}
Expand Down
4 changes: 2 additions & 2 deletions db/operations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func TestEscapeColumns(t *testing.T) {
t.Skip(`PG_DSN not set, please specify PG_DSN to run this test, example: PG_DSN="psql://dev-node:insecure-change-me-in-prod@localhost:5432/dev-node?enable_incremental_sort=off&sslmode=disable"`)
}

dbLoader, err := NewLoader(dsn, 1, 1, 1, "cursors", OnModuleHashMismatchIgnore, nil, zlog, tracer)
dbLoader, err := NewLoader(dsn, 1, 1, 1, "cursors", "", false, OnModuleHashMismatchIgnore, nil, zlog, tracer)
require.NoError(t, err)

tx, err := dbLoader.DB.Begin()
Expand Down Expand Up @@ -68,7 +68,7 @@ func TestEscapeValues(t *testing.T) {
t.Skip(`PG_DSN not set, please specify PG_DSN to run this test, example: PG_DSN="psql://dev-node:insecure-change-me-in-prod@localhost:5432/dev-node?enable_incremental_sort=off&sslmode=disable"`)
}

dbLoader, err := NewLoader(dsn, 1, 1, 1, "cursors", OnModuleHashMismatchIgnore, nil, zlog, tracer)
dbLoader, err := NewLoader(dsn, 1, 1, 1, "cursors", "", false, OnModuleHashMismatchIgnore, nil, zlog, tracer)
require.NoError(t, err)

tx, err := dbLoader.DB.Begin()
Expand Down
2 changes: 1 addition & 1 deletion db/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func NewTestLoader(
tables map[string]*TableInfo,
) (*Loader, *TestTx) {

loader, err := NewLoader("psql://x:5432/x", 1, 1, 1, "cursors", OnModuleHashMismatchIgnore, nil, zlog, tracer)
loader, err := NewLoader("psql://x:5432/x", 1, 1, 1, "cursors", "", false, OnModuleHashMismatchIgnore, nil, zlog, tracer)
if err != nil {
panic(err)
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/streamingfast/substreams-sink-sql
go 1.23.2

require (
github.com/AfterShip/clickhouse-sql-parser v0.3.9-0.20241023040204-2e25715e0d1e
github.com/ClickHouse/clickhouse-go/v2 v2.30.0
github.com/bobg/go-generics/v2 v2.2.2
github.com/drone/envsubst v1.0.3
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ connectrpc.com/connect v1.16.1/go.mod h1:XpZAduBQUySsb4/KO5JffORVkDI4B6/EYPi7N8x
contrib.go.opencensus.io/exporter/stackdriver v0.12.6/go.mod h1:8x999/OcIPy5ivx/wDiV7Gx4D+VUPODf0mWRGRc5kSk=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20210715213245-6c3934b029d8/go.mod h1:CzsSbkDixRphAF5hS6wbMKq0eI6ccJRb7/A0M6JBnwg=
github.com/AfterShip/clickhouse-sql-parser v0.3.9-0.20241023040204-2e25715e0d1e h1:XwVfP5J2Dh1NFStG+OMKa88pO6u0tXBmnVo14i+eS/U=
github.com/AfterShip/clickhouse-sql-parser v0.3.9-0.20241023040204-2e25715e0d1e/go.mod h1:W0Z82wJWkJxz2RVun/RMwxue3g7ut47Xxl+SFqdJGus=
github.com/Azure/azure-pipeline-go v0.2.3 h1:7U9HBg1JFK3jHl5qmo4CTZKFTVgMwdFHMVtCdfBE21U=
github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k=
github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
Expand Down Expand Up @@ -1084,13 +1086,17 @@ github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtm
github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw=
github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24q7p+U=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y=
github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/semiotic-ai/schema v0.0.0-20231204185452-913e2b1c41d6 h1:SS6C0GUr75PJoMp6OHxGjPiiLJqWTM+QsQAg4ucsrDQ=
github.com/semiotic-ai/schema v0.0.0-20231204185452-913e2b1c41d6/go.mod h1:dwA6tLbVjXHg8H9yXxUTdmwo77miO9m8NBqyhlGK074=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sethvargo/go-retry v0.2.3 h1:oYlgvIvsju3jNbottWABtbnoLC+GDtLdBHxKWxQm/iU=
github.com/sethvargo/go-retry v0.2.3/go.mod h1:1afjQuvh7s4gflMObvjLPaWgluLLyhA1wmVZ6KLpICw=
github.com/shirou/gopsutil v2.19.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
Expand Down