diff --git a/README.md b/README.md index d9104b6..d77901a 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,10 @@ dodo diff --min-duration-diff 200ms --original-sqls 'output/sql/*.sql' output/re # diff of two replay result directories dodo diff replay1/ replay2/ + + +# Export table data +dodo export --help ``` ### Config diff --git a/cmd/create.go b/cmd/create.go index d51ebc8..a420ae0 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -16,11 +16,10 @@ limitations under the License. package cmd import ( - "errors" "fmt" + "os" "path/filepath" "slices" - "strings" "github.com/emirpasic/gods/queues/circularbuffer" "github.com/samber/lo" @@ -151,30 +150,13 @@ func completeCreateConfig() (err error) { } } createTableDDLs = tableDDLs - - return err + return nil } ddldir := filepath.Join(GlobalConfig.OutputDir, "ddl") - GlobalConfig.DBs, GlobalConfig.Tables = lo.Uniq(GlobalConfig.DBs), lo.Uniq(GlobalConfig.Tables) - dbs, tables := GlobalConfig.DBs, GlobalConfig.Tables - if len(dbs) == 0 && len(tables) == 0 { - return errors.New("expected at least one database or tables, please use --dbs/--tables flag or --ddl flag") - } else if len(dbs) == 1 { - // prepend default database if only one database specified - prefix := dbs[0] + "." - for i, t := range GlobalConfig.Tables { - if !strings.Contains(t, ".") { - GlobalConfig.Tables[i] = prefix + t - } - } - } else { - for _, t := range tables { - if !strings.Contains(t, ".") { - return errors.New("expected database in table name when zero/multiple databases specified, e.g. --tables db1.table1,db2.table2") - } - } + if err := completeDBTables(); err != nil { + return err } if len(GlobalConfig.Tables) == 0 { @@ -198,6 +180,14 @@ func completeCreateConfig() (err error) { } else { for _, table := range GlobalConfig.Tables { tableddl := filepath.Join(ddldir, fmt.Sprintf("%s.table.sql", table)) + if _, err := os.Stat(tableddl); err != nil { + // maybe a view + fmatch := filepath.Join(ddldir, fmt.Sprintf("%s.*view.sql", table)) + if viewddls, err := src.FileGlob([]string{fmatch}); err == nil && len(viewddls) > 0 { + createOtherDDLs = append(createOtherDDLs, viewddls...) + } + continue + } createTableDDLs = append(createTableDDLs, tableddl) } } diff --git a/cmd/dump.go b/cmd/dump.go index 7f794f1..dc5d286 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -162,6 +162,10 @@ func init() { pFlags.StringVar(&DumpConfig.SSHPrivateKey, "ssh-private-key", "~/.ssh/id_rsa", "File path of SSH private key for '--ssh-address'") addAnonymizeBaseFlags(pFlags, false) + dumpCmd.RegisterFlagCompletionFunc("query-states", func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return []string{"ok", "eof", "err"}, cobra.ShellCompDirectiveNoFileComp | cobra.ShellCompDirectiveDefault + }) + flags := dumpCmd.Flags() flags.BoolVar(&DumpConfig.Clean, "clean", false, "Clean previous data and output directory") } diff --git a/cmd/export.go b/cmd/export.go new file mode 100644 index 0000000..f19745d --- /dev/null +++ b/cmd/export.go @@ -0,0 +1,201 @@ +/* +Copyright © 2025 Thearas thearas850@gmail.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package cmd + +import ( + "context" + "errors" + "fmt" + "os/signal" + "strings" + "syscall" + + "github.com/jmoiron/sqlx" + "github.com/samber/lo" + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + "github.com/valyala/fasttemplate" + + "github.com/Thearas/dodo/src" +) + +// ExportConfig holds the configuration values +var ExportConfig = Export{} + +// Export holds the configuration for the export command +type Export struct { + Target string + ToURL string + Properties map[string]string + With map[string]string + + dbconn *sqlx.DB +} + +// TODO: Support BROKER export? +// exportCmd represents the export command +var exportCmd = &cobra.Command{ + Use: "export", + Short: "Export data from Doris", + Long: `Export data from Doris via [Export](https://doris.apache.org/docs/sql-manual/sql-statements/data-modification/load-and-export/EXPORT) command. + +Example: + dodo export --target s3 --url 's3://bucket/export/{db}/{table}_' -p timeout=60 -w s3.endpoint=xxx -w s3.access_key=xxx -w s3.secret_key=xxx + dodo export --target hdfs --url 'hdfs://path/to/export/{db}/{table}_' -w fs.defaultFS=hdfs://HDFS8000871 -w hadoop.username=xxx`, + Aliases: []string{"e"}, + PersistentPreRunE: func(cmd *cobra.Command, _ []string) error { + return initConfig(cmd) + }, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, _ []string) (err error) { + ctx, _ := signal.NotifyContext(cmd.Context(), syscall.SIGABRT, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) + + if err := completeExportConfig(ctx); err != nil { + return err + } + GlobalConfig.Parallel = min(GlobalConfig.Parallel, len(GlobalConfig.Tables)) + + logrus.Infof("Export data for %d table(s) to '%s', parallel: %d\n", len(GlobalConfig.Tables), ExportConfig.ToURL, GlobalConfig.Parallel) + if len(GlobalConfig.Tables) == 0 { + return nil + } + if !src.Confirm("Confirm") { + return nil + } + + g := src.ParallelGroup(GlobalConfig.Parallel) + for _, t := range GlobalConfig.Tables { + dbtable := strings.SplitN(t, ".", 2) + if len(dbtable) != 2 { + return fmt.Errorf("invalid table format '%s', expected 'db.table'", t) + } + dbname, table := dbtable[0], dbtable[1] + toURL := fasttemplate.ExecuteString(ExportConfig.ToURL, "{", "}", map[string]any{"db": dbname, "table": table}) + + g.Go(func() error { + logrus.Infof("Exporting table '%s.%s' to '%s'", dbname, table, toURL) + if err := src.Export(ctx, ExportConfig.dbconn, dbname, table, ExportConfig.Target, toURL, ExportConfig.With, ExportConfig.Properties); err != nil { + return fmt.Errorf("export table '%s.%s' failed: %w", dbname, table, err) + } + logrus.Infof("Export completed for table '%s.%s'", dbname, table) + return nil + }) + } + + return g.Wait() + }, +} + +func init() { + rootCmd.AddCommand(exportCmd) + exportCmd.PersistentFlags().SortFlags = false + exportCmd.Flags().SortFlags = false + + pFlags := exportCmd.PersistentFlags() + pFlags.StringVarP(&ExportConfig.Target, "target", "t", "s3", "Target storage for the export, e.g. 's3', 'hdfs'") + pFlags.StringVarP(&ExportConfig.ToURL, "url", "u", "", "Target URL that Doris export to, can use placeholders {db} and {table}, e.g. 's3://bucket/export/{db}/{table}_', 'hdfs://path/to/{db}/{table}_'") + pFlags.StringToStringVarP(&ExportConfig.Properties, "props", "p", map[string]string{}, "Additional properties, e.g. 'format=parquet'") + pFlags.StringToStringVarP(&ExportConfig.With, "with", "w", map[string]string{}, "Additional options for export target, e.g. 's3.endpoint=xxx'") + + exportCmd.RegisterFlagCompletionFunc("target", func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return []string{"s3", "hdfs", "local"}, cobra.ShellCompDirectiveNoFileComp | cobra.ShellCompDirectiveDefault + }) + + exportCmd.RegisterFlagCompletionFunc("url", func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + compopts := cobra.ShellCompDirectiveNoFileComp | cobra.ShellCompDirectiveDefault + switch ExportConfig.Target { + case "s3": + return []string{"s3://"}, compopts + case "hdfs": + return []string{"hdfs://"}, compopts + case "local": + return []string{"file://"}, compopts + } + return []string{}, cobra.ShellCompDirectiveError + }) + + compopts := cobra.ShellCompDirectiveNoFileComp | cobra.ShellCompDirectiveDefault | cobra.ShellCompDirectiveNoSpace | cobra.ShellCompDirectiveKeepOrder + exportCmd.RegisterFlagCompletionFunc("props", func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + // https://doris.apache.org/docs/sql-manual/sql-statements/data-modification/load-and-export/EXPORT#optional-parameters + return []string{ + "label=", + "column_separator=", + "line_delimiter=", + "timeout=", + "columns=", + "format=", + "parallelism=", + "delete_existing_files=", + "max_file_size=", + "with_bom=", + "compress_type=", + }, + compopts + }) + + exportCmd.RegisterFlagCompletionFunc("with", func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + switch ExportConfig.Target { + case "s3": + return []string{"s3.endpoint=", "s3.access_key=", "s3.secret_key=", "s3.region="}, compopts + case "hdfs": + return []string{"fs.defaultFS=", "hadoop.username=", "fs.", "dfs.", "hadoop."}, compopts + } + return []string{}, cobra.ShellCompDirectiveError + }) +} + +func completeExportConfig(ctx context.Context) (err error) { + if err = completeDBTables(); err != nil { + return err + } + + ExportConfig.Target = strings.ToLower(ExportConfig.Target) + if ExportConfig.Target == "" { + return errors.New("export target is required, use --target or -t to specify it") + } + urlPrefix := ExportConfig.Target + if urlPrefix == "local" { + urlPrefix = "file" + } + if !strings.HasPrefix(ExportConfig.ToURL, urlPrefix+"://") { + return fmt.Errorf("export URL must start with '%s://', got: '%s'", urlPrefix, ExportConfig.ToURL) + } + + ExportConfig.dbconn, err = connectDBWithoutDBName() + if err != nil { + return fmt.Errorf("failed to connect to database: %w", err) + } + + // find tables if not provided + if len(GlobalConfig.Tables) > 0 { + return nil + } + for _, db := range GlobalConfig.DBs { + schemas, err := src.ShowTables(ctx, ExportConfig.dbconn, db) + if err != nil { + return fmt.Errorf("failed to get tables for database '%s': %w", db, err) + } + tables := lo.FilterMap(schemas, func(s *src.Schema, _ int) (string, bool) { + return s.Name, s.Type == src.SchemaTypeTable + }) + logrus.Infof("Found %d table(s) in database '%s'", len(tables), db) + for _, table := range tables { + GlobalConfig.Tables = append(GlobalConfig.Tables, db+"."+table) + } + } + + return nil +} diff --git a/cmd/gendata.go b/cmd/gendata.go index 9a62172..fab06db 100644 --- a/cmd/gendata.go +++ b/cmd/gendata.go @@ -268,6 +268,10 @@ func init() { pFlags.StringVarP(&GendataConfig.Query, "query", "q", "", "SQL query file to generate data, only can be used when LLM is on") pFlags.StringVarP(&GendataConfig.Prompt, "prompt", "p", "", "Additional user prompt for LLM") addAnonymizeBaseFlags(pFlags, false) + + gendataCmd.RegisterFlagCompletionFunc("llm", func(_ *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) { + return []string{"deepseek-reasoner", "deepseek-chat"}, cobra.ShellCompDirectiveNoFileComp | cobra.ShellCompDirectiveDefault + }) } // completeGendataConfig validates and completes the gendata configuration @@ -299,24 +303,8 @@ func completeGendataConfig() (err error) { return nil } - GlobalConfig.DBs, GlobalConfig.Tables = lo.Uniq(GlobalConfig.DBs), lo.Uniq(GlobalConfig.Tables) - dbs, tables := GlobalConfig.DBs, GlobalConfig.Tables - if len(dbs) == 0 && len(tables) == 0 { - return errors.New("expected at least one database or tables, please use --dbs/--tables flag or --ddl flag with '.sql' file(s)") - } else if len(dbs) == 1 { - // prepend default database if only one database specified - prefix := dbs[0] + "." - for i, t := range GlobalConfig.Tables { - if !strings.Contains(t, ".") { - GlobalConfig.Tables[i] = prefix + t - } - } - } else { - for _, t := range tables { - if !strings.Contains(t, ".") { - return errors.New("expected database in table name when zero/multiple databases specified, e.g. --tables db1.table1,db2.table2") - } - } + if err := completeDBTables(); err != nil { + return err } ddls := []string{} diff --git a/cmd/import.go b/cmd/import.go index b0980f2..632701d 100644 --- a/cmd/import.go +++ b/cmd/import.go @@ -105,24 +105,8 @@ func completeImportConfig() (err error) { ImportConfig.Data = filepath.Join(GlobalConfig.OutputDir, "gendata") } - GlobalConfig.DBs, GlobalConfig.Tables = lo.Uniq(GlobalConfig.DBs), lo.Uniq(GlobalConfig.Tables) - dbs, tables := GlobalConfig.DBs, GlobalConfig.Tables - if len(dbs) == 0 && len(tables) == 0 { - return errors.New("expected at least one database or tables, please use --dbs/--tables flag") - } else if len(dbs) == 1 { - // prepend default database if only one database specified - prefix := dbs[0] + "." - for i, t := range GlobalConfig.Tables { - if !strings.Contains(t, ".") { - GlobalConfig.Tables[i] = prefix + t - } - } - } else { - for _, t := range tables { - if !strings.Contains(t, ".") { - return errors.New("expected database in table name when zero/multiple databases specified, e.g. --tables db1.table1,db2.table2") - } - } + if err := completeDBTables("expected at least one database or tables, please use --dbs/--tables flag"); err != nil { + return err } table2datafiles := map[string][]string{} diff --git a/cmd/root.go b/cmd/root.go index ddc4bb4..9cd9797 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -16,21 +16,29 @@ limitations under the License. package cmd import ( + "errors" "fmt" "os" "reflect" "runtime" "strings" + "sync/atomic" + "github.com/jmoiron/sqlx" + "github.com/samber/lo" "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/spf13/viper" + + "github.com/Thearas/dodo/src" ) var ( GlobalConfig = Global{} DefaultParallel = 10 + + completionDB *sqlx.DB ) type Global struct { @@ -88,10 +96,7 @@ func init() { rootCmd.PersistentFlags().SortFlags = false rootCmd.Flags().SortFlags = false - parallel := runtime.NumCPU() - if parallel > DefaultParallel { - parallel = DefaultParallel - } + parallel := min(runtime.NumCPU(), DefaultParallel) pFlags := rootCmd.PersistentFlags() pFlags.StringVarP(&GlobalConfig.ConfigFile, "config", "C", "", "Config file (default is $HOME/.dodo.yaml)") @@ -109,10 +114,77 @@ func init() { pFlags.StringVar(&GlobalConfig.Catalog, "catalog", "", "Catalog to work on") pFlags.StringSliceVarP(&GlobalConfig.DBs, "dbs", "D", []string{}, "DBs to work on") pFlags.StringSliceVarP(&GlobalConfig.Tables, "tables", "T", []string{}, "Tables to work on") + + compInit := func(cmd *cobra.Command) error { + if err := initConfig(cmd); err != nil { + return err + } + db := setupCompletionDB() + if db == nil { + return errors.New("failed to connect to database for completion") + } + return nil + } + compopts := cobra.ShellCompDirectiveNoFileComp | cobra.ShellCompDirectiveDefault | cobra.ShellCompDirectiveKeepOrder + + rootCmd.RegisterFlagCompletionFunc("catalog", func(cmd *cobra.Command, _ []string, tocomplete string) ([]string, cobra.ShellCompDirective) { + if err := compInit(cmd); err != nil { + return nil, cobra.ShellCompDirectiveError + } + items, err := src.ShowCatalogs(cmd.Context(), completionDB, tocomplete) + if len(items) == 0 || err != nil { + return []string{"No catalog found"}, cobra.ShellCompDirectiveError + } + return items, compopts + }) + + rootCmd.RegisterFlagCompletionFunc("dbs", func(cmd *cobra.Command, _ []string, tocomplete string) ([]string, cobra.ShellCompDirective) { + if err := compInit(cmd); err != nil { + return nil, cobra.ShellCompDirectiveError + } + items, err := src.ShowDatabases(cmd.Context(), completionDB, tocomplete) + if len(items) == 0 || err != nil { + return []string{"No database found"}, cobra.ShellCompDirectiveError + } + return items, compopts + }) + + rootCmd.RegisterFlagCompletionFunc("tables", func(cmd *cobra.Command, _ []string, tocomplete string) ([]string, cobra.ShellCompDirective) { + if err := compInit(cmd); err != nil { + return nil, cobra.ShellCompDirectiveError + } + + var dbname string + dbtable := strings.SplitN(tocomplete, ".", 2) + if len(dbtable) == 2 { + dbname = dbtable[0] + tocomplete = dbtable[1] + } else if len(GlobalConfig.DBs) == 0 { + dbCompF, _ := rootCmd.GetFlagCompletionFunc("dbs") + items, compopts := dbCompF(cmd, nil, tocomplete) + return lo.Map(items, func(item string, _ int) string { return item + "." }), compopts + } else if len(GlobalConfig.DBs) == 1 { + dbname = GlobalConfig.DBs[0] + } else { + return lo.Map(GlobalConfig.DBs, func(db string, _ int) string { return db + "." }), compopts + } + + items, err := src.ShowTables(cmd.Context(), completionDB, dbname, tocomplete) + if len(items) == 0 || err != nil { + return []string{"No table found"}, cobra.ShellCompDirectiveError + } + return lo.Map(items, func(item *src.Schema, _ int) string { return dbname + "." + item.Name }), compopts + }) } +var isCfgInited atomic.Bool + // initConfig reads in config file and ENV variables if set. func initConfig(cmd *cobra.Command, prefixs ...string) error { + if isCfgInited.Swap(true) { + return nil + } + cfgFile := GlobalConfig.ConfigFile if cfgFile != "" { // Use config file from the flag. @@ -195,3 +267,42 @@ func initLog() error { logrus.SetFormatter(&logrus.TextFormatter{}) return nil } + +func completeDBTables(dbtableNotFoundErr ...string) error { + GlobalConfig.DBs, GlobalConfig.Tables = lo.Uniq(GlobalConfig.DBs), lo.Uniq(GlobalConfig.Tables) + dbs, tables := GlobalConfig.DBs, GlobalConfig.Tables + if len(dbs) == 0 && len(tables) == 0 { + if len(dbtableNotFoundErr) > 0 { + return errors.New(dbtableNotFoundErr[0]) + } + return errors.New("expected at least one database or tables, please use --dbs/--tables flag or --ddl flag with '.sql' file(s)") + } else if len(dbs) == 1 { + // prepend default database if only one database specified + prefix := dbs[0] + "." + for i, t := range GlobalConfig.Tables { + if !strings.Contains(t, ".") { + GlobalConfig.Tables[i] = prefix + t + } + } + } else { + for _, t := range tables { + if !strings.Contains(t, ".") { + return errors.New("expected database in table name when zero/multiple databases specified, e.g. --tables db1.table1,db2.table2") + } + } + } + return nil +} + +func setupCompletionDB() *sqlx.DB { + if completionDB != nil { + return completionDB + } + + db, err := connectDBWithoutDBName() + if err != nil { + return nil + } + completionDB = db + return db +} diff --git a/go.mod b/go.mod index 6ee5428..3edb66e 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/jmoiron/sqlx v1.3.5 github.com/manifoldco/promptui v0.9.0 github.com/openai/openai-go v1.7.0 - github.com/samber/lo v1.49.1 + github.com/samber/lo v1.51.0 github.com/sirupsen/logrus v1.9.3 github.com/spf13/cast v1.6.0 github.com/spf13/cobra v1.8.1 @@ -29,8 +29,8 @@ require ( github.com/zeebo/blake3 v0.2.4 golang.org/x/crypto v0.35.0 golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 - golang.org/x/sync v0.11.0 - golang.org/x/text v0.22.0 + golang.org/x/sync v0.16.0 + golang.org/x/text v0.27.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 8cc3e0c..04af59d 100644 --- a/go.sum +++ b/go.sum @@ -68,8 +68,6 @@ github.com/mattn/go-sqlite3 v1.14.6 h1:dNPt6NO46WmLVt2DLNpwczCmdV5boIZ6g/tlDrlRU github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/openai/openai-go v1.7.0 h1:M1JfDjQgo3d3PsLyZgpGUG0wUAaUAitqJPM4Rl56dCA= github.com/openai/openai-go v1.7.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= @@ -84,8 +82,8 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= -github.com/samber/lo v1.49.1 h1:4BIFyVfuQSEpluc7Fua+j1NolZHiEHEpaSEKdsH0tew= -github.com/samber/lo v1.49.1/go.mod h1:dO6KHFzUKXgP8LDhU0oI8d2hekjXnGOu0DB8Jecxd6o= +github.com/samber/lo v1.51.0 h1:kysRYLbHy/MB7kQZf5DSN50JHmMsNEdeY24VzJFu7wI= +github.com/samber/lo v1.51.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v1.13.1 h1:Ef7KhSmjZcK6AVf9YbJdvPYG9avaF0ZxudX+ThRdWfU= @@ -145,8 +143,8 @@ golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA= golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= -golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= -golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= +golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -155,11 +153,11 @@ golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= -golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= -golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= +golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4= +golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/introduction-zh.md b/introduction-zh.md index be45da1..48ec3d5 100644 --- a/introduction-zh.md +++ b/introduction-zh.md @@ -28,6 +28,7 @@ - [回放速度和并发](#回放速度和并发) - [其他回放参数](#其他回放参数) - [对比回放结果](#对比回放结果) +- [导出表数据](#导出表数据) - [最佳实践](#最佳实践) - [命令行提示与自动补全](#命令行提示与自动补全) - [环境变量和配置文件](#环境变量和配置文件) @@ -634,6 +635,22 @@ dodo replay -f output/q0.sql > `--min-duration-diff` 表示打印执行时长差异超过此值的 SQL,默认 `100ms` +## 导出表数据 + +`dodo export --help` + +对 [Export](https://doris.apache.org/docs/sql-manual/sql-statements/data-modification/load-and-export/EXPORT) 语句的封装,导出表数据到 `s3`、`hdfs` 或 `local` 存储。 + +> [!NOTE] +> +> - 命令会等待导出跑完后返回,中途终止会取消导出 +> - CSV 格式下默认的列分隔符是 `☆`,可通过 `-p column_separator=xxx` 指定 + +```sh +# 导出目标 `--url` 中可使用占位符 `{db}` 和 `{table}`,分别代表数据库名和表名 +dodo export --dbs db1 --tables t1,t2 --target s3 --url 's3://bucket/export/{db}/{table}_' -p timeout=60 -w s3.endpoint=xxx -w s3.access_key=xxx -w s3.secret_key=xxx +``` + ## 最佳实践 ### 命令行提示与自动补全 diff --git a/introduction.md b/introduction.md index 677fe58..b099f9e 100644 --- a/introduction.md +++ b/introduction.md @@ -28,6 +28,7 @@ - [Replay Speed and Concurrency](#replay-speed-and-concurrency) - [Other Replay Parameters](#other-replay-parameters) - [Diff Replay Results](#diff-replay-results) +- [Export table data](#export-table-data) - [Best Practices](#best-practices) - [Command-line Prompts and Autocompletion](#command-line-prompts-and-autocompletion) - [Environment Variables and Configuration Files](#environment-variables-and-configuration-files) @@ -637,6 +638,22 @@ There are two ways: > `--min-duration-diff` means print SQLs whose execution duration difference exceeds this value. Default is `100ms`. +## Export table data + +`dodo export --help` + +Encapsulates the [Export](https://doris.apache.org/docs/sql-manual/sql-statements/data-modification/load-and-export/EXPORT) SQL statement, exporting table data to `s3`, `hdfs` or `local` storage. + +> [!NOTE] +> +> - The command will wait for the export to complete, and the export will be canceled if the command is terminated. +> - The column separator is `☆` by default in CSV format. You can specify them with `-p column_separator=xxx` + +```sh +# The placeholders `{db}` and `{table}` can be used in the export target `--url`, representing the database name and table name respectively +dodo export --dbs db1 --tables t1,t2 --target s3 --url 's3://bucket/export/{db}/{table}_' -p timeout=60 -w s3.endpoint=xxx -w s3.access_key=xxx -w s3.secret_key=xxx +``` + ## Best Practices ### Command-line Prompts and Autocompletion diff --git a/src/db.go b/src/db.go index 210b6f7..a80b14c 100644 --- a/src/db.go +++ b/src/db.go @@ -201,6 +201,26 @@ func getStmtfromShowCreate(r *sqlx.Rows) (schema string, err error) { return } +func ShowCatalogs(ctx context.Context, conn *sqlx.DB, namePrefix string) ([]string, error) { + r, err := conn.QueryxContext(ctx, InternalSqlComment+`SHOW CATALOGS LIKE ?`, SanitizeLike(namePrefix)+"%") + if err != nil { + return nil, err + } + defer r.Close() + + catalogs := []string{} + for r.Next() { + catalog := map[string]any{} + if err := r.MapScan(catalog); err != nil { + return nil, err + } + // cobra.CompDebug(fmt.Sprintln("asdadad", catalog), true) + catalogs = append(catalogs, cast.ToString(catalog["CatalogName"])) + } + + return catalogs, r.Err() +} + func ShowDatabases(ctx context.Context, conn *sqlx.DB, dbnamePrefix string) ([]string, error) { dbs := []string{} err := conn.SelectContext(ctx, &dbs, InternalSqlComment+`SELECT SCHEMA_NAME FROM information_schema.schemata WHERE SCHEMA_NAME not in ('__internal_schema', 'information_schema', 'mysql') AND SCHEMA_NAME like ? ORDER BY SCHEMA_NAME`, SanitizeLike(dbnamePrefix)+"%") @@ -274,6 +294,58 @@ func ShowFronendsDisksDir(ctx context.Context, conn *sqlx.DB, diskType string) ( return dir, r.Err() } +func exportTable(ctx context.Context, conn *sqlx.DB, dbname, table, target, toURL string, with, props map[string]string) error { + strKV := func(k string, v string) string { + if !strings.HasPrefix(k, `"`) && !strings.HasSuffix(k, `'`) { + k = string(MustJsonMarshal(strings.TrimSpace(k))) + } + if !strings.HasPrefix(v, `"`) && !strings.HasSuffix(v, `'`) { + v = string(MustJsonMarshal(strings.TrimSpace(v))) + } + return fmt.Sprintf(" %s = %s", k, v) + } + + stmt := fmt.Sprintf("EXPORT TABLE `%s`.`%s` TO '%s'\nPROPERTIES (\n%s\n)\nWITH %s (\n%s\n);", + dbname, table, toURL, + strings.Join(lo.MapToSlice(props, strKV), ",\n"), + strings.ToUpper(target), + strings.Join(lo.MapToSlice(with, strKV), ",\n"), + ) + + _, err := conn.ExecContext(ctx, InternalSqlComment+stmt) + return err +} + +func showExportTable(ctx context.Context, conn *sqlx.DB, dbname string, label string) (completed bool, progress string, err error) { + r, err := conn.QueryxContext(ctx, InternalSqlComment+fmt.Sprintf("SHOW EXPORT FROM `%s` WHERE Label = '%s' ORDER BY CreateTime desc LIMIT 1", dbname, label)) + if err != nil { + return false, "", err + } + defer r.Close() + if !r.Next() { + return false, "", fmt.Errorf("no rows returned from SHOW EXPORT, db: %s, label: %s", dbname, label) + } + + vals := map[string]any{} + if err := r.MapScan(vals); err != nil { + return false, "", err + } + + // https://doris.apache.org/docs/sql-manual/sql-statements/data-modification/load-and-export/SHOW-EXPORT#return-value + state := cast.ToString(vals["State"]) + progress = cast.ToString(vals["Progress"]) + errMsg := cast.ToString(vals["ErrorMsg"]) + if state == "CANCELLED" || errMsg != "" { + return false, "", fmt.Errorf("export failed: %s", errMsg) + } + return state == "FINISHED", progress, nil +} + +func cancelExportTable(ctx context.Context, conn *sqlx.DB, dbname string, label string) error { + _, err := conn.ExecContext(ctx, InternalSqlComment+fmt.Sprintf("CANCEL EXPORT FROM `%s` WHERE Label = '%s'", dbname, label)) + return err +} + //nolint:revive func GetTablesStats(ctx context.Context, conn *sqlx.DB, analyze bool, dbname string, tables ...string) ([]*TableStats, error) { if len(tables) == 0 { diff --git a/src/export.go b/src/export.go new file mode 100644 index 0000000..ae87d7f --- /dev/null +++ b/src/export.go @@ -0,0 +1,80 @@ +package src + +import ( + "context" + "errors" + "fmt" + "strconv" + "time" + + "github.com/jmoiron/sqlx" + "github.com/sirupsen/logrus" + "github.com/spf13/cast" + + gen "github.com/Thearas/dodo/src/generator" +) + +const ( + ExportLabelPrefix = "dodo_export_" + DefaultExportTimeoutSec = 7200 +) + +func Export( + ctx context.Context, + conn *sqlx.DB, + dbname, table, target, toURL string, + with, props map[string]string, +) error { + label := props["label"] + if label == "" { + // label format: dodo_export___ + label = fmt.Sprintf("%s%s_%s_%s", ExportLabelPrefix, dbname, table, gen.RandomStr(3, 3)) + } + + timeout := cast.ToInt(props["timeout"]) + if timeout == 0 { + timeout = DefaultExportTimeoutSec + } + + colSep := props["column_separator"] + if colSep == "" { + colSep = string(ColumnSeparator) + } + + // set default properties + props["label"] = label + props["timeout"] = strconv.Itoa(timeout) + props["column_separator"] = colSep + + // execute EXPORT statement + if err := exportTable(ctx, conn, dbname, table, target, toURL, with, props); err != nil { + return err + } + + // wait for export to complete + var ( + now = time.Now() + waitSec = 5 + ) + for int(time.Since(now).Seconds()) <= timeout+waitSec { + select { + case <-ctx.Done(): + // cancel export job + err := cancelExportTable(ctx, conn, dbname, label) + return errors.Join(ctx.Err(), err) + case <-time.After(time.Duration(waitSec) * time.Second): + // continue + } + + completed, progress, err := showExportTable(ctx, conn, dbname, label) + if err != nil { + return fmt.Errorf("show export failed: %w", err) + } + if completed { + return nil + } + logrus.Debugf("Exporting table '%s.%s', progress: %s\n", dbname, table, progress) + } + + return fmt.Errorf("export table '%s.%s' timed out after %d seconds", dbname, table, timeout) +} diff --git a/src/gendata.go b/src/gendata.go index d5708ab..167865d 100644 --- a/src/gendata.go +++ b/src/gendata.go @@ -5,7 +5,6 @@ import ( "fmt" "strings" - "github.com/goccy/go-json" "github.com/samber/lo" "github.com/sirupsen/logrus" @@ -69,10 +68,12 @@ func NewTableGen(ddlfile, createTableStmt string, stats *TableStats, rows int, s streamLoadCols := make([]string, 0, colCount) // construct for streamload header `curl -H 'columns: xxx'` hasStreamLoadColMapping := false for i, col := range c.ColumnDefs().GetCols() { - colName := strings.Trim(col.GetColName().GetText(), "`") - colType_ := col.GetType_() - visitor := gen.NewTypeVisitor(fmt.Sprintf("%s.%s", table, colName), nil) - colBaseType := visitor.GetBaseType(colType_) + var ( + colName = strings.Trim(col.GetColName().GetText(), "`") + colType_ = col.GetType_() + visitor = gen.NewTypeVisitor(fmt.Sprintf("%s.%s", table, colName), nil) + colBaseType = visitor.GetBaseType(colType_) + ) // get column gen rule visitor.GenRule = newColGenRule(col, colName, colBaseType, colStats, customColumnRule) @@ -84,25 +85,10 @@ func NewTableGen(ddlfile, createTableStmt string, stats *TableStats, rows int, s tg.Columns = append(tg.Columns, colName) // column mapping in streamload header - var loadMapping string - loadCol := colName - if len(streamloadColNames) > 0 { - loadCol = streamloadColNames[i] - } - switch colBaseType { - case "BITMAP": - hasStreamLoadColMapping = true - loadMapping = fmt.Sprintf("raw_%s,`%s`=bitmap_from_array(cast(raw_%s as ARRAY))", loadCol, loadCol, loadCol) - case "HLL": - hasStreamLoadColMapping = true - loadMapping = fmt.Sprintf("raw_%s,`%s`=hll_empty()", loadCol, loadCol) - if from := visitor.GetRule("from"); from != nil { - loadMapping = fmt.Sprintf("raw_%s,`%s`=hll_hash(%v)", loadCol, loadCol, from) - } - default: - loadMapping = "`" + loadCol + "`" - } - streamLoadCols = append(streamLoadCols, loadMapping) + loadCol := lo.NthOr(streamloadColNames, i, colName) + mapping, needMapping := buildStreamLoadMapping(visitor, loadCol, colBaseType) + streamLoadCols = append(streamLoadCols, mapping) + hasStreamLoadColMapping = hasStreamLoadColMapping || needMapping } if hasStreamLoadColMapping { @@ -112,7 +98,12 @@ func NewTableGen(ddlfile, createTableStmt string, stats *TableStats, rows int, s return tg, nil } -func newColGenRule(col parser.IColumnDefContext, colName, colType string, colStats map[string]*ColumnStats, customColumnRule map[string]GenRule) GenRule { +func newColGenRule( + col parser.IColumnDefContext, + colName, colBaseType string, + colStats map[string]*ColumnStats, + customColumnRule map[string]GenRule, +) GenRule { genRule := GenRule{} // 1. Merge rules in stats @@ -125,15 +116,21 @@ func newColGenRule(col parser.IColumnDefContext, colName, colType string, colSta genRule["null_frequency"] = nullFreq } - if IsStringType(colType) { + if IsStringType(colBaseType) { avgLen := colstats.AvgSizeByte genRule["length"] = avgLen - // HACK: +-5 on string avg size as length - if avgLen > 5 && colType != "CHAR" { + // HACK: +-5/10 on string avg size as length + if colBaseType != "CHAR" && len(colstats.Min) != len(colstats.Max) { + var extent int64 + if avgLen > 10 { + extent = 10 + } else if avgLen > 5 { + extent = 5 + } genRule["length"] = GenRule{ - "min": avgLen - 5, - "max": avgLen + 5, + "min": avgLen - extent, + "max": avgLen + extent, } } } else { @@ -161,6 +158,27 @@ func newColGenRule(col parser.IColumnDefContext, colName, colType string, colSta return genRule } +func buildStreamLoadMapping(visitor *gen.TypeVisitor, loadColName, colBaseType string) (string, bool) { + var ( + mapping string + needMapping bool + ) + switch colBaseType { + case "BITMAP": + needMapping = true + mapping = fmt.Sprintf("raw_%s,`%s`=bitmap_from_array(cast(raw_%s as ARRAY))", loadColName, loadColName, loadColName) + case "HLL": + needMapping = true + mapping = fmt.Sprintf("raw_%s,`%s`=hll_empty()", loadColName, loadColName) + if from := visitor.GetRule("from"); from != nil { + mapping = fmt.Sprintf("raw_%s,`%s`=hll_hash(%v)", loadColName, loadColName, from) + } + default: + mapping = "`" + loadColName + "`" + } + return mapping, needMapping +} + type TableGen struct { Name string Columns []string @@ -215,15 +233,7 @@ func (tg *TableGen) genOne(w *bufio.Writer, colIdxRefGens map[int]*gen.RefGen) { } } - if val == nil { - w.WriteString(`\N`) - } else if v, ok := val.(json.RawMessage); ok { - w.Write(v) - } else if s, ok := val.(string); ok { - w.WriteString(s) - } else { - fmt.Fprint(w, val) - } + gen.WriteColVal(w, val) if i != len(tg.colGens)-1 { w.WriteRune(ColumnSeparator) } diff --git a/src/generator/format.go b/src/generator/format.go index 6bf9c5c..34b0984 100644 --- a/src/generator/format.go +++ b/src/generator/format.go @@ -58,12 +58,7 @@ func (g *FormatGen) Gen() any { } result := tagF.Call(nil)[0].Interface() - if result == nil { - return w.Write([]byte(`\N`)) - } else if s, ok := result.(string); ok { - return w.Write([]byte(s)) - } - return w.Write(fmt.Append(nil, result)) + return WriteColVal(w.(ColValWriter), result) }) if err != nil { logrus.Errorf("format execute templace failed, err: %v\n", err) diff --git a/src/generator/generator.go b/src/generator/generator.go index 8ba5eeb..7e02db5 100644 --- a/src/generator/generator.go +++ b/src/generator/generator.go @@ -63,47 +63,24 @@ func NewTypeVisitor(colpath string, genRule GenRule) *TypeVisitor { } func (v *TypeVisitor) GetGen(type_ parser.IDataTypeContext) Gen { - baseType := v.GetBaseType(type_) - v.MergeDefaultRule(baseType) // Merge global (aka. default) generation rules. + var ( + g Gen + err error + baseType = v.GetBaseType(type_) + ) + + // Merge global (aka. default) generation rules. + v.MergeDefaultRule(baseType) if logrus.GetLevel() > logrus.DebugLevel { logrus.Tracef("gen rule of '%s': %s\n", v.Colpath, string(MustJSONMarshal(v.GenRule))) } - var ( - g Gen - err error - ) - - // 1. custom generator if customGenRule, ok := v.GetRule("gen").(GenRule); ok { - var ( - g_ Gen - genName string - ) - for name, newCustomGen := range CustomGenConstructors { - if _, ok := customGenRule[name]; !ok { - continue - } - if g_ != nil { - logrus.Fatalf("Multiple custom generators found for column '%s', only one is allowed, but got both: %s and %s\n", v.Colpath, genName, name) - } - - g_, err = newCustomGen(v, type_, customGenRule) - if err != nil { - logrus.Fatalf("Invalid custom generator '%s' for column '%s', err: %v\n", name, v.Colpath, err) - } - genName = name - } - g = g_ - if g == nil { - logrus.Fatalf("Custom generator not found for column '%s', expect one of %v\n", - v.Colpath, - lo.MapToSlice(CustomGenConstructors, func(name string, _ CustomGenConstructor) string { return name }), - ) - } + // 1. custom generator + g = v.getCustomGen(type_, customGenRule) } else { // 2. type generator - g = v.getTypeGen(baseType, type_) + g = v.getTypeGen(type_, baseType) } // format generator @@ -130,7 +107,36 @@ func (v *TypeVisitor) GetGen(type_ parser.IDataTypeContext) Gen { return g } -func (v *TypeVisitor) getTypeGen(baseType string, type_ parser.IDataTypeContext) Gen { +func (v *TypeVisitor) getCustomGen(type_ parser.IDataTypeContext, customGenRule GenRule) Gen { + var ( + g Gen + genName string + err error + ) + for name, newCustomGen := range CustomGenConstructors { + if _, ok := customGenRule[name]; !ok { + continue + } + if g != nil { + logrus.Fatalf("Multiple custom generators found for column '%s', only one is allowed, but got both: %s and %s\n", v.Colpath, genName, name) + } + + g, err = newCustomGen(v, type_, customGenRule) + if err != nil { + logrus.Fatalf("Invalid custom generator '%s' for column '%s', err: %v\n", name, v.Colpath, err) + } + genName = name + } + if g == nil { + logrus.Fatalf("Custom generator not found for column '%s', expect one of %v\n", + v.Colpath, + lo.MapToSlice(CustomGenConstructors, func(name string, _ CustomGenConstructor) string { return name }), + ) + } + return g +} + +func (v *TypeVisitor) getTypeGen(type_ parser.IDataTypeContext, baseType string) Gen { var g Gen switch ty := type_.(type) { case *parser.ComplexDataTypeContext: @@ -212,7 +218,7 @@ func (v *TypeVisitor) getTypeGen(baseType string, type_ parser.IDataTypeContext) genRule = maps.Clone(v.GenRule) delete(genRule, "structure") } else { - logrus.Fatalf("JSON/JSONB/VARIANT must have gen rule 'structure' at column '%s'\n", v.Colpath) + logrus.Fatalf("JSON/JSONB/VARIANT must have gen rule 'structure' or 'gen' at column '%s'\n", v.Colpath) } p := parser.NewParser(v.Colpath, structure) diff --git a/src/generator/misc.go b/src/generator/misc.go index ad2803e..c8bd471 100644 --- a/src/generator/misc.go +++ b/src/generator/misc.go @@ -2,6 +2,7 @@ package generator import ( "fmt" + "io" "math/rand/v2" "time" "unsafe" @@ -14,6 +15,28 @@ import ( "gopkg.in/yaml.v3" ) +type ColValWriter interface { + io.Writer + io.StringWriter +} + +func WriteColVal(w ColValWriter, val any) (int, error) { + if val == nil { + return w.WriteString(`\N`) + } + + switch v := val.(type) { + case string: + return w.WriteString(v) + case json.RawMessage: + return w.Write(v) + case []byte: + return w.Write(v) + default: + return fmt.Fprint(w, val) + } +} + //nolint:revive func MergeGenRules(dst, src GenRule, overwrite bool) { for k, v := range src { diff --git a/src/prompt/gendata.xml b/src/prompt/gendata.xml index bcd15f3..6bb598c 100644 --- a/src/prompt/gendata.xml +++ b/src/prompt/gendata.xml @@ -5,10 +5,11 @@ So your task is generating YAML configurations for the data generation tool dodo -1. The generated data must be able to be queried by user's queries -2. The YAML configurations should according to 'usage' below. Do not use rule key in `gendata.yaml` that haven't been documented in 'usage' -3. When column stats conflict with queries conditions, prioritize queries conditions and ignore column stats -4. Output should be a valid YAML and do not output anything else except YAML +1. The generated data must be able to be inserted into the tables in user prompt, constraints like UNIQUE KEY and PARTITIONS must be satisfied +2. The generated data must be able to be queried by user's queries +3. The YAML configurations should according to 'usage' below. Do not use rule key in `gendata.yaml` that haven't been documented in 'usage' +4. When column stats conflict with queries conditions, prioritize queries conditions and ignore column stats +5. Output should be a valid YAML and do not output anything else except YAML @@ -1187,8 +1188,7 @@ tables: gen: ref: employees.employee_id - - + @@ -1199,7 +1199,7 @@ Do not generation rules for those columns that not been used as condition (like -The list of generation rule `format` built-in tags (placeholder like {{month}}) in Markdown table: +The list of built-in tags (placeholder like {{month}}) for generation rule `format` in Markdown table: | Name | Return Type | diff --git a/src/prompt/gendata.xml.tpl b/src/prompt/gendata.xml.tpl index 7914236..ddad318 100644 --- a/src/prompt/gendata.xml.tpl +++ b/src/prompt/gendata.xml.tpl @@ -5,10 +5,11 @@ So your task is generating YAML configurations for the data generation tool dodo -1. The generated data must be able to be queried by user's queries -2. The YAML configurations should according to 'usage' below. Do not use rule key in `gendata.yaml` that haven't been documented in 'usage' -3. When column stats conflict with queries conditions, prioritize queries conditions and ignore column stats -4. Output should be a valid YAML and do not output anything else except YAML +1. The generated data must be able to be inserted into the tables in user prompt, constraints like UNIQUE KEY and PARTITIONS must be satisfied +2. The generated data must be able to be queried by user's queries +3. The YAML configurations should according to 'usage' below. Do not use rule key in `gendata.yaml` that haven't been documented in 'usage' +4. When column stats conflict with queries conditions, prioritize queries conditions and ignore column stats +5. Output should be a valid YAML and do not output anything else except YAML @@ -121,8 +122,7 @@ All kinds of generation rules example(without queries): 「example」 - - + @@ -133,7 +133,7 @@ Do not generation rules for those columns that not been used as condition (like -The list of generation rule `format` built-in tags (placeholder like {{month}}) in Markdown table: +The list of built-in tags (placeholder like {{month}}) for generation rule `format` in Markdown table: 「format-tags」