Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactor mc2mc to use query builder #51

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 4 additions & 91 deletions mc2mc/internal/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,20 @@ import (
e "errors"
"fmt"
"log/slog"
"os"
"strings"
"time"

"github.com/pkg/errors"

"github.com/goto/transformers/mc2mc/internal/loader"
)

type Loader interface {
GetQuery(tableID, query string) string
GetPartitionedQuery(tableID, query string, partitionName []string) string
}

type OdpsClient interface {
GetOrderedColumns(tableID string) ([]string, error)
GetPartitionNames(ctx context.Context, tableID string) ([]string, error)
ExecSQL(ctx context.Context, query string) error
}

type Client struct {
OdpsClient OdpsClient
Loader Loader

appCtx context.Context
logger *slog.Logger
shutdownFns []func() error

// TODO: remove this temporary capability after 15 nov
enablePartitionValue bool
enableAutoPartition bool
}

func NewClient(ctx context.Context, setupFns ...SetupFn) (*Client, error) {
Expand All @@ -60,83 +43,13 @@ func (c *Client) Close() error {
return errors.WithStack(err)
}

func (c *Client) Execute(ctx context.Context, tableID, queryFilePath string, dstart string) error {
// read query from filepath
c.logger.Info(fmt.Sprintf("executing query from %s", queryFilePath))
queryRaw, err := os.ReadFile(queryFilePath)
if err != nil {
return errors.WithStack(err)
}

// get column names
if tableID != "" {
columnNames, err := c.OdpsClient.GetOrderedColumns(tableID)
if err != nil {
return errors.WithStack(err)
}
// convert time 2024-11-04T00:00:00Z to 2024-11-04 00:00:00
start, err := time.Parse(time.RFC3339, dstart)
if err != nil {
return errors.WithStack(err)
}
dstart = start.Format(time.DateTime)
// construct query with ordered columns and BQ pseudo columns for ingestion time (based on dstart)
queryRaw = constructQueryWithOrderedColumnsWithBQIngestionTime(queryRaw, columnNames, dstart)
}

if c.enablePartitionValue && !c.enableAutoPartition {
queryRaw = addPartitionValueColumn(queryRaw)
}

// check if table is partitioned
partitionNames, err := c.OdpsClient.GetPartitionNames(ctx, tableID)
if err != nil {
return errors.WithStack(err)
}

// prepare query
queryToExec := c.Loader.GetQuery(tableID, string(queryRaw))
if len(partitionNames) > 0 && !c.enableAutoPartition {
// when table is partitioned and auto partition is disabled, then we need to specify partition columns explicitly
c.logger.Info(fmt.Sprintf("table %s is partitioned by %s", tableID, strings.Join(partitionNames, ", ")))
queryToExec = c.Loader.GetPartitionedQuery(tableID, string(queryRaw), partitionNames)
}

func (c *Client) Execute(ctx context.Context, query string) error {
// execute query with odps client
c.logger.Info(fmt.Sprintf("execute: %s", queryToExec))
if err := c.OdpsClient.ExecSQL(ctx, queryToExec); err != nil {
c.logger.Info(fmt.Sprintf("execute: %s", query))
if err := c.OdpsClient.ExecSQL(ctx, query); err != nil {
return errors.WithStack(err)
}

c.logger.Info("execution done")
return errors.WithStack(err)
}

// TODO: remove this temporary support after 15 nov
func addPartitionValueColumn(rawQuery []byte) []byte {
header, qr := loader.SeparateHeadersAndQuery(string(rawQuery))
return []byte(fmt.Sprintf("%s SELECT *, STRING(CURRENT_DATE()) as __partitionvalue FROM (%s)", header, qr))
}

// constructQueryWithOrderedColumnsWithBQIngestionTime constructs query with ordered columns and BQ pseudo columns for ingestion time
// based on dstart.
// ref: https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
func constructQueryWithOrderedColumnsWithBQIngestionTime(query []byte, orderedColumns []string, dstart string) []byte {
var orderedColumnsWithBQIngestionTime []string
for _, col := range orderedColumns {
val := col
switch col {
case "_partitiontime":
val = fmt.Sprintf("TIMESTAMP('%s') as _partitiontime", dstart)
case "_partitiondate":
val = fmt.Sprintf("DATE(TIMESTAMP('%s')) as _partitiondate", dstart)
}
orderedColumnsWithBQIngestionTime = append(orderedColumnsWithBQIngestionTime, val)
}
return constructQueryWithOrderedColumns(query, orderedColumnsWithBQIngestionTime)
}

func constructQueryWithOrderedColumns(query []byte, orderedColumns []string) []byte {
header, qr := loader.SeparateHeadersAndQuery(string(query))
return []byte(fmt.Sprintf("%s %s", header, loader.ConstructQueryWithOrderedColumns(qr, orderedColumns)))
return nil
}
174 changes: 0 additions & 174 deletions mc2mc/internal/client/client_test.go

This file was deleted.

36 changes: 3 additions & 33 deletions mc2mc/internal/client/setup.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
package client

import (
"log/slog"

"github.com/aliyun/aliyun-odps-go-sdk/odps"
"github.com/pkg/errors"

"github.com/goto/transformers/mc2mc/internal/loader"
"github.com/goto/transformers/mc2mc/internal/logger"
)

type SetupFn func(c *Client) error

func SetupLogger(logLevel string) SetupFn {
func SetupLogger(logger *slog.Logger) SetupFn {
return func(c *Client) error {
logger, err := logger.NewLogger(logLevel)
if err != nil {
return errors.WithStack(err)
}
c.logger = logger
return nil
}
Expand All @@ -41,28 +36,3 @@ func SetupOTelSDK(collectorGRPCEndpoint, jobName, scheduledTime string) SetupFn
return nil
}
}

func SetupLoader(loadMethod string) SetupFn {
return func(c *Client) error {
loader, err := loader.GetLoader(loadMethod, c.logger)
if err != nil {
return errors.WithStack(err)
}
c.Loader = loader
return nil
}
}

func EnablePartitionValue(enabled bool) SetupFn {
return func(c *Client) error {
c.enablePartitionValue = enabled
return nil
}
}

func EnableAutoPartition(enabled bool) SetupFn {
return func(c *Client) error {
c.enableAutoPartition = enabled
return nil
}
}
27 changes: 0 additions & 27 deletions mc2mc/internal/loader/append.go

This file was deleted.

7 changes: 0 additions & 7 deletions mc2mc/internal/loader/const.go

This file was deleted.

Loading
Loading