Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable included fields feature for search assets api #61

Merged
merged 1 commit into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/asset/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ type SearchConfig struct {
// Offset parameter defines the offset from the first result you want to fetch
// Note that MaxResults + Offset can not be more than the `index.max_result_window` index setting in ES cluster, which defaults to 10,000
Offset int

// IncludeFields specifies the fields to return in response
IncludeFields []string
}

// SearchResult represents an item/result in a list of search results
Expand Down
15 changes: 8 additions & 7 deletions internal/server/v1beta1/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ func (server *APIServer) SearchAssets(ctx context.Context, req *compassv1beta1.S
}

cfg := asset.SearchConfig{
Text: text,
MaxResults: int(req.GetSize()),
Filters: filterConfigFromValues(req.GetFilter()),
RankBy: req.GetRankby(),
Queries: req.GetQuery(),
Flags: getSearchFlagsFromFlags(req.GetFlags()),
Offset: int(req.GetOffset()),
Text: text,
MaxResults: int(req.GetSize()),
Filters: filterConfigFromValues(req.GetFilter()),
RankBy: req.GetRankby(),
Queries: req.GetQuery(),
Flags: getSearchFlagsFromFlags(req.GetFlags()),
Offset: int(req.GetOffset()),
IncludeFields: req.GetIncludeFields(),
}

results, err := server.assetService.SearchAssets(ctx, cfg)
Expand Down
14 changes: 12 additions & 2 deletions internal/store/elasticsearch/discovery_search_repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ const (
suggesterName = "name-phrase-suggest"
)

var returnedAssetFieldsResult = []string{"id", "urn", "type", "service", "name", "description", "data", "labels", "created_at", "updated_at"}

// Search the asset store
func (repo *DiscoveryRepository) Search(ctx context.Context, cfg asset.SearchConfig) (results []asset.SearchResult, err error) {
if strings.TrimSpace(cfg.Text) == "" {
return nil, asset.DiscoveryError{Op: "Search", Err: errors.New("search text cannot be empty")}
}
var returnedAssetFieldsResult []string

maxResults := cfg.MaxResults
if maxResults <= 0 {
maxResults = defaultMaxResults
Expand All @@ -38,6 +38,15 @@ func (repo *DiscoveryRepository) Search(ctx context.Context, cfg asset.SearchCon
offset = 0
}

if len(cfg.IncludeFields) == 0 {
returnedAssetFieldsResult = []string{
"id", "urn", "type", "service", "name", "description", "data", "labels",
"created_at", "updated_at",
}
} else {
returnedAssetFieldsResult = cfg.IncludeFields
}

defer func(start time.Time) {
const op = "search"
repo.cli.instrumentOp(ctx, instrumentParams{
Expand Down Expand Up @@ -350,6 +359,7 @@ func toSearchResults(hits []searchHit) []asset.SearchResult {
"_highlight": hit.HighLight,
}
}

results[i] = asset.SearchResult{
Type: r.Type.String(),
ID: id,
Expand Down
80 changes: 63 additions & 17 deletions internal/store/elasticsearch/discovery_search_repository_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func TestSearcherSearch(t *testing.T) {
Type string
AssetID string
Data map[string]interface{}
Service string
}
type searchTest struct {
Description string
Expand All @@ -68,10 +69,38 @@ func TestSearcherSearch(t *testing.T) {

tests := []searchTest{
{
Description: "should fetch assets which has text in any of its fields",
Description: "should fetch assets with fields mentioned in included fields",
Config: asset.SearchConfig{
Text: "topic",
IncludeFields: []string{"id", "data.company", "type"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic", Data: map[string]interface{}{"company": "gotocompany"}},
{Type: "topic", AssetID: "order-topic", Data: map[string]interface{}{"company": "gotocompany"}},
{Type: "topic", AssetID: "purchase-topic", Data: map[string]interface{}{"company": "microsoft"}},
{Type: "topic", AssetID: "consumer-mq-2", Data: map[string]interface{}{"company": "gotocompany"}},
{Type: "topic", AssetID: "transaction", Data: map[string]interface{}{"company": "gotocompany"}},
},
},
{
Description: "should fetch assets with default fields if included fields is empty",
Config: asset.SearchConfig{
Text: "topic",
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic", Service: "rabbitmq", Data: map[string]interface{}{"company": "gotocompany", "country": "id", "description": "Update on every rabbitmq customer creation/update", "environment": "production", "partition": float64(50), "topic_name": "consumer-topic"}},
{Type: "topic", AssetID: "order-topic", Service: "kafka", Data: map[string]interface{}{"company": "gotocompany", "country": "us", "description": "Topic for each submitted order", "environment": "integration", "malformed": "2023-05-23T04:35:42Z", "partition": float64(250), "topic_name": "order-topic"}},
{Type: "topic", AssetID: "purchase-topic", Service: "kafka", Data: map[string]interface{}{"company": "microsoft", "country": "id", "description": "Topic for each submitted purchase", "environment": "integration", "malformed": "", "partition": float64(100), "topic_name": "purchase-topic"}},
{Type: "topic", AssetID: "consumer-mq-2", Service: "rabbitmq", Data: map[string]interface{}{"company": "gotocompany", "country": "id", "description": "Another rabbitmq topic", "environment": "production", "partition": float64(50), "topic_name": "consumer-mq-2"}},
{Type: "topic", AssetID: "transaction", Service: "rabbitmq", Data: map[string]interface{}{"company": "gotocompany", "description": "This publishes all the invoices from each of invoice storage where the invoice will be filtered and checked using invoice filterer and invoice checker", "environment": "production", "partition": float64(1), "topic_name": "transaction"}},
},
},
{
Description: "should fetch assets which has text in any of its fields",
Config: asset.SearchConfig{
Text: "topic",
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
{Type: "topic", AssetID: "order-topic"},
Expand All @@ -83,7 +112,8 @@ func TestSearcherSearch(t *testing.T) {
{
Description: "should enable fuzzy search",
Config: asset.SearchConfig{
Text: "tpic",
Text: "tpic",
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -96,15 +126,17 @@ func TestSearcherSearch(t *testing.T) {
{
Description: "should disable fuzzy search",
Config: asset.SearchConfig{
Text: "tpic",
Flags: asset.SearchFlags{DisableFuzzy: true},
Text: "tpic",
Flags: asset.SearchFlags{DisableFuzzy: true},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{},
},
{
Description: "should put more weight on id fields",
Config: asset.SearchConfig{
Text: "invoice",
Text: "invoice",
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "table", AssetID: "us1-apple-invoice"},
Expand All @@ -119,6 +151,7 @@ func TestSearcherSearch(t *testing.T) {
Filters: map[string][]string{
"service": {"rabbitmq", "postgres"},
},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "table", AssetID: "au2-microsoft-invoice"},
Expand All @@ -132,6 +165,7 @@ func TestSearcherSearch(t *testing.T) {
Filters: map[string][]string{
"data.company": {"gotocompany"},
},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -149,6 +183,7 @@ func TestSearcherSearch(t *testing.T) {
"data.environment": {"production"},
"data.company": {"gotocompany"},
},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -162,6 +197,7 @@ func TestSearcherSearch(t *testing.T) {
Filters: map[string][]string{
"owners.email": {"[email protected]"},
},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -170,8 +206,9 @@ func TestSearcherSearch(t *testing.T) {
{
Description: "should return a descendingly sorted based on usage count in search results if rank by usage in the config",
Config: asset.SearchConfig{
Text: "bigquery",
RankBy: "data.profile.usage_count",
Text: "bigquery",
RankBy: "data.profile.usage_count",
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
Expand All @@ -190,6 +227,7 @@ func TestSearcherSearch(t *testing.T) {
"description": "rabbitmq",
"owners.email": "john.doe",
},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -198,9 +236,10 @@ func TestSearcherSearch(t *testing.T) {
{
Description: "should return 5 records with offset of 0",
Config: asset.SearchConfig{
Text: "topic",
Offset: 0,
MaxResults: 5,
Text: "topic",
Offset: 0,
MaxResults: 5,
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -213,9 +252,10 @@ func TestSearcherSearch(t *testing.T) {
{
Description: "should return 4 records with offset of 1",
Config: asset.SearchConfig{
Text: "topic",
Offset: 1,
MaxResults: 5,
Text: "topic",
Offset: 1,
MaxResults: 5,
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
// {Type: "topic", AssetID: "consumer-topic"},
Expand All @@ -232,6 +272,7 @@ func TestSearcherSearch(t *testing.T) {
Queries: map[string]string{
"data.schema.columns.name": "common",
},
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"},
Expand All @@ -241,8 +282,9 @@ func TestSearcherSearch(t *testing.T) {
{
Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if searched for text 'tablename-abc-common-test'",
Config: asset.SearchConfig{
Text: "tablename-abc-common-test",
RankBy: "data.profile.usage_count",
Text: "tablename-abc-common-test",
RankBy: "data.profile.usage_count",
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
Expand All @@ -261,6 +303,7 @@ func TestSearcherSearch(t *testing.T) {
Flags: asset.SearchFlags{
EnableHighlight: true,
},
IncludeFields: []string{"type", "id"},
},

Expected: []expectedRow{
Expand All @@ -284,8 +327,9 @@ func TestSearcherSearch(t *testing.T) {
Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if " +
"searched for text 'abc-test' as it has both the keywords searched",
Config: asset.SearchConfig{
Text: "abc-test",
RankBy: "data.profile.usage_count",
Text: "abc-test",
RankBy: "data.profile.usage_count",
IncludeFields: []string{"type", "id"},
},
Expected: []expectedRow{
{Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"},
Expand All @@ -302,6 +346,8 @@ func TestSearcherSearch(t *testing.T) {
for i, res := range test.Expected {
assert.Equal(t, res.Type, results[i].Type)
assert.Equal(t, res.AssetID, results[i].ID)
assert.Equal(t, res.Data, results[i].Data)
assert.Equal(t, res.Service, results[i].Service)
if test.Config.Flags.EnableHighlight {
assert.Equal(t, res.Data["_highlight"], results[i].Data["_highlight"])
}
Expand Down
Loading