Skip to content

Commit

Permalink
Feature/datasets (#9)
Browse files Browse the repository at this point in the history
* Add support for Dataset CRUD operations
  • Loading branch information
Telemaco019 authored Jan 6, 2022
1 parent 5747d56 commit 3d7db22
Show file tree
Hide file tree
Showing 20 changed files with 1,435 additions and 78 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: main
name: test

on:
push:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Azure ML Go SDK

[![Actions Status](https://github.com/telemaco019/azureml-go-sdk/workflows/main/badge.svg)](https://github.com/telemaco019/azureml-go-sdk/actions)
[![Actions Status](https://github.com/telemaco019/azureml-go-sdk/workflows/test/badge.svg)](https://github.com/telemaco019/azureml-go-sdk/actions)
[![codecov](https://codecov.io/gh/telemaco019/azureml-go-sdk/branch/main/graph/badge.svg)](https://codecov.io/gh/telemaco019/azureml-go-sdk)

Go SDK for configuring [Azure Machine Learning](https://azure.microsoft.com/en-us/services/machine-learning/)
Expand Down Expand Up @@ -53,5 +53,5 @@ datastores, err := ws.GetDatastores( "rg-name", "workspace-name" )
### Get a specific Datastore of a workspace

```go
datastore, err := ws.GetDatastores( "rg-name", "workspace-name", "datastore-name" )
datastore, err := ws.GetDatastore( "rg-name", "workspace-name", "datastore-name" )
```
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/google/uuid v1.1.1 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/objx v0.1.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
Expand Down
3 changes: 3 additions & 0 deletions workspace/assets/example_resp_create_or_update_dataset.json
Git LFS file not shown
3 changes: 3 additions & 0 deletions workspace/assets/example_resp_get_dataset.json
Git LFS file not shown
3 changes: 3 additions & 0 deletions workspace/assets/example_resp_get_dataset_next_version.json
Git LFS file not shown
3 changes: 3 additions & 0 deletions workspace/assets/example_resp_get_dataset_versions.json
Git LFS file not shown
3 changes: 3 additions & 0 deletions workspace/assets/example_resp_get_datasets.json
Git LFS file not shown
1 change: 1 addition & 0 deletions workspace/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ package workspace

const (
DefaultAmlOauthScope string = "https://management.azure.com/.default"
NConcurrentWorkers = 8
)
110 changes: 93 additions & 17 deletions workspace/converters.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@ package workspace
import (
"fmt"
"github.com/tidwall/gjson"
"go.uber.org/zap"
"regexp"
)

func unmarshalDatastore(json []byte) *Datastore {
sysData := SystemData{
CreationDate: gjson.GetBytes(json, "systemData.createdAt").Time(),
CreationUserType: gjson.GetBytes(json, "systemData.createdByType").Str,
CreationUser: gjson.GetBytes(json, "systemData.createdBy").Str,
LastModifiedDate: gjson.GetBytes(json, "systemData.lastModifiedAt").Time(),
LastModifiedUserType: gjson.GetBytes(json, "systemData.lastModifiedByType").Str,
LastModifiedUser: gjson.GetBytes(json, "systemData.lastModifiedBy").Str,
func unmarshalDatastoreArray(json []byte) []Datastore {
jsonDatastoreArray := gjson.GetBytes(json, "value").Array()
datastoreSlice := make([]Datastore, gjson.GetBytes(json, "value.#").Int())
for i, jsonDatastore := range jsonDatastoreArray {
datastore := unmarshalDatastore([]byte(jsonDatastore.Raw))
datastoreSlice[i] = *datastore
}
return datastoreSlice
}

func unmarshalDatastore(json []byte) *Datastore {
auth := DatastoreAuth{
CredentialsType: gjson.GetBytes(json, "properties.contents.credentials.credentialsType").Str,
TenantId: gjson.GetBytes(json, "properties.contents.credentials.tenantId").Str,
Expand All @@ -32,20 +36,75 @@ func unmarshalDatastore(json []byte) *Datastore {
StorageContainerName: gjson.GetBytes(json, "properties.contents.containerName").Str,
StorageType: gjson.GetBytes(json, "properties.contents.contentsType").Str,

SystemData: &sysData,
SystemData: unmarshalSystemData(json),
Auth: &auth,
}
}

func unmarshalDatastoreArray(json []byte) []Datastore {
jsonDatastoreArray := gjson.GetBytes(json, "value").Array()
datastoreSlice := make([]Datastore, gjson.GetBytes(json, "value.#").Int())
for i, jsonDatastore := range jsonDatastoreArray {
datastore := unmarshalDatastore([]byte(jsonDatastore.Raw))
datastoreSlice[i] = *datastore
fmt.Println(datastore)
type DatasetConverter struct {
logger *zap.SugaredLogger
}

func (d DatasetConverter) unmarshalDatasetVersionArray(datasetName string, json []byte) []Dataset {
jsonDatasetArray := gjson.GetBytes(json, "value").Array()
datasetSlice := make([]Dataset, gjson.GetBytes(json, "value.#").Int())
for i, jsonDataset := range jsonDatasetArray {
dataset := d.unmarshalDatasetVersion(datasetName, []byte(jsonDataset.Raw))
datasetSlice[i] = *dataset
}
return datasetSlice
}

func (d DatasetConverter) unmarshalDatasetVersion(datasetName string, json []byte) *Dataset {
return &Dataset{
Id: gjson.GetBytes(json, "id").Str,
Name: datasetName,
Description: gjson.GetBytes(json, "properties.description").Str,
DatastoreId: gjson.GetBytes(json, "properties.datastoreId").Str,
Version: int(gjson.GetBytes(json, "name").Int()),
FilePaths: d.unmarshalDatasetPaths(gjson.GetBytes(json, "properties.paths"), "file"),
DirectoryPaths: d.unmarshalDatasetPaths(gjson.GetBytes(json, "properties.paths"), "folder"),
SystemData: unmarshalSystemData(json),
}
}

func (d DatasetConverter) unmarshalDatasetNextVersion(json []byte) int {
return int(gjson.GetBytes(json, "properties.nextVersion").Int())
}

func (d DatasetConverter) unmarshalDatasetPaths(jsonDatasetPaths gjson.Result, pathType string) []DatasetPath {
result := make([]DatasetPath, 0)
jsonDatasetPaths.ForEach(func(key, value gjson.Result) bool {
path := value.Get(pathType)
if path.Exists() == false {
d.logger.Errorf("cannot unmarshal dataset path: path type %q does exist", pathType)
return false
}
if path.Type != gjson.Null {
isDatastorePath, _ := regexp.MatchString(fmt.Sprintf("%s.*", datastorePathPrefix), path.Str)
if isDatastorePath == true {
datastorePath, err := NewDatastorePath(path.Str)
if err != nil {
d.logger.Errorf("error unmarshalling dataset path: %s", err.Error())
} else {
result = append(result, datastorePath)
}
}
}
return true
})
return result
}

func unmarshalSystemData(json []byte) *SystemData {
return &SystemData{
CreationDate: gjson.GetBytes(json, "systemData.createdAt").Time(),
CreationUserType: gjson.GetBytes(json, "systemData.createdByType").Str,
CreationUser: gjson.GetBytes(json, "systemData.createdBy").Str,
LastModifiedDate: gjson.GetBytes(json, "systemData.lastModifiedAt").Time(),
LastModifiedUserType: gjson.GetBytes(json, "systemData.lastModifiedByType").Str,
LastModifiedUser: gjson.GetBytes(json, "systemData.lastModifiedBy").Str,
}
return datastoreSlice
}

func toWriteDatastoreSchema(datastore *Datastore) *SchemaWrapper {
Expand Down Expand Up @@ -81,3 +140,20 @@ func toWriteDatastoreSchema(datastore *Datastore) *SchemaWrapper {
},
}
}

func toWriteDatasetSchema(dataset *Dataset) *SchemaWrapper {
pathSchemas := make([]DatasetPathsSchema, len(dataset.FilePaths)+len(dataset.DirectoryPaths))
for i, filePath := range dataset.FilePaths {
pathSchemas[i] = DatasetPathsSchema{FilePath: filePath.String()}
}
for i, directoryPath := range dataset.DirectoryPaths {
pathSchemas[i] = DatasetPathsSchema{DirectoryPath: directoryPath.String()}
}

return &SchemaWrapper{
Properties: WriteDatasetSchema{
Description: dataset.Description,
Paths: pathSchemas,
},
}
}
176 changes: 176 additions & 0 deletions workspace/convertes_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package workspace

import (
"fmt"
"github.com/stretchr/testify/assert"
"github.com/tidwall/gjson"
"go.uber.org/zap"
"testing"
"time"
)
Expand Down Expand Up @@ -153,3 +156,176 @@ func TestToWriteDatastoreSchema_NilAuth(t *testing.T) {
}
assert.Equal(t, expected, writeSchema)
}

func TestToWriteDatasetSchema(t *testing.T) {
a := assert.New(t)
l, _ := zap.NewDevelopment()
logger := l.Sugar()

testCases := []struct {
testCaseName string
testCase func()
}{
{
testCaseName: "Test convert empty dataset",
testCase: func() {
d := &Dataset{}
schema := toWriteDatasetSchema(d)
props := schema.Properties.(WriteDatasetSchema)
a.Empty(props.Description)
a.Empty(props.Paths)
},
},
{
testCaseName: "Test convert dataset with datastore paths",
testCase: func() {
d := &Dataset{
Id: "id",
Name: "name",
Description: "description",
DatastoreId: "datastore-id",
Version: 1,
FilePaths: []DatasetPath{
DatastorePath{
DatastoreName: "foo",
Path: "file.json",
},
DatastorePath{
DatastoreName: "foo2",
Path: "file2.json",
},
DatastorePath{
DatastoreName: "foo3",
Path: "file3.json",
},
},
DirectoryPaths: []DatasetPath{
DatastorePath{
DatastoreName: "foo1",
Path: "/dir1",
},
DatastorePath{
DatastoreName: "foo2",
Path: "/dir2",
},
},
SystemData: &SystemData{},
}
props := toWriteDatasetSchema(d)
writeSchema := props.Properties.(WriteDatasetSchema)

a.Equal(d.Description, writeSchema.Description)
a.Equal(len(d.DirectoryPaths)+len(d.FilePaths), len(writeSchema.Paths))
},
},
{
testCaseName: "Test datastore directory paths conversion",
testCase: func() {
d := &Dataset{
DirectoryPaths: []DatasetPath{
DatastorePath{
DatastoreName: "datastore",
Path: "/foo/bar/",
},
},
}
props := toWriteDatasetSchema(d)
schema := props.Properties.(WriteDatasetSchema)
schemaPath := schema.Paths[0]
a.Empty(schemaPath.FilePath)
a.Equal(d.DirectoryPaths[0].String(), schemaPath.DirectoryPath)
},
},
{
testCaseName: "Test file paths conversion",
testCase: func() {

},
},
}
for _, test := range testCases {
logger.Infof("Running test %q", test.testCaseName)
test.testCase()
}
}

func TestUnmarshalDatasetPaths(t *testing.T) {
a := assert.New(t)
l, _ := zap.NewDevelopment()
logger := l.Sugar()
converter := &DatasetConverter{logger: logger}

testCases := []struct {
testCaseName string
testCase func()
}{
{
testCaseName: "Test unmarshal dataset paths empty list",
testCase: func() {
paths := gjson.Parse("[]")
result := converter.unmarshalDatasetPaths(paths, "")
a.Empty(result)
},
},
{
testCaseName: "Test unmarshal dataset paths invalid path type",
testCase: func() {
paths := gjson.Parse("[{\"file\": null, \"folder\": \"azureml://datastores/datastore/foo\"}]")
result := converter.unmarshalDatasetPaths(paths, "foo")
a.Empty(result)
},
},
{
testCaseName: "Test unmarshal dataset paths not matching datastore regex",
testCase: func() {
paths := gjson.Parse("[{\"file\": null, \"folder\": \"path\"}]")
result := converter.unmarshalDatasetPaths(paths, "folder")
a.Empty(result)
},
},
{
testCaseName: "Test unmarshal dataset folder datastore paths",
testCase: func() {
firstPath := "azureml://datastores/datastore/paths/path/bar"
secondPath := "azureml://datastores/datastore2/paths/foo2"
paths := gjson.Parse(fmt.Sprintf("[{\"file\": null, \"folder\": \"%s\"}, {\"file\": null, \"folder\": \"%s\"}]", firstPath, secondPath))
filePaths := converter.unmarshalDatasetPaths(paths, "file")
folderPaths := converter.unmarshalDatasetPaths(paths, "folder")
a.Empty(filePaths)
a.Equal(2, len(folderPaths))
a.Equal(firstPath, folderPaths[0].String())
},
},
{
testCaseName: "Test unmarshal dataset file datastore paths",
testCase: func() {
firstPath := "azureml://datastores/datastore/paths/foo/bar/foo"
secondPath := "azureml://datastores/datastore2/paths/foo2"
paths := gjson.Parse(fmt.Sprintf("[{\"folder\": null, \"file\": \"%s\"}, {\"folder\": null, \"file\": \"%s\"}]", firstPath, secondPath))
folderPaths := converter.unmarshalDatasetPaths(paths, "folder")
filePaths := converter.unmarshalDatasetPaths(paths, "file")
a.Empty(folderPaths)
a.Equal(2, len(filePaths))
a.Equal(firstPath, filePaths[0].String())
},
},
{
testCaseName: "Test unmarshal dataset malformed datastore paths",
testCase: func() {
firstPath := "azureml://datastores/datastore/paths/foo/bar/foo"
secondPath := "azureml://datastores/malformed"
paths := gjson.Parse(fmt.Sprintf("[{\"folder\": null, \"file\": \"%s\"}, {\"folder\": null, \"file\": \"%s\"}]", firstPath, secondPath))
folderPaths := converter.unmarshalDatasetPaths(paths, "folder")
filePaths := converter.unmarshalDatasetPaths(paths, "file")
a.Empty(folderPaths)
a.Equal(1, len(filePaths))
a.Equal(firstPath, filePaths[0].String())
},
},
}

for _, test := range testCases {
logger.Infof("Running test %q", test.testCaseName)
test.testCase()
}
}
Loading

0 comments on commit 3d7db22

Please sign in to comment.