Skip to content

Commit

Permalink
feat: update mlflow-related metadata models (#12174)
Browse files Browse the repository at this point in the history
Co-authored-by: John Joyce <[email protected]>
Co-authored-by: John Joyce <[email protected]>
  • Loading branch information
3 people authored Dec 24, 2024
1 parent 21ddb55 commit 047644b
Show file tree
Hide file tree
Showing 12 changed files with 568 additions and 95 deletions.
196 changes: 193 additions & 3 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,16 @@ type Query {
Fetch all Business Attributes
"""
listBusinessAttributes(input: ListBusinessAttributesInput!): ListBusinessAttributesResult

"""
Fetch a Data Process Instance by primary key (urn)
"""
dataProcessInstance(urn: String!): DataProcessInstance


}


"""
An ERModelRelationship is a high-level abstraction that dictates what datasets fields are erModelRelationshiped.
"""
Expand Down Expand Up @@ -9832,15 +9840,45 @@ type MLModelGroup implements EntityWithRelationships & Entity & BrowsableEntity
privileges: EntityPrivileges
}

"""
Properties describing a group of related ML models
"""
type MLModelGroupProperties {
"""
Display name of the model group
"""
name: String

"""
Detailed description of the model group's purpose and contents
"""
description: String

createdAt: Long
"""
When this model group was created
"""
created: AuditStamp

"""
When this model group was last modified
"""
lastModified: AuditStamp

"""
Version identifier for this model group
"""
version: VersionTag

"""
Custom key-value properties for the model group
"""
customProperties: [CustomPropertiesEntry!]

"""
Deprecated creation timestamp
@deprecated Use the 'created' field instead
"""
createdAt: Long @deprecated(reason: "Use `created` instead")
}

"""
Expand Down Expand Up @@ -9990,40 +10028,103 @@ description: String
}

type MLMetric {
"""
Name of the metric (e.g. accuracy, precision, recall)
"""
name: String

"""
Description of what this metric measures
"""
description: String

"""
The computed value of the metric
"""
value: String

"""
Timestamp when this metric was recorded
"""
createdAt: Long
}

type MLModelProperties {
"""
The display name of the model used in the UI
"""
name: String!

"""
Detailed description of the model's purpose and characteristics
"""
description: String

date: Long
"""
When the model was last modified
"""
lastModified: AuditStamp

"""
Version identifier for this model
"""
version: String

"""
The type/category of ML model (e.g. classification, regression)
"""
type: String

"""
Mapping of hyperparameter configurations
"""
hyperParameters: HyperParameterMap

hyperParams: [MLHyperParam]
"""
List of hyperparameter settings used to train this model
"""
hyperParams: [MLHyperParam]

"""
Performance metrics from model training
"""
trainingMetrics: [MLMetric]

"""
Names of ML features used by this model
"""
mlFeatures: [String!]

"""
Tags for categorizing and searching models
"""
tags: [String!]

"""
Model groups this model belongs to
"""
groups: [MLModelGroup]

"""
Additional custom properties specific to this model
"""
customProperties: [CustomPropertiesEntry!]

"""
URL to view this model in external system
"""
externalUrl: String

"""
When this model was created
"""
created: AuditStamp

"""
Deprecated timestamp for model creation
@deprecated Use 'created' field instead
"""
date: Long @deprecated(reason: "Use `created` instead")
}

type MLFeatureProperties {
Expand Down Expand Up @@ -12804,3 +12905,92 @@ type CronSchedule {
"""
timezone: String!
}


"""
Properties describing a data process instance's execution metadata
"""
type DataProcessInstanceProperties {
"""
The display name of this process instance
"""
name: String!

"""
URL to view this process instance in the external system
"""
externalUrl: String

"""
When this process instance was created
"""
created: AuditStamp

"""
Additional custom properties specific to this process instance
"""
customProperties: [CustomPropertiesEntry!]
}

"""
Properties specific to an ML model training run instance
"""
type MLTrainingRunProperties {
"""
Unique identifier for this training run
"""
id: String

"""
List of URLs to access training run outputs (e.g. model artifacts, logs)
"""
outputUrls: [String]

"""
Hyperparameters used in this training run
"""
hyperParams: [MLHyperParam]

"""
Performance metrics recorded during this training run
"""
trainingMetrics: [MLMetric]
}

extend type DataProcessInstance {

"""
Additional read only properties associated with the Data Job
"""
properties: DataProcessInstanceProperties

"""
The specific instance of the data platform that this entity belongs to
"""
dataPlatformInstance: DataPlatformInstance

"""
Sub Types that this entity implements
"""
subTypes: SubTypes

"""
The parent container in which the entity resides
"""
container: Container

"""
Standardized platform urn where the data process instance is defined
"""
platform: DataPlatform!

"""
Recursively get the lineage of containers for this entity
"""
parentContainers: ParentContainersResult

"""
Additional properties when subtype is Training Run
"""
mlTrainingRunProperties: MLTrainingRunProperties
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ record DataProcessInstanceOutput {
@Relationship = {
"/*": {
"name": "Produces",
"entityTypes": [ "dataset" ]
"entityTypes": [ "dataset", "mlModel" ]
}
}
@Searchable = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ record DataProcessInstanceProperties includes CustomProperties, ExternalReferenc
}
created: AuditStamp

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import com.linkedin.common.Urn
import com.linkedin.common.Time
import com.linkedin.common.VersionTag
import com.linkedin.common.CustomProperties
import com.linkedin.common.TimeStamp

/**
* Properties associated with an ML Model Group
Expand All @@ -13,6 +14,17 @@ import com.linkedin.common.CustomProperties
}
record MLModelGroupProperties includes CustomProperties {

/**
* Display name of the MLModelGroup
*/
@Searchable = {
"fieldType": "WORD_GRAM",
"enableAutocomplete": true,
"boostScore": 10.0,
"queryByDefault": true,
}
name: optional string

/**
* Documentation of the MLModelGroup
*/
Expand All @@ -25,8 +37,31 @@ record MLModelGroupProperties includes CustomProperties {
/**
* Date when the MLModelGroup was developed
*/
@deprecated
createdAt: optional Time

/**
* Time and Actor who created the MLModelGroup
*/
created: optional TimeStamp

/**
* Date when the MLModelGroup was last modified
*/
lastModified: optional TimeStamp

/**
* List of jobs (if any) used to train the model group. Visible in Lineage.
*/
@Relationship = {
"/*": {
"name": "TrainedBy",
"entityTypes": [ "dataJob" ],
"isLineage": true
}
}
trainingJobs: optional array[Urn]

/**
* Version of the MLModelGroup
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.linkedin.common.Time
import com.linkedin.common.VersionTag
import com.linkedin.common.CustomProperties
import com.linkedin.common.ExternalReference
import com.linkedin.common.TimeStamp

/**
* Properties associated with a ML Model
Expand All @@ -15,6 +16,18 @@ import com.linkedin.common.ExternalReference
}
record MLModelProperties includes CustomProperties, ExternalReference {

/**
* Display name of the MLModel
*/
@Searchable = {
"fieldType": "WORD_GRAM",
"enableAutocomplete": true,
"boostScore": 10.0,
"queryByDefault": true,
}
name: optional string


/**
* Documentation of the MLModel
*/
Expand All @@ -27,8 +40,19 @@ record MLModelProperties includes CustomProperties, ExternalReference {
/**
* Date when the MLModel was developed
*/
@deprecated
date: optional Time

/**
* Audit stamp containing who created this and when
*/
created: optional TimeStamp

/**
* Date when the MLModel was last modified
*/
lastModified: optional TimeStamp

/**
* Version of the MLModel
*/
Expand Down Expand Up @@ -93,12 +117,12 @@ record MLModelProperties includes CustomProperties, ExternalReference {
deployments: optional array[Urn]

/**
* List of jobs (if any) used to train the model
* List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.
*/
@Relationship = {
"/*": {
"name": "TrainedBy",
"entityTypes": [ "dataJob" ],
"entityTypes": [ "dataJob", "dataProcessInstance" ],
"isLineage": true
}
}
Expand Down
Loading

0 comments on commit 047644b

Please sign in to comment.