Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update mlflow-related metadata models #12174

Merged
merged 22 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 193 additions & 3 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,16 @@ type Query {
Fetch all Business Attributes
"""
listBusinessAttributes(input: ListBusinessAttributesInput!): ListBusinessAttributesResult

"""
Fetch a Data Process Instance by primary key (urn)
"""
dataProcessInstance(urn: String!): DataProcessInstance


}


"""
An ERModelRelationship is a high-level abstraction that dictates what datasets fields are erModelRelationshiped.
"""
Expand Down Expand Up @@ -9832,15 +9840,45 @@ type MLModelGroup implements EntityWithRelationships & Entity & BrowsableEntity
privileges: EntityPrivileges
}

"""
Properties describing a group of related ML models
"""
type MLModelGroupProperties {
"""
Display name of the model group
"""
name: String

yoonhyejin marked this conversation as resolved.
Show resolved Hide resolved
"""
Detailed description of the model group's purpose and contents
"""
description: String

createdAt: Long
"""
When this model group was created
"""
created: AuditStamp

"""
When this model group was last modified
"""
lastModified: AuditStamp

"""
Version identifier for this model group
"""
version: VersionTag

"""
Custom key-value properties for the model group
"""
customProperties: [CustomPropertiesEntry!]

"""
Deprecated creation timestamp
@deprecated Use the 'created' field instead
"""
createdAt: Long @deprecated(reason: "Use `created` instead")
}

"""
Expand Down Expand Up @@ -9990,40 +10028,103 @@ description: String
}

type MLMetric {
"""
Name of the metric (e.g. accuracy, precision, recall)
"""
name: String

"""
Description of what this metric measures
"""
description: String

"""
The computed value of the metric
"""
value: String

"""
Timestamp when this metric was recorded
"""
createdAt: Long
}

type MLModelProperties {
"""
The display name of the model used in the UI
"""
name: String!

"""
Detailed description of the model's purpose and characteristics
"""
description: String

date: Long
"""
When the model was last modified
"""
lastModified: AuditStamp

"""
Version identifier for this model
"""
version: String

"""
The type/category of ML model (e.g. classification, regression)
"""
type: String

"""
Mapping of hyperparameter configurations
"""
hyperParameters: HyperParameterMap

hyperParams: [MLHyperParam]
"""
List of hyperparameter settings used to train this model
"""
hyperParams: [MLHyperParam]

"""
Performance metrics from model training
"""
trainingMetrics: [MLMetric]

"""
Names of ML features used by this model
"""
mlFeatures: [String!]

"""
Tags for categorizing and searching models
"""
tags: [String!]

"""
Model groups this model belongs to
"""
groups: [MLModelGroup]

"""
Additional custom properties specific to this model
"""
customProperties: [CustomPropertiesEntry!]

"""
URL to view this model in external system
"""
externalUrl: String
yoonhyejin marked this conversation as resolved.
Show resolved Hide resolved

"""
When this model was created
"""
created: AuditStamp

"""
Deprecated timestamp for model creation
@deprecated Use 'created' field instead
"""
date: Long @deprecated(reason: "Use `created` instead")
}

type MLFeatureProperties {
Expand Down Expand Up @@ -12804,3 +12905,92 @@ type CronSchedule {
"""
timezone: String!
}


"""
Properties describing a data process instance's execution metadata
"""
type DataProcessInstanceProperties {
"""
The display name of this process instance
"""
name: String!

"""
URL to view this process instance in the external system
"""
externalUrl: String

"""
When this process instance was created
"""
created: AuditStamp

"""
Additional custom properties specific to this process instance
"""
customProperties: [CustomPropertiesEntry!]
}

"""
Properties specific to an ML model training run instance
"""
type MLTrainingRunProperties {
"""
Unique identifier for this training run
"""
id: String

"""
List of URLs to access training run outputs (e.g. model artifacts, logs)
"""
outputUrls: [String]

"""
Hyperparameters used in this training run
"""
hyperParams: [MLHyperParam]

"""
Performance metrics recorded during this training run
"""
trainingMetrics: [MLMetric]
}

extend type DataProcessInstance {

"""
Additional read only properties associated with the Data Job
"""
properties: DataProcessInstanceProperties

"""
The specific instance of the data platform that this entity belongs to
"""
dataPlatformInstance: DataPlatformInstance

"""
Sub Types that this entity implements
"""
subTypes: SubTypes

"""
The parent container in which the entity resides
"""
container: Container

"""
Standardized platform urn where the data process instance is defined
"""
platform: DataPlatform!

"""
Recursively get the lineage of containers for this entity
"""
parentContainers: ParentContainersResult

"""
Additional properties when subtype is Training Run
"""
mlTrainingRunProperties: MLTrainingRunProperties
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ record DataProcessInstanceOutput {
@Relationship = {
"/*": {
"name": "Produces",
"entityTypes": [ "dataset" ]
"entityTypes": [ "dataset", "mlModel" ]
}
}
@Searchable = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ record DataProcessInstanceProperties includes CustomProperties, ExternalReferenc
}
created: AuditStamp

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import com.linkedin.common.Urn
import com.linkedin.common.Time
import com.linkedin.common.VersionTag
import com.linkedin.common.CustomProperties
import com.linkedin.common.TimeStamp

/**
* Properties associated with an ML Model Group
Expand All @@ -13,6 +14,17 @@ import com.linkedin.common.CustomProperties
}
record MLModelGroupProperties includes CustomProperties {

/**
* Display name of the MLModelGroup
*/
@Searchable = {
"fieldType": "WORD_GRAM",
"enableAutocomplete": true,
"boostScore": 10.0,
"queryByDefault": true,
}
name: optional string

/**
* Documentation of the MLModelGroup
*/
Expand All @@ -25,8 +37,31 @@ record MLModelGroupProperties includes CustomProperties {
/**
* Date when the MLModelGroup was developed
*/
@deprecated
createdAt: optional Time

/**
* Time and Actor who created the MLModelGroup
*/
created: optional TimeStamp

/**
* Date when the MLModelGroup was last modified
*/
lastModified: optional TimeStamp

/**
* List of jobs (if any) used to train the model group. Visible in Lineage.
*/
@Relationship = {
"/*": {
"name": "TrainedBy",
"entityTypes": [ "dataJob" ],
"isLineage": true
}
}
trainingJobs: optional array[Urn]

/**
* Version of the MLModelGroup
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.linkedin.common.Time
import com.linkedin.common.VersionTag
import com.linkedin.common.CustomProperties
import com.linkedin.common.ExternalReference
import com.linkedin.common.TimeStamp

/**
* Properties associated with a ML Model
Expand All @@ -15,6 +16,18 @@ import com.linkedin.common.ExternalReference
}
record MLModelProperties includes CustomProperties, ExternalReference {

/**
* Display name of the MLModel
*/
@Searchable = {
"fieldType": "WORD_GRAM",
"enableAutocomplete": true,
"boostScore": 10.0,
"queryByDefault": true,
}
name: optional string


/**
* Documentation of the MLModel
*/
Expand All @@ -27,8 +40,19 @@ record MLModelProperties includes CustomProperties, ExternalReference {
/**
* Date when the MLModel was developed
*/
@deprecated
date: optional Time

/**
* Audit stamp containing who created this and when
*/
created: optional TimeStamp

/**
* Date when the MLModel was last modified
*/
lastModified: optional TimeStamp

/**
* Version of the MLModel
*/
Expand Down Expand Up @@ -93,12 +117,12 @@ record MLModelProperties includes CustomProperties, ExternalReference {
deployments: optional array[Urn]

/**
* List of jobs (if any) used to train the model
* List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.
*/
@Relationship = {
"/*": {
"name": "TrainedBy",
"entityTypes": [ "dataJob" ],
"entityTypes": [ "dataJob", "dataProcessInstance" ],
yoonhyejin marked this conversation as resolved.
Show resolved Hide resolved
"isLineage": true
}
}
Expand Down
Loading
Loading