Skip to content

Commit

Permalink
ml xgboosst class spec
Browse files Browse the repository at this point in the history
  • Loading branch information
PondiB committed Dec 12, 2023
1 parent b162040 commit d063bbd
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `load_ml_model`
- `load_url`
- `ml_fit_class_random_forest`
- `ml_fit_class_xgboost`
- `ml_fit_regr_random_forest`
- `ml_predict`
- `save_ml_model`
Expand Down
115 changes: 115 additions & 0 deletions proposals/ml_fit_class_xgboost.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"id": "ml_fit_class_xgboost",
"summary": "Train an XGBoost classification model",
"description": "Fit an XGBoost classification model to training data. XGBoost is a high-performance, flexible, and portable distributed gradient boosting library. It implements machine lSubsamplening algorithms within the Gradient Boosting framework, featuring parallel tree boosting for efficiency",
"categories": [
"machine learning"
],
"experimental": true,
"parameters": [
{
"name": "predictors",
"description": "The predictors for the XGBoost classification model as a vector data cube. They are the independent variables that the XGBoost algorithm analyses to learn patterns and relationships within the data.",
"schema": {
"type": "object",
"subtype": "datacube",
"dimensions": [
{
"type": "geometry"
},
{
"type": "bands"
}
]
}
},
{
"name": "target",
"description": "Labeled data for XGBoost classification, aligning with predictor values based on a shared geometry dimension. This ensures a clear connection between predictor rows and labels.",
"schema": {
"type": "object",
"subtype": "datacube",
"dimensions": [
{
"type": "geometry"
}
]
}
},
{
"name": "learning_rate",
"description": "Step size shrinkage used in update to prevent overfitting.",
"schema": {
"type": "number",
"minimum": 0,
"default": 0.15
}
},
{
"name": "max_depth",
"description": "Maximum depth of a tree.",
"schema": {
"type": "integer",
"minimum": 1,
"default": 5
}
},
{
"name": "min_child_weight",
"description": "Minimum sum of instance weight (hessian) needed in a child.",
"schema": {
"type": "number",
"minimum": 0,
"default": 1
}
},
{
"name": "subsample",
"description": "Subsample ratio of the training instance.",
"optional": true,
"default": 0.8,
"schema": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
{
"name": "min_split_loss",
"description": "Minimum loss reduction required to make a further partition on a leaf node of the tree.",
"optional": true,
"default": 1,
"schema": {
"type": "number",
"minimum": 0
}
},
{
"name": "seed",
"description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
"optional": true,
"default": null,
"schema": {
"type": [
"integer",
"null"
]
}
}
],
"returns": {
"description": "A model object that can be saved with `save_ml_model()` and restored with `load_ml_model()`.",
"schema": {
"type": "object",
"subtype": "ml-model"
}
},
"links": [
{
"href": "https://dl.acm.org/doi/10.1145/2939672.2939785",
"title": "Chen and Guestrin (2016), XGBoost: A Scalable Tree Boosting System",
"type": "text/html",
"rel": "about"
}
]
}
6 changes: 6 additions & 0 deletions tests/.words
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,9 @@ Hyndman
date1
date2
favor
XGBoost
Chen
Guestrin
Subsample
hessian
overfitting

0 comments on commit d063bbd

Please sign in to comment.