cognoma · dhimmel · Oct 11, 2016 · Sep 19, 2016 · Sep 20, 2016 · Sep 20, 2016
diff --git a/cognoml/analysis.py b/cognoml/analysis.py
@@ -11,6 +11,7 @@
 from sklearn.cross_validation import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.linear_model import SGDClassifier
+from sklearn.feature_selection import VarianceThreshold
 from sklearn.preprocessing import StandardScaler
 
 import utils
@@ -85,6 +86,11 @@ def classify(sample_id, mutation_status, **kwargs):
         performance[part] = utils.value_map(metrics, round, ndigits=5)
     performance['cv'] = {'auroc': round(clf_grid.best_score_, 5)}
     results['performance'] = performance
+
+    results['model'] = utils.model_info(clf_grid.best_estimator_)
+
+    feature_df = utils.get_feature_df(clf_grid.best_estimator_, X.columns)
+    results['model']['features'] = utils.df_to_datatables(feature_df)
 
     results['observations'] = utils.df_to_datatables(obs_df)
     return results
@@ -107,6 +113,7 @@ def classify(sample_id, mutation_status, **kwargs):
 clf_grid = grid_search.GridSearchCV(estimator=clf, param_grid=param_grid, n_jobs=-1, scoring='roc_auc')
 
 pipeline = make_pipeline(
+    VarianceThreshold(),
     StandardScaler(),
     clf_grid
 )

diff --git a/cognoml/utils.py b/cognoml/utils.py
@@ -97,3 +97,18 @@ def threshold_metrics(y_true, y_pred):
     metrics['auroc'] = sklearn.metrics.roc_auc_score(y_true, y_pred)
     metrics['auprc'] = sklearn.metrics.average_precision_score(y_true, y_pred)
     return metrics
+
+def model_info(estimator):
+    model = collections.OrderedDict()
+    model['class'] = type(estimator).__name__
+    model['module'] = estimator.__module__
+    model['parameters'] = estimator.get_params()            
+    return model
+
+def get_feature_df(estimator, features):
+    coefficients, = estimator.coef_
+    feature_df = pd.DataFrame.from_items([
+        ('feature', features),
+        ('coefficient', coefficients),
+    ])
+    return feature_df
diff --git a/data/api/hippo-output.json b/data/api/hippo-output.json
@@ -31,6 +31,97 @@
       "auroc": 0.62524
     }
   },
+  "model": {
+    "class": "SGDClassifier",
+    "module": "sklearn.linear_model.stochastic_gradient",
+    "parameters": {
+      "warm_start": false,
+      "alpha": 0.1,
+      "random_state": 0,
+      "learning_rate": "optimal",
+      "shuffle": true,
+      "epsilon": 0.1,
+      "power_t": 0.5,
+      "n_iter": 5,
+      "penalty": "elasticnet",
+      "class_weight": "balanced",
+      "loss": "log",
+      "n_jobs": 1,
+      "eta0": 0.0,
+      "fit_intercept": true,
+      "average": false,
+      "l1_ratio": 0.0,
+      "verbose": 0
+    },
+    "features": {
+      "columns": [
+        "feature",
+        "coefficient"
+      ],
+      "data": [
+        [
+          "1421",
+          -0.04357
+        ],
+        [
+          "5203",
+          0.10076
+        ],
+        [
+          "5818",
+          0.09927
+        ],
+        [
+          "9875",
+          0.07751
+        ],
+        [
+          "10675",
+          0.03264
+        ],
+        [
+          "10919",
+          0.02275
+        ],
+        [
+          "23262",
+          -0.02254
+        ],
+        [
+          "23467",
+          -0.21388
+        ],
+        [
+          "54941",
+          0.0073
+        ],
+        [
+          "79622",
+          0.00158
+        ],
+        [
+          "147746",
+          -0.10429
+        ],
+        [
+          "255167",
+          -0.03445
+        ],
+        [
+          "284123",
+          -0.0188
+        ],
+        [
+          "646851",
+          -0.05939
+        ],
+        [
+          "728689",
+          0.00557
+        ]
+      ]
+    }
+  },
   "observations": {
     "columns": [
       "sample_id",