From 643cc8d59913720f5846d12711c046c9710a57ed Mon Sep 17 00:00:00 2001
From: Huilin Qu <huilin.qu@cern.ch>
Date: Fri, 29 Mar 2019 14:47:48 +0100
Subject: [PATCH] Add instructions for deploying models into CMSSW.

---
 scripts/deploy.md    | 29 +++++++++++++++++++++++++++++
 scripts/json2pset.py | 22 ++++++++++++++++++++--
 2 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 scripts/deploy.md

diff --git a/scripts/deploy.md b/scripts/deploy.md
new file mode 100644
index 0000000..0b3b92e
--- /dev/null
+++ b/scripts/deploy.md
@@ -0,0 +1,29 @@
+Deploy a trained model to CMSSW
+======
+
+To deploy the trained model to CMSSW, the following three files from the model directory are needed:
+
+ - `preprocessing.json`: this file contains the list of input variables and the input preprocessing information. It can be converted to a CMSSW python configuration fragment (e.g., [pfDeepBoostedJetPreprocessParams_cfi](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfDeepBoostedJetPreprocessParams_cfi.py)) using the [json2pset](json2pset.py) script.
+ - `modelname-symbol.json`: this file describes the neural network architecture. 
+    - [**Note**] For the mass decorrelated model, a couple of model json files exist, and the one named as `modelname-symbol-softmax.json` should be used.
+ - `modelname-xxxx.params`: this file contains the trained parameters (i.e., weights) of the neural network. It needs to match the epoch number you intend to deploy.
+
+
+[**Step 1**] Convert the `preprocessing.json` to a CMSSW python configuration fragment. This step needs to be done under a CMSSW release area.
+
+```bash
+cd CMSSW_X_Y_Z/src
+cmsenv
+# for the nominal tagger
+python json2pset.py -i preprocessing.json -o pfDeepBoostedJetPreprocessParams_cfi.py -n pfDeepBoostedJetPreprocessParams
+# for the mass decorrelated tagger
+python json2pset.py -i preprocessing.json -o pfMassDecorrelatedDeepBoostedJetPreprocessParams_cfi.py -n pfMassDecorrelatedDeepBoostedJetPreprocessParams
+```
+
+The output `params.py` should replace either [pfDeepBoostedJetPreprocessParams_cfi](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfDeepBoostedJetPreprocessParams_cfi.py) or [pfMassDecorrelatedDeepBoostedJetPreprocessParams_cfi](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfMassDecorrelatedDeepBoostedJetPreprocessParams_cfi.py).
+
+
+[**Step 2**] Gather the model files (`modelname-symbol.json` and `modelname-xxxx.params`) and integrate them into CMSSW. 
+To have the models officially integrated in CMSSW, you need to add them to the [cms-data](https://github.com/cms-data/RecoBTag-Combined/tree/master/DeepBoostedJet/) repo via a pull request. 
+
+[**Step 3**] Update the CMSSW configuration [pfDeepBoostedJet_cff.py](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfDeepBoostedJet_cff.py). Change the `model_path` and `param_path` to the model you intend to use. And verify that `pfDeepBoostedJetPreprocessParams` and `pfMassDecorrelatedDeepBoostedJetPreprocessParams` are consistent with the preprocessing parameters in `preprocessing.json`.
\ No newline at end of file
diff --git a/scripts/json2pset.py b/scripts/json2pset.py
index 8351b53..e21e627 100644
--- a/scripts/json2pset.py
+++ b/scripts/json2pset.py
@@ -41,6 +41,10 @@ def _byteify(data, ignore_dicts = False):
 parser.add_argument('-o', '--output',
     help='Output file.'
 )
+parser.add_argument('-n', '--name',
+    default='pfDeepBoostedJetPreprocessParams',
+    help='Params name.'
+)
 args = parser.parse_args()
 
 with open(args.input) as fp:
@@ -49,6 +53,12 @@ def _byteify(data, ignore_dicts = False):
 cfg = cms.PSet()
 cfg.input_names = cms.vstring(*j['input_names'])
 
+try:
+    scale_method = j['scale_method']
+except KeyError:
+    scale_method = 'upper'
+    print('scale_method not found! Use `upper` by default.')
+
 for name in j['input_names']:
     p = cms.PSet(
         input_shape=cms.vuint32(j['input_shapes'][name]),
@@ -60,12 +70,20 @@ def _byteify(data, ignore_dicts = False):
     
     for v in j['var_names'][name]:
         info = j['var_info'][v]
+        if scale_method == 'upper':
+            scale = info['upper'] - info['median']
+        elif scale_method == 'max':
+            scale = max(info['upper'] - info['median'], info['median'] - info['lower'])
+        else:
+            raise NotImplemented('scale_method=%s is not implemented!' % scale_method)
+        if scale == 0:
+            scale = 1
         pvar = cms.PSet(
             median=cms.double(info['median']),
-            upper=cms.double(info['upper']),
+            norm_factor=cms.double(1. / scale),
             )
         setattr(p.var_infos, v, pvar)
 
 with open(args.output, 'w') as fout:
     fout.write('import FWCore.ParameterSet.Config as cms\n\n')
-    fout.write('pfDeepBoostedJetPreprocessParams = '+str(cfg))
+    fout.write(args.name + ' = ' + str(cfg))