From 643cc8d59913720f5846d12711c046c9710a57ed Mon Sep 17 00:00:00 2001 From: Huilin Qu Date: Fri, 29 Mar 2019 14:47:48 +0100 Subject: [PATCH] Add instructions for deploying models into CMSSW. --- scripts/deploy.md | 29 +++++++++++++++++++++++++++++ scripts/json2pset.py | 22 ++++++++++++++++++++-- 2 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 scripts/deploy.md diff --git a/scripts/deploy.md b/scripts/deploy.md new file mode 100644 index 0000000..0b3b92e --- /dev/null +++ b/scripts/deploy.md @@ -0,0 +1,29 @@ +Deploy a trained model to CMSSW +====== + +To deploy the trained model to CMSSW, the following three files from the model directory are needed: + + - `preprocessing.json`: this file contains the list of input variables and the input preprocessing information. It can be converted to a CMSSW python configuration fragment (e.g., [pfDeepBoostedJetPreprocessParams_cfi](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfDeepBoostedJetPreprocessParams_cfi.py)) using the [json2pset](json2pset.py) script. + - `modelname-symbol.json`: this file describes the neural network architecture. + - [**Note**] For the mass decorrelated model, a couple of model json files exist, and the one named as `modelname-symbol-softmax.json` should be used. + - `modelname-xxxx.params`: this file contains the trained parameters (i.e., weights) of the neural network. It needs to match the epoch number you intend to deploy. + + +[**Step 1**] Convert the `preprocessing.json` to a CMSSW python configuration fragment. This step needs to be done under a CMSSW release area. + +```bash +cd CMSSW_X_Y_Z/src +cmsenv +# for the nominal tagger +python json2pset.py -i preprocessing.json -o pfDeepBoostedJetPreprocessParams_cfi.py -n pfDeepBoostedJetPreprocessParams +# for the mass decorrelated tagger +python json2pset.py -i preprocessing.json -o pfMassDecorrelatedDeepBoostedJetPreprocessParams_cfi.py -n pfMassDecorrelatedDeepBoostedJetPreprocessParams +``` + +The output `params.py` should replace either [pfDeepBoostedJetPreprocessParams_cfi](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfDeepBoostedJetPreprocessParams_cfi.py) or [pfMassDecorrelatedDeepBoostedJetPreprocessParams_cfi](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfMassDecorrelatedDeepBoostedJetPreprocessParams_cfi.py). + + +[**Step 2**] Gather the model files (`modelname-symbol.json` and `modelname-xxxx.params`) and integrate them into CMSSW. +To have the models officially integrated in CMSSW, you need to add them to the [cms-data](https://github.com/cms-data/RecoBTag-Combined/tree/master/DeepBoostedJet/) repo via a pull request. + +[**Step 3**] Update the CMSSW configuration [pfDeepBoostedJet_cff.py](https://github.com/cms-sw/cmssw/blob/master/RecoBTag/MXNet/python/pfDeepBoostedJet_cff.py). Change the `model_path` and `param_path` to the model you intend to use. And verify that `pfDeepBoostedJetPreprocessParams` and `pfMassDecorrelatedDeepBoostedJetPreprocessParams` are consistent with the preprocessing parameters in `preprocessing.json`. \ No newline at end of file diff --git a/scripts/json2pset.py b/scripts/json2pset.py index 8351b53..e21e627 100644 --- a/scripts/json2pset.py +++ b/scripts/json2pset.py @@ -41,6 +41,10 @@ def _byteify(data, ignore_dicts = False): parser.add_argument('-o', '--output', help='Output file.' ) +parser.add_argument('-n', '--name', + default='pfDeepBoostedJetPreprocessParams', + help='Params name.' +) args = parser.parse_args() with open(args.input) as fp: @@ -49,6 +53,12 @@ def _byteify(data, ignore_dicts = False): cfg = cms.PSet() cfg.input_names = cms.vstring(*j['input_names']) +try: + scale_method = j['scale_method'] +except KeyError: + scale_method = 'upper' + print('scale_method not found! Use `upper` by default.') + for name in j['input_names']: p = cms.PSet( input_shape=cms.vuint32(j['input_shapes'][name]), @@ -60,12 +70,20 @@ def _byteify(data, ignore_dicts = False): for v in j['var_names'][name]: info = j['var_info'][v] + if scale_method == 'upper': + scale = info['upper'] - info['median'] + elif scale_method == 'max': + scale = max(info['upper'] - info['median'], info['median'] - info['lower']) + else: + raise NotImplemented('scale_method=%s is not implemented!' % scale_method) + if scale == 0: + scale = 1 pvar = cms.PSet( median=cms.double(info['median']), - upper=cms.double(info['upper']), + norm_factor=cms.double(1. / scale), ) setattr(p.var_infos, v, pvar) with open(args.output, 'w') as fout: fout.write('import FWCore.ParameterSet.Config as cms\n\n') - fout.write('pfDeepBoostedJetPreprocessParams = '+str(cfg)) + fout.write(args.name + ' = ' + str(cfg))