Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basenji Documentation #91

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
75ad09c
ignore downloaded files
moodyRahman Aug 2, 2021
73993ba
display basset default model
moodyRahman Aug 2, 2021
8548ec0
directly invoke python in script
moodyRahman Aug 2, 2021
198edb0
misc cleanup files
moodyRahman Aug 2, 2021
9781599
annotated basset configs
moodyRahman Aug 4, 2021
ff86736
docstring build_block
moodyRahman Aug 4, 2021
94e0f32
global_var in build_block explanation
moodyRahman Aug 4, 2021
6875716
builkd_model documentation
moodyRahman Aug 4, 2021
0abb2ff
training script comments
moodyRahman Aug 4, 2021
e0b5368
Merge branch 'calico:master' into master
moodyRahman Aug 4, 2021
6abb24c
moved generic parms to proper folder
moodyRahman Aug 4, 2021
fe7a345
formatting
moodyRahman Aug 10, 2021
cce77d5
name_func explanation
moodyRahman Aug 10, 2021
b2aaace
docs for seqnn.py
moodyRahman Aug 10, 2021
8286fd3
test function for SeqNN functionality
moodyRahman Aug 10, 2021
e298c90
Merge branch 'calico:master' into master
moodyRahman Aug 10, 2021
d9994b5
script to generate params_react.json
moodyRahman Aug 24, 2021
f16f1b8
generated function definitions for the react frontend
moodyRahman Aug 24, 2021
ca48e8f
seqnn comments
moodyRahman Aug 24, 2021
00ec27f
annotated params
moodyRahman Aug 24, 2021
97becc4
removed debug file
moodyRahman Aug 24, 2021
0fb6632
removed relative path call
moodyRahman Aug 24, 2021
040a5a7
deleted unnecessary files
moodyRahman Aug 24, 2021
96f7d72
reverted .gitignore
moodyRahman Aug 24, 2021
2fd739a
Merge branch 'master' into moody
moodyRahman Sep 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ basenji.egg-info/
*/.DS_Store
*/._.DS_Store
**/.ipynb_checkpoints/
data/hg19.fa*
data/hg19.fa*
3 changes: 3 additions & 0 deletions basenji/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,9 @@ def concat_to_2d(inputs, **kwargs):
############################################################
# Dictionary
############################################################
"""
dictionary that maps config strings to their definitions in this module
"""
name_func = {
'attention': attention,
'center_slice': center_slice,
Expand Down
54 changes: 50 additions & 4 deletions basenji/seqnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
# =========================================================================
from __future__ import print_function
import json

import pdb
import sys
Expand Down Expand Up @@ -43,11 +44,20 @@ def set_defaults(self):
self.augment_shift = [0]

def build_block(self, current, block_params):
"""Construct a SeqNN block.
"""
Construct a SeqNN block (a series of layers), and set it's previous layer
to be the `current` parameter.

Args:
current (tf.keras.layers.Layer): The previous later to attach this new
block to.

block_params (dict): The parameters for this specific block, in order
of the elements of model[trunk]

Returns:
current
"""
tf.keras.layers.Layer: the final layer generated by this method
"""
block_args = {}

# extract name
Expand All @@ -64,6 +74,8 @@ def build_block(self, current, block_params):
block_varnames = block_func.__init__.__code__.co_varnames

# set global defaults
# try to see if the SeqNN object has the following attributes:
# if they do, put them in the dictionary block_args
global_vars = ['activation', 'batch_norm', 'bn_momentum', 'bn_type',
'l2_scale', 'l1_scale', 'padding', 'kernel_initializer']
for gv in global_vars:
Expand Down Expand Up @@ -93,17 +105,37 @@ def build_block(self, current, block_params):
block_args['concat'] = concat_repr

# switch for block
# get the desired next layer from the name_func dict
# set it to the next layer
#
# if name is capitalized, use the keras definition for that layer
# otherwise, use the layer in blocks.py
#
# pass all of the parameters associated with this block in as a keyword argument

# basenji custom layer
if block_name[0].islower():
block_func = blocks.name_func[block_name]
current = block_func(current, **block_args)

# keras functional style layer
else:
block_func = blocks.keras_func[block_name]
current = block_func(**block_args)(current)

return current

def build_model(self, save_reprs=True):
def build_model(self, save_reprs=False):
"""
Using the functional API for tensorflow, programmatically builds a neural
network, according to the attributes set under the 'model' key in
the config JSON.

see generic_params.json for further details

Args:
save_reprs (bool, optional): [description]. Defaults to False.
"""
###################################################
# inputs
###################################################
Expand All @@ -120,6 +152,7 @@ def build_model(self, save_reprs=True):
###################################################
# build convolution blocks
###################################################

self.reprs = []
for bi, block_params in enumerate(self.trunk):
current = self.build_block(current, block_params)
Expand All @@ -136,6 +169,8 @@ def build_model(self, save_reprs=True):
###################################################
# heads
###################################################
# iterate through the elements of params[head] and create those layers
# typically the final layer of the network
head_keys = natsorted([v for v in vars(self) if v.startswith('head')])
self.heads = [getattr(self, hk) for hk in head_keys]

Expand Down Expand Up @@ -196,6 +231,7 @@ def build_model(self, save_reprs=True):


def build_embed(self, conv_layer_i, batch_norm=True):

if conv_layer_i == -1:
self.embed = tf.keras.Model(inputs=self.model.inputs,
outputs=self.model.inputs)
Expand Down Expand Up @@ -376,3 +412,13 @@ def save(self, model_file, trunk=False):
self.model_trunk.save(model_file, include_optimizer=False)
else:
self.model.save(model_file, include_optimizer=False)

def main():
with open("/home/moody/basset/basenji/manuscripts/basset/model_basset/params.json") as params_open:
params = json.load(params_open)
params_model = params['model']
params_train = params['train']
seqnn_model = SeqNN(params_model)

if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions bin/basenji_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
# main
################################################################################
def main():
"""
runner script that builds a model according to JSON params, and trains it
"""
usage = 'usage: %prog [options] <params_file> <data1_dir> ...'
parser = OptionParser(usage)
parser.add_option('-k', dest='keras_fit',
Expand Down Expand Up @@ -108,6 +111,7 @@ def main():
# one GPU

# initialize model
# the keys from params_model become direct attributes of seqnn_model
seqnn_model = seqnn.SeqNN(params_model)

# restore
Expand Down
61 changes: 61 additions & 0 deletions docs/generic_params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// THIS IS NOT A VALID JSON PARAMTER FILE
// THIS IS AN ANNOTATED JSON SHOWING ALL THE POSSIBLE CONFIGURATIONS, AND WHAT THEIR TYPES ARE

{
"train": {
"batch_size": 64 [int],
"shuffle_buffer": 8192 [int],
"optimizer": "sgd" [enum],
"loss": "bce" [enum],
"learning_rate": 0.005 [float],
"momentum": 0.98 [float],
"patience": 12 [int],
"train_epochs_min": 10 [int: default 1],
"train_epochs_max": 10 [int: default 10000]
},
"model": {
"seq_length": 1344 [int],

"augment_rc": true [bool],
"augment_shift": 3 [int],

"activation": "gelu" [enum],
"batch_norm": true [bool],
"bn_momentum": 0.90 [float],

"trunk": [
{
"name": "conv_block", [enum: see module variables in blocks.py]
"filters": 288,
"kernel_size": 17,
"pool_size": 3
},
{
"name": "conv_tower",
"filters_init": 288,
"filters_mult": 1.122,
"kernel_size": 5,
"pool_size": 2,
"repeat": 6
},
{
"name": "conv_block",
"filters": 256,
"kernel_size": 1
},
{
"name": "dense_block",
"flatten": true,
"units": 768,
"dropout": 0.2
}
] [array: SeqNN block ()],
"head": [
{
"name": "final",
"units": 164,
"activation": "sigmoid"
}
]
}
}
57 changes: 57 additions & 0 deletions manuscripts/basset/models/heart/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"train": {
"batch_size": 64,
"shuffle_buffer": 8192,
"optimizer": "sgd",
"loss": "bce",
"learning_rate": 0.005,
"momentum": 0.98,
"patience": 12,
"train_epochs_min": 10
},
"model": {
"seq_length": 1344,

"augment_rc": true,
"augment_shift": 3,

"activation": "gelu",
"batch_norm": true,
"bn_momentum": 0.90,

"trunk": [
{
"name": "conv_block",
"filters": 288,
"kernel_size": 17,
"pool_size": 3
},
{
"name": "conv_tower",
"filters_init": 288,
"filters_mult": 1.122,
"kernel_size": 5,
"pool_size": 2,
"repeat": 6
},
{
"name": "conv_block",
"filters": 256,
"kernel_size": 1
},
{
"name": "dense_block",
"flatten": true,
"units": 768,
"dropout": 0.2
}
],
"head": [
{
"name": "final",
"units": 164,
"activation": "sigmoid"
}
]
}
}
83 changes: 83 additions & 0 deletions manuscripts/basset/out.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
Model: "model_1"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
sequence (InputLayer) [(None, 1344, 4)] 0
__________________________________________________________________________________________________
stochastic_reverse_complement ( ((None, 1344, 4), () 0 sequence[0][0]
__________________________________________________________________________________________________
stochastic_shift (StochasticShi (None, 1344, 4) 0 stochastic_reverse_complement[0][
__________________________________________________________________________________________________
gelu (GELU) (None, 1344, 4) 0 stochastic_shift[0][0]
__________________________________________________________________________________________________
conv1d (Conv1D) (None, 1344, 288) 19584 gelu[0][0]
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 1344, 288) 1152 conv1d[0][0]
__________________________________________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 448, 288) 0 batch_normalization[0][0]
__________________________________________________________________________________________________
gelu_1 (GELU) (None, 448, 288) 0 max_pooling1d[0][0]
__________________________________________________________________________________________________
conv1d_1 (Conv1D) (None, 448, 288) 414720 gelu_1[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 448, 288) 1152 conv1d_1[0][0]
__________________________________________________________________________________________________
max_pooling1d_1 (MaxPooling1D) (None, 224, 288) 0 batch_normalization_1[0][0]
__________________________________________________________________________________________________
gelu_2 (GELU) (None, 224, 288) 0 max_pooling1d_1[0][0]
__________________________________________________________________________________________________
conv1d_2 (Conv1D) (None, 224, 323) 465120 gelu_2[0][0]
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 224, 323) 1292 conv1d_2[0][0]
__________________________________________________________________________________________________
max_pooling1d_2 (MaxPooling1D) (None, 112, 323) 0 batch_normalization_2[0][0]
__________________________________________________________________________________________________
gelu_3 (GELU) (None, 112, 323) 0 max_pooling1d_2[0][0]
__________________________________________________________________________________________________
conv1d_3 (Conv1D) (None, 112, 363) 586245 gelu_3[0][0]
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 112, 363) 1452 conv1d_3[0][0]
__________________________________________________________________________________________________
max_pooling1d_3 (MaxPooling1D) (None, 56, 363) 0 batch_normalization_3[0][0]
__________________________________________________________________________________________________
gelu_4 (GELU) (None, 56, 363) 0 max_pooling1d_3[0][0]
__________________________________________________________________________________________________
conv1d_4 (Conv1D) (None, 56, 407) 738705 gelu_4[0][0]
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 56, 407) 1628 conv1d_4[0][0]
__________________________________________________________________________________________________
max_pooling1d_4 (MaxPooling1D) (None, 28, 407) 0 batch_normalization_4[0][0]
__________________________________________________________________________________________________
gelu_5 (GELU) (None, 28, 407) 0 max_pooling1d_4[0][0]
__________________________________________________________________________________________________
conv1d_5 (Conv1D) (None, 28, 456) 927960 gelu_5[0][0]
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 28, 456) 1824 conv1d_5[0][0]
__________________________________________________________________________________________________
max_pooling1d_5 (MaxPooling1D) (None, 14, 456) 0 batch_normalization_5[0][0]
__________________________________________________________________________________________________
gelu_6 (GELU) (None, 14, 456) 0 max_pooling1d_5[0][0]
__________________________________________________________________________________________________
conv1d_6 (Conv1D) (None, 14, 512) 1167360 gelu_6[0][0]
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 14, 512) 2048 conv1d_6[0][0]
__________________________________________________________________________________________________
max_pooling1d_6 (MaxPooling1D) (None, 7, 512) 0 batch_normalization_6[0][0]
__________________________________________________________________________________________________
gelu_7 (GELU) (None, 7, 512) 0 max_pooling1d_6[0][0]
__________________________________________________________________________________________________
conv1d_7 (Conv1D) (None, 7, 256) 131072 gelu_7[0][0]
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 7, 256) 1024 conv1d_7[0][0]
__________________________________________________________________________________________________
gelu_8 (GELU) (None, 7, 256) 0 batch_normalization_7[0][0]
__________________________________________________________________________________________________
reshape (Reshape) (None, 1, 1792) 0 gelu_8[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 1, 768) 1376256 reshape[0][0]
__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 1, 768) 3072 dense[0][0]
__________________________________________________________________________________________________
dropout (Dropout) (None, 1, 768) 0 batch_normalization_8[0][0]
__________________________________________________________________________________________________
gelu_9 (GELU) (None, 1, 768) 0 dropout[0][0]
4 changes: 3 additions & 1 deletion manuscripts/basset/params_basset.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"patience": 12,
"train_epochs_min": 10
},

"model": {
"seq_length": 1344,

Expand Down Expand Up @@ -41,11 +42,12 @@
},
{
"name": "dense_block",
"flatten": true,
"flatten": true,
"units": 768,
"dropout": 0.2
}
],

"head": [
{
"name": "final",
Expand Down