Skip to content

Commit

Permalink
fix regression
Browse files Browse the repository at this point in the history
  • Loading branch information
mfumagalli committed Apr 8, 2020
1 parent 5408051 commit d4e6f77
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 276 deletions.
36 changes: 20 additions & 16 deletions ImaGene.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ class ImaNet:
"""
def __init__(self, name=None, model=None):
self.name = name
self.scores = {'val_loss': [], 'val_acc': [], 'loss': [], 'acc': [], 'val_mse': [], 'val_mae': [], 'mse': [], 'mae': []}
self.scores = {'val_loss': [], 'val_acc': [], 'loss': [], 'acc': [], 'mae': [], 'val_mae': []}
self.test = np.zeros(2)
self.values = None # matrix(3,nr_test) true, map, mle
return None
Expand All @@ -621,32 +621,33 @@ def plot_train(self, file=None):
"""
Plot training accuracy/mae and loss/mse
"""
if 'loss' in self.scores.keys():
loss = self.scores['loss']
val_loss = self.scores['val_loss']
acc = self.scores['acc']
val_acc = self.scores['val_acc']
else:
loss = self.scores['mse']
val_loss = self.scores['val_mse']
loss = self.scores['loss']
val_loss = self.scores['val_loss']
# if regression
if 'mae' in self.scores.keys():
acc = self.scores['mae']
val_acc = self.scores['val_mae']
label = 'mae'
else: # if not
acc = self.scores['acc']
val_acc = self.scores['val_acc']
label = 'acc'
epochs = range(1, len(loss) + 1)

plt.figure()
plt.subplots_adjust(wspace = 0, hspace = 0.4)
plt.subplot(211)

plt.plot(epochs, loss, 'bo', label='Training loss/mse')
plt.plot(epochs, val_loss, 'b', label='Validation loss/mse')
plt.title('Training and validation loss/mse')
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.subplot(212)

plt.plot(epochs, acc, 'bo', label='Training acc/mae')
plt.plot(epochs, val_acc, 'b', label='Validation acc/mae')
plt.title('Training and validation accuracy/mae')
plt.plot(epochs, acc, 'bo', label='Training '+label)
plt.plot(epochs, val_acc, 'b', label='Validation '+label)
plt.title('Training and validation '+label)
plt.legend()

if file==None:
Expand All @@ -661,6 +662,7 @@ def predict(self, gene, model):
Calculate predicted values (many, I assume this is for testing not for single prediction); output is a matrix with rnows=2, row 0 is true, row 1 is MAP, row 2 is posterior mean
"""
self.values = np.zeros((3, gene.data.shape[0]), dtype='float32')
# if binary or regression
if len(gene.targets.shape) == 1:
probs = model.predict(gene.data, batch_size=None)[:,0]
self.values[1,:] = np.where(probs < 0.5, 0., 1.)
Expand All @@ -671,15 +673,17 @@ def predict(self, gene, model):
self.values[1,:] = gene.classes[np.argmax(probs, axis=1)]
self.values[0,:] = gene.classes[np.argmax(gene.targets, axis=1)]
self.values[2,:] = [np.average(gene.classes, weights=probs[i]) for i in range(probs.shape[0])]

return 0

def plot_scatter(self, MAP=True, file=None):
"""
Plot scatter plot (on testing set)
"""
# if MAP
if MAP == True:
plt.scatter(self.values[0,:], self.values[1,:], marker='o')
else:
else: # if regression
plt.scatter(self.values[0,:], self.values[2,:], marker='o')
#plt.title('Relationship between true and predicted values')
plt.xlabel('True')
Expand Down
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# ImaGene

**ImaGene** implements a supervised machine learning algorithm to predict natural selection and estimate selection coefficients from population genomic data.
It can be used to estimate any parameter of interest from an evolutionary model.
**ImaGene** is a supervised machine learning algorithm to predict natural selection and estimate selection coefficients from population genomic data.
It can be used to estimate any parameter of interest from an evolutionary population genetics model.

**ImaGene** implements a convolutional neural network (CNN) which takes as input haplotypes of a _locus_ of interest for a population.
It outputs confusion matrices as well as point estimates of the selection coefficient (or any parameter of interst) along with its posterior distribution and various metrics of confidence.
It outputs confusion matrices as well as point estimates of the selection coefficient (or any parameter of interest) along with its posterior distribution and various metrics of confidence.

### Citation

Expand All @@ -27,7 +27,7 @@ git clone https://github.com/mfumagalli/ImaGene
**ImaGene** runs under Python3 and it is interfaced with [tensorflow](https://www.tensorflow.org) and [keras](https://keras.io/).
We recommend using [conda](https://conda.io/docs/index.html) to set the environment and take care of all dependencies.
There are detailed instructions on how to download conda for [linux](https://conda.io/docs/user-guide/install/linux.html) and [macOS](https://conda.io/docs/user-guide/install/macos.html).
For instance, a suitable environment can be created with
A suitable environment can be created with

`conda create -n ImaGene python=3.6 tensorflow=2 keras numpy scipy scikit-image scikit-learn matplotlib pydot pymc3 ipython jupyter`

Expand All @@ -49,12 +49,10 @@ Otherwise follow the link [here](https://www.java.com/en/download/) if you need
Remember that java must be in your /usr/bin folder.
In unix systems you can create a symbolic link with `ln -s ~/Downloads/java-XXX/jre/bin/java /usr/bin/java`, as an example.



### Usage

Please look at the jupyter notebook `01_binary.ipynb` for a tutorial on how to use **ImaGene** for predicting natural selection with a simple binary classification.
We also provide examples on how **ImaGene** can be used for multiclass classification in `02_multiclass.ipynb` and `03_multiclass_for_continuous.ipynb`.
We also provide examples on how **ImaGene** can be used for multiclass classification in `02_multiclass.ipynb` and `03_multiclass_for_continuous.ipynb` and for regression in `04_regression.ipynb`.

Finally, we provide an utility `generate_dataset.sh` to quickly generate simulations with msms to be used for training.
This script takes an input file with all parameters needed for the simulations.
Expand All @@ -64,6 +62,8 @@ More information can be found in the tutorials.
The folder `Reproduce` contains all scripts used for the analyses shown in the manuscript.

### Contributors (in alphabetical order)
Alice Beddis, Matteo Fumagalli, Ulas Isildak, Lucrezia Lorenzon, Luis Torada

- main: Matteo Fumagalli (m.fumagalli [at] imperial [dot] ac [dot] uk)
- others (in alphabetical order): Alice Beddis, Ulas Isildak, Lucrezia Lorenzon, Luis Torada


Loading

0 comments on commit d4e6f77

Please sign in to comment.