diff --git a/.shippable.yml b/.shippable.yml index 40f1300e..20cb5c23 100755 --- a/.shippable.yml +++ b/.shippable.yml @@ -62,7 +62,7 @@ script: # SEGMENTATION section - rm -r -f results && mkdir results - - python experiments_segmentation/run_compute_stat_annot_segm.py --visual + - python experiments_segmentation/run_compute_stat_annot_segm.py -a "data_images/drosophila_ovary_slice/annot_struct/*.png" -s "data_images/drosophila_ovary_slice/segm/*.png" --visual - python experiments_segmentation/run_segm_slic_model_graphcut.py -i "data_images/drosophila_disc/image/img_[5,6].jpg" -cfg ./experiments_segmentation/sample_config.json --visual - python experiments_segmentation/run_segm_slic_classif_graphcut.py -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" -cfg ./experiments_segmentation/sample_config.json --visual @@ -96,3 +96,5 @@ after_success: - coverage xml -o $COVERAGE_REPORTS/coverage.xml - codecov # public repository on Travis CI - coverage report + + - cd .. && python -c "import imsegm.descriptors" diff --git a/.travis.yml b/.travis.yml index bd90008e..c2c8be5d 100755 --- a/.travis.yml +++ b/.travis.yml @@ -54,3 +54,4 @@ after_success: - coverage xml - python-codacy-coverage -r coverage.xml - coverage report + - cd .. && python -c "import imsegm.descriptors" diff --git a/README.md b/README.md index 66c7c3a3..cf7def78 100755 --- a/README.md +++ b/README.md @@ -10,14 +10,14 @@ ## Superpixel segmentation with GraphCut regularisation -Image segmentation is widely used as an initial phase of many image processing tasks in computer vision and image analysis. Many recent segmentation methods use superpixels because they reduce the size of the segmentation problem by order of magnitude. Also, features on superpixels are much more robust than features on pixels only. We use spatial regularization on superpixels to make segmented regions more compact. The segmentation pipeline comprises (i) computation of superpixels; (ii) extraction of descriptors such as color and texture; (iii) soft classification, using a standard classifier for supervised learning, or the Gaussian Mixture Model for unsupervised learning; (iv) final segmentation using Graph Cut. We use this segmentation pipeline on real-world applications in medical imaging (see a sample [images](./images)). We also show that [unsupervised segmentation](./notebooks/segment-2d_slic-fts-model-gc.ipynb) is sufficient for some situations, and provides similar results to those obtained using [trained segmentation](notebooks/segment-2d_slic-fts-classif-gc.ipynb). +Image segmentation is widely used as an initial phase of many image processing tasks in computer vision and image analysis. Many recent segmentation methods use superpixels because they reduce the size of the segmentation problem by order of magnitude. Also, features on superpixels are much more robust than features on pixels only. We use spatial regularization on superpixels to make segmented regions more compact. The segmentation pipeline comprises (i) computation of superpixels; (ii) extraction of descriptors such as color and texture; (iii) soft classification, using a standard classifier for supervised learning, or the Gaussian Mixture Model for unsupervised learning; (iv) final segmentation using Graph Cut. We use this segmentation pipeline on real-world applications in medical imaging (see a sample [images](./images)). We also show that [unsupervised segmentation](./notebooks/segment-2d_slic-fts-model-gc.ipynb) is sufficient for some situations, and provides similar results to those obtained using [trained segmentation](notebooks/segment-2d_slic-fts-classif-gc.ipynb). ![schema](figures/schema_slic-fts-clf-gc.jpg) **Sample ipython notebooks:** * [Supervised segmentation](notebooks/segment-2d_slic-fts-classif-gc.ipynb) requires training anottaion * [Unsupervised segmentation](notebooks/segment-2d_slic-fts-model-gc.ipynb) just asks for expected number of classes -* **partially annotated images** with missing annotatio is marked by a negative number +* **partially annotated images** with missing annotation is marked by a negative number **Illustration** @@ -44,7 +44,7 @@ Borovec J., Kybic J., Nava R. (2017) **Detection and Localization of Drosophila ## Superpixel Region Growing with Shape prior -Region growing is a classical image segmentation method based on hierarchical region aggregation using local similarity rules. Our proposed approach differs from standard region growing in three essential aspects. First, it works on the level of superpixels instead of pixels, which leads to a substantial speedup. Second, our method uses learned statistical shape properties which encourage growing leading to plausible shapes. In particular, we use ray features to describe the object boundary. Third, our method can segment multiple objects and ensure that the segmentations do not overlap. The problem is represented as an energy minimization and is solved either greedily, or iteratively using GraphCuts. +Region growing is a classical image segmentation method based on hierarchical region aggregation using local similarity rules. Our proposed approach differs from standard region growing in three essential aspects. First, it works on the level of superpixels instead of pixels, which leads to a substantial speedup. Second, our method uses learned statistical shape properties which encourage growing leading to plausible shapes. In particular, we use ray features to describe the object boundary. Third, our method can segment multiple objects and ensure that the segmentations do not overlap. The problem is represented as energy minimization and is solved either greedily, or iteratively using GraphCuts. **Sample ipython notebooks:** * [General GraphCut](notebooks/egg_segment_graphcut.ipynb) from given centers and initial structure segmentation. @@ -93,7 +93,7 @@ We have implemented cython version of some functions, especially computing descr ```bash python setup.py build_ext --inplace ``` -If loading of compiled descriptors in cython fails, it is automatically swapped to numpy which gives the same results, but it is significantly slower. +If loading of compiled descriptors in `cython` fails, it is automatically swapped to `numpy` which gives the same results, but it is significantly slower. **Installation** @@ -191,32 +191,32 @@ We utilize (un)supervised segmentation according to given training examples or s * For both experiment you can evaluate segmentation results. ```bash python experiments_segmentation/run_compute-stat_annot-segm.py \ - -annot "./data_images/drosophila_ovary_slice/annot_struct/*.png" \ - -segm "./results/experiment_segm-supervise_ovary/*.png" \ - -img "./data_images/drosophila_ovary_slice/image/*.jpg" \ - -out ./results/evaluation + -a "./data_images/drosophila_ovary_slice/annot_struct/*.png" \ + -s "./results/experiment_segm-supervise_ovary/*.png" \ + -i "./data_images/drosophila_ovary_slice/image/*.jpg" \ + -o ./results/evaluation --visual ``` ![vusial](figures/segm-visual_D03_sy04_100x.jpg) The previous two (un)segmentation accept [configuration file](experiments_segmentation/sample_config.json) (JSON) by parameter `-cfg` with some extra parameters which was not passed in arguments, for instance: ```json { - "slic_size": 35, - "slic_regul": 0.2, - "features": {"color_hsv": ["mean", "std", "eng"]}, - "classif": "SVM", - "nb_classif_search": 150, - "gc_edge_type": "model", - "gc_regul": 3.0, - "run_LOO": false, - "run_LPO": true, - "cross_val": 0.1 + "slic_size": 35, + "slic_regul": 0.2, + "features": {"color_hsv": ["mean", "std", "eng"]}, + "classif": "SVM", + "nb_classif_search": 150, + "gc_edge_type": "model", + "gc_regul": 3.0, + "run_LOO": false, + "run_LPO": true, + "cross_val": 0.1 } ``` ### Center detection and ellipse fitting -In general, the input is a formatted list (CSV file) of input images and annotations. Another option is set `-list none` and then the list is paired from given paths to images and annotations. +In general, the input is a formatted list (CSV file) of input images and annotations. Another option is set `-list none` and then the list is paired with given paths to images and annotations. **Experiment sequence is following:** @@ -224,7 +224,7 @@ In general, the input is a formatted list (CSV file) of input images and annotat ```bash python experiments_ovary_centres/run_create_annotation.py ``` -1. With zone annotation, we train a classifier for center candidate prediction. The annotation can be a CSV file with annotated centers as points, and the zone of positive examples is set uniformly as the circular neighborhood around these points. Another way (preferable) is to use annotated image with marked zones for positive, negative and neutral examples. +1. With zone annotation, we train a classifier for center candidate prediction. The annotation can be a CSV file with annotated centers as points, and the zone of positive examples is set uniformly as the circular neighborhood around these points. Another way (preferable) is to use an annotated image with marked zones for positive, negative and neutral examples. ```bash python experiments_ovary_centres/run_center_candidate_training.py -list none \ -segs "./data_images/drosophila_ovary_slice/segm/*.png" \ @@ -269,7 +269,7 @@ In general, the input is a formatted list (CSV file) of input images and annotat ![ellipse fitting](figures/insitu7544_ellipses.jpg) -### Region growing with shape prior +### Region growing with a shape prior In case you do not have estimated object centers, you can use [plugins](ij_macros) for landmarks import/export for [Fiji](http://fiji.sc/). diff --git a/circle.yml b/circle.yml index ec04586f..490fbfbb 100755 --- a/circle.yml +++ b/circle.yml @@ -48,7 +48,7 @@ test: - python handling_annotations/run_segm_annot_relabel.py -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" -out ./results/relabel_center_levels # SEGMENTATION section - - python experiments_segmentation/run_compute_stat_annot_segm.py --visual + - python experiments_segmentation/run_compute_stat_annot_segm.py -a "data_images/drosophila_ovary_slice/annot_struct/*.png" -s "data_images/drosophila_ovary_slice/segm/*.png" --visual - python experiments_segmentation/run_segm_slic_model_graphcut.py -i "data_images/drosophila_disc/image/img_[5,6].jpg" -cfg ./experiments_segmentation/sample_config.json --visual - python experiments_segmentation/run_segm_slic_classif_graphcut.py -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" -cfg ./experiments_segmentation/sample_config.json --visual diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 3f93cf36..c53f7c20 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -57,10 +57,8 @@ Here's the long and short of it: * Refer to array dimensions as (plane), row, column, not as x, y, z. See :ref:`Coordinate conventions ` in the user guide for more information. * Functions should support all input image dtypes. Use utility functions such as ``img_as_float`` to help convert to an appropriate type. The output format can be whatever is most efficient. This allows us to string together several functions into a pipeline * Use ``Py_ssize_t`` as data type for all indexing, shape and size variables in C/C++ and Cython code. -* Use relative module imports, i.e. ``from .._shared import xyz`` rather than ``from skimage._shared import xyz``. * Wrap Cython code in a pure Python function, which defines the API. This improves compatibility with code introspection tools, which are often not aware of Cython code. -* For Cython functions, release the GIL whenever possible, using - ``with nogil:``. +* For Cython functions, release the GIL whenever possible, using ``with nogil:``. ## Testing @@ -76,12 +74,12 @@ the library is installed in development mode:: ``` Now, run all tests using:: ``` - $ PYTHONPATH=. pytest pyImSegm + $ pytest -v pyImSegm ``` Use ``--doctest-modules`` to run doctests. For example, run all tests and all doctests using:: ``` - $ PYTHONPATH=. pytest --doctest-modules --with-xunit --with-coverage pyImSegm + $ pytest -v --doctest-modules --with-xunit --with-coverage pyImSegm ``` ## Test coverage @@ -92,7 +90,7 @@ To measure the test coverage, install `pytest-cov > python run_compute_stat_annot_segm.py \ - -annot "data_images/drosophila_ovary_slice/annot_struct/*.png" \ - -segm "results/experiment_segm-supervise_ovary/*.png" \ - -img "data_images/drosophila_ovary_slice/image/*.jpg" \ - -out results/evaluation --visual + -a "data_images/drosophila_ovary_slice/annot_struct/*.png" \ + -s "results/experiment_segm-supervise_ovary/*.png" \ + -i "data_images/drosophila_ovary_slice/image/*.jpg" \ + -o results/evaluation --visual Copyright (C) 2016-2018 Jiri Borovec """ @@ -18,7 +19,15 @@ import multiprocessing as mproc from functools import partial +import matplotlib +if os.environ.get('DISPLAY', '') == '' \ + and matplotlib.rcParams['backend'] != 'agg': + logging.warning('No display found. Using non-interactive Agg backend.') + matplotlib.use('Agg') + import pandas as pd +import numpy as np +import matplotlib.pyplot as plt from skimage.segmentation import relabel_sequential sys.path += [os.path.abspath('.'), os.path.abspath('..')] # Add path to root @@ -49,60 +58,78 @@ def aparse_params(dict_paths): :return ({str: str}, obj): """ parser = argparse.ArgumentParser() - parser.add_argument('-annot', '--path_annot', type=str, required=False, + parser.add_argument('-a', '--path_annot', type=str, required=True, help='path to directory with annotations & name pattern', default=dict_paths['annot']) - parser.add_argument('-segm', '--path_segm', type=str, required=False, + parser.add_argument('-s', '--path_segm', type=str, required=True, help='path to directory & name pattern for segmentation', default=dict_paths['segm']) - parser.add_argument('-imgs', '--path_image', type=str, required=False, + parser.add_argument('-i', '--path_image', type=str, required=False, help='path to directory & name pattern for images', default=dict_paths['image']) - parser.add_argument('-out', '--path_out', type=str, required=False, + parser.add_argument('-o', '--path_output', type=str, required=False, help='path to the output directory', default=dict_paths['output']) + parser.add_argument('--drop_labels', type=int, required=False, nargs='*', + help='list of skipped labels from statistic') parser.add_argument('--nb_jobs', type=int, required=False, - default=NB_THREADS, - help='number of processes in parallel') + help='number of processes in parallel', + default=NB_THREADS) + parser.add_argument('--relabel', required=False, action='store_true', + help='relabel to find label relations', default=False) parser.add_argument('--visual', required=False, action='store_true', help='export visualisations', default=False) - args = parser.parse_args() + args = vars(parser.parse_args()) logging.info('ARG PARAMETERS: \n %s', repr(args)) - dict_paths = { - 'annot': tl_data.update_path(args.path_annot), - 'segm': tl_data.update_path(args.path_segm), - 'image': '', - 'output': tl_data.update_path(args.path_out), - } - if isinstance(args.path_image, str) and args.path_image.lower() != 'none': - dict_paths['image'] = tl_data.update_path(args.path_image) + if not isinstance(args['path_image'], str) \ + or args['path_image'].lower() != 'none': + args['path_image'] = None + dict_paths = {k.split('_')[-1]: + os.path.join(tl_data.update_path(os.path.dirname(args[k])), + os.path.basename(args[k])) + for k in args if k.startswith('path_') and args[k] is not None} for k in dict_paths: - if dict_paths[k] == '' or k == 'output': - continue - p = os.path.dirname(dict_paths[k]) if '*' in dict_paths[k] else dict_paths[k] - assert os.path.exists(p), 'missing: (%s) "%s"' % (k, p) + assert os.path.isdir(os.path.dirname(dict_paths[k])), \ + 'missing: (%s) "%s"' % (k, os.path.dirname(dict_paths[k])) + if args['drop_labels'] is None: + args['drop_labels'] = [] return dict_paths, args -def export_visual(df_row, path_out, relabel=True): +def fill_lut(lut, segm, offset=0): + uq_lbs = np.unique(lut).tolist() + for i, lb in enumerate(lut[1:]): + j = i + 1 + offset + if lb == 0 and j in segm: + lut[j] = max(uq_lbs) + 1 + uq_lbs += [lut[j]] + return lut + + +def export_visual(n_annot_seg_img, path_out): """ given visualisation of segmented image and annotation :param {str: ...} df_row: :param str path_out: path to the visualisation directory - :param bool relabel: whether relabel segmentation as sequential + :param [int] drop_labels: whether skip some labels """ - annot, _ = tl_data.load_image_2d(df_row['path_1']) - segm, _ = tl_data.load_image_2d(df_row['path_2']) - img = None - if 'path_3' in df_row: - img, _ = tl_data.load_image_2d(df_row['path_3']) - if relabel: - annot = relabel_sequential(annot)[0] - segm = seg_lbs.relabel_max_overlap_unique(annot, segm) - fig = seg_visu.figure_overlap_annot_segm_image(annot, segm, img) - name = os.path.splitext(os.path.basename(df_row['path_1']))[0] + name, annot, segm, img = n_annot_seg_img + # relabel for simpler visualisations of class differences + if np.sum(annot < 0) > 0: + annot[annot < 0] = -1 + _, lut, _ = relabel_sequential(annot + 1) + lut = fill_lut(lut, segm, offset=1) + annot = lut[annot.astype(int) + 1] - 1 + segm = lut[segm.astype(int) + 1] - 1 + else: + annot, lut, _ = relabel_sequential(annot) + lut = fill_lut(lut, segm, offset=0) + segm = lut[segm.astype(int)] + fig = seg_visu.figure_overlap_annot_segm_image(annot, segm, img, + drop_labels=[-1]) logging.debug('>> exporting -> %s', name) fig.savefig(os.path.join(path_out, '%s.png' % name)) + plt.close(fig) def wrapper_relabel_segm(annot_segm): @@ -114,7 +141,8 @@ def wrapper_relabel_segm(annot_segm): return segm -def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): +def main(dict_paths, visual=True, drop_labels=None, relabel=True, + nb_jobs=NB_THREADS): """ main evaluation :param {str: str} dict_paths: @@ -129,7 +157,7 @@ def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): name = os.path.basename(os.path.dirname(dict_paths['segm'])) list_dirs = [dict_paths['annot'], dict_paths['segm']] - if dict_paths['image'] != '': + if dict_paths.get('image', '') != '': list_dirs.append(dict_paths['image']) df_paths = tl_data.find_files_match_names_across_dirs(list_dirs) path_csv = os.path.join(dict_paths['output'], NAME_CVS_PER_IMAGE % name) @@ -141,9 +169,18 @@ def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): segms, names = tl_data.load_images_list(df_paths['path_2'].values.tolist()) logging.info('loaded %i annots and %i segms', len(annots), len(segms)) + if drop_labels is not None: + annots = [np.array(annot, dtype=float) for annot in annots] + for lb in drop_labels: + for i, annot in enumerate(annots): + annots[i][annot == lb] = np.nan + annots = [np.nan_to_num(annot + 1).astype(int) - 1 for annot in annots] + segms = [seg.astype(int) for seg in segms] + if relabel: logging.info('relabel annotations and segmentations') - annots = [relabel_sequential(annot)[0] for annot in annots] + if drop_labels is None: + annots = [relabel_sequential(annot)[0] for annot in annots] iterate = tl_expt.WrapExecuteSequence(wrapper_relabel_segm, zip(annots, segms), nb_jobs=nb_jobs, ordered=True, @@ -153,7 +190,8 @@ def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): logging.info('compute statistic per image') path_csv = os.path.join(dict_paths['output'], NAME_CVS_PER_IMAGE % name) logging.debug('export to "%s"', path_csv) - df_stat = seg_clf.compute_stat_per_image(segms, annots, names, nb_jobs) + df_stat = seg_clf.compute_stat_per_image(segms, annots, names, nb_jobs, + drop_labels=[-1]) df_stat.to_csv(path_csv) logging.info('summarise statistic') @@ -165,6 +203,9 @@ def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): df_desc.to_csv(path_csv) if visual: + images = [None] * len(annots) + if 'path_3' in df_paths: + images, _ = tl_data.load_images_list(df_paths['path_3'].values) path_visu = os.path.join(dict_paths['output'], '%s%s' % (name, SUFFIX_VISUAL)) if not os.path.isdir(path_visu): @@ -172,7 +213,7 @@ def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): # for idx, row in df_paths.iterrows(): # export_visual(row, path_visu) _wrapper_visual = partial(export_visual, path_out=path_visu) - it_values = (row for idx, row in df_paths.iterrows()) + it_values = zip(names, annots, segms, images) iterate = tl_expt.WrapExecuteSequence(_wrapper_visual, it_values, desc='visualisations', nb_jobs=nb_jobs) @@ -184,4 +225,5 @@ def main(dict_paths, nb_jobs=NB_THREADS, visual=True, relabel=True): if __name__ == '__main__': logging.basicConfig(level=logging.INFO) dict_paths, args = aparse_params(PATHS) - main(dict_paths, nb_jobs=args.nb_jobs, visual=args.visual) + main(dict_paths, nb_jobs=args['nb_jobs'], visual=args['visual'], + drop_labels=args['drop_labels'], relabel=args['relabel']) diff --git a/experiments_segmentation/run_eval_superpixels.py b/experiments_segmentation/run_eval_superpixels.py index 07bae034..4cd777f2 100644 --- a/experiments_segmentation/run_eval_superpixels.py +++ b/experiments_segmentation/run_eval_superpixels.py @@ -1,5 +1,7 @@ """ Evaluate superpixels quality regarding given annotation +Perform experiment with specified parameters and export output statistic +per image if the output path is given SAMPLE run: >> python run_eval_superpixels.py \ @@ -18,8 +20,15 @@ import multiprocessing as mproc from functools import partial +import matplotlib +if os.environ.get('DISPLAY', '') == '' \ + and matplotlib.rcParams['backend'] != 'agg': + logging.warning('No display found. Using non-interactive Agg backend.') + matplotlib.use('Agg') + import numpy as np import pandas as pd +import matplotlib.pyplot as plt sys.path += [os.path.abspath('.'), os.path.abspath('..')] # Add path to root import imsegm.utils.data_io as tl_data @@ -32,10 +41,12 @@ NB_THREADS = max(1, int(mproc.cpu_count() * 0.9)) -PATH_IMAGES = tl_data.update_path(os.path.join('data_images', 'drosophila_ovary_slice')) +PATH_IMAGES = os.path.join(tl_data.update_path('data_images'), + 'drosophila_ovary_slice') PATH_RESULTS = tl_data.update_path('results', absolute=True) -NAME_CSV_DISTANCES = 'measured_boundary_distances.csv' -PARAMS = { +NAME_CSV_DISTANCES = 'measured_boundary_distances' \ + '_SLIC_size-%i_regul-%.2f_slico-%i.csv' +DEFAULT_PARAMS = { 'path_images': os.path.join(PATH_IMAGES, 'image', '*.jpg'), 'path_segms': os.path.join(PATH_IMAGES, 'annot_eggs', '*.png'), 'path_out': os.path.join(PATH_RESULTS, 'compute_boundary_distances'), @@ -106,6 +117,7 @@ def compute_boundary_distance(idx_row, params, path_out=''): logging.debug('visualise results...') fig = tl_visu.figure_segm_boundary_dist(segm, slic) fig.savefig(os.path.join(path_out, name + '.jpg')) + plt.close(fig) return name, np.mean(dists) @@ -137,7 +149,10 @@ def main(params): df_dist.set_index('name', inplace=True) if os.path.isdir(params['path_out']): - df_dist.to_csv(os.path.join(params['path_out'], NAME_CSV_DISTANCES)) + csv_name = NAME_CSV_DISTANCES % (params['slic_size'], + params['slic_regul'], + params['slico']) + df_dist.to_csv(os.path.join(params['path_out'], csv_name)) logging.info('STATISTIC:') logging.info(df_dist.describe()) @@ -147,6 +162,6 @@ def main(params): if __name__ == '__main__': logging.basicConfig(level=logging.INFO) - params = arg_parse_params(PARAMS) + params = arg_parse_params(DEFAULT_PARAMS) main(params) diff --git a/experiments_segmentation/run_segm_slic_classif_graphcut.py b/experiments_segmentation/run_segm_slic_classif_graphcut.py index 51c6fde6..582d7abc 100644 --- a/experiments_segmentation/run_segm_slic_classif_graphcut.py +++ b/experiments_segmentation/run_segm_slic_classif_graphcut.py @@ -1,7 +1,7 @@ """ -Run supervised segmentation with superpixels and training examples +Run supervised segmentation experiment with superpixels and training examples -1) train classifier on annotated images with some statistic +1) train classifier on annotated images with some statistic - LPO 2) segment new images in specified folder Segmentation pipeline: @@ -70,8 +70,6 @@ TYPES_LOAD_IMAGE = ['2d_rgb', '2d_split'] NAME_FIG_LABEL_HISTO = 'fig_histogram_annot_segments.png' NAME_CSV_SEGM_STAT_SLIC_ANNOT = 'statistic_segm_slic_annot.csv' -NAME_CSV_SEGM_STAT_RESULT_LOO = 'statistic_segm_LOO.csv' -NAME_CSV_SEGM_STAT_RESULT_LOO_GC = 'statistic_segm_LOO_gc.csv' NAME_CSV_SEGM_STAT_RESULT_LPO = 'statistic_segm_L-%i-O.csv' NAME_CSV_SEGM_STAT_RESULT_LPO_GC = 'statistic_segm_L-%i-O_gc.csv' NAME_CSV_SEGM_STAT_RESULTS = 'statistic_segm_results.csv' @@ -82,16 +80,14 @@ FOLDER_ANNOT = 'annotations' FOLDER_SLIC = 'slic' FOLDER_SLIC_ANNOT = 'annot_slic' -FOLDER_SEGM = 'segmentation_trained' +FOLDER_TRAIN = 'segmentation_trained' SUFFIX_VISUAL = '___visual' -FOLDER_SEGM_VISU = FOLDER_SEGM + SUFFIX_VISUAL -FOLDER_LOO = 'segmentation_leave-one-out' -FOLDER_LOO_VISU = FOLDER_LOO + SUFFIX_VISUAL +FOLDER_TRAIN_VISU = FOLDER_TRAIN + SUFFIX_VISUAL FOLDER_LPO = 'segmentation_leave-P-out' FOLDER_LPO_VISU = FOLDER_LPO + SUFFIX_VISUAL LIST_FOLDERS_BASE = (FOLDER_IMAGE, FOLDER_ANNOT, FOLDER_SLIC, FOLDER_SLIC_ANNOT, - FOLDER_SEGM, FOLDER_LOO, FOLDER_LPO) -LIST_FOLDERS_DEBUG = (FOLDER_SEGM_VISU, FOLDER_LOO_VISU, FOLDER_LPO_VISU) + FOLDER_TRAIN, FOLDER_LPO) +LIST_FOLDERS_DEBUG = (FOLDER_TRAIN_VISU, FOLDER_LPO_VISU) # unique experiment means adding timestemp on the end of folder name EACH_UNIQUE_EXPERIMENT = False @@ -100,19 +96,17 @@ # relabel annotation such that labels are in sequence no gaps in between them ANNOT_RELABEL_SEQUENCE = False # whether skip loading config from previous fun -FORCE_RELOAD = True +FORCE_RELOAD = False # even you have dumped data from previous time, all wil be recomputed -FORCE_RECOMP_DATA = True +FORCE_RECOMP_DATA = False # even you have saved classif. data from previous time, all wil be retrained -FORCE_RETRAIN_CLASSIF = True +FORCE_RETRAIN_CLASSIF = False # ration of fold size for LPO for hyper-parameter search CROSS_VAL_LEAVE_OUT_SEARCH = 0.2 # ration of fold size for LPO for evaluation CROSS_VAL_LEAVE_OUT_EVAL = 0.1 -# perform the Leave-One-Out experiment -RUN_CROSS_VAL_LOO = True -# perform the Leave-P-Out experiment -RUN_CROSS_VAL_LPO = True +# run prediction on training data, should be overfiting +RUN_TRAIN_PREDICT = False FEATURES_SET_COLOR = {'color': ('mean', 'std', 'energy')} @@ -146,8 +140,8 @@ PATH_RESULTS = tl_data.update_path('results', absolute=True) SEGM_PARAMS.update({ 'path_train_list': os.path.join(PATH_IMAGES, 'list_imgs-annot-struct.csv'), - # 'path_predict_imgs': os.path.join(PATH_IMAGES, 'image', 'insitu43*.tif'), - 'path_predict_imgs': '', + 'path_predict_imgs': os.path.join(PATH_IMAGES, 'image', 'insitu43*.tif'), + # 'path_predict_imgs': '', 'path_out': PATH_RESULTS, }) @@ -247,6 +241,7 @@ def dataset_load_images_annot_compute_features(params, # compute features df_paths = pd.read_csv(params['path_train_list'], index_col=0) + df_paths.reset_index(inplace=True) assert all(n in df_paths.columns for n in ['path_image', 'path_annot']), \ 'missing required columns in loaded csv file' _wrapper_load_compute = partial(load_image_annot_compute_features_labels, @@ -351,7 +346,10 @@ def segment_image(imgs_idx_path, params, classif, path_out, path_visu=None, for segm, suffix in [(segm_gc, ''), (segm_map, '_MAP')]: path_img = os.path.join(path_out, idx_name + suffix + '.png') logging.debug('export segmentation: %s', path_img) - img_seg = Image.fromarray(segm.astype(np.uint8)) + if np.max(segm) <= 1: + img_seg = Image.fromarray((segm * 255).astype(np.uint8)) + else: + img_seg = Image.fromarray(segm.astype(np.uint8)) img_seg.convert('L').save(path_img) # io.imsave(path_img, segm_gc) @@ -359,7 +357,8 @@ def segment_image(imgs_idx_path, params, classif, path_out, path_visu=None, np.savez_compressed(path_npz, segm_soft) # plt.imsave(os.path.join(path_out, idx_name + '_rgb.png'), seg_pipe) - if path_visu is not None and os.path.isdir(path_visu): + if params.get('visual', False) and path_visu is not None \ + and os.path.isdir(path_visu): export_draw_image_segm_contour(img, segm_gc, path_visu, idx_name, '_GC') export_draw_image_segm_contour(img, segm_map, path_visu, @@ -370,12 +369,14 @@ def segment_image(imgs_idx_path, params, classif, path_out, path_visu=None, fig = tl_visu.figure_segm_graphcut_debug(debug_visual) fig.savefig(path_fig, bbox_inches='tight', pad_inches=0.1) plt.close(fig) - # gc.collect(), time.sleep(1) + gc.collect() + time.sleep(1) return idx_name, segm_map, segm_gc def eval_segment_with_annot(params, dict_annot, dict_segm, dict_label_hist=None, - name_csv=NAME_CSV_SEGM_STAT_SLIC_ANNOT, nb_jobs=1): + name_csv='statistic___.csv', drop_labels=None, + nb_jobs=1): """ evaluate the segmentation results according given annotation :param {str: ...} params: @@ -394,10 +395,11 @@ def eval_segment_with_annot(params, dict_annot, dict_segm, dict_label_hist=None, list_annot = [dict_annot[n] for n in dict_annot] list_segm = [dict_segm[n] for n in dict_annot] df_stat = seg_clf.compute_stat_per_image(list_segm, list_annot, - [n for n in dict_annot], nb_jobs) + [n for n in dict_annot], nb_jobs, + drop_labels=drop_labels) path_csv = os.path.join(params['path_exp'], name_csv) - logging.info('STAT on seg_pipe and annot (%s):', name_csv) + logging.info('STATISTIC on segm and annot (%s):', name_csv) df_stat.to_csv(path_csv) logging.info(metrics.classification_report( @@ -407,45 +409,8 @@ def eval_segment_with_annot(params, dict_annot, dict_segm, dict_label_hist=None, return df_stat -def retrain_loo_segment_image(imgs_idx_path, path_classif, path_dump, - path_out, path_visu, - show_debug_imgs=SHOW_DEBUG_IMAGES): - """ load the classifier, and dumped data, subtract the image, - retrain the classif. without it and do the segmentation - - :param () imgs_idx_path: path to input image - :param str path_classif: path to saved classifier - :param str path_dump: path to dumped data - :param, str path_out: path to segmentation outputs - :param bool show_debug_imgs: whether show debug images - :return (str, ndarray, ndarray): - """ - idx, path_img = parse_imgs_idx_path(imgs_idx_path) - dict_imgs, _, _, dict_features, dict_labels, _, _ = \ - load_dump_data(path_dump) - dict_classif = seg_clf.load_classifier(path_classif) - classif = dict_classif['clf_pipeline'] - params = dict_classif['params'] - - idx_name = get_idx_name(idx, path_img) - for d in [dict_features, dict_labels]: - _ = d.pop(idx_name, None) - assert (len(dict_imgs) - len(dict_features)) == 1, \ - 'no image was dropped from training set' - - features, labels, _ = seg_clf.convert_set_features_labels_2_dataset( - dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) - classif.fit(features, labels) - - idx_name, segm, segm_gc = segment_image(imgs_idx_path, params, classif, - path_out, path_visu, - show_debug_imgs=show_debug_imgs) - # gc.collect(), time.sleep(1) - return idx_name, segm, segm_gc - - -def retrain_lpo_segment_image(list_imgs_idx_path, path_classif, path_dump, - path_out, path_visu, +def retrain_lpo_segment_image(list_imgs_idx_path, + path_classif, path_dump, path_out, path_visu, show_debug_imgs=SHOW_DEBUG_IMAGES): """ load the classifier, and dumped data, subtract the image, retrain the classif without it and do the segmentation @@ -465,15 +430,15 @@ def retrain_lpo_segment_image(list_imgs_idx_path, path_classif, path_dump, for idx, path_img in list_imgs_idx_path: idx_name = get_idx_name(idx, path_img) - for d in [dict_features, dict_labels]: - _ = d.pop(idx_name, None) + _ = dict_features.pop(idx_name, None) + _ = dict_labels.pop(idx_name, None) assert (len(dict_imgs) - len(dict_features)) == len(list_imgs_idx_path), \ - 'no (%i) images of (%i) was dropped from training set (%i)' \ - % (len(list_imgs_idx_path), len(dict_imgs), len(dict_features)) + 'subset of %i images was not dropped, training set %i from total %i' \ + % (len(list_imgs_idx_path), len(dict_features), len(dict_imgs)) features, labels, _ = seg_clf.convert_set_features_labels_2_dataset( - dict_features, dict_labels, balance_type=params['balance'], - drop_labels=[-1]) + dict_features, dict_labels, balance_type=params['balance'], + drop_labels=[-1, np.nan] + params.get('drop_labels', [])) classif.fit(features, labels) dict_segm, dict_segm_gc = {}, {} @@ -483,7 +448,8 @@ def retrain_lpo_segment_image(list_imgs_idx_path, path_classif, path_dump, show_debug_imgs=show_debug_imgs) dict_segm[idx_name] = segm dict_segm_gc[idx_name] = segm_gc - # gc.collect(), time.sleep(1) + gc.collect() + time.sleep(1) return dict_segm, dict_segm_gc @@ -508,14 +474,14 @@ def get_summary(df, name, list_stat=('mean', 'std', 'median')): return dict_state -def perform_predictions(params, paths_img, classif, - show_debug_imgs=SHOW_DEBUG_IMAGES): +def perform_train_predictions(params, paths_img, classif, + show_debug_imgs=SHOW_DEBUG_IMAGES): logging.info('run prediction on training images...') imgs_idx_path = list(zip(range(1, len(paths_img) + 1), paths_img)) dict_segms, dict_segms_gc = dict(), dict() - path_out = os.path.join(params['path_exp'], FOLDER_SEGM) - path_visu = os.path.join(params['path_exp'], FOLDER_SEGM_VISU) + path_out = os.path.join(params['path_exp'], FOLDER_TRAIN) + path_visu = os.path.join(params['path_exp'], FOLDER_TRAIN_VISU) _wrapper_segment = partial(segment_image, params=params, classif=classif, path_out=path_out, path_visu=path_visu, show_debug_imgs=show_debug_imgs) @@ -528,53 +494,7 @@ def perform_predictions(params, paths_img, classif, return dict_segms, dict_segms_gc -def experiment_loo(params, df_stat, dict_annot, paths_img, path_classif, - path_dump, show_debug_imgs=SHOW_DEBUG_IMAGES): - """ experiment Leave-One-Out - - :param {str: ...} params: - :param DF df_stat: - :param {str: ndarray} dict_annot: - :param [str] paths_img: - :param str path_classif: - :param str path_dump: - :param bool show_debug_imgs: whether show debug images - :return {}: - """ - imgs_idx_path = list(zip(range(1, len(paths_img) + 1), paths_img)) - logging.info('run prediction on training images as Leave-One-Out...') - dict_segms, dict_segms_gc = dict(), dict() - path_out = os.path.join(params['path_exp'], FOLDER_LOO) - path_visu = os.path.join(params['path_exp'], FOLDER_LOO_VISU) - _wrapper_segment = partial(retrain_loo_segment_image, - path_classif=path_classif, path_dump=path_dump, - path_out=path_out, path_visu=path_visu, - show_debug_imgs=show_debug_imgs) - iterate = tl_expt.WrapExecuteSequence(_wrapper_segment, imgs_idx_path, - nb_jobs=params['nb_jobs'], - desc='experiment LOO') - for name, segm, segm_gc in iterate: - dict_segms[name] = segm - dict_segms_gc[name] = segm_gc - gc.collect() - time.sleep(1) - - df = eval_segment_with_annot(params, dict_annot, dict_segms, None, - NAME_CSV_SEGM_STAT_RESULT_LOO, - params['nb_jobs']) - df_stat = df_stat.append(get_summary(df, 'segm (LOO)'), - ignore_index=True) - df = eval_segment_with_annot(params, dict_annot, dict_segms_gc, None, - NAME_CSV_SEGM_STAT_RESULT_LOO_GC, - params['nb_jobs']) - df_stat = df_stat.append(get_summary(df, 'segm GC (LOO)'), - ignore_index=True) - path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) - df_stat.set_index(['name']).to_csv(path_csv_stat) - return df_stat - - -def experiment_lpo(params, df_stat, dict_annot, paths_img, path_classif, +def experiment_lpo(params, df_stat, dict_annot, idx_paths_img, path_classif, path_dump, nb_holdout, show_debug_imgs=SHOW_DEBUG_IMAGES): """ experiment Leave-P-samples-Out @@ -588,14 +508,14 @@ def experiment_lpo(params, df_stat, dict_annot, paths_img, path_classif, :param bool show_debug_imgs: whether show debug images :return {}: """ - imgs_idx_path = list(zip(range(1, len(paths_img) + 1), paths_img)) logging.info('run prediction on training images as Leave-%i-Out...', nb_holdout) dict_segms, dict_segms_gc = dict(), dict() - cv = seg_clf.CrossValidatePOut(len(paths_img), nb_hold_out=nb_holdout) - test_imgs_idx_path = [[imgs_idx_path[i] for i in ids] for _, ids in cv] + cv = seg_clf.CrossValidatePOut(len(idx_paths_img), nb_hold_out=nb_holdout) + test_imgs_idx_path = [[idx_paths_img[i] for i in ids] for _, ids in cv] path_out = os.path.join(params['path_exp'], FOLDER_LPO) - path_visu = os.path.join(params['path_exp'], FOLDER_LPO_VISU) + path_visu = os.path.join(params['path_exp'], FOLDER_LPO_VISU) \ + if params.get('visual', False) else None _wrapper_segment = partial(retrain_lpo_segment_image, path_classif=path_classif, path_dump=path_dump, path_out=path_out, path_visu=path_visu, @@ -611,11 +531,13 @@ def experiment_lpo(params, df_stat, dict_annot, paths_img, path_classif, df = eval_segment_with_annot(params, dict_annot, dict_segms, None, NAME_CSV_SEGM_STAT_RESULT_LPO % nb_holdout, + params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'segm (L-%i-O)' % nb_holdout), ignore_index=True) df = eval_segment_with_annot(params, dict_annot, dict_segms_gc, None, NAME_CSV_SEGM_STAT_RESULT_LPO_GC % nb_holdout, + params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'segm GC (L-%i-O)' % nb_holdout), ignore_index=True) @@ -646,8 +568,10 @@ def load_train_classifier(params, features, labels, feature_names, sizes, classif, path_classif = seg_clf.create_classif_train_export( params['classif'], features, labels, cross_val=cv, params=params, feature_names=feature_names, + pca_coef=params['pca_coef'], + eval_metric=params.get('classif_metric', 'f1'), nb_search_iter=params.get('nb_classif_search', 1), - nb_jobs=params['nb_jobs'], pca_coef=params['pca_coef'], + nb_jobs=params['nb_jobs'], path_out=params['path_exp']) params['path_classif'] = path_classif cv = seg_clf.CrossValidatePSetsOut(sizes, nb_hold_out=nb_holdout) @@ -658,6 +582,41 @@ def load_train_classifier(params, features, labels, feature_names, sizes, return params, classif, path_classif +def wrapper_filter_labels(name_img_labels_slic_label_hist, label_purity, + drop_labels=None, path_visu=None): + name, img, labels, slic, label_hist = name_img_labels_slic_label_hist + weights = np.max(label_hist, axis=1) + if path_visu is not None and os.path.isdir(path_visu): + used = np.zeros(len(weights)) + used[np.asarray(weights) >= label_purity] = 1 + if drop_labels is not None: + for lb in drop_labels: + used[labels == lb] = 0 + fig = tl_visu.figure_used_samples(img, labels, slic, used) + path_fig = os.path.join(path_visu, name + '___training.jpg') + fig.savefig(path_fig) + plt.close(fig) + + labels[weights < label_purity] = -1 + + return name, labels + + +def filter_train_with_purity(dict_imgs, dict_labels, dict_label_hist, + label_purity, dict_slics, drop_labels=None, + path_visu=None, nb_jobs=NB_THREADS): + _w_filter = partial(wrapper_filter_labels, label_purity=label_purity, + drop_labels=drop_labels, path_visu=path_visu) + iter_vals = ((n, dict_imgs[n], dict_labels[n], dict_slics[n], + dict_label_hist[n]) for n in dict_labels) + iterate = tl_expt.WrapExecuteSequence(_w_filter, iter_vals, nb_jobs=nb_jobs, + desc='filter labels (purity)') + for n, lbs in iterate: + dict_labels[n] = lbs + + return dict_labels + + def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now @@ -670,7 +629,7 @@ def main_train(params): """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') - show_debug_imgs = params.get('visual', False) + show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params.get('path_config', '')) or FORCE_RELOAD) @@ -680,7 +639,7 @@ def main_train(params): tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) - if show_debug_imgs: + if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() @@ -692,7 +651,7 @@ def main_train(params): dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params, - show_debug_imgs) + show_visual) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) @@ -702,6 +661,7 @@ def main_train(params): for n in dict_annot} df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, + params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) @@ -714,14 +674,19 @@ def main_train(params): logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) - for name in dict_labels: - weights = np.max(dict_label_hist[name], axis=1) - dict_labels[name][weights < params['label_purity']] = -1 + path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \ + if show_visual else None + dict_labels = filter_train_with_purity(dict_imgs, dict_labels, dict_label_hist, + params['label_purity'], dict_slics, + drop_labels=params.get('drop_labels', None), + path_visu=path_purity_visu, + nb_jobs=params['nb_jobs']) logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( - dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) + dict_features, dict_labels, balance_type=params['balance'], + drop_labels=[-1, np.nan] + params.get('drop_labels', [])) # drop "do not care" label which are -1 features = np.nan_to_num(features) @@ -732,39 +697,39 @@ def main_train(params): labels, feature_names, sizes, nb_holdout) - # test classif. on images - df_paths = pd.read_csv(params['path_train_list'], index_col=0) - paths_img = df_paths['path_image'].tolist() - perform_predictions(params, paths_img, classif, - show_debug_imgs=show_debug_imgs) - def _path_expt(n): return os.path.join(params['path_exp'], n) - # LEAVE ONE OUT - if params.get('run_LOO', RUN_CROSS_VAL_LOO): - df_stat = experiment_loo(params, df_stat, dict_annot, paths_img, - path_classif, path_dump, - show_debug_imgs=show_debug_imgs) + # test classif. on training images + df_paths = pd.read_csv(params['path_train_list'], index_col=0) + df_paths.reset_index(inplace=True) + paths_img = df_paths['path_image'].tolist() + if RUN_TRAIN_PREDICT: + perform_train_predictions(params, paths_img, classif, + show_debug_imgs=show_visual) else: - write_skip_file(_path_expt(FOLDER_LOO)) - write_skip_file(_path_expt(FOLDER_LOO_VISU)) + write_skip_file(_path_expt(FOLDER_TRAIN)) + + gc.collect() + time.sleep(1) # LEAVE P OUT - if params.get('run_LPO', RUN_CROSS_VAL_LPO): - df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img, + if params.get('run_LPO', True): + idx_paths_img = list(zip(df_paths.index.tolist(), + df_paths['path_image'].tolist())) + df_stat = experiment_lpo(params, df_stat, dict_annot, idx_paths_img, path_classif, path_dump, nb_holdout, - show_debug_imgs=show_debug_imgs) + show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_LPO)) - write_skip_file(_path_expt(FOLDER_LPO_VISU)) + # write_skip_file(_path_expt(FOLDER_LPO_VISU)) logging.info('Statistic: \n %s', repr(df_stat.describe())) logging.info('training DONE') return params -def prepare_output_dir(path_pattern_imgs, path_out, name): +def prepare_output_dir(path_pattern_imgs, path_out, name, visual=True): """ prepare output directory for segmenting new images :param str path_pattern_imgs: @@ -778,9 +743,12 @@ def prepare_output_dir(path_pattern_imgs, path_out, name): path_out = os.path.join(path_out, name) if not os.path.isdir(path_out): os.mkdir(path_out) - path_visu = path_out + SUFFIX_VISUAL - if not os.path.isdir(path_visu): - os.mkdir(path_visu) + if visual: + path_visu = path_out + SUFFIX_VISUAL + if not os.path.isdir(path_visu): + os.mkdir(path_visu) + else: + path_visu = None return path_out, path_visu @@ -815,7 +783,8 @@ def main_predict(path_classif, path_pattern_imgs, path_out, name='SEGMENT___', params.update({k: params_local[k] for k in params_local if k.startswith('path_') or k.startswith('gc_')}) - path_out, path_visu = prepare_output_dir(path_pattern_imgs, path_out, name) + path_out, path_visu = prepare_output_dir(path_pattern_imgs, path_out, name, + visual=params.get('visual', False)) tl_expt.set_experiment_logger(path_out) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) diff --git a/experiments_segmentation/run_segm_slic_model_graphcut.py b/experiments_segmentation/run_segm_slic_model_graphcut.py index a2d7def1..5b47383c 100644 --- a/experiments_segmentation/run_segm_slic_model_graphcut.py +++ b/experiments_segmentation/run_segm_slic_model_graphcut.py @@ -1,5 +1,5 @@ """ -Run supervised segmentation with superpixels and training examples +Run supervised segmentation experiment with superpixels and training examples Pipeline: 1. segment SLIC superpixels @@ -14,7 +14,7 @@ >> python run_segm_slic_model_graphcut.py \ -l data_images/langerhans_islets/list_lang-isl_imgs-annot.csv \ -i "data_images/langerhans_islets/image/*.jpg" \ - -o results -n LangIsl --nb_classes 3 --visual --nb_jobs 2 + -o results -n LangIsl --nb_classes 3 --nb_jobs 2 --visual Copyright (C) 2016-2018 Jiri Borovec """ @@ -142,8 +142,8 @@ def arg_parse_params(params): help='number of classes for segmentation', default=params.get('nb_classes', 2)) parser.add_argument('--nb_jobs', type=int, required=False, - default=NB_THREADS, - help='number of processes in parallel') + help='number of processes in parallel', + default=NB_THREADS) parser.add_argument('--visual', required=False, action='store_true', help='export debug visualisations', default=False) parser.add_argument('--unique', required=False, action='store_true', @@ -495,7 +495,7 @@ def main(params): """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running...') - show_debug_imgs = params.get('visual', False) + show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, @@ -504,7 +504,7 @@ def main(params): tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) - if show_debug_imgs: + if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) paths_img = load_path_images(params) @@ -514,22 +514,24 @@ def _path_expt(n): return os.path.join(params['path_exp'], n) # Segment as single model per image + path_visu = _path_expt(FOLDER_SEGM_GMM_VISU) if show_visual else None dict_segms_gmm = experiment_single_gmm(params, paths_img, _path_expt(FOLDER_SEGM_GMM), - _path_expt(FOLDER_SEGM_GMM_VISU), - show_debug_imgs=show_debug_imgs) + path_visu, + show_debug_imgs=show_visual) gc.collect() time.sleep(1) # Segment as model ober set of images if params.get('run_groupGMM', False): + path_visu = _path_expt(FOLDER_SEGM_GROUP_VISU) if show_visual else None dict_segms_group = experiment_group_gmm(params, paths_img, _path_expt(FOLDER_SEGM_GROUP), - _path_expt(FOLDER_SEGM_GROUP_VISU), - show_debug_imgs=show_debug_imgs) + path_visu, + show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_SEGM_GROUP)) - write_skip_file(_path_expt(FOLDER_SEGM_GROUP_VISU)) + # write_skip_file(_path_expt(FOLDER_SEGM_GROUP_VISU)) dict_segms_group = None if dict_segms_group is not None: diff --git a/experiments_segmentation/sample_config.json b/experiments_segmentation/sample_config.json index 4a9620ee..22d933b0 100755 --- a/experiments_segmentation/sample_config.json +++ b/experiments_segmentation/sample_config.json @@ -28,11 +28,14 @@ "label_purity---OPTIONS": "float between 0 ad 1", "balance": "unique", - "balance---OPTIONS": ["random", "kmeans", "unique"], + "balance---OPTIONS": ["None", "random", "kmeans", "unique"], "classif": "RandForest", "classif---OPTIONS": ["RandForest", "KNN", "SVM", "DecTree", "GradBoost", "LogistRegr", "AdaBoost"], + + "classif_metric": "f1", + "classif_metric---OPTIONS": ["f1", "accuracy", "precision", "recall"], "nb_classif_search": 50, "nb_classif_search---OPTIONS": "unsigned integer", @@ -40,6 +43,9 @@ "gc_edge_type": "model", "gc_edge_type---OPTIONS": ["spatial", "color", "features", "model", "model_l1", "model_l2", "model_lT"], + + "drop_labels": [], + "drop_labels---OPTIONS": "list of labels to be skipped", "gc_regul": 3.0, "gc_regul--OPTIONS": "positive float", @@ -47,7 +53,6 @@ "cross_val": 0.1, "cross_val---OPTIONS": "float between 0 and 1", - "run_LOO": true, "run_LPO": true, "run_groupGMM": true } \ No newline at end of file diff --git a/handling_annotations/run_image_color_quantization.py b/handling_annotations/run_image_color_quantization.py index bc8192aa..970ead91 100644 --- a/handling_annotations/run_image_color_quantization.py +++ b/handling_annotations/run_image_color_quantization.py @@ -43,8 +43,8 @@ def parse_arg_params(): parser.add_argument('-imgs', '--path_images', type=str, required=True, help='path to dir with images', default=PATH_IMAGES) parser.add_argument('-m', '--method', type=str, required=False, - help='method for quantisation color/position', default='color', - choices=['color', 'position']) + help='method for quantisation color/position', + default='color', choices=['color', 'position']) parser.add_argument('-thr', '--px_threshold', type=float, required=False, help='percentage of pixels of a color to be removed', default=THRESHOLD_INVALID_PIXELS) diff --git a/handling_annotations/run_overlap_images_segms.py b/handling_annotations/run_overlap_images_segms.py index d16416a9..73a21de0 100644 --- a/handling_annotations/run_overlap_images_segms.py +++ b/handling_annotations/run_overlap_images_segms.py @@ -43,6 +43,8 @@ BOOL_ANNOT_RELABEL = True SIZE_SUB_FIGURE = 9 COLOR_CONTOUR = (0., 0., 1.) +MIDDLE_ALPHA_OVERLAP = 0. +MIDDLE_IMAGE_GRAY = False def parse_arg_params(): @@ -52,7 +54,7 @@ def parse_arg_params(): """ parser = argparse.ArgumentParser() parser.add_argument('-imgs', '--path_images', type=str, required=True, - help='path to the input images') + help='path to the input images + name pattern') parser.add_argument('-segs', '--path_segms', type=str, required=True, help='path to the input segms') parser.add_argument('-out', '--path_output', type=str, required=True, @@ -83,7 +85,7 @@ def visualise_overlap(path_img, path_seg, path_out, img = np.rollaxis(np.tile(img, (3, 1, 1)), 0, 3) if b_img_scale: - p_low, p_high = np.percentile(img, (3, 98)) + p_low, p_high = np.percentile(img, q=(3, 98)) # plt.imshow(255 - img, cmap='Greys') img = exposure.rescale_intensity(img, in_range=(p_low, p_high), out_range='uint8') @@ -97,7 +99,9 @@ def visualise_overlap(path_img, path_seg, path_out, # mask = (np.sum(img, axis=2) == 0) # img[mask] = [255, 255, 255] - fig = tl_visu.figure_image_segm_results(img, seg, SIZE_SUB_FIGURE) + fig = tl_visu.figure_image_segm_results(img, seg, SIZE_SUB_FIGURE, + MIDDLE_ALPHA_OVERLAP, + MIDDLE_IMAGE_GRAY) fig.savefig(path_out) plt.close(fig) @@ -125,6 +129,9 @@ def perform_visu_overlap(path_img, paths): def main(paths, nb_jobs=NB_THREADS): logging.info('running...') + assert paths['segms'] != paths['output'], 'overwriting segmentation dir' + assert os.path.basename(paths['images']) != paths['output'], \ + 'overwriting image dir' logging.info(tl_expt.string_dict(paths, desc='PATHS')) if not os.path.exists(paths['output']): diff --git a/imsegm/classification.py b/imsegm/classification.py index 1be00330..964c2cfd 100755 --- a/imsegm/classification.py +++ b/imsegm/classification.py @@ -12,7 +12,7 @@ import collections import traceback import itertools -# import multiprocessing as mproc +from functools import partial import numpy as np import pandas as pd @@ -48,6 +48,14 @@ ROUND_UNIQUE_FTS_DIGITS = 3 +DICT_SCORING = { + 'f1': metrics.f1_score, + 'accuracy': metrics.accuracy_score, + 'precision': metrics.precision_score, + 'recall': metrics.recall_score, +} + + def create_classifiers(nb_jobs=-1): """ create all classifiers with default parameters @@ -115,6 +123,9 @@ def create_clf_param_search_grid(name_classif=DEFAULT_CLASSIF_NAME): >>> create_clf_param_search_grid('RandForest') # doctest: +ELLIPSIS {'classif__...': ...} + >>> dict_classif = create_classifiers() + >>> all(len(create_clf_param_search_grid(k)) > 0 for k in dict_classif) + True """ def log_space(b, e, n): return np.unique(np.logspace(b, e, n).astype(int)).tolist() @@ -174,6 +185,9 @@ def create_clf_param_search_distrib(name_classif=DEFAULT_CLASSIF_NAME): >>> create_clf_param_search_distrib() # doctest: +ELLIPSIS {...} + >>> dict_classif = create_classifiers() + >>> all(len(create_clf_param_search_distrib(k)) > 0 for k in dict_classif) + True """ clf_params = { 'RandForest': { @@ -327,7 +341,8 @@ def compute_classif_metrics(y_true, y_pred, metric_averages=METRIC_AVERAGES): return dict_metrics -def compute_classif_stat_segm_annot(annot_segm_name, relabel=False): +def compute_classif_stat_segm_annot(annot_segm_name, drop_labels=None, + relabel=False): """ compute classification statistic between annotation and segmentation :param (ndarray, ndarray, str) annot_segm_name: @@ -337,19 +352,40 @@ def compute_classif_stat_segm_annot(annot_segm_name, relabel=False): >>> np.random.seed(0) >>> annot = np.random.randint(0, 2, (5, 10)) >>> segm = np.random.randint(0, 2, (5, 10)) - >>> d = compute_classif_stat_segm_annot((annot, segm, 'ttt'), relabel=True) + >>> d = compute_classif_stat_segm_annot((annot, annot, 'ttt'), relabel=True, + ... drop_labels=[5]) + >>> d['(FP+FN)/(TP+FN)'] # doctest: +ELLIPSIS + 0.0 + >>> d['(TP+FP)/(TP+FN)'] # doctest: +ELLIPSIS + 1.0 + >>> d = compute_classif_stat_segm_annot((annot, segm, 'ttt'), relabel=True, + ... drop_labels=[5]) >>> d['(FP+FN)/(TP+FN)'] # doctest: +ELLIPSIS 0.846... >>> d['(TP+FP)/(TP+FN)'] # doctest: +ELLIPSIS 1.153... + >>> d = compute_classif_stat_segm_annot((annot, segm + 1, 'ttt'), + ... relabel=False, drop_labels=[0]) + >>> d['confusion'] + [[13, 17], [0, 0]] """ annot, segm, name = annot_segm_name assert segm.shape == annot.shape, 'dimension do not match for ' \ 'segm: %s - annot: %s' \ % (repr(segm.shape), repr(annot.shape)) - if relabel: - segm = seg_lbs.relabel_max_overlap_unique(annot, segm, keep_bg=False) y_true, y_pred = annot.ravel(), segm.ravel() + # filter particular labels + if drop_labels is not None: + mask = np.ones(y_true.shape, dtype=bool) + for lb in drop_labels: + mask[y_true == lb] = 0 + mask[y_pred == lb] = 0 + y_true = y_true[mask] + y_pred = y_pred[mask] + # relabel such that the classes maximaly match + if relabel: + y_pred = seg_lbs.relabel_max_overlap_unique(y_true, y_pred, + keep_bg=False) dict_stat = compute_classif_metrics(y_true, y_pred, metric_averages=['macro']) # add binary metric @@ -361,7 +397,8 @@ def compute_classif_stat_segm_annot(annot_segm_name, relabel=False): return dict_stat -def compute_stat_per_image(segms, annots, names=None, nb_jobs=1): +def compute_stat_per_image(segms, annots, names=None, nb_jobs=1, + drop_labels=None, relabel=False): """ compute statistic over multiple segmentations with annotation :param [ndarray] segms: @@ -374,7 +411,8 @@ def compute_stat_per_image(segms, annots, names=None, nb_jobs=1): >>> np.random.seed(0) >>> img_true = np.random.randint(0, 3, (50, 100)) >>> img_pred = np.random.randint(0, 2, (50, 100)) - >>> df = compute_stat_per_image([img_true], [img_true], nb_jobs=2) + >>> df = compute_stat_per_image([img_true], [img_true], nb_jobs=2, + ... relabel=True) >>> df.iloc[0] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE ARS 1 accuracy 1 @@ -384,7 +422,7 @@ def compute_stat_per_image(segms, annots, names=None, nb_jobs=1): recall_macro 1 support_macro None Name: 0, dtype: object - >>> df = compute_stat_per_image([img_true], [img_pred]) + >>> df = compute_stat_per_image([img_true], [img_pred], drop_labels=[-1]) >>> df.iloc[0] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE ARS 0.0... accuracy 0.3384 @@ -400,13 +438,14 @@ def compute_stat_per_image(segms, annots, names=None, nb_jobs=1): % (len(segms), len(annots)) if names is None: names = map(str, range(len(segms))) - df_stat = pd.DataFrame() - iterate = tl_expt.WrapExecuteSequence(compute_classif_stat_segm_annot, + _compute_stat = partial(compute_classif_stat_segm_annot, + drop_labels=drop_labels, relabel=relabel) + iterate = tl_expt.WrapExecuteSequence(_compute_stat, zip(annots, segms, names), nb_jobs=nb_jobs, desc='statistic per image') - for dict_stat in iterate: - df_stat = df_stat.append(dict_stat, ignore_index=True) + list_stat = list(iterate) + df_stat = pd.DataFrame(list_stat) df_stat.set_index('name', inplace=True) return df_stat @@ -584,7 +623,7 @@ def relabel_sequential(labels, uq_lbs=None): def create_classif_train_export(clf_name, features, labels, cross_val=10, nb_search_iter=1, search_type='random', - nb_jobs=NB_JOBS_CLASSIF_SEARCH, + eval_metric='f1', nb_jobs=NB_JOBS_CLASSIF_SEARCH, path_out=None, params=None, pca_coef=0.98, feature_names=None, label_names=None): """ create classifier and train it once or find best parameters. @@ -641,9 +680,13 @@ def create_classif_train_export(clf_name, features, labels, cross_val=10, # find the best params for the classif. logging.debug('Performing param search...') nb_labels = len(np.unique(labels)) - clf_search = create_classif_search(clf_name, clf_pipeline, nb_labels, - search_type, cross_val, - nb_search_iter, nb_jobs) + clf_search = create_classif_search(clf_name, clf_pipeline, + nb_labels=nb_labels, + search_type=search_type, + cross_val=cross_val, + eval_scoring=eval_metric, + nb_iter=nb_search_iter, + nb_jobs=nb_jobs) # NOTE, this is temporal just for purposes of computing statistic clf_search.fit(features, relabel_sequential(labels)) @@ -823,8 +866,8 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels, classif.fit(features_train, labels_train) proba = classif.predict_proba(features_test) # Compute ROC curve and area the curve - for i, j in enumerate(unique_labels): - fpr, tpr, _ = metrics.roc_curve(labels_bin[test, j], proba[:, i]) + for i, lb in enumerate(unique_labels): + fpr, tpr, _ = metrics.roc_curve(labels_bin[test, lb], proba[:, i]) fpr = [0.] + fpr.tolist() + [1.] tpr = [0.] + tpr.tolist() + [1.] mean_tpr += interp(mean_fpr, fpr, tpr) @@ -874,6 +917,7 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter): def create_classif_search(name_clf, clf_pipeline, nb_labels, search_type='random', cross_val=10, + eval_scoring='f1', nb_iter=NB_CLASSIF_SEARCH_ITER, nb_jobs=NB_JOBS_CLASSIF_SEARCH): """ create sklearn search depending on spec. random or grid @@ -885,25 +929,22 @@ def create_classif_search(name_clf, clf_pipeline, nb_labels, :param nb_jobs: int, nb jobs running in parallel :return: """ - scoring = 'weighted' if nb_labels > 2 else 'binary' - f1_scoring = metrics.make_scorer(metrics.f1_score, average=scoring) - + score_weight = 'weighted' if nb_labels > 2 else 'binary' + scoring = metrics.make_scorer(DICT_SCORING[eval_scoring.lower()], + average=score_weight) if search_type == 'grid': clf_parameters = create_clf_param_search_grid(name_clf) logging.info('init Grid search...') - clf_search = grid_search.GridSearchCV(clf_pipeline, clf_parameters, - scoring=f1_scoring, cv=cross_val, - n_jobs=nb_jobs, verbose=1, - refit=True) + clf_search = grid_search.GridSearchCV( + clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val, + n_jobs=nb_jobs, verbose=1, refit=True) else: clf_parameters = create_clf_param_search_distrib(name_clf) nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter) logging.info('init Randomized search...') - clf_search = grid_search.RandomizedSearchCV(clf_pipeline, clf_parameters, - scoring=f1_scoring, - cv=cross_val, n_jobs=nb_jobs, - n_iter=nb_iter, verbose=1, - refit=True) + clf_search = grid_search.RandomizedSearchCV( + clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val, + n_jobs=nb_jobs, n_iter=nb_iter, verbose=1, refit=True) return clf_search @@ -1088,16 +1129,16 @@ def balance_dataset_by_(features, labels, balance_type='random', min_samples = min(hist_labels.values()) dict_features = compose_dict_label_features(features, labels) - if balance_type == 'random': + if balance_type.lower() == 'random': dict_features = down_sample_dict_features_random(dict_features, min_samples) - elif balance_type == 'kmeans': + elif balance_type.lower() == 'kmeans': dict_features = down_sample_dict_features_kmean(dict_features, min_samples) - elif balance_type == 'unique': + elif balance_type.lower() == 'unique': dict_features = down_sample_dict_features_unique(dict_features) else: - logging.warning('not defined balacing method "%s"', balance_type) + logging.warning('not defined balancing method "%s"', balance_type) features, labels = convert_dict_label_features_2_vectors(dict_features) # features, labels = shuffle_features_labels(features, labels) @@ -1135,10 +1176,10 @@ def convert_set_features_labels_2_dataset(imgs_features, imgs_labels, features = np.array(imgs_features[name]) labels = np.array(imgs_labels[name].astype(int)) - if drop_labels is not None: - for lb in drop_labels: - features = features[labels != lb] - labels = labels[labels != lb] + drop_labels = [] if drop_labels is None else drop_labels + for lb in drop_labels: + features = features[labels != lb] + labels = labels[labels != lb] if balance_type is not None: # balance_type dataset to have comparable nb of samples @@ -1168,6 +1209,8 @@ def compute_tp_tn_fp_fn(annot, segm, label_positive=None): [-9, 0, -9, -9, -9, 0, 0], [ 0, 9, 0, -9, 0, 9, 0], [ 9, -9, 9, 0, 9, 0, 9]]) + >>> compute_tp_tn_fp_fn(annot, annot) + (20, 15, 0, 0) >>> compute_tp_tn_fp_fn(annot, segm) (9, 5, 11, 10) >>> compute_tp_tn_fp_fn(annot, np.ones((5, 7))) @@ -1180,7 +1223,7 @@ def compute_tp_tn_fp_fn(annot, segm, label_positive=None): uq_labels = np.unique([y_true, y_pred]).tolist() if len(uq_labels) > 2: logging.debug('too many labels: %s', repr(uq_labels)) - return np.nan,np.nan, np.nan, np.nan + return np.nan, np.nan, np.nan, np.nan elif len(uq_labels) < 2: logging.debug('only one label: %s', repr(uq_labels)) return len(y_true), 0, 0, 0 @@ -1242,14 +1285,14 @@ def compute_metric_tpfp_tpfn(annot, segm, label_positive=None): >>> compute_metric_tpfp_tpfn(annot, segm) # doctest: +ELLIPSIS 1.03... >>> compute_metric_tpfp_tpfn(annot, annot) - 0.0 + 1.0 >>> compute_metric_tpfp_tpfn(annot, np.ones((50, 75))) nan """ tp, _, fp, fn = compute_tp_tn_fp_fn(annot, segm, label_positive) if tp == np.nan: return np.nan - elif (fp + fn) == 0: + elif (tp + fn) == 0: return 0. measure = float(tp + fp) / float(tp + fn) return measure diff --git a/imsegm/graph_cuts.py b/imsegm/graph_cuts.py index c872139b..86674132 100755 --- a/imsegm/graph_cuts.py +++ b/imsegm/graph_cuts.py @@ -19,6 +19,8 @@ DEFAULT_GC_ITERATIONS = 25 COEF_INT_CONVERSION = 1e6 DEBUG_NB_SHOW_SAMPLES = 15 +MIN_UNARY_PROB = 1e-2 +MAX_PAIRWISE_COST = 1e3 def estim_gmm_params(features, prob): @@ -507,10 +509,11 @@ def create_pairwise_matrix(gc_regul, nb_classes): return pairwise -def compute_unary_cost(proba): +def compute_unary_cost(proba, min_proba=MIN_UNARY_PROB): """ compute the GC unary cost with some threshold on minimal values :param ndarray proba: + :param float min_proba: :return ndarray: >>> compute_unary_cost(np.random.random((50, 2))).shape @@ -518,23 +521,25 @@ def compute_unary_cost(proba): """ proba = proba.copy() # constrain that each class should have at least 1.% - proba[proba < 1e-2] = 1e-2 + proba[proba < min_proba] = min_proba # unary_cost = np.array(1. / proba , dtype=np.float64) unary_cost = np.abs(np.array(-np.log(proba), dtype=np.float64)) return unary_cost -def compute_pairwise_cost(gc_regul, proba_shape): +def compute_pairwise_cost(gc_regul, proba_shape, + max_pairwise_cost=MAX_PAIRWISE_COST): """ wrapper for creating GC pairwise cost :param gc_regul: :param (int, int) proba_shape: + :param float max_pairwise_cost: :return ndarray: """ # original and the right way... pairwise = create_pairwise_matrix(gc_regul, proba_shape[1]) pairwise_cost = np.array(pairwise, dtype=np.float64) - pairwise_cost[pairwise_cost > 1e2] = 1e2 + pairwise_cost[pairwise_cost > max_pairwise_cost] = max_pairwise_cost return pairwise_cost diff --git a/imsegm/labeling.py b/imsegm/labeling.py index 062d60ee..4f31745f 100755 --- a/imsegm/labeling.py +++ b/imsegm/labeling.py @@ -496,9 +496,8 @@ def compute_labels_overlap_matrix(seg1, seg2): % (repr(seg1.shape), repr(seg2.shape)) maxims = [np.max(seg1) + 1, np.max(seg2) + 1] overlap = np.zeros(maxims, dtype=int) - for i in range(seg1.shape[0]): - for j in range(seg1.shape[1]): - lb1, lb2 = seg1[i, j], seg2[i, j] + for lb1, lb2 in zip(seg1.ravel(), seg2.ravel()): + if lb1 >= 0 and lb2 >= 0: overlap[lb1, lb2] += 1 # logging.debug(res) return overlap @@ -545,9 +544,18 @@ def relabel_max_overlap_unique(seg_ref, seg_relabel, keep_bg=False): [0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 0], [0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 0], [0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 0]]) + >>> atlas2[0, 0] = -1 + >>> relabel_max_overlap_unique(atlas1, atlas2, keep_bg=True) + array([[-1, 5, 5, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], + [ 5, 5, 5, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], + [ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], + [ 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0], + [ 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 0], + [ 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 0], + [ 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 0]]) """ assert seg_ref.shape == seg_relabel.shape, \ - 'Ref segm (%s) and segm (%s) should match' \ + 'Reference segm (%s) and input segm (%s) should match' \ % (repr(seg_ref.shape), repr(seg_relabel.shape)) overlap = compute_labels_overlap_matrix(seg_ref, seg_relabel) @@ -556,23 +564,30 @@ def relabel_max_overlap_unique(seg_ref, seg_relabel, keep_bg=False): lut[0] = 0 overlap[0, :] = 0 overlap[:, 0] = 0 + # select always the maximal value and reset it for i in range(max(overlap.shape) + 1): - if np.sum(overlap) == 0: break + if np.sum(overlap) == 0: + break lb_ref, lb_est = np.argwhere(overlap.max() == overlap)[0] lut[lb_est] = lb_ref overlap[lb_ref, :] = 0 overlap[:, lb_est] = 0 - + # fill all not used by its equal id it is not used yet for i, lb in enumerate(lut): if lb == -1 and i not in lut: lut[i] = i + # fill by any unused yet for i, lb in enumerate(lut): - if lb > -1: continue + if lb > -1: + continue for j in range(len(lut)): if j not in lut: lut[i] = j + # lut[lut == -1] = 0 - seg_new = np.array(lut)[seg_relabel] + seg_new = np.array(lut)[seg_relabel].astype(int) + # hold all negative labels + seg_new[seg_relabel < 0] = seg_relabel[seg_relabel < 0] return seg_new @@ -636,7 +651,9 @@ def relabel_max_overlap_merge(seg_ref, seg_relabel, keep_bg=False): ptn_sum = np.sum(overlap, axis=0) if 0 in ptn_sum: lut[ptn_sum == 0] = np.arange(len(lut))[ptn_sum == 0] - seg_new = lut[seg_relabel] + seg_new = lut[seg_relabel].astype(int) + # hold all negative labels + seg_new[seg_relabel < 0] = seg_relabel[seg_relabel < 0] return seg_new diff --git a/imsegm/pipelines.py b/imsegm/pipelines.py index cd1cfead..2f16654c 100755 --- a/imsegm/pipelines.py +++ b/imsegm/pipelines.py @@ -339,10 +339,9 @@ def train_classif_color2d_slic_features(list_images, list_annots, dict_features, cv = 10 classif, _ = seg_clf.create_classif_train_export(clf_name, features, labels, + pca_coef=pca_coef, cross_val=cv, nb_search_iter=nb_classif_search, - cross_val=cv, - nb_jobs=nb_jobs, - pca_coef=pca_coef) + nb_jobs=nb_jobs) return classif, list_slic, list_features, list_labels diff --git a/imsegm/tests/test-classification.py b/imsegm/tests/test-classification.py index 278d44c3..30cc8033 100644 --- a/imsegm/tests/test-classification.py +++ b/imsegm/tests/test-classification.py @@ -78,7 +78,8 @@ def test_classif_pipeline(self): data_test, labels_test = generate_data() for n in CLASSIFIER_NAMES: logging.info('created classif.: %s', n) - clf, _ = seg_clf.create_classif_train_export(n, data_train, labels_train) + clf, _ = seg_clf.create_classif_train_export(n, data_train, + labels_train) self.classif_eval(clf, data_train, labels_train, data_test, labels_test) diff --git a/imsegm/tests/test-ellipse_fitting.py b/imsegm/tests/test-ellipse_fitting.py index 6fa4438b..1a80d108 100644 --- a/imsegm/tests/test-ellipse_fitting.py +++ b/imsegm/tests/test-ellipse_fitting.py @@ -11,6 +11,7 @@ import numpy as np import pandas as pd +import matplotlib.pyplot as plt from sklearn.metrics import adjusted_rand_score sys.path.append(os.path.abspath(os.path.join('..', '..'))) # Add path to root @@ -75,6 +76,7 @@ def test_ellipse_fitting(self, name='insitu7545', fig_name = 'ellipse-fitting_%s.pdf' % name fig.savefig(os.path.join(PATH_OUTPUT, fig_name), bbox_inches='tight', pad_inches=0) + plt.close(fig) score = adjusted_rand_score(annot.ravel(), segm.ravel()) self.assertGreaterEqual(score, 0.5) diff --git a/imsegm/tests/test-region_growing.py b/imsegm/tests/test-region_growing.py index 0da458cf..e88b8988 100644 --- a/imsegm/tests/test-region_growing.py +++ b/imsegm/tests/test-region_growing.py @@ -23,7 +23,7 @@ import imsegm.region_growing as seg_rg PATH_OVARY = os.path.join(tl_data.update_path('data_images', absolute=True), - 'drosophila_ovary_slice') + 'drosophila_ovary_slice') PATH_IMAGE = os.path.join(PATH_OVARY, 'image') PATH_SEGM = os.path.join(PATH_OVARY, 'segm') PATH_ANNOT = os.path.join(PATH_OVARY, 'annot_eggs') @@ -70,6 +70,7 @@ def expert_segm(name, img, seg, segm_obj, annot, str_type='xxx'): ax[1].contour(annot, cmap=plt.cm.jet) fig.savefig(os.path.join(PATH_OUTPUT, '%s_%s.pdf' % (str_type, name)), bbox_inches='tight', pad_inches=0) + plt.close(fig) class TestRegionGrowing(unittest.TestCase): @@ -120,6 +121,7 @@ def test_shape_modeling(self, dir_annot=PATH_ANNOT): fig.savefig(os.path.join(PATH_OUTPUT, 'RG2Sp_shape-modeling.pdf'), bbox_inches='tight', pad_inches=0) + plt.close(fig) def test_region_growing_greedy(self, name='insitu7545'): """ """ diff --git a/imsegm/utils/drawing.py b/imsegm/utils/drawing.py index 0289aba9..308adfc4 100755 --- a/imsegm/utils/drawing.py +++ b/imsegm/utils/drawing.py @@ -197,8 +197,9 @@ def figure_image_adjustment(fig, img_size): :param (int, int) img_size: :return: - >>> figure_image_adjustment(plt.figure(), (150, 200)) # doctest: +ELLIPSIS - + >>> fig = figure_image_adjustment(plt.figure(), (150, 200)) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ ax = fig.gca() ax.set_xlim([0, img_size[1]]) @@ -211,20 +212,24 @@ def figure_image_adjustment(fig, img_size): return fig -def figure_image_segm_results(img, seg, subfig_size=9): +def figure_image_segm_results(img, seg, subfig_size=9, mid_labels_alpha=0.2, + mid_image_gray=True): """ creating subfigure with original image, overlapped segmentation contours and clean result segmentation... it turns the sequence in vertical / horizontal according major image dim - :param ndarray img: - :param ndarray seg: - :param int subfig_size: + :param ndarray img: image + :param ndarray seg: segmentation + :param int subfig_size: max image size + :param fool mid_image_color: used color image as bacround in middele + :param float mid_labels_alpha: alpha for middle segmentation overlap :return Figure: >>> img = np.random.random((100, 150, 3)) >>> seg = np.random.randint(0, 2, (100, 150)) - >>> figure_image_segm_results(img, seg) # doctest: +ELLIPSIS - + >>> fig = figure_image_segm_results(img, seg) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ assert img.shape[:2] == seg.shape[:2], \ 'different image %s & seg_pipe %s sizes' % (repr(img.shape), repr(seg.shape)) @@ -232,22 +237,17 @@ def figure_image_segm_results(img, seg, subfig_size=9): # img = np.rollaxis(np.tile(img, (3, 1, 1)), 0, 3) img = color.gray2rgb(img) - norm_size = np.array(img.shape[:2]) / float(np.max(img.shape)) - # reverse dimensions and scale by fig size - if norm_size[0] >= norm_size[1]: # horizontal - fig_size = norm_size[::-1] * subfig_size * np.array([3, 1]) - fig, axarr = plt.subplots(ncols=3, figsize=fig_size) - else: # vertical - fig_size = norm_size[::-1] * subfig_size * np.array([1, 3]) - fig, axarr = plt.subplots(nrows=3, figsize=fig_size) + fig, axarr = create_figure_by_image(img.shape[:2], subfig_size, + nb_subfigs=3) axarr[0].set_title('original image') axarr[0].imshow(img) # visualise the 3th label axarr[1].set_title('original image w. segment overlap') - axarr[1].imshow(color.rgb2gray(img), cmap=plt.cm.Greys_r) - axarr[1].imshow(seg, alpha=0.2, cmap=plt.cm.jet) + img_bg = color.rgb2gray(img) if mid_image_gray else img + axarr[1].imshow(img_bg, cmap=plt.cm.Greys_r) + axarr[1].imshow(seg, alpha=mid_labels_alpha, cmap=plt.cm.jet) axarr[1].contour(seg, levels=np.unique(seg), linewidths=2, cmap=plt.cm.jet) axarr[2].set_title('segmentation of all labels') @@ -263,7 +263,8 @@ def figure_image_segm_results(img, seg, subfig_size=9): return fig -def figure_overlap_annot_segm_image(annot, segm, img=None, subfig_size=9): +def figure_overlap_annot_segm_image(annot, segm, img=None, subfig_size=9, + drop_labels=None): """ figure showing overlap annotation - segmentation - image :param ndarray annot: @@ -274,8 +275,9 @@ def figure_overlap_annot_segm_image(annot, segm, img=None, subfig_size=9): >>> img = np.random.random((100, 150, 3)) >>> seg = np.random.randint(0, 2, (100, 150)) - >>> figure_overlap_annot_segm_image(seg, seg, img) # doctest: +ELLIPSIS - + >>> fig = figure_overlap_annot_segm_image(seg, seg, img, drop_labels=[5]) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ norm_size = np.array(annot.shape) / float(np.max(annot.shape)) fig_size = norm_size[::-1] * subfig_size * np.array([3, 1]) @@ -300,8 +302,12 @@ def figure_overlap_annot_segm_image(annot, segm, img=None, subfig_size=9): axarr[2].set_title('difference annot & segment') # axarr[2].imshow(~(annot == segm), cmap=plt.cm.Reds) max_val = np.max(annot.astype(int)) - cax = axarr[2].imshow(annot - segm, alpha=0.5, - vmin=-max_val, vmax=max_val, cmap=plt.cm.bwr) + diff = annot - segm + if drop_labels is not None: + for lb in drop_labels: + diff[annot == lb] = 0 + cax = axarr[2].imshow(diff, vmin=-max_val, vmax=max_val, alpha=0.5, + cmap=plt.cm.bwr) # vals = np.linspace(-max_val, max_val, max_val * 2 + 1) plt.colorbar(cax, ticks=np.linspace(-max_val, max_val, max_val * 2 + 1), boundaries=np.linspace(-max_val - 0.5, max_val + 0.5, @@ -336,8 +342,9 @@ def figure_segm_graphcut_debug(dict_imgs, subfig_size=9): ... 'img_graph_segm': np.random.random((100, 150, 3)), ... 'imgs_unary_cost': [np.random.random((100, 150, 3))], ... } - >>> figure_segm_graphcut_debug(dict_imgs) # doctest: +ELLIPSIS - + >>> fig = figure_segm_graphcut_debug(dict_imgs) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ assert all(n in dict_imgs for n in ['image', 'slic', 'slic_mean', 'img_graph_edges', 'img_graph_segm', @@ -372,6 +379,20 @@ def figure_segm_graphcut_debug(dict_imgs, subfig_size=9): return fig +def create_figure_by_image(img_size, subfig_size, nb_subfigs=1, extend=0.): + norm_size = np.array(img_size) / float(np.max(img_size)) + # reverse dimensions and scale by fig size + if norm_size[0] >= norm_size[1]: # horizontal + fig_size = norm_size[::-1] * subfig_size * np.array([nb_subfigs, 1]) + fig_size[0] += extend * fig_size[0] + fig, axarr = plt.subplots(ncols=nb_subfigs, figsize=fig_size) + else: # vertical + fig_size = norm_size[::-1] * subfig_size * np.array([1, nb_subfigs]) + fig_size[0] += extend * fig_size[0] + fig, axarr = plt.subplots(nrows=nb_subfigs, figsize=fig_size) + return fig, axarr + + def figure_ellipse_fitting(img, seg, ellipses, centers, crits, fig_size=9): """ show figure with result of the ellipse fitting @@ -388,15 +409,15 @@ def figure_ellipse_fitting(img, seg, ellipses, centers, crits, fig_size=9): >>> ells = np.random.random((3, 5)) * 25 >>> centers = np.random.random((3, 2)) * 25 >>> crits = np.random.random(3) - >>> figure_ellipse_fitting(img[:, :, 0], seg, ells, centers, crits) # doctest: +ELLIPSIS - + >>> fig = figure_ellipse_fitting(img[:, :, 0], seg, ells, centers, crits) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ assert len(ellipses) == len(centers) == len(crits), \ - 'number of ellipses (%i) and centers (%i) and criteria (%i) should match' \ - % (len(ellipses), len(centers), len(crits)) + 'number of ellipses (%i) and centers (%i) and criteria (%i) ' \ + 'should match' % (len(ellipses), len(centers), len(crits)) - fig_size = (fig_size * np.array(img.shape[:2]) / np.max(img.shape))[::-1] - fig, ax = plt.subplots(figsize=fig_size) + fig, ax = create_figure_by_image(img.shape[:2], fig_size) assert img.ndim == 2, \ 'required image dimension is 2 to instead %s' % repr(img.shape) ax.imshow(img, cmap=plt.cm.Greys_r) @@ -431,8 +452,9 @@ def figure_annot_slic_histogram_labels(dict_label_hist, slic_size=-1, >>> np.random.seed(0) >>> dict_label_hist = {'a': np.tile([1, 0, 0, 0, 1], (25, 1)), ... 'b': np.tile([0, 1, 0, 0, 1], (30, 1))} - >>> figure_annot_slic_histogram_labels(dict_label_hist) # doctest: +ELLIPSIS - + >>> fig = figure_annot_slic_histogram_labels(dict_label_hist) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ matrix_hist_all = np.concatenate(tuple(dict_label_hist.values()), axis=0) lb_sums = np.sum(matrix_hist_all, axis=0) @@ -490,6 +512,45 @@ def figure_ray_feature(segm, points, ray_dist_raw=None, ray_dist=None, return fig +def figure_used_samples(img, labels, slic, used_samples, fig_size=12): + """ draw used examples (superpixels) + + :param ndarray img: + :param [int] labels: + :param ndarray slic: + :param [bool] used_samples: + :param int fig_size: + :return Figure: + + >>> img = np.random.random((50, 75, 3)) + >>> labels = [-1, 0, 2] + >>> used = [1, 0, 0] + >>> seg = np.random.randint(0, 3, img.shape[:2]) + >>> fig = figure_used_samples(img, labels, seg, used) + >>> isinstance(fig, matplotlib.figure.Figure) + True + """ + w_samples = np.asarray(used_samples)[slic] + img = color.gray2rgb(img) if img.ndim == 2 else img + + fig, axarr = create_figure_by_image(img.shape[:2], fig_size, nb_subfigs=2, + extend=0.15) + axarr[0].imshow(np.asarray(labels)[slic], cmap=plt.cm.jet) + axarr[0].contour(slic, levels=np.unique(slic), colors='w', linewidths=0.5) + axarr[0].axis('off') + + axarr[1].imshow(img) + axarr[1].contour(slic, levels=np.unique(slic), colors='w', linewidths=0.5) + cax = axarr[1].imshow(w_samples, cmap=plt.cm.RdYlGn, + vmin=0, vmax=1, alpha=0.5) + cbar = plt.colorbar(cax, ticks=[0, 1], boundaries=[-0.5, 0.5, 1.5]) + cbar.ax.set_yticklabels(['drop', 'used']) + axarr[1].axis('off') + + fig.tight_layout() + return fig + + def draw_color_labeling(segments, lut_labels): """ visualise the graph cut results @@ -774,8 +835,9 @@ def figure_image_segm_centres(img, segm, centers=None, >>> img = np.random.random((100, 150, 3)) >>> seg = np.random.randint(0, 2, (100, 150)) >>> centre = [[55, 60]] - >>> figure_image_segm_centres(img, seg, centre) # doctest: +ELLIPSIS - + >>> fig = figure_image_segm_centres(img, seg, centre) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ fig, ax = plt.subplots() @@ -901,8 +963,9 @@ def figure_rg2sp_debug_complete(seg, slic, dict_rg2sp_debug, iter_index=-1, ... 'shifts': np.random.random((15, 3)), ... 'energy': np.random.random(15), ... } - >>> figure_rg2sp_debug_complete(seg, slic, dict_debug) # doctest: +ELLIPSIS - + >>> fig = figure_rg2sp_debug_complete(seg, slic, dict_debug) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ nb_objects = dict_rg2sp_debug['lut_data_cost'].shape[1] - 1 nb_subfigs = max(3, nb_objects) @@ -1076,8 +1139,9 @@ def figure_segm_boundary_dist(segm_ref, segm, subfig_size=9): >>> seg = np.zeros((100, 100)) >>> seg[35:80, 10:65] = 1 - >>> figure_segm_boundary_dist(seg, seg.T) # doctest: +ELLIPSIS - + >>> fig = figure_segm_boundary_dist(seg, seg.T) + >>> isinstance(fig, matplotlib.figure.Figure) + True """ assert segm_ref.shape == segm.shape, \ 'ref segm (%s) and segm (%s) should match' \ diff --git a/imsegm/utils/experiments.py b/imsegm/utils/experiments.py index d96e20f1..796ee107 100755 --- a/imsegm/utils/experiments.py +++ b/imsegm/utils/experiments.py @@ -334,7 +334,7 @@ def __iter__(self): tqdm_bar = None if self.nb_jobs > 1: - logging.debug('perform sequential in %i threads', self.nb_jobs) + logging.debug('perform parallel in %i threads', self.nb_jobs) pool = mproc.Pool(self.nb_jobs) pooling = pool.imap if self.ordered else pool.imap_unordered diff --git a/requirements.txt b/requirements.txt index a4a02056..680223a3 100755 --- a/requirements.txt +++ b/requirements.txt @@ -4,12 +4,12 @@ pandas>=0.14.1 six>=1.7.3 pillow>=2.1.0 Cython>=0.23 -matplotlib==2.0.2 +matplotlib>=2.0.2 scikit-learn>=0.18.1 scikit-image>=0.13.0 -ipython==4.2.0 +ipython>=4.2.0 tqdm>=4.7.4 -nibabel==2.1.0 +nibabel>=2.1.0 olefile==0.44 # new version - ImportError: No module named OleFileIO_PL planar>=0.4 gco-wrapper>=3.0.2c \ No newline at end of file diff --git a/setup.py b/setup.py index 65c42012..a8329c61 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,10 @@ import logging import pkg_resources try: - from setuptools import setup, Extension # , Command, find_packages + from setuptools import setup, Extension, find_packages # , Command from setuptools.command.build_ext import build_ext except ImportError: - from distutils.core import setup, Extension # , Command, find_packages + from distutils.core import setup, Extension, find_packages # , Command from distutils.command.build_ext import build_ext # from Cython.Distutils import build_ext @@ -67,7 +67,7 @@ def _parse_requirements(file_path): setup( name='ImSegm', - version='0.1', + version='0.1.2', url='https://borda.github.com/pyImSegm', author='Jiri Borovec', @@ -76,7 +76,7 @@ def _parse_requirements(file_path): description='superpixel image segmentation: ' '(un)supervised, center detection, region growing', - packages=["imsegm"], + packages=find_packages(), cmdclass={'build_ext': BuildExt}, ext_modules=[Extension('imsegm.features_cython', language='c++', @@ -91,7 +91,7 @@ def _parse_requirements(file_path): include_package_data=True, long_description="""Image segmentation package contains several useful features: - * supervised and unsupervised segmentation on superpixels using GrapCut, + * supervised and unsupervised segmentation on superpixels using GraphCut, * detection object centres and cluster candidates, * region growing on superpixel level with a shape prior.""", classifiers=[