From 944e9905cc0fa0235f3f447cb166227bec9a0544 Mon Sep 17 00:00:00 2001 From: Oscar Esteban Date: Fri, 16 Aug 2024 06:29:00 +0200 Subject: [PATCH] sty: format jupyter notebooks --- docs/notebooks/MRIQC Web API.ipynb | 73 ++- docs/notebooks/Paper-v1.0.ipynb | 258 +++++++--- docs/notebooks/Paper-v2.0.ipynb | 522 ++++++++++++++++---- docs/notebooks/SpikesPlotter.ipynb | 48 +- docs/notebooks/Supplemental Materials.ipynb | 16 +- docs/notebooks/finding_spikes.ipynb | 63 ++- 6 files changed, 726 insertions(+), 254 deletions(-) diff --git a/docs/notebooks/MRIQC Web API.ipynb b/docs/notebooks/MRIQC Web API.ipynb index bc64528c1..b0e6cfcd9 100644 --- a/docs/notebooks/MRIQC Web API.ipynb +++ b/docs/notebooks/MRIQC Web API.ipynb @@ -21,12 +21,13 @@ "source": [ "import pandas as pd\n", "from json import load\n", - "import urllib.request, json \n", + "import urllib.request, json\n", "from pandas.io.json import json_normalize\n", "import seaborn as sns\n", "import pylab as plt\n", "import multiprocessing as mp\n", "import numpy as np\n", + "\n", "%matplotlib inline" ] }, @@ -52,23 +53,22 @@ " url_root = 'https://mriqc.nimh.nih.gov/api/v1/{modality}?{query}'\n", " page = 1\n", " dfs = []\n", - " \n", + "\n", " if versions is None:\n", " versions = ['*']\n", "\n", " for version in versions:\n", " while True:\n", " query = []\n", - " \n", + "\n", " if software is not None:\n", " query.append('\"provenance.software\":\"%s\"' % software)\n", - " \n", + "\n", " if version != '*':\n", " query.append('\"provenance.version\":\"%s\"' % version)\n", - " \n", + "\n", " page_url = url_root.format(\n", - " modality=modality,\n", - " query='where={%s}&page=%d' % (','.join(query), page)\n", + " modality=modality, query='where={%s}&page=%d' % (','.join(query), page)\n", " )\n", " with urllib.request.urlopen(page_url) as url:\n", " data = json.loads(url.read().decode())\n", @@ -87,13 +87,13 @@ " Distribution plot of a given measure\n", " \"\"\"\n", " sns.distplot(data, ax=ax, label=label)\n", - " \n", + "\n", " if xlabel is not None:\n", " ax.set_xlabel(xlabel)\n", - " \n", + "\n", " if min is None:\n", " min = np.percentile(data, 0.5)\n", - " \n", + "\n", " if max is None:\n", " max = np.percentile(data, 99.5)\n", " ax.set_xlim((min, max))" @@ -165,7 +165,7 @@ "ax.set_title('Number of T1w records in database')\n", "ax.legend()\n", "\n", - "plt.savefig(\"fig03a-0.svg\", bbox_inches='tight', transparent=False, pad_inches=0)" + "plt.savefig('fig03a-0.svg', bbox_inches='tight', transparent=False, pad_inches=0)" ] }, { @@ -195,7 +195,7 @@ "ax.plot(dates_bold_u, list(range(1, len(dates_bold_u) + 1)), label='unique')\n", "ax.set_title('Number of BOLD records in database')\n", "ax.legend()\n", - "plt.savefig(\"fig03a-1.svg\", bbox_inches='tight', transparent=False, pad_inches=0)" + "plt.savefig('fig03a-1.svg', bbox_inches='tight', transparent=False, pad_inches=0)" ] }, { @@ -221,8 +221,17 @@ } ], "source": [ - "print(','.join([l for l in df_t1w.columns \n", - " if not l.startswith('_') and not l.startswith('bids_meta') and not l.startswith('provenance')]))" + "print(\n", + " ','.join(\n", + " [\n", + " l\n", + " for l in df_t1w.columns\n", + " if not l.startswith('_')\n", + " and not l.startswith('bids_meta')\n", + " and not l.startswith('provenance')\n", + " ]\n", + " )\n", + ")" ] }, { @@ -242,14 +251,18 @@ } ], "source": [ - "f, ax = plt.subplots(1, 5, figsize=(25,5))\n", + "f, ax = plt.subplots(1, 5, figsize=(25, 5))\n", "plot_measure(df_t1w_unique.cjv, xlabel='Coefficient of joint variation (CJV)', ax=ax[0])\n", "plot_measure(df_t1w_unique.cnr, xlabel='Contrast-to-noise ratio (CNR)', ax=ax[1])\n", - "plot_measure(df_t1w_unique.snr_wm, xlabel='Signal-to-noise ratio estimated on the white-matter (SNR)', ax=ax[2])\n", + "plot_measure(\n", + " df_t1w_unique.snr_wm,\n", + " xlabel='Signal-to-noise ratio estimated on the white-matter (SNR)',\n", + " ax=ax[2],\n", + ")\n", "plot_measure(df_t1w_unique.fwhm_avg, xlabel='Smoothness (FWHM)', ax=ax[3])\n", "plot_measure(df_t1w_unique.wm2max, xlabel='WM-to-max intensity ratio (WM2MAX)', ax=ax[4])\n", "plt.suptitle('Distributions of some IQMs extracted from T1-weighted MRI')\n", - "plt.savefig(\"fig03b-0.svg\", bbox_inches='tight', transparent=False, pad_inches=0)" + "plt.savefig('fig03b-0.svg', bbox_inches='tight', transparent=False, pad_inches=0)" ] }, { @@ -275,8 +288,17 @@ } ], "source": [ - "print(','.join([l for l in df_bold.columns \n", - " if not l.startswith('_') and not l.startswith('bids_meta') and not l.startswith('provenance')]))" + "print(\n", + " ','.join(\n", + " [\n", + " l\n", + " for l in df_bold.columns\n", + " if not l.startswith('_')\n", + " and not l.startswith('bids_meta')\n", + " and not l.startswith('provenance')\n", + " ]\n", + " )\n", + ")" ] }, { @@ -296,8 +318,13 @@ } ], "source": [ - "f, ax = plt.subplots(1, 5, figsize=(25,5))\n", - "plot_measure(df_bold_unique[df_bold_unique.fd_mean < 10].fd_mean, xlabel='Framewise Displacement (mm)', max=2, ax=ax[0])\n", + "f, ax = plt.subplots(1, 5, figsize=(25, 5))\n", + "plot_measure(\n", + " df_bold_unique[df_bold_unique.fd_mean < 10].fd_mean,\n", + " xlabel='Framewise Displacement (mm)',\n", + " max=2,\n", + " ax=ax[0],\n", + ")\n", "plot_measure(df_bold_unique[df_bold_unique.dvars_nstd < 100].dvars_nstd, xlabel='DVARS', ax=ax[1])\n", "plot_measure(df_bold_unique.gcor, xlabel='Global correlation', ax=ax[2])\n", "plot_measure(df_bold_unique.gsr_x, label='x-axis', ax=ax[3])\n", @@ -305,7 +332,9 @@ "ax[3].legend()\n", "plot_measure(df_bold_unique.tsnr, xlabel='Temporal SNR (tSNR)', ax=ax[4])\n", "plt.suptitle('Distributions of some IQMs extracted from BOLD fMRI')\n", - "plt.savefig(\"fig03b-1.png\", bbox_inches='tight', transparent=False, pad_inches=0, facecolor='white')" + "plt.savefig(\n", + " 'fig03b-1.png', bbox_inches='tight', transparent=False, pad_inches=0, facecolor='white'\n", + ")" ] } ], diff --git a/docs/notebooks/Paper-v1.0.ipynb b/docs/notebooks/Paper-v1.0.ipynb index c46707c1d..5fbda5fee 100644 --- a/docs/notebooks/Paper-v1.0.ipynb +++ b/docs/notebooks/Paper-v1.0.ipynb @@ -17,7 +17,7 @@ "import numpy as np\n", "import seaborn as sn\n", "\n", - "sn.set(style=\"whitegrid\")" + "sn.set(style='whitegrid')" ] }, { @@ -29,6 +29,7 @@ "outputs": [], "source": [ "from mriqc.classifier import data as mcd\n", + "\n", "abide, _ = mcd.read_dataset(x_path, y_path, rate_label='rater_1')\n", "sites = list(sorted(set(abide.site.values.ravel())))\n", "\n", @@ -38,18 +39,27 @@ "\n", "for site in sites:\n", " subabide = abide.loc[abide.site.str.contains(site)]\n", - " \n", - " medians = np.median(subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']],\n", - " axis=0)\n", - " \n", - " mins = np.abs(medians - np.min(\n", - " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0))\n", "\n", - " maxs = np.abs(medians - np.max(\n", - " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0))\n", + " medians = np.median(\n", + " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0\n", + " )\n", + "\n", + " mins = np.abs(\n", + " medians\n", + " - np.min(\n", + " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0\n", + " )\n", + " )\n", + "\n", + " maxs = np.abs(\n", + " medians\n", + " - np.max(\n", + " subabide[['size_x', 'size_y', 'size_z', 'spacing_x', 'spacing_y', 'spacing_z']], axis=0\n", + " )\n", + " )\n", "\n", " ranges = np.max(np.vstack((maxs, mins)), axis=0)\n", - " \n", + "\n", " print(\n", " fmt.format(\n", " site=site,\n", @@ -57,8 +67,8 @@ " sr=tuple(ranges[:3].astype(int)),\n", " sp=tuple(medians[3:]),\n", " spr=tuple(ranges[3:]),\n", - "\n", - "))" + " )\n", + " )" ] }, { @@ -69,7 +79,7 @@ }, "outputs": [], "source": [ - "#data_path = '/home/oesteban/Google Drive/mriqc'\n", + "# data_path = '/home/oesteban/Google Drive/mriqc'\n", "data_path = '/home/oesteban/tmp/mriqc-ml-tests-2/'\n", "out_path = data_path\n", "loso = pd.read_csv(op.join(data_path, 'cv_loso_inner.csv'), index_col=False)\n", @@ -78,6 +88,7 @@ "kfold_outer = pd.read_csv(op.join(data_path, 'cv_kfold_outer.csv'), index_col=False)\n", "loso_outer = pd.read_csv(op.join(data_path, 'cv_loso_outer.csv'), index_col=False)\n", "\n", + "\n", "def gen_newparams(dataframe):\n", " thisdf = dataframe.copy()\n", " thisdf['zscored_str'] = ['nzs'] * len(thisdf['zscored'])\n", @@ -86,6 +97,7 @@ " del thisdf['zscored_str']\n", " return thisdf\n", "\n", + "\n", "loso = gen_newparams(loso)\n", "kfold = gen_newparams(kfold)" ] @@ -108,41 +120,55 @@ "for i, split_cv in enumerate([loso, kfold]):\n", " best_models[spstr[i]] = {}\n", " splitcols = [col for col in split_cv.columns.ravel() if col.startswith('split0')]\n", - " for clf in ['svc_linear-nzs', 'svc_rbf-nzs', 'rfc-nzs', 'svc_linear-zs', 'svc_rbf-zs', 'rfc-zs']:\n", + " for clf in [\n", + " 'svc_linear-nzs',\n", + " 'svc_rbf-nzs',\n", + " 'rfc-nzs',\n", + " 'svc_linear-zs',\n", + " 'svc_rbf-zs',\n", + " 'rfc-zs',\n", + " ]:\n", " thismodeldf = split_cv.loc[split_cv.params.str.contains(clf)]\n", " max_auc = thismodeldf.mean_auc.max()\n", " best = thismodeldf.loc[thismodeldf.mean_auc >= max_auc]\n", " best_list = best.params.values.ravel().tolist()\n", - " \n", + "\n", " if len(best_list) == 1:\n", " best_models[spstr[i]][clf] = best_list[0]\n", " else:\n", - " overall_means = [thismodeldf.loc[thismodeldf.params.str.contains(pset), 'mean_auc'].mean()\n", - " for pset in best_list]\n", + " overall_means = [\n", + " thismodeldf.loc[thismodeldf.params.str.contains(pset), 'mean_auc'].mean()\n", + " for pset in best_list\n", + " ]\n", " overall_max = np.max(overall_means)\n", " if sum([val >= overall_max for val in overall_means]) == 1:\n", " best_models[spstr[i]][clf] = best_list[np.argmax(overall_means)]\n", " else:\n", " best_models[spstr[i]][clf] = best_list[0]\n", - " \n", + "\n", "newdict = {'AUC': [], 'Classifier': [], 'Split scheme': []}\n", "\n", - "modelnames = {'rfc-nzs': 'RFC-nzs', 'rfc-zs': 'RFC-zs',\n", - " 'svc_linear-nzs': 'SVC_lin-nzs', 'svc_linear-zs': 'SVC_lin-zs',\n", - " 'svc_rbf-nzs': 'SVC_rbf-nzs', 'svc_rbf-zs': 'SVC_rbf-zs'}\n", + "modelnames = {\n", + " 'rfc-nzs': 'RFC-nzs',\n", + " 'rfc-zs': 'RFC-zs',\n", + " 'svc_linear-nzs': 'SVC_lin-nzs',\n", + " 'svc_linear-zs': 'SVC_lin-zs',\n", + " 'svc_rbf-nzs': 'SVC_rbf-nzs',\n", + " 'svc_rbf-zs': 'SVC_rbf-zs',\n", + "}\n", "\n", "for key, val in list(best_models['LoSo'].items()):\n", " scores = loso.loc[loso.params.str.contains(val), 'mean_auc'].values.ravel().tolist()\n", " nscores = len(scores)\n", - " \n", + "\n", " newdict['AUC'] += scores\n", " newdict['Classifier'] += [modelnames[key]] * nscores\n", " newdict['Split scheme'] += ['LoSo (16 folds)'] * nscores\n", - " \n", + "\n", "for key, val in list(best_models['10-fold'].items()):\n", " scores = kfold.loc[kfold.params.str.contains(val), 'mean_auc'].values.ravel().tolist()\n", " nscores = len(scores)\n", - " \n", + "\n", " newdict['AUC'] += scores\n", " newdict['Classifier'] += [modelnames[key]] * nscores\n", " newdict['Split scheme'] += ['10-fold'] * nscores\n", @@ -158,70 +184,120 @@ }, "outputs": [], "source": [ - "def plot_cv_outer(data, score='auc', zscored=0, ax=None, ds030_score=None,\n", - " split_type='LoSo', color='dodgerblue'):\n", - " \n", + "def plot_cv_outer(\n", + " data, score='auc', zscored=0, ax=None, ds030_score=None, split_type='LoSo', color='dodgerblue'\n", + "):\n", " if ax is None:\n", " ax = plt.gca()\n", - " \n", + "\n", " outer_score = data.loc[data[score].notnull(), [score, 'zscored']]\n", - " sn.distplot(outer_score.loc[outer_score.zscored==zscored, score],\n", - " hist=True, norm_hist=True, ax=ax, color=color, label=split_type)\n", + " sn.distplot(\n", + " outer_score.loc[outer_score.zscored == zscored, score],\n", + " hist=True,\n", + " norm_hist=True,\n", + " ax=ax,\n", + " color=color,\n", + " label=split_type,\n", + " )\n", " ax.set_xlim([0.4, 1.0])\n", " ax.grid(False)\n", " ax.set_yticklabels([])\n", - " \n", - " mean = outer_score.loc[outer_score.zscored==zscored, score].mean()\n", - " std = outer_score.loc[outer_score.zscored==zscored, score].std()\n", + "\n", + " mean = outer_score.loc[outer_score.zscored == zscored, score].mean()\n", + " std = outer_score.loc[outer_score.zscored == zscored, score].std()\n", "\n", " mean_coord = draw_line(mean, ax=ax, color=color, lw=2.0, marker='o', extend=True)\n", - " \n", + "\n", " ymax = ax.get_ylim()[1]\n", " draw_line(mean - std, ax=ax, color=color, extend=True)\n", " draw_line(mean + std, ax=ax, color=color, extend=True)\n", - " \n", - " \n", + "\n", " ax.annotate(\n", - " '$\\mu$=%0.3f' % mean, xy=(mean_coord[0], 0.75*ymax), xytext=(-35, 30),\n", - " textcoords='offset points', va='center', color='w', size=14,\n", + " '$\\mu$=%0.3f' % mean,\n", + " xy=(mean_coord[0], 0.75 * ymax),\n", + " xytext=(-35, 30),\n", + " textcoords='offset points',\n", + " va='center',\n", + " color='w',\n", + " size=14,\n", " bbox=dict(boxstyle='round', fc=color, ec='none', color='none', lw=0),\n", " arrowprops=dict(\n", - " arrowstyle='wedge,tail_width=0.8', lw=0, patchA=None, patchB=None,\n", - " fc=color, ec='none', relpos=(0.5, 0.5)))\n", - " sigmay = 0.70*ymax\n", - " ax.annotate(s='', xy=(mean - std, sigmay), xytext=(mean + std, sigmay), arrowprops=dict(arrowstyle='<->'))\n", + " arrowstyle='wedge,tail_width=0.8',\n", + " lw=0,\n", + " patchA=None,\n", + " patchB=None,\n", + " fc=color,\n", + " ec='none',\n", + " relpos=(0.5, 0.5),\n", + " ),\n", + " )\n", + " sigmay = 0.70 * ymax\n", + " ax.annotate(\n", + " s='',\n", + " xy=(mean - std, sigmay),\n", + " xytext=(mean + std, sigmay),\n", + " arrowprops=dict(arrowstyle='<->'),\n", + " )\n", " ax.annotate(\n", - " '$2\\sigma$=%0.3f' % (2 * std), xy=(mean_coord[0], 0.70*ymax), xytext=(-25, -12),\n", - " textcoords='offset points', va='center', color='k', size=12,\n", - " bbox=dict(boxstyle='round', fc='w', ec='none', color='none', alpha=.7, lw=0))\n", - " \n", + " '$2\\sigma$=%0.3f' % (2 * std),\n", + " xy=(mean_coord[0], 0.70 * ymax),\n", + " xytext=(-25, -12),\n", + " textcoords='offset points',\n", + " va='center',\n", + " color='k',\n", + " size=12,\n", + " bbox=dict(boxstyle='round', fc='w', ec='none', color='none', alpha=0.7, lw=0),\n", + " )\n", + "\n", " if ds030_score is not None:\n", " ds030_coord = draw_line(ds030_score, ax=ax, color='k', marker='o')\n", " ax.annotate(\n", - " 'DS030', xy=ds030_coord, xytext=(-100, 0),\n", - " textcoords='offset points', va='center', color='w', size=16,\n", + " 'DS030',\n", + " xy=ds030_coord,\n", + " xytext=(-100, 0),\n", + " textcoords='offset points',\n", + " va='center',\n", + " color='w',\n", + " size=16,\n", " bbox=dict(boxstyle='round', fc=color, ec='none', color='none', lw=0),\n", " arrowprops=dict(\n", - " arrowstyle='wedge,tail_width=0.8', lw=0, patchA=None, patchB=None,\n", - " fc=color, ec='none', relpos=(0.5, 0.5)))\n", - " \n", - " \n", - "def draw_line(score, ax=None, color='k', marker=None, lw=.7, extend=False):\n", + " arrowstyle='wedge,tail_width=0.8',\n", + " lw=0,\n", + " patchA=None,\n", + " patchB=None,\n", + " fc=color,\n", + " ec='none',\n", + " relpos=(0.5, 0.5),\n", + " ),\n", + " )\n", + "\n", + "\n", + "def draw_line(score, ax=None, color='k', marker=None, lw=0.7, extend=False):\n", " if ax is None:\n", " ax = plt.gca()\n", - " \n", + "\n", " if score > 1.0:\n", " score = 1.0\n", - " \n", + "\n", " coords = [score, -1]\n", " pdf_points = ax.lines[0].get_data()\n", " coords[1] = np.interp([coords[0]], pdf_points[0], pdf_points[1])\n", - " \n", + "\n", " if extend:\n", - " ax.axvline(coords[0], ymin=coords[1] / ax.get_ylim()[1], ymax=0.75, color='gray', lw=.7)\n", - " \n", - " ax.axvline(coords[0], ymin=coords[1] / ax.get_ylim()[1], ymax=0, color=color, marker=marker, markevery=2,\n", - " markeredgewidth=1.5, markerfacecolor='w', markeredgecolor=color, lw=lw)\n", + " ax.axvline(coords[0], ymin=coords[1] / ax.get_ylim()[1], ymax=0.75, color='gray', lw=0.7)\n", + "\n", + " ax.axvline(\n", + " coords[0],\n", + " ymin=coords[1] / ax.get_ylim()[1],\n", + " ymax=0,\n", + " color=color,\n", + " marker=marker,\n", + " markevery=2,\n", + " markeredgewidth=1.5,\n", + " markerfacecolor='w',\n", + " markeredgecolor=color,\n", + " lw=lw,\n", + " )\n", "\n", " return coords" ] @@ -234,43 +310,67 @@ }, "outputs": [], "source": [ - "sn.set(style=\"whitegrid\")\n", + "sn.set(style='whitegrid')\n", "\n", - "fig = plt.figure(figsize=(20, 8)) \n", - "ax1 = plt.subplot2grid((2,4), (0,0), colspan=2, rowspan=2)\n", + "fig = plt.figure(figsize=(20, 8))\n", + "ax1 = plt.subplot2grid((2, 4), (0, 0), colspan=2, rowspan=2)\n", "\n", - "sn.violinplot(x='Classifier', y='AUC', hue='Split scheme', data=newdf, split=True,\n", - " palette=['dodgerblue', 'darkorange'], ax=ax1)\n", + "sn.violinplot(\n", + " x='Classifier',\n", + " y='AUC',\n", + " hue='Split scheme',\n", + " data=newdf,\n", + " split=True,\n", + " palette=['dodgerblue', 'darkorange'],\n", + " ax=ax1,\n", + ")\n", "ax1.set_ylim([0.70, 1.0])\n", "ax1.set_ylabel('AUC')\n", "ax1.set_xlabel('Model')\n", "ax1.set_title('Model selection - Inner loop of nested cross-validation')\n", "\n", - "ax2 = plt.subplot2grid((2,4), (0, 2))\n", + "ax2 = plt.subplot2grid((2, 4), (0, 2))\n", "plot_cv_outer(kfold_outer, zscored=0, score='auc', ax=ax2, ds030_score=0.695, split_type='10-fold')\n", "ax2.set_xlabel('')\n", "ax2.legend()\n", "ax2.set_title('Evaluation - Outer loop of nested cross-validation')\n", "ax2.title.set_position([1.1, 1.0])\n", "\n", - "ax3 = plt.subplot2grid((2,4), (1, 2))\n", - "plot_cv_outer(loso_outer, zscored=0, score='auc', ax=ax3, ds030_score=0.695, color='darkorange', split_type='LoSo (17 folds)')\n", + "ax3 = plt.subplot2grid((2, 4), (1, 2))\n", + "plot_cv_outer(\n", + " loso_outer,\n", + " zscored=0,\n", + " score='auc',\n", + " ax=ax3,\n", + " ds030_score=0.695,\n", + " color='darkorange',\n", + " split_type='LoSo (17 folds)',\n", + ")\n", "ax3.legend()\n", "ax3.set_xlabel('AUC')\n", "\n", - "ax4 = plt.subplot2grid((2,4), (0, 3))\n", - "plot_cv_outer(kfold_outer, zscored=0, score='acc', ax=ax4, ds030_score=0.7283, split_type='10-fold')\n", + "ax4 = plt.subplot2grid((2, 4), (0, 3))\n", + "plot_cv_outer(\n", + " kfold_outer, zscored=0, score='acc', ax=ax4, ds030_score=0.7283, split_type='10-fold'\n", + ")\n", "ax4.set_xlabel('')\n", "ax4.legend()\n", "\n", - "ax5 = plt.subplot2grid((2,4), (1, 3))\n", - "plot_cv_outer(loso_outer, zscored=0, score='acc', ax=ax5, ds030_score=0.7283, color='darkorange', split_type='LoSo (17 folds)')\n", + "ax5 = plt.subplot2grid((2, 4), (1, 3))\n", + "plot_cv_outer(\n", + " loso_outer,\n", + " zscored=0,\n", + " score='acc',\n", + " ax=ax5,\n", + " ds030_score=0.7283,\n", + " color='darkorange',\n", + " split_type='LoSo (17 folds)',\n", + ")\n", "ax5.legend()\n", "ax5.set_xlabel('Accuracy')\n", "\n", "\n", - "fig.savefig(op.join(out_path, 'crossvalidation.pdf'),\n", - " bbox_inches='tight', pad_inches=0, dpi=300)" + "fig.savefig(op.join(out_path, 'crossvalidation.pdf'), bbox_inches='tight', pad_inches=0, dpi=300)" ] }, { @@ -282,8 +382,10 @@ "outputs": [], "source": [ "zscoreddf = loso_outer.loc[loso_outer.zscored == 0, ['auc', 'acc', 'site']]\n", - "palette = sn.color_palette(\"cubehelix\", len(set(zscoreddf.site)))\n", - "sn.pairplot(zscoreddf.loc[zscoreddf.auc.notnull(), ['auc', 'acc', 'site']], hue='site', palette=palette)" + "palette = sn.color_palette('cubehelix', len(set(zscoreddf.site)))\n", + "sn.pairplot(\n", + " zscoreddf.loc[zscoreddf.auc.notnull(), ['auc', 'acc', 'site']], hue='site', palette=palette\n", + ")" ] }, { @@ -295,11 +397,11 @@ "outputs": [], "source": [ "sites = sorted(list(set(loso_outer.site.ravel().tolist())))\n", - "palette = sn.color_palette(\"husl\", len(sites))\n", + "palette = sn.color_palette('husl', len(sites))\n", "fig = plt.figure()\n", "for i, site in enumerate(sites):\n", " sitedf = loso_outer.loc[loso_outer.site == site]\n", - " accdf = sitedf.loc[sitedf.zscored==0]\n", + " accdf = sitedf.loc[sitedf.zscored == 0]\n", " sn.distplot(accdf.acc.values.ravel(), bins=20, kde=0, label=site, color=palette[i])\n", "\n", "fig.gca().legend()\n", diff --git a/docs/notebooks/Paper-v2.0.ipynb b/docs/notebooks/Paper-v2.0.ipynb index 8df1256c2..02f2c03e5 100644 --- a/docs/notebooks/Paper-v2.0.ipynb +++ b/docs/notebooks/Paper-v2.0.ipynb @@ -86,7 +86,8 @@ "mviz.figure1(\n", " op.join(abide_path, 'sub-50137', 'anat', 'sub-50137_T1w.nii.gz'),\n", " op.join(abide_path, 'sub-50110', 'anat', 'sub-50110_T1w.nii.gz'),\n", - " out_file)" + " out_file,\n", + ")" ] }, { @@ -121,47 +122,141 @@ "from mriqc.classifier.sklearn import preprocessing as mcsp\n", "\n", "# Concatenate ABIDE & DS030\n", - "fulldata = combine_datasets([\n", - " (x_path, y_path, 'ABIDE'),\n", - " (ds030_x_path, ds030_y_path, 'DS030'),\n", - "])\n", + "fulldata = combine_datasets(\n", + " [\n", + " (x_path, y_path, 'ABIDE'),\n", + " (ds030_x_path, ds030_y_path, 'DS030'),\n", + " ]\n", + ")\n", "\n", "# Names of all features\n", - "features =[\n", - " 'cjv', 'cnr', 'efc', 'fber',\n", - " 'fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z',\n", - " 'icvs_csf', 'icvs_gm', 'icvs_wm',\n", - " 'inu_med', 'inu_range', \n", - " 'qi_1', 'qi_2',\n", - " 'rpve_csf', 'rpve_gm', 'rpve_wm',\n", - " 'size_x', 'size_y', 'size_z',\n", - " 'snr_csf', 'snr_gm', 'snr_total', 'snr_wm',\n", - " 'snrd_csf', 'snrd_gm', 'snrd_total', 'snrd_wm',\n", - " 'spacing_x', 'spacing_y', 'spacing_z',\n", - " 'summary_bg_k', 'summary_bg_mad', 'summary_bg_mean', 'summary_bg_median', 'summary_bg_n', 'summary_bg_p05', 'summary_bg_p95', 'summary_bg_stdv',\n", - " 'summary_csf_k', 'summary_csf_mad', 'summary_csf_mean', 'summary_csf_median', 'summary_csf_n', 'summary_csf_p05', 'summary_csf_p95', 'summary_csf_stdv',\n", - " 'summary_gm_k', 'summary_gm_mad', 'summary_gm_mean', 'summary_gm_median', 'summary_gm_n', 'summary_gm_p05', 'summary_gm_p95', 'summary_gm_stdv',\n", - " 'summary_wm_k', 'summary_wm_mad', 'summary_wm_mean', 'summary_wm_median', 'summary_wm_n', 'summary_wm_p05', 'summary_wm_p95', 'summary_wm_stdv',\n", - " 'tpm_overlap_csf', 'tpm_overlap_gm', 'tpm_overlap_wm',\n", - " 'wm2max'\n", + "features = [\n", + " 'cjv',\n", + " 'cnr',\n", + " 'efc',\n", + " 'fber',\n", + " 'fwhm_avg',\n", + " 'fwhm_x',\n", + " 'fwhm_y',\n", + " 'fwhm_z',\n", + " 'icvs_csf',\n", + " 'icvs_gm',\n", + " 'icvs_wm',\n", + " 'inu_med',\n", + " 'inu_range',\n", + " 'qi_1',\n", + " 'qi_2',\n", + " 'rpve_csf',\n", + " 'rpve_gm',\n", + " 'rpve_wm',\n", + " 'size_x',\n", + " 'size_y',\n", + " 'size_z',\n", + " 'snr_csf',\n", + " 'snr_gm',\n", + " 'snr_total',\n", + " 'snr_wm',\n", + " 'snrd_csf',\n", + " 'snrd_gm',\n", + " 'snrd_total',\n", + " 'snrd_wm',\n", + " 'spacing_x',\n", + " 'spacing_y',\n", + " 'spacing_z',\n", + " 'summary_bg_k',\n", + " 'summary_bg_mad',\n", + " 'summary_bg_mean',\n", + " 'summary_bg_median',\n", + " 'summary_bg_n',\n", + " 'summary_bg_p05',\n", + " 'summary_bg_p95',\n", + " 'summary_bg_stdv',\n", + " 'summary_csf_k',\n", + " 'summary_csf_mad',\n", + " 'summary_csf_mean',\n", + " 'summary_csf_median',\n", + " 'summary_csf_n',\n", + " 'summary_csf_p05',\n", + " 'summary_csf_p95',\n", + " 'summary_csf_stdv',\n", + " 'summary_gm_k',\n", + " 'summary_gm_mad',\n", + " 'summary_gm_mean',\n", + " 'summary_gm_median',\n", + " 'summary_gm_n',\n", + " 'summary_gm_p05',\n", + " 'summary_gm_p95',\n", + " 'summary_gm_stdv',\n", + " 'summary_wm_k',\n", + " 'summary_wm_mad',\n", + " 'summary_wm_mean',\n", + " 'summary_wm_median',\n", + " 'summary_wm_n',\n", + " 'summary_wm_p05',\n", + " 'summary_wm_p95',\n", + " 'summary_wm_stdv',\n", + " 'tpm_overlap_csf',\n", + " 'tpm_overlap_gm',\n", + " 'tpm_overlap_wm',\n", + " 'wm2max',\n", "]\n", "\n", "# Names of features that can be normalized\n", "coi = [\n", - " 'cjv', 'cnr', 'efc', 'fber', 'fwhm_avg', 'fwhm_x', 'fwhm_y', 'fwhm_z',\n", - " 'snr_csf', 'snr_gm', 'snr_total', 'snr_wm', 'snrd_csf', 'snrd_gm', 'snrd_total', 'snrd_wm',\n", - " 'summary_csf_mad', 'summary_csf_mean', 'summary_csf_median', 'summary_csf_p05', 'summary_csf_p95', 'summary_csf_stdv', 'summary_gm_k', 'summary_gm_mad', 'summary_gm_mean', 'summary_gm_median', 'summary_gm_p05', 'summary_gm_p95', 'summary_gm_stdv', 'summary_wm_k', 'summary_wm_mad', 'summary_wm_mean', 'summary_wm_median', 'summary_wm_p05', 'summary_wm_p95', 'summary_wm_stdv'\n", + " 'cjv',\n", + " 'cnr',\n", + " 'efc',\n", + " 'fber',\n", + " 'fwhm_avg',\n", + " 'fwhm_x',\n", + " 'fwhm_y',\n", + " 'fwhm_z',\n", + " 'snr_csf',\n", + " 'snr_gm',\n", + " 'snr_total',\n", + " 'snr_wm',\n", + " 'snrd_csf',\n", + " 'snrd_gm',\n", + " 'snrd_total',\n", + " 'snrd_wm',\n", + " 'summary_csf_mad',\n", + " 'summary_csf_mean',\n", + " 'summary_csf_median',\n", + " 'summary_csf_p05',\n", + " 'summary_csf_p95',\n", + " 'summary_csf_stdv',\n", + " 'summary_gm_k',\n", + " 'summary_gm_mad',\n", + " 'summary_gm_mean',\n", + " 'summary_gm_median',\n", + " 'summary_gm_p05',\n", + " 'summary_gm_p95',\n", + " 'summary_gm_stdv',\n", + " 'summary_wm_k',\n", + " 'summary_wm_mad',\n", + " 'summary_wm_mean',\n", + " 'summary_wm_median',\n", + " 'summary_wm_p05',\n", + " 'summary_wm_p95',\n", + " 'summary_wm_stdv',\n", "]\n", "\n", "# Plot batches\n", - "fig = mviz.plot_batches(fulldata, cols=list(reversed(coi)),\n", - " out_file=op.join(outputs_path, 'figures/fig02-batches-a.pdf'))\n", + "fig = mviz.plot_batches(\n", + " fulldata,\n", + " cols=list(reversed(coi)),\n", + " out_file=op.join(outputs_path, 'figures/fig02-batches-a.pdf'),\n", + ")\n", "\n", "# Apply new site-wise scaler\n", "scaler = mcsp.BatchRobustScaler(by='site', columns=coi)\n", "scaled = scaler.fit_transform(fulldata)\n", - "fig = mviz.plot_batches(scaled, cols=coi, site_labels='right',\n", - " out_file=op.join(outputs_path, 'figures/fig02-batches-b.pdf'))" + "fig = mviz.plot_batches(\n", + " scaled,\n", + " cols=coi,\n", + " site_labels='right',\n", + " out_file=op.join(outputs_path, 'figures/fig02-batches-b.pdf'),\n", + ")" ] }, { @@ -180,10 +275,13 @@ "outputs": [], "source": [ "from sklearn.metrics import cohen_kappa_score\n", + "\n", "overlap = mdata[np.all(~np.isnan(mdata[['rater_1', 'rater_2']]), axis=1)]\n", "y1 = overlap.rater_1.values.ravel().tolist()\n", "y2 = overlap.rater_2.values.ravel().tolist()\n", - "fig = mviz.inter_rater_variability(y1, y2, out_file=op.join(outputs_path, 'figures', 'fig02-irv.pdf'))\n", + "fig = mviz.inter_rater_variability(\n", + " y1, y2, out_file=op.join(outputs_path, 'figures', 'fig02-irv.pdf')\n", + ")\n", "\n", "print(\"Cohen's Kappa %f\" % cohen_kappa_score(y1, y2))\n", "\n", @@ -192,7 +290,7 @@ "\n", "y2 = overlap.rater_2.values.ravel()\n", "y2[y2 == 0] = 1\n", - "print(\"Cohen's Kappa (binarized): %f\" % cohen_kappa_score(y1, y2))\n" + "print(\"Cohen's Kappa (binarized): %f\" % cohen_kappa_score(y1, y2))" ] }, { @@ -212,16 +310,72 @@ "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sn\n", - "rfc_acc=[0.842, 0.815, 0.648, 0.609, 0.789, 0.761, 0.893, 0.833, 0.842, 0.767, 0.806, 0.850, 0.878, 0.798, 0.559, 0.881, 0.375]\n", - "svc_lin_acc=[0.947, 0.667, 0.870, 0.734, 0.754, 0.701, 0.750, 0.639, 0.877, 0.767, 0.500, 0.475, 0.837, 0.768, 0.717, 0.050, 0.429]\n", - "svc_rbf_acc=[0.947, 0.852, 0.500, 0.578, 0.772, 0.712, 0.821, 0.583, 0.912, 0.767, 0.500, 0.450, 0.837, 0.778, 0.441, 0.950, 0.339]\n", "\n", - "df = pd.DataFrame({\n", - " 'site': list(range(len(sites))) * 3,\n", - " 'accuracy': rfc_acc + svc_lin_acc + svc_rbf_acc,\n", - " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites)\n", - " \n", - "})\n", + "rfc_acc = [\n", + " 0.842,\n", + " 0.815,\n", + " 0.648,\n", + " 0.609,\n", + " 0.789,\n", + " 0.761,\n", + " 0.893,\n", + " 0.833,\n", + " 0.842,\n", + " 0.767,\n", + " 0.806,\n", + " 0.850,\n", + " 0.878,\n", + " 0.798,\n", + " 0.559,\n", + " 0.881,\n", + " 0.375,\n", + "]\n", + "svc_lin_acc = [\n", + " 0.947,\n", + " 0.667,\n", + " 0.870,\n", + " 0.734,\n", + " 0.754,\n", + " 0.701,\n", + " 0.750,\n", + " 0.639,\n", + " 0.877,\n", + " 0.767,\n", + " 0.500,\n", + " 0.475,\n", + " 0.837,\n", + " 0.768,\n", + " 0.717,\n", + " 0.050,\n", + " 0.429,\n", + "]\n", + "svc_rbf_acc = [\n", + " 0.947,\n", + " 0.852,\n", + " 0.500,\n", + " 0.578,\n", + " 0.772,\n", + " 0.712,\n", + " 0.821,\n", + " 0.583,\n", + " 0.912,\n", + " 0.767,\n", + " 0.500,\n", + " 0.450,\n", + " 0.837,\n", + " 0.778,\n", + " 0.441,\n", + " 0.950,\n", + " 0.339,\n", + "]\n", + "\n", + "df = pd.DataFrame(\n", + " {\n", + " 'site': list(range(len(sites))) * 3,\n", + " 'accuracy': rfc_acc + svc_lin_acc + svc_rbf_acc,\n", + " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites),\n", + " }\n", + ")\n", "\n", "\n", "x = np.arange(len(sites))\n", @@ -237,9 +391,11 @@ "\n", "fig = plt.figure(figsize=(10, 3))\n", "ax2 = plt.subplot2grid((1, 4), (0, 3))\n", - "plot = sn.violinplot(data=df, x='Model', y=\"accuracy\", ax=ax2, palette=colors, bw=.1, linewidth=.7)\n", + "plot = sn.violinplot(\n", + " data=df, x='Model', y='accuracy', ax=ax2, palette=colors, bw=0.1, linewidth=0.7\n", + ")\n", "for i in range(dim):\n", - " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", + " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", "# ax2.axhline(np.percentile(allvals[i], 50), ls='--', color=colors[i], lw=.8)\n", "# sn.swarmplot(x=\"model\", y=\"accuracy\", data=df, color=\"w\", alpha=.5, ax=ax2);\n", "ax2.yaxis.tick_right()\n", @@ -247,14 +403,13 @@ "ax2.set_xticklabels(ax2.get_xticklabels(), rotation=40)\n", "ax2.set_ylim([0.0, 1.0])\n", "\n", - "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3) \n", + "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3)\n", "for i in range(dim):\n", " y = [d[i] for d in data]\n", - " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=.6)\n", + " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=0.6)\n", " print(np.average(allvals[i]), np.std(allvals[i]))\n", - " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", - " \n", - " \n", + " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", + "\n", "\n", "plt.xlim([-0.2, 16.75])\n", "plt.grid(False)\n", @@ -272,16 +427,71 @@ }, "outputs": [], "source": [ - "rfc_roc_auc=[0.597, 0.380, 0.857, 0.610, 0.698, 0.692, 0.963, 0.898, 0.772, 0.596, 0.873, 0.729, 0.784, 0.860, 0.751, 0.900, 0.489]\n", - "svc_lin_roc_auc=[0.583, 0.304, 0.943, 0.668, 0.691, 0.754, 1.000, 0.778, 0.847, 0.590, 0.857, 0.604, 0.604, 0.838, 0.447, 0.650, 0.501]\n", - "svc_rbf_roc_auc=[0.681, 0.217, 0.827, 0.553, 0.738, 0.616, 0.889, 0.813, 0.845, 0.658, 0.779, 0.493, 0.726, 0.510, 0.544, 0.500, 0.447]\n", + "rfc_roc_auc = [\n", + " 0.597,\n", + " 0.380,\n", + " 0.857,\n", + " 0.610,\n", + " 0.698,\n", + " 0.692,\n", + " 0.963,\n", + " 0.898,\n", + " 0.772,\n", + " 0.596,\n", + " 0.873,\n", + " 0.729,\n", + " 0.784,\n", + " 0.860,\n", + " 0.751,\n", + " 0.900,\n", + " 0.489,\n", + "]\n", + "svc_lin_roc_auc = [\n", + " 0.583,\n", + " 0.304,\n", + " 0.943,\n", + " 0.668,\n", + " 0.691,\n", + " 0.754,\n", + " 1.000,\n", + " 0.778,\n", + " 0.847,\n", + " 0.590,\n", + " 0.857,\n", + " 0.604,\n", + " 0.604,\n", + " 0.838,\n", + " 0.447,\n", + " 0.650,\n", + " 0.501,\n", + "]\n", + "svc_rbf_roc_auc = [\n", + " 0.681,\n", + " 0.217,\n", + " 0.827,\n", + " 0.553,\n", + " 0.738,\n", + " 0.616,\n", + " 0.889,\n", + " 0.813,\n", + " 0.845,\n", + " 0.658,\n", + " 0.779,\n", + " 0.493,\n", + " 0.726,\n", + " 0.510,\n", + " 0.544,\n", + " 0.500,\n", + " 0.447,\n", + "]\n", "\n", - "df = pd.DataFrame({\n", - " 'site': list(range(len(sites))) * 3,\n", - " 'auc': rfc_roc_auc + svc_lin_roc_auc + svc_rbf_roc_auc,\n", - " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites)\n", - " \n", - "})\n", + "df = pd.DataFrame(\n", + " {\n", + " 'site': list(range(len(sites))) * 3,\n", + " 'auc': rfc_roc_auc + svc_lin_roc_auc + svc_rbf_roc_auc,\n", + " 'Model': ['RFC'] * len(sites) + ['SVC_lin'] * len(sites) + ['SVC_rbf'] * len(sites),\n", + " }\n", + ")\n", "\n", "x = np.arange(len(sites))\n", "data = list(zip(rfc_roc_auc, svc_lin_roc_auc, svc_rbf_roc_auc))\n", @@ -296,23 +506,22 @@ "\n", "fig = plt.figure(figsize=(10, 3))\n", "ax2 = plt.subplot2grid((1, 4), (0, 3))\n", - "plot = sn.violinplot(data=df, x='Model', y=\"auc\", ax=ax2, palette=colors, bw=.1, linewidth=.7)\n", + "plot = sn.violinplot(data=df, x='Model', y='auc', ax=ax2, palette=colors, bw=0.1, linewidth=0.7)\n", "for i in range(dim):\n", - " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", + " ax2.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", "\n", "ax2.yaxis.tick_right()\n", "ax2.set_ylabel('')\n", "ax2.set_xticklabels(ax2.get_xticklabels(), rotation=40)\n", "ax2.set_ylim([0.0, 1.0])\n", "\n", - "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3) \n", + "ax1 = plt.subplot2grid((1, 4), (0, 0), colspan=3)\n", "for i in range(dim):\n", " y = [d[i] for d in data]\n", - " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=.6)\n", + " b = ax1.bar(x + i * dimw, y, dimw, bottom=0.001, color=colors[i], alpha=0.6)\n", " print(np.average(allvals[i]), np.std(allvals[i]))\n", - " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=.8)\n", - " \n", - " \n", + " ax1.axhline(np.average(allvals[i]), ls='--', color=colors[i], lw=0.8)\n", + "\n", "\n", "plt.xlim([-0.2, 16.75])\n", "plt.grid(False)\n", @@ -343,15 +552,21 @@ "source": [ "from sklearn.metrics import confusion_matrix\n", "\n", - "pred_file = op.abspath(op.join(\n", - " '..', 'mriqc/data/csv',\n", - " 'mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-test_pred.csv'))\n", + "pred_file = op.abspath(\n", + " op.join(\n", + " '..',\n", + " 'mriqc/data/csv',\n", + " 'mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-test_pred.csv',\n", + " )\n", + ")\n", "\n", "pred_y = pd.read_csv(pred_file)\n", "true_y = pd.read_csv(ds030_y_path)\n", "true_y.rater_1 *= -1\n", "true_y.rater_1[true_y.rater_1 < 0] = 0\n", - "print(confusion_matrix(true_y.rater_1.tolist(), pred_y.pred_y.values.ravel().tolist(), labels=[0, 1]))" + "print(\n", + " confusion_matrix(true_y.rater_1.tolist(), pred_y.pred_y.values.ravel().tolist(), labels=[0, 1])\n", + ")" ] }, { @@ -371,27 +586,76 @@ "source": [ "import seaborn as sn\n", "from sklearn.externals.joblib import load as loadpkl\n", - "sn.set_style(\"white\")\n", + "\n", + "sn.set_style('white')\n", "\n", "# Get the RFC\n", - "estimator = loadpkl(pkgrf('mriqc', 'data/mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-train_estimator.pklz'))\n", + "estimator = loadpkl(\n", + " pkgrf(\n", + " 'mriqc',\n", + " 'data/mclf_run-20170724-191452_mod-rfc_ver-0.9.7-rc8_class-2_cv-loso_data-train_estimator.pklz',\n", + " )\n", + ")\n", "forest = estimator.named_steps['rfc']\n", "\n", "# Features selected in cross-validation\n", "features = [\n", - " \"cjv\", \"cnr\", \"efc\", \"fber\", \"fwhm_avg\", \"fwhm_x\", \"fwhm_y\", \"fwhm_z\", \"icvs_csf\", \"icvs_gm\", \"icvs_wm\",\n", - " \"qi_1\", \"qi_2\", \"rpve_csf\", \"rpve_gm\", \"rpve_wm\", \"snr_csf\", \"snr_gm\", \"snr_total\", \"snr_wm\", \"snrd_csf\",\n", - " \"snrd_gm\", \"snrd_total\", \"snrd_wm\", \"summary_bg_k\", \"summary_bg_stdv\", \"summary_csf_k\", \"summary_csf_mad\",\n", - " \"summary_csf_mean\", \"summary_csf_median\", \"summary_csf_p05\", \"summary_csf_p95\", \"summary_csf_stdv\",\n", - " \"summary_gm_k\", \"summary_gm_mad\", \"summary_gm_mean\", \"summary_gm_median\", \"summary_gm_p05\", \"summary_gm_p95\",\n", - " \"summary_gm_stdv\", \"summary_wm_k\", \"summary_wm_mad\", \"summary_wm_mean\", \"summary_wm_median\", \"summary_wm_p05\",\n", - " \"summary_wm_p95\", \"summary_wm_stdv\", \"tpm_overlap_csf\", \"tpm_overlap_gm\", \"tpm_overlap_wm\"]\n", + " 'cjv',\n", + " 'cnr',\n", + " 'efc',\n", + " 'fber',\n", + " 'fwhm_avg',\n", + " 'fwhm_x',\n", + " 'fwhm_y',\n", + " 'fwhm_z',\n", + " 'icvs_csf',\n", + " 'icvs_gm',\n", + " 'icvs_wm',\n", + " 'qi_1',\n", + " 'qi_2',\n", + " 'rpve_csf',\n", + " 'rpve_gm',\n", + " 'rpve_wm',\n", + " 'snr_csf',\n", + " 'snr_gm',\n", + " 'snr_total',\n", + " 'snr_wm',\n", + " 'snrd_csf',\n", + " 'snrd_gm',\n", + " 'snrd_total',\n", + " 'snrd_wm',\n", + " 'summary_bg_k',\n", + " 'summary_bg_stdv',\n", + " 'summary_csf_k',\n", + " 'summary_csf_mad',\n", + " 'summary_csf_mean',\n", + " 'summary_csf_median',\n", + " 'summary_csf_p05',\n", + " 'summary_csf_p95',\n", + " 'summary_csf_stdv',\n", + " 'summary_gm_k',\n", + " 'summary_gm_mad',\n", + " 'summary_gm_mean',\n", + " 'summary_gm_median',\n", + " 'summary_gm_p05',\n", + " 'summary_gm_p95',\n", + " 'summary_gm_stdv',\n", + " 'summary_wm_k',\n", + " 'summary_wm_mad',\n", + " 'summary_wm_mean',\n", + " 'summary_wm_median',\n", + " 'summary_wm_p05',\n", + " 'summary_wm_p95',\n", + " 'summary_wm_stdv',\n", + " 'tpm_overlap_csf',\n", + " 'tpm_overlap_gm',\n", + " 'tpm_overlap_wm',\n", + "]\n", "\n", "nft = len(features)\n", "\n", "forest = estimator.named_steps['rfc']\n", - "importances = np.median([tree.feature_importances_ for tree in forest.estimators_],\n", - " axis=0)\n", + "importances = np.median([tree.feature_importances_ for tree in forest.estimators_], axis=0)\n", "# importances = np.median(, axis=0)\n", "indices = np.argsort(importances)[::-1]\n", "\n", @@ -410,8 +674,12 @@ "plt.gca().set_xticklabels([features[i] for i in indices], rotation=90)\n", "plt.xlim([-1, nft])\n", "plt.show()\n", - "fig.savefig(op.join(outputs_path, 'figures', 'fig06-exp2-fi.pdf'),\n", - " bbox_inches='tight', pad_inches=0, dpi=300)" + "fig.savefig(\n", + " op.join(outputs_path, 'figures', 'fig06-exp2-fi.pdf'),\n", + " bbox_inches='tight',\n", + " pad_inches=0,\n", + " dpi=300,\n", + ")" ] }, { @@ -429,17 +697,63 @@ }, "outputs": [], "source": [ - "fn = ['10225', '10235', '10316', '10339', '10365', '10376',\n", - " '10429', '10460', '10506', '10527', '10530', '10624',\n", - " '10696', '10891', '10948', '10968', '10977', '11050',\n", - " '11052', '11142', '11143', '11149', '50004', '50005',\n", - " '50008', '50010', '50016', '50027', '50029', '50033',\n", - " '50034', '50036', '50043', '50047', '50049', '50053',\n", - " '50054', '50055', '50085', '60006', '60010', '60012',\n", - " '60014', '60016', '60021', '60046', '60052', '60072',\n", - " '60073', '60084', '60087', '70051', '70060', '70072']\n", - "fp = ['10280', '10455', '10523', '11112', '50020', '50048',\n", - " '50052', '50061', '50073', '60077']" + "fn = [\n", + " '10225',\n", + " '10235',\n", + " '10316',\n", + " '10339',\n", + " '10365',\n", + " '10376',\n", + " '10429',\n", + " '10460',\n", + " '10506',\n", + " '10527',\n", + " '10530',\n", + " '10624',\n", + " '10696',\n", + " '10891',\n", + " '10948',\n", + " '10968',\n", + " '10977',\n", + " '11050',\n", + " '11052',\n", + " '11142',\n", + " '11143',\n", + " '11149',\n", + " '50004',\n", + " '50005',\n", + " '50008',\n", + " '50010',\n", + " '50016',\n", + " '50027',\n", + " '50029',\n", + " '50033',\n", + " '50034',\n", + " '50036',\n", + " '50043',\n", + " '50047',\n", + " '50049',\n", + " '50053',\n", + " '50054',\n", + " '50055',\n", + " '50085',\n", + " '60006',\n", + " '60010',\n", + " '60012',\n", + " '60014',\n", + " '60016',\n", + " '60021',\n", + " '60046',\n", + " '60052',\n", + " '60072',\n", + " '60073',\n", + " '60084',\n", + " '60087',\n", + " '70051',\n", + " '70060',\n", + " '70072',\n", + "]\n", + "fp = ['10280', '10455', '10523', '11112', '50020', '50048', '50052', '50061', '50073', '60077']" ] }, { @@ -450,12 +764,7 @@ }, "outputs": [], "source": [ - "fn_clear = [\n", - " ('10316', 98),\n", - " ('10968', 122),\n", - " ('11050', 110),\n", - " ('11149', 111)\n", - "]" + "fn_clear = [('10316', 98), ('10968', 122), ('11050', 110), ('11149', 111)]" ] }, { @@ -469,14 +778,18 @@ "import matplotlib.pyplot as plt\n", "from mriqc.viz.utils import plot_slice\n", "import nibabel as nb\n", + "\n", "for im, z in fn_clear:\n", " image_path = op.join(ds030_path, 'sub-%s' % im, 'anat', 'sub-%s_T1w.nii.gz' % im)\n", " imdata = nb.load(image_path).get_data()\n", - " \n", + "\n", " fig, ax = plt.subplots()\n", " plot_slice(imdata[..., z], annotate=True)\n", - " fig.savefig(op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", - " dpi=300, bbox_inches='tight')\n", + " fig.savefig(\n", + " op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", + " dpi=300,\n", + " bbox_inches='tight',\n", + " )\n", " plt.clf()\n", " plt.close()" ] @@ -496,11 +809,14 @@ "for im, z in fp_clear:\n", " image_path = op.join(ds030_path, 'sub-%s' % im, 'anat', 'sub-%s_T1w.nii.gz' % im)\n", " imdata = nb.load(image_path).get_data()\n", - " \n", + "\n", " fig, ax = plt.subplots()\n", " plot_slice(imdata[..., z], annotate=True)\n", - " fig.savefig(op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", - " dpi=300, bbox_inches='tight')\n", + " fig.savefig(\n", + " op.join(outputs_path, 'figures', 'fig-06_sub-%s_slice-%03d.svg' % (im, z)),\n", + " dpi=300,\n", + " bbox_inches='tight',\n", + " )\n", " plt.clf()\n", " plt.close()" ] diff --git a/docs/notebooks/SpikesPlotter.ipynb b/docs/notebooks/SpikesPlotter.ipynb index 7ab764d2c..dc9f48595 100644 --- a/docs/notebooks/SpikesPlotter.ipynb +++ b/docs/notebooks/SpikesPlotter.ipynb @@ -22,15 +22,25 @@ "outputs": [], "source": [ "import os.path as op\n", + "\n", "wf_path = '/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/'\n", "\n", - "in_fft = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01'\n", - " '..func..sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_zsfft.nii.gz')\n", + "in_fft = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01'\n", + " '..func..sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_zsfft.nii.gz',\n", + ")\n", "\n", - "in_file = op.join(wf_path, '_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..sub-ben01_'\n", - " 'task-unknown_bold.nii.gz/reorient_and_discard/sub-ben01_task-unknown_bold.nii.gz')\n", - "in_spikes = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..'\n", - " 'sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_spikes.tsv')" + "in_file = op.join(\n", + " wf_path,\n", + " '_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..sub-ben01_'\n", + " 'task-unknown_bold.nii.gz/reorient_and_discard/sub-ben01_task-unknown_bold.nii.gz',\n", + ")\n", + "in_spikes = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben01..func..'\n", + " 'sub-ben01_task-unknown_bold.nii.gz/SpikesFinderFFT/sub-ben01_task-unknown_bold_spikes.tsv',\n", + ")" ] }, { @@ -39,13 +49,22 @@ "metadata": {}, "outputs": [], "source": [ - "in_fft = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", - " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_zsfft.nii.gz')\n", + "in_fft = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", + " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_zsfft.nii.gz',\n", + ")\n", "\n", - "in_file = op.join(wf_path, '_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09..func..sub-ds205s09_'\n", - " 'task-view_acq-LR_run-02_bold.nii.gz/reorient_and_discard/sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz')\n", - "in_spikes = op.join(wf_path, 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", - " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_spikes.tsv')" + "in_file = op.join(\n", + " wf_path,\n", + " '_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09..func..sub-ds205s09_'\n", + " 'task-view_acq-LR_run-02_bold.nii.gz/reorient_and_discard/sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz',\n", + ")\n", + "in_spikes = op.join(\n", + " wf_path,\n", + " 'ComputeIQMs/_in_file_..home..oesteban..Data..circle-tests..sub-ds205s09'\n", + " '..func..sub-ds205s09_task-view_acq-LR_run-02_bold.nii.gz/SpikesFinderFFT/sub-ds205s09_task-view_acq-LR_run-02_bold_spikes.tsv',\n", + ")" ] }, { @@ -95,8 +114,8 @@ "\n", "data_path = '/home/oesteban/Data/ABIDE/sub-50465/anat/sub-50465_T1w.nii.gz'\n", "data_path = '/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/_in_file_..home..oesteban..Data..example_artifacts_dataset..sub-ben04..func..sub-ben04_task-unknown_bold.nii.gz/compute_tsnr/stdev.nii.gz'\n", - "#data_path = '/home/oesteban/Data/rewardBeastBIDS2/sub-119/func/sub-119_task-rest_sbref.nii.gz'\n", - "#data_path ='/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/_in_file_..home..oesteban..Data..circle-tests..sub-ds003s03..func..sub-ds003s03_task-rhymejudgment_bold.nii.gz/compute_tsnr/stdev.nii.gz'" + "# data_path = '/home/oesteban/Data/rewardBeastBIDS2/sub-119/func/sub-119_task-rest_sbref.nii.gz'\n", + "# data_path ='/home/oesteban/tmp/mriqc-newcircle/work/workflow_enumerator/funcMRIQC/_in_file_..home..oesteban..Data..circle-tests..sub-ds003s03..func..sub-ds003s03_task-rhymejudgment_bold.nii.gz/compute_tsnr/stdev.nii.gz'" ] }, { @@ -106,6 +125,7 @@ "outputs": [], "source": [ "from mriqc.viz import utils as mvu\n", + "\n", "mvu.plot_mosaic(data_path, cmap='viridis')" ] }, diff --git a/docs/notebooks/Supplemental Materials.ipynb b/docs/notebooks/Supplemental Materials.ipynb index 7399c4587..0758f3489 100644 --- a/docs/notebooks/Supplemental Materials.ipynb +++ b/docs/notebooks/Supplemental Materials.ipynb @@ -27,6 +27,7 @@ "import pandas as pd\n", "from mriqc.viz import misc as mviz\n", "from pkg_resources import resource_filename as pkgrf\n", + "\n", "outputs_path = '../../mriqc-data/'" ] }, @@ -58,10 +59,12 @@ "outputs": [], "source": [ "fig = mviz.raters_variability_plot(\n", - " mdata, raters=['rater_1', 'rater_2', 'rater_3'], \n", + " mdata,\n", + " raters=['rater_1', 'rater_2', 'rater_3'],\n", " rater_names=['Rater 1', 'Rater 2A', 'Rater 2B'],\n", " out_file=op.join(outputs_path, 'figures', 'suppl-fig02.pdf'),\n", - " only_overlap=False)" + " only_overlap=False,\n", + ")" ] }, { @@ -71,12 +74,17 @@ "outputs": [], "source": [ "from sklearn.metrics import cohen_kappa_score\n", + "\n", "overlap = mdata[np.all(~np.isnan(mdata[['rater_2', 'rater_3']]), axis=1)]\n", "y1 = overlap.rater_2.values.ravel().tolist()\n", "y2 = overlap.rater_3.values.ravel().tolist()\n", "\n", - "fig = mviz.inter_rater_variability(y1, y2, raters=['Protocol A', 'Protocol B'],\n", - " out_file=op.join(outputs_path, 'figures', 'suppl-intrarv.pdf'))\n", + "fig = mviz.inter_rater_variability(\n", + " y1,\n", + " y2,\n", + " raters=['Protocol A', 'Protocol B'],\n", + " out_file=op.join(outputs_path, 'figures', 'suppl-intrarv.pdf'),\n", + ")\n", "\n", "print(\"Cohen's Kappa %f\" % cohen_kappa_score(y1, y2))\n", "\n", diff --git a/docs/notebooks/finding_spikes.ipynb b/docs/notebooks/finding_spikes.ipynb index a89fc657e..a2e688673 100644 --- a/docs/notebooks/finding_spikes.ipynb +++ b/docs/notebooks/finding_spikes.ipynb @@ -16,6 +16,7 @@ "import seaborn as sns\n", "from nilearn.plotting import plot_epi, plot_anat, plot_roi\n", "import numpy as np\n", + "\n", "%matplotlib inline" ] }, @@ -38,7 +39,7 @@ }, "outputs": [], "source": [ - "in_file = \"data/sub-ben01_task-unknown_bold.nii.gz\"" + "in_file = 'data/sub-ben01_task-unknown_bold.nii.gz'" ] }, { @@ -72,7 +73,7 @@ }, "outputs": [], "source": [ - "#mask_nii = compute_epi_mask(in_4d_nii)\n", + "# mask_nii = compute_epi_mask(in_4d_nii)\n", "\n", "mask_data = compute_mask(mean_nii.get_data())\n", "mask_nii = new_img_like(mean_nii, mask_data)\n", @@ -106,14 +107,14 @@ "from scipy import ndimage\n", "\n", "# Input here is a binarized and intersected mask data from previous section\n", - "dil_mask = ndimage.binary_dilation(mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis]/8))\n", + "dil_mask = ndimage.binary_dilation(\n", + " mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis] / 8)\n", + ")\n", "\n", "# Now, we visualize the same using `plot_roi` with data being converted to Nifti\n", "# image. In all new image like, reference image is the same but second argument\n", "# varies with data specific\n", - "dil_mask_nii = new_img_like(\n", - " mask_nii,\n", - " dil_mask.astype(np.int))\n", + "dil_mask_nii = new_img_like(mask_nii, dil_mask.astype(np.int))\n", "plot_roi(dil_mask_nii, mean_nii)" ] }, @@ -150,10 +151,10 @@ }, "outputs": [], "source": [ - "rep = [1,1,1]\n", + "rep = [1, 1, 1]\n", "rep[longest_axis] = mask_nii.shape[longest_axis]\n", "new_mask_3d = np.logical_not(np.tile(new_mask_2d, rep))\n", - "#new_mask_3d = " + "# new_mask_3d =" ] }, { @@ -164,9 +165,7 @@ }, "outputs": [], "source": [ - "new_mask_nii = new_img_like(\n", - " mask_nii,\n", - " new_mask_3d.astype(np.int))\n", + "new_mask_nii = new_img_like(mask_nii, new_mask_3d.astype(np.int))\n", "plot_roi(new_mask_nii, mean_nii)" ] }, @@ -182,7 +181,7 @@ "\n", "data4d = in_4d_nii.get_data()\n", "for slice_i in range(in_4d_nii.shape[2]):\n", - " slice_data = data4d[:,:,slice_i,:][new_mask_3d[:,:,slice_i]].mean(axis=0)\n", + " slice_data = data4d[:, :, slice_i, :][new_mask_3d[:, :, slice_i]].mean(axis=0)\n", " slice_data = zscore(slice_data)\n", " plt.plot(slice_data)" ] @@ -195,7 +194,7 @@ }, "outputs": [], "source": [ - "data4d[:,:,slice_i,:][new_mask_3d[:,:,slice_i]].shape" + "data4d[:, :, slice_i, :][new_mask_3d[:, :, slice_i]].shape" ] }, { @@ -236,47 +235,45 @@ " mask_nii = new_img_like(mean_nii, mask_data)\n", "\n", " plot_roi(mask_nii, mean_nii)\n", - " \n", + "\n", " a = np.where(mask_nii.get_data() != 0)\n", " bbox = np.max(a[0]) - np.min(a[0]), np.max(a[1]) - np.min(a[1]), np.max(a[2]) - np.min(a[2])\n", " print(bbox)\n", " print(np.argmax(bbox))\n", " longest_axis = np.argmax(bbox)\n", - " \n", + "\n", " from scipy import ndimage\n", "\n", " # Input here is a binarized and intersected mask data from previous section\n", - " dil_mask = ndimage.binary_dilation(mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis]/8))\n", + " dil_mask = ndimage.binary_dilation(\n", + " mask_nii.get_data(), iterations=int(mask_nii.shape[longest_axis] / 8)\n", + " )\n", "\n", " # Now, we visualize the same using `plot_roi` with data being converted to Nifti\n", " # image. In all new image like, reference image is the same but second argument\n", " # varies with data specific\n", - " dil_mask_nii = new_img_like(\n", - " mask_nii,\n", - " dil_mask.astype(np.int))\n", + " dil_mask_nii = new_img_like(mask_nii, dil_mask.astype(np.int))\n", " plot_roi(dil_mask_nii, mean_nii)\n", - " \n", + "\n", " rep = list(mask_nii.shape)\n", " rep[longest_axis] = -1\n", " new_mask_2d = dil_mask.max(axis=longest_axis).reshape(rep)\n", " new_mask_2d.shape\n", - " \n", - " rep = [1,1,1]\n", + "\n", + " rep = [1, 1, 1]\n", " rep[longest_axis] = mask_nii.shape[longest_axis]\n", " new_mask_3d = np.logical_not(np.tile(new_mask_2d, rep))\n", - " \n", - " new_mask_nii = new_img_like(\n", - " mask_nii,\n", - " new_mask_3d.astype(np.int))\n", + "\n", + " new_mask_nii = new_img_like(mask_nii, new_mask_3d.astype(np.int))\n", " plot_roi(new_mask_nii, mean_nii)\n", - " \n", + "\n", " from scipy.stats.mstats import zscore\n", "\n", - " data4d = in_4d_nii.get_data()[:,:,:,skip:]\n", + " data4d = in_4d_nii.get_data()[:, :, :, skip:]\n", " plt.figure()\n", " for slice_i in range(in_4d_nii.shape[2]):\n", - " slice_data = data4d[:,:,slice_i,:][new_mask_3d[:,:,slice_i]].mean(axis=0)\n", - " #slice_data = zscore(slice_data)\n", + " slice_data = data4d[:, :, slice_i, :][new_mask_3d[:, :, slice_i]].mean(axis=0)\n", + " # slice_data = zscore(slice_data)\n", " plt.plot(slice_data)\n", " plt.title(in_file)" ] @@ -289,7 +286,7 @@ }, "outputs": [], "source": [ - "plot_spikes(\"D:/example_artifacts_dataset/sub-ben01/func/sub-ben01_task-unknown_bold.nii.gz\")" + "plot_spikes('D:/example_artifacts_dataset/sub-ben01/func/sub-ben01_task-unknown_bold.nii.gz')" ] }, { @@ -312,7 +309,7 @@ }, "outputs": [], "source": [ - "for file in glob(\"D:/*/sub-*/func/sub-*_task-*_bold.nii.gz\"):\n", + "for file in glob('D:/*/sub-*/func/sub-*_task-*_bold.nii.gz'):\n", " plot_spikes(file)" ] }, @@ -335,7 +332,7 @@ }, "outputs": [], "source": [ - "glob(\"D:/*/sub-*/func/sub-*_task-*_bold.nii.gz\")" + "glob('D:/*/sub-*/func/sub-*_task-*_bold.nii.gz')" ] } ],