From f516935e9439150d2081b1fe367e170fee20079b Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Fri, 28 Jun 2024 17:09:22 +0200 Subject: [PATCH 01/32] CTX-5783: Fixed coretex-jobs linter errors --- .../resources/function/function.py | 10 ++++-- .../resources/function/load_data.py | 8 +++-- .../resources/function/load_data_std.py | 6 ++-- .../resources/function/model.py | 10 +++--- .../resources/function/utils.py | 21 +++++++----- tasks/bio-bodysite-prediction-nn/src/cache.py | 25 +++++++++++---- .../bio-bodysite-prediction-nn/src/dataset.py | 6 ++-- .../src/load_data.py | 16 ++++++---- .../src/load_data_std.py | 18 +++++------ tasks/bio-bodysite-prediction-nn/src/model.py | 24 ++++++++++---- .../bio-bodysite-prediction-nn/src/objects.py | 2 +- tasks/bio-bodysite-prediction-nn/src/utils.py | 32 +++++++++++-------- tasks/bio-bodysite-prediction/main.py | 4 +-- .../resources/function/function.py | 10 ++++-- .../resources/function/load_data.py | 16 +++++++--- .../resources/function/load_data_std.py | 2 +- .../bio-bodysite-prediction/src/cache_json.py | 5 ++- .../src/cache_matrix.py | 7 ++-- .../bio-bodysite-prediction/src/load_data.py | 27 +++++++++------- .../src/load_data_std.py | 24 +++++++------- tasks/bio-bodysite-prediction/src/objects.py | 2 +- tasks/bio-bodysite-prediction/src/utils.py | 4 +-- tasks/bio-bodysite-prediction/task.yaml | 4 +-- tasks/bio-read-quality/main.py | 12 +++---- tasks/bio-region-seperation/src/__init__.py | 0 tasks/bio-region-seperation/src/separate.py | 4 +-- tasks/contextual-targeting/main.py | 2 +- tasks/image-augmentation/src/augmentation.py | 32 ++++++++++++------- tasks/image-augmentation/src/utils.py | 9 ++++-- tasks/image-augmentation/task.yaml | 4 +-- tasks/image-segmentation/src/dataset.py | 2 +- tasks/image-segmentation/src/detect.py | 7 +++- tasks/image-segmentation/src/utils.py | 2 +- tasks/llama2-lora/src/configurations.py | 2 +- tasks/llama2-lora/src/model.py | 2 ++ tasks/llm-text-processing/main.py | 4 ++- tasks/object-detection-yolov10/src/predict.py | 8 ++--- tasks/object-detection-yolov8/src/predict.py | 2 +- .../resources/function/function.py | 6 ++-- .../resources/function/model.py | 4 ++- tasks/ollama-rag-index/main.py | 2 +- tasks/ollama-rag-index/src/model.py | 4 ++- tasks/qiime-import/src/__init__.py | 0 tasks/qiime-import/src/utils.py | 2 +- 44 files changed, 242 insertions(+), 151 deletions(-) create mode 100644 tasks/bio-region-seperation/src/__init__.py create mode 100644 tasks/qiime-import/src/__init__.py diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/function.py b/tasks/bio-bodysite-prediction-nn/resources/function/function.py index b1ac9f77..c2cd0fb3 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/function.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/function.py @@ -6,6 +6,8 @@ from coretex import folder_manager, functions +import numpy as np + from load_data import loadDataAtlas from load_data_std import loadDataStd @@ -29,7 +31,7 @@ def unzip(inputPath: Path, dataFormat: int) -> Path: return inputPath -def inference(modelInput: Path, model: Model, uniqueTaxons: dict[str, int]) -> list[str]: +def inference(modelInput: Path, model: Model, uniqueTaxons: dict[str, int]) -> np.ndarray: BATCHE_SIZE = 562 sampleCount = len(list(modelInput.iterdir())) @@ -45,7 +47,11 @@ def response(requestData: dict[str, Any]) -> dict[str, Any]: with open(modelDir / "model_descriptor.json", "r") as jsonFile: modelDescriptor = json.load(jsonFile) - dataFormat = int(requestData.get("dataFormat")) # 0 - MBA, 1 - Microbiome Forensics Institute Zuric + dataFormatRaw = requestData.get("dataFormat") + if not isinstance(dataFormatRaw, str) and not isinstance(dataFormatRaw, int): + return functions.badRequest("Invalid dataFormat. (0 - MBA, 1 - Microbiome Forensics Institute Zuric)") + + dataFormat = int(dataFormatRaw) # 0 - MBA, 1 - Microbiome Forensics Institute Zuric inputPath = requestData.get("inputFile") if not isinstance(inputPath, Path): diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/load_data.py b/tasks/bio-bodysite-prediction-nn/resources/function/load_data.py index 104588f0..fac6d629 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/load_data.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/load_data.py @@ -73,6 +73,9 @@ def loadDataAtlas( ) -> tuple[Path, dict[str, int], dict[str, int], list[str]]: workerCount = os.cpu_count() # This value should not exceed the total number of CPU cores + if workerCount is None: + workerCount = 1 + logging.info(f">> [MicrobiomeForensics] Using {workerCount} CPU cores to read the file") fileSize = inputPath.stat().st_size @@ -89,8 +92,9 @@ def loadDataAtlas( uniqueBodySites = pickle.load(f) def onProcessingFinished(future: Future) -> None: - if future.exception() is not None: - raise future.exception() + exception = future.exception() + if exception is not None: + raise exception logging.info(f">> [MicrobiomeForensics] Reading: {inputPath}") diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/load_data_std.py b/tasks/bio-bodysite-prediction-nn/resources/function/load_data_std.py index 8956ff99..a986dc50 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/load_data_std.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/load_data_std.py @@ -9,12 +9,12 @@ from objects import Sample, Taxon -def loadDataStd(inputPath: Path, modelDir: Path, level: int) -> tuple[int, int, dict[str, int], list[int]]: +def loadDataStd(inputPath: Path, modelDir: Path, level: int) -> tuple[Path, dict[str, int], dict[str, int], list[str]]: with open(modelDir / "uniqueTaxons.pkl", "rb") as f: - uniqueTaxons = pickle.load(f) + uniqueTaxons: dict[str, int] = pickle.load(f) with open(modelDir / "uniqueBodySites.pkl", "rb") as f: - uniqueBodySites = pickle.load(f) + uniqueBodySites: dict[str, int] = pickle.load(f) datasetPath = folder_manager.createTempFolder("dataset") diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/model.py b/tasks/bio-bodysite-prediction-nn/resources/function/model.py index 88ad4676..8ecb16ab 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/model.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/model.py @@ -148,7 +148,7 @@ def __init__( self.lam = lam self._activation_gating = activation_gating - self.activation_gating = activation_gating # will overwrite _activation_gating + self.activation_gating = activation_gating # type: ignore[assignment] self.activation_pred = activation_pred @@ -337,7 +337,7 @@ def predict(self, data: tf.data.Dataset, batches: int): return convertFromOneHot(np.array(y_pred)) - def test(self, data: tf.data.Dataset, batches: int) -> tuple[np.ndarray, np.ndarray, float]: + def test(self, data: tf.data.Dataset, batches: int) -> tuple[np.ndarray, np.ndarray]: y_pred: list[list[int]] = [] # List of one hot vectors y_true: list[list[int]] = [] @@ -374,11 +374,11 @@ def _predict_from_array(self, X: ArrayLike) -> Tensor: @property def activation_gating(self) -> Callable: - return self._activation_gating + return self._activation_gating # type: ignore[return-value] @activation_gating.setter - def activation_gating(self, value: str) -> Callable: + def activation_gating(self, value: str) -> Callable: # type: ignore[return] if value == 'relu': self._activation_gating = tf.nn.relu elif value == 'l_relu': @@ -388,7 +388,7 @@ def activation_gating(self, value: str) -> Callable: elif value == 'tanh': self._activation_gating = tf.nn.tanh elif value == 'none': - self._activation_gating = lambda x: x + self._activation_gating = lambda x: x # type: ignore[assignment] else: raise NotImplementedError('activation for the gating network not recognized') diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/utils.py b/tasks/bio-bodysite-prediction-nn/resources/function/utils.py index 130b52b6..a524321a 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/utils.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/utils.py @@ -1,10 +1,9 @@ -from typing import Optional - -from numpy.typing import ArrayLike +from typing import Optional, Union import numpy as np -def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.ndarray: + +def oneHotEncoding(vector: Union[np.ndarray, int], numClasses: Optional[int] = None) -> np.ndarray: """ Converts an input 1-D vector of integers into an output @@ -16,7 +15,7 @@ def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.n ---------- vector : ArrayLike A vector of integers - num_classes : int + numClasses : int Optionally declare the number of classes (can not exceed the maximum value of the vector) Returns @@ -26,7 +25,7 @@ def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.n Example ------- - >>> v = np.array((1, 0, 4)) + >>> v = np.array([1, 0, 4]) >>> one_hot_v = oneHotEncoding(v) >>> print one_hot_v [[0 1 0 0 0] @@ -34,9 +33,15 @@ def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.n [0 0 0 0 1]] """ - vecLen = 1 if isinstance(vector, int) else len(vector) + if isinstance(vector, int): + vector = np.array([vector]) + + vecLen = vector.shape[0] + + if numClasses is None: + numClasses = vector.max() + 1 - result = np.zeros(shape = (vecLen, num_classes)) + result = np.zeros(shape = (vecLen, numClasses)) result[np.arange(vecLen), vector] = 1 return result.astype(int) diff --git a/tasks/bio-bodysite-prediction-nn/src/cache.py b/tasks/bio-bodysite-prediction-nn/src/cache.py index fe3acbd9..3da8c16b 100644 --- a/tasks/bio-bodysite-prediction-nn/src/cache.py +++ b/tasks/bio-bodysite-prediction-nn/src/cache.py @@ -69,7 +69,7 @@ def cacheDataset( logging.info(">> [MicrobiomeForensics] Successfuly cached assembled dataset") -def loadCache(taskRun: TaskRun[CustomDataset], cacheName: str) -> tuple[Path, dict[str, int], dict[str, int]]: +def loadCache(taskRun: TaskRun[CustomDataset], cacheName: str) -> tuple[dict[str, int], dict[str, int], int]: logging.info(">> [MicrobiomeForensics] Loading assembled dataset to cache") start = time.time() @@ -77,6 +77,9 @@ def loadCache(taskRun: TaskRun[CustomDataset], cacheName: str) -> tuple[Path, di datasetPath.mkdir(parents = True, exist_ok = True) cache = getCache(cacheName) + if cache is None: + raise ValueError(">> [MicrobiomeForensics] Failed to retrieve cache") + cache.download() samples = cache.getSamples(lambda sample: sample.name != "taxonDistribution" and sample.name != "classDistribution") @@ -89,16 +92,24 @@ def loadCache(taskRun: TaskRun[CustomDataset], cacheName: str) -> tuple[Path, di with datasetPath.joinpath(sample.name).open("wb") as file: pickle.dump(content, file) - taxonDistribution = cache.getSample("taxonDistribution") - classDistribution = cache.getSample("classDistribution") + taxonDistributionCache = cache.getSample("taxonDistribution") + classDistributionCache = cache.getSample("classDistribution") - if taxonDistribution is None and classDistribution is None: + if taxonDistributionCache is None and classDistributionCache is None: raise RuntimeError(">> [MicrobiomeForensics] Could not find taxonDistribution and classDistribution files in cache") - elif taxonDistribution is None: + elif taxonDistributionCache is None: raise RuntimeError(">> [MicrobiomeForensics] Could not find taxonDistribution file in cache") - elif classDistribution is None: + elif classDistributionCache is None: raise RuntimeError(">> [MicrobiomeForensics] Could not find classDistribution file in cache") + taxonDistributionCache.unzip() + with taxonDistributionCache.path.joinpath("taxonDistribution.pkl").open("rb") as file: + taxonDistribution: dict[str, int] = pickle.load(file) + + classDistributionCache.unzip() + with classDistributionCache.path.joinpath("classDistribution.pkl").open("rb") as file: + classDistribution: dict[str, int] = pickle.load(file) + uniqueTaxons = generateTaxonEncoding(taxonDistribution) uniqueBodySites = generateClassEncodings(classDistribution) @@ -108,7 +119,7 @@ def loadCache(taskRun: TaskRun[CustomDataset], cacheName: str) -> tuple[Path, di plots(taskRun, classDistribution, taxonDistribution, datasetLen) - return datasetPath, uniqueBodySites, uniqueTaxons, datasetLen + return uniqueBodySites, uniqueTaxons, datasetLen def generateTaxonEncoding(taxonDistribution: dict[str, int]) -> dict[str, int]: diff --git a/tasks/bio-bodysite-prediction-nn/src/dataset.py b/tasks/bio-bodysite-prediction-nn/src/dataset.py index 94cf6655..db858a1d 100644 --- a/tasks/bio-bodysite-prediction-nn/src/dataset.py +++ b/tasks/bio-bodysite-prediction-nn/src/dataset.py @@ -43,12 +43,12 @@ def generatorFunc() -> Generator: for taxon in sample.taxons: x[uniqueTaxons[taxon.taxonId]] = np.log(taxon.count + 0.5) - y = oneHotEncoding(y, len(uniqueBodySites)) - y = y.reshape(len(uniqueBodySites), ) + yOneHot = oneHotEncoding(y, len(uniqueBodySites)) + yOneHot = yOneHot.reshape(len(uniqueBodySites), ) yield { "features": tf.convert_to_tensor(x, dtype = tf.float32), - "labels": tf.convert_to_tensor(y, dtype = tf.float32) + "labels": tf.convert_to_tensor(yOneHot, dtype = tf.float32) } return tf.data.Dataset.from_generator( diff --git a/tasks/bio-bodysite-prediction-nn/src/load_data.py b/tasks/bio-bodysite-prediction-nn/src/load_data.py index efe73ca4..6a510d98 100644 --- a/tasks/bio-bodysite-prediction-nn/src/load_data.py +++ b/tasks/bio-bodysite-prediction-nn/src/load_data.py @@ -187,9 +187,9 @@ def loadDataAtlas( sampleOrigin: list[str], sequencingTechnique: list[str], useCache: bool, - validBodySites: dict[str, int] = None, - validTaxons: dict[str, int] = None -) -> tuple[Path, dict[str, int], dict[str, int], int]: + validBodySites: Optional[dict[str, int]] = None, + validTaxons: Optional[dict[str, int]] = None +) -> tuple[dict[str, int], dict[str, int], int]: """ Loads the dataset and returns it ready for training. @@ -230,6 +230,9 @@ def loadDataAtlas( sampleInfoObj = readEnvInfo(infoPath, sampleOrigin, sequencingTechnique) workerCount = os.cpu_count() # This value should not exceed the total number of CPU cores + if workerCount is None: + workerCount = 1 + logging.info(f">> [MicrobiomeForensics] Using {workerCount} CPU cores to read the dataset") fileSize = mappedPath.stat().st_size @@ -261,8 +264,9 @@ def onProcessingFinished(future: Future) -> None: The future object of the process from ProcessPoolExecutor """ - if future.exception() is not None: - raise future.exception() + exception = future.exception() + if exception is not None: + raise exception processClassDistribution, processTaxonDistribution = future.result() @@ -341,4 +345,4 @@ def onProcessingFinished(future: Future) -> None: taskRun.projectId ) - return uniqueBodySite, uniqueTaxons, datasetLen + return uniqueBodySite, uniqueTaxons, datasetLen diff --git a/tasks/bio-bodysite-prediction-nn/src/load_data_std.py b/tasks/bio-bodysite-prediction-nn/src/load_data_std.py index 0addfca4..ec7f4b1f 100644 --- a/tasks/bio-bodysite-prediction-nn/src/load_data_std.py +++ b/tasks/bio-bodysite-prediction-nn/src/load_data_std.py @@ -21,7 +21,7 @@ def loadDataStd( level: int, validBodySites: Optional[dict[str, int]] = None, validTaxons: Optional[dict[str, int]] = None -) -> tuple[int, int, dict[str, int], dict[str, int], list[int]]: +) -> tuple[dict[str, int], dict[str, int], int]: logging.info(">> [MicrobiomeForensics] Downloading dataset...") taskRun.updateStatus(TaskRunStatus.inProgress, "Downloading dataset...") @@ -50,19 +50,19 @@ def loadDataStd( samplePath = glob.glob(os.path.join(sample.path, f"*.json"))[0] with open(samplePath, "r") as f: - sample = json.load(f) + sampleDict = json.load(f) - if validBodySites is not None and sample["body_site"] not in validBodySites: + if validBodySites is not None and sampleDict["body_site"] not in validBodySites: continue - sampleObj = Sample(sample["_id"]["$oid"], sample["body_site"], None, []) + sampleObj = Sample(sampleDict["_id"]["$oid"], sampleDict["body_site"], None, []) - if not sample["body_site"] in classDistribution: - classDistribution[sample["body_site"]] = 1 + if not sampleDict["body_site"] in classDistribution: + classDistribution[sampleDict["body_site"]] = 1 else: - classDistribution[sample["body_site"]] += 1 + classDistribution[sampleDict["body_site"]] += 1 - taxons = loadTaxons(sample, level) + taxons = loadTaxons(sampleDict, level) if validTaxons is not None and any(taxon not in validTaxons for taxon in taxons.keys()): continue @@ -78,7 +78,7 @@ def loadDataStd( with datasetPath.joinpath(sampleObj.sampleId).open("wb") as file: pickle.dump(sampleObj, file) - if validBodySites is not None and uniqueTaxons is not None: + if validBodySites is not None and validTaxons is not None: uniqueBodySites = validBodySites uniqueTaxons = validTaxons else: diff --git a/tasks/bio-bodysite-prediction-nn/src/model.py b/tasks/bio-bodysite-prediction-nn/src/model.py index 6ecc0d30..8ecb16ab 100644 --- a/tasks/bio-bodysite-prediction-nn/src/model.py +++ b/tasks/bio-bodysite-prediction-nn/src/model.py @@ -13,7 +13,7 @@ from coretex import TaskRun -from .utils import convertFromOneHot +from utils import convertFromOneHot class GatingLayer(tf.keras.layers.Layer): @@ -148,7 +148,7 @@ def __init__( self.lam = lam self._activation_gating = activation_gating - self.activation_gating = activation_gating # will overwrite _activation_gating + self.activation_gating = activation_gating # type: ignore[assignment] self.activation_pred = activation_pred @@ -325,7 +325,19 @@ def _valid_step(self, X: Tensor, y: Tensor) -> Tensor: return y_pred_hot - def test(self, data: tf.data.Dataset, batches: int) -> tuple[np.ndarray, np.ndarray, float]: + def predict(self, data: tf.data.Dataset, batches: int): + y_pred: list[list[int]] = [] + + for i, batch in enumerate(data): + if i == batches: + break + + y_pred.extend(list(self._test_step(batch["features"]))) + + return convertFromOneHot(np.array(y_pred)) + + + def test(self, data: tf.data.Dataset, batches: int) -> tuple[np.ndarray, np.ndarray]: y_pred: list[list[int]] = [] # List of one hot vectors y_true: list[list[int]] = [] @@ -362,11 +374,11 @@ def _predict_from_array(self, X: ArrayLike) -> Tensor: @property def activation_gating(self) -> Callable: - return self._activation_gating + return self._activation_gating # type: ignore[return-value] @activation_gating.setter - def activation_gating(self, value: str) -> Callable: + def activation_gating(self, value: str) -> Callable: # type: ignore[return] if value == 'relu': self._activation_gating = tf.nn.relu elif value == 'l_relu': @@ -376,7 +388,7 @@ def activation_gating(self, value: str) -> Callable: elif value == 'tanh': self._activation_gating = tf.nn.tanh elif value == 'none': - self._activation_gating = lambda x: x + self._activation_gating = lambda x: x # type: ignore[assignment] else: raise NotImplementedError('activation for the gating network not recognized') diff --git a/tasks/bio-bodysite-prediction-nn/src/objects.py b/tasks/bio-bodysite-prediction-nn/src/objects.py index 39cfc0ca..8cd25880 100644 --- a/tasks/bio-bodysite-prediction-nn/src/objects.py +++ b/tasks/bio-bodysite-prediction-nn/src/objects.py @@ -10,7 +10,7 @@ def __init__(self, taxonId: str, count: int): class Sample: - def __init__(self, sampleId: str, bodySite: str, associationSite: str, taxons: Optional[list[Taxon]] = None) -> None: + def __init__(self, sampleId: str, bodySite: str, associationSite: Optional[str], taxons: Optional[list[Taxon]] = None) -> None: self.sampleId = sampleId self.bodySite = bodySite self.associationSite = associationSite diff --git a/tasks/bio-bodysite-prediction-nn/src/utils.py b/tasks/bio-bodysite-prediction-nn/src/utils.py index e48b5144..20f65dfd 100644 --- a/tasks/bio-bodysite-prediction-nn/src/utils.py +++ b/tasks/bio-bodysite-prediction-nn/src/utils.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Union from pathlib import Path import csv @@ -6,8 +6,6 @@ import shutil import logging -from numpy.typing import ArrayLike - import numpy as np import matplotlib.pyplot as plt @@ -97,7 +95,7 @@ def saveFeatureTable(taskRun: TaskRun[CustomDataset], featureTablePath: str, tab def savePlotFig( taskRun: TaskRun[CustomDataset], distributionDict: dict, - savePath: str, + savePath: Path, fileName: str, xLabelRotation: bool, xLabel: str, @@ -127,15 +125,15 @@ def savePlotFig( def savePredictionFile( taskRun: TaskRun[CustomDataset], - savePath: str, + savePath: Path, trainCount: int, testCount: int, sampleIds: list, uniqueBodySite: dict, - yTrain: list, - yTest: list, - yPred: list, - zPred: list + yTrain: np.ndarray, + yTest: np.ndarray, + yPred: np.ndarray, + zPred: np.ndarray ) -> None: with folder_manager.temp.joinpath("body_site_predictions.csv").open("a+") as f: @@ -220,7 +218,7 @@ def plots(taskRun: TaskRun[CustomDataset], classDistribution: dict[str, int], ta logging.info(f">> [MicrobiomeForensics] Loading data and matching finished. Successfully matched {datasetLen} samples") -def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.ndarray: +def oneHotEncoding(vector: Union[np.ndarray, int], numClasses: Optional[int] = None) -> np.ndarray: """ Converts an input 1-D vector of integers into an output @@ -232,7 +230,7 @@ def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.n ---------- vector : ArrayLike A vector of integers - num_classes : int + numClasses : int Optionally declare the number of classes (can not exceed the maximum value of the vector) Returns @@ -242,7 +240,7 @@ def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.n Example ------- - >>> v = np.array((1, 0, 4)) + >>> v = np.array([1, 0, 4]) >>> one_hot_v = oneHotEncoding(v) >>> print one_hot_v [[0 1 0 0 0] @@ -250,9 +248,15 @@ def oneHotEncoding(vector: ArrayLike, num_classes: Optional[int] = None) -> np.n [0 0 0 0 1]] """ - vecLen = 1 if isinstance(vector, int) else len(vector) + if isinstance(vector, int): + vector = np.array([vector]) + + vecLen = vector.shape[0] + + if numClasses is None: + numClasses = vector.max() + 1 - result = np.zeros(shape = (vecLen, num_classes)) + result = np.zeros(shape = (vecLen, numClasses)) result[np.arange(vecLen), vector] = 1 return result.astype(int) diff --git a/tasks/bio-bodysite-prediction/main.py b/tasks/bio-bodysite-prediction/main.py index 43f5fb3e..e874e639 100644 --- a/tasks/bio-bodysite-prediction/main.py +++ b/tasks/bio-bodysite-prediction/main.py @@ -10,8 +10,8 @@ def validation(taskRun: TaskRun[CustomDataset]) -> None: - trainedModel: Model = taskRun.parameters.get("trainedModel") - if trainedModel is None: + trainedModel= taskRun.parameters.get("trainedModel") + if not isinstance(trainedModel, Model): raise RuntimeError(">> [MicrobiomeForensics] In order to start the validation process You have to type in \"trainedModel\" in TaskRun parameters") trainedModel.download() diff --git a/tasks/bio-bodysite-prediction/resources/function/function.py b/tasks/bio-bodysite-prediction/resources/function/function.py index 890cc160..c1e80067 100644 --- a/tasks/bio-bodysite-prediction/resources/function/function.py +++ b/tasks/bio-bodysite-prediction/resources/function/function.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Union from pathlib import Path from zipfile import ZipFile, is_zipfile @@ -36,13 +36,17 @@ def response(requestData: dict[str, Any]) -> dict[str, Any]: with open(modelDir / "model_descriptor.json", "r") as jsonFile: modelDescriptor = json.load(jsonFile) - dataFormat = int(requestData.get("dataFormat")) # 0 - MBA, 1 - Microbiome Forensics Institute Zuric + dataFormatRaw = requestData.get("dataFormat") + if not isinstance(dataFormatRaw, str) and not isinstance(dataFormatRaw, int): + return functions.badRequest("Invalid dataFormat. (0 - MBA, 1 - Microbiome Forensics Institute Zuric)") + + dataFormat = int(dataFormatRaw) # 0 - MBA, 1 - Microbiome Forensics Institute Zuric inputPath = requestData.get("inputFile") if not isinstance(inputPath, Path): return functions.badRequest("Invalid input data") - inputPath = unzip(inputPath) + inputPath = unzip(inputPath, dataFormat) if dataFormat == 0 and inputPath.is_file(): percentile = modelDescriptor.get("percentile") diff --git a/tasks/bio-bodysite-prediction/resources/function/load_data.py b/tasks/bio-bodysite-prediction/resources/function/load_data.py index a6ad5272..dffa4507 100644 --- a/tasks/bio-bodysite-prediction/resources/function/load_data.py +++ b/tasks/bio-bodysite-prediction/resources/function/load_data.py @@ -10,6 +10,8 @@ from scipy import sparse from sklearn.feature_selection import SelectPercentile +import numpy as np + from objects import Sample, Taxon @@ -93,14 +95,17 @@ def loadDataAtlas( inputPath: Path, modelDir: Path, percentile: int -) -> tuple[list[Sample], dict[str, int], dict[str, int]]: +) -> tuple[np.ndarray, dict[str, int], list[str]]: workerCount = os.cpu_count() # This value should not exceed the total number of CPU cores + if workerCount is None: + workerCount = 1 + logging.info(f">> [MicrobiomeForensics] Using {workerCount} CPU cores to read the file") fileSize = inputPath.stat().st_size # Smaller file size - used for testing - # fileSize = 100 * 1024 * 1024 + fileSize = 100 * 1024 * 1024 step = fileSize // workerCount remainder = fileSize % workerCount @@ -124,8 +129,9 @@ def onProcessingFinished(future: Future) -> None: The future object of the process from ProcessPoolExecutor """ - if future.exception() is not None: - raise future.exception() + exception = future.exception() + if exception is not None: + raise exception processSampleData = future.result() sampleData.extend(processSampleData) @@ -160,7 +166,7 @@ def prepareForInferenceAtlas( uniqueTaxons: dict[str, int], uniqueBodySites: dict[str, int], percentile: Optional[int] -) -> tuple[sparse.csr_matrix, dict[str, int], list[str]]: +) -> tuple[np.ndarray, dict[str, int], list[str]]: sampleIdList: list[str] = [] rowIndices: list[int] = [] diff --git a/tasks/bio-bodysite-prediction/resources/function/load_data_std.py b/tasks/bio-bodysite-prediction/resources/function/load_data_std.py index c686460d..78ad542b 100644 --- a/tasks/bio-bodysite-prediction/resources/function/load_data_std.py +++ b/tasks/bio-bodysite-prediction/resources/function/load_data_std.py @@ -6,7 +6,7 @@ import numpy as np -def loadDataStd(inputPath: Path, modelDir: Path, level: int) -> tuple[int, int, dict[str, int], list[int]]: +def loadDataStd(inputPath: Path, modelDir: Path, level: int) -> tuple[np.ndarray, dict[str, int], list[str]]: with open(modelDir / "uniqueTaxons.pkl", "rb") as f: uniqueTaxons = pickle.load(f) diff --git a/tasks/bio-bodysite-prediction/src/cache_json.py b/tasks/bio-bodysite-prediction/src/cache_json.py index edee9b22..29f37e21 100644 --- a/tasks/bio-bodysite-prediction/src/cache_json.py +++ b/tasks/bio-bodysite-prediction/src/cache_json.py @@ -18,13 +18,16 @@ def getJsonName(datasetName: str, sampleOrigin: list[str], sequencingTechnique: suffix = f"{origins}-{techniques}" - return hashCacheName(datasetName, suffix) + return hashCacheName(datasetName, suffix)[:20] def loadJsonCache(cacheName: str) -> JsonTuple: logging.info(">> [MicrobiomeForensics] Loading assembled dataset from cache") cache = getJsonCache(cacheName) + if cache is None: + raise ValueError(">> [MicrobiomeForensics] Failed to retrieve cache") + cache.download() cache.samples[0].unzip() cachePath = Path(cache.samples[0].path) diff --git a/tasks/bio-bodysite-prediction/src/cache_matrix.py b/tasks/bio-bodysite-prediction/src/cache_matrix.py index 060e12f2..7db387ea 100644 --- a/tasks/bio-bodysite-prediction/src/cache_matrix.py +++ b/tasks/bio-bodysite-prediction/src/cache_matrix.py @@ -30,13 +30,16 @@ def getMatrixName( suffix = f"{origins}-{techniques}-{percentile}-{quantize}" - return hashCacheName(datasetName, suffix) + return hashCacheName(datasetName, suffix)[:20] def loadMatrixCache(cacheName: str, validation: bool) -> MatrixTuple: logging.info(">> [MicrobiomeForensics] Loading processed data from cache") cache = getMatrixCache(cacheName) + if cache is None: + raise ValueError(">> [MicrobiomeForensics] Failed to retrieve cache") + cache.download() cache.samples[0].unzip() cachePath = Path(cache.samples[0].path) @@ -104,7 +107,7 @@ def cacheMatrix( archive.write(cachePath.joinpath(f"{item}.pkl"), f"{item}.pkl") with createDataset(CustomDataset, cacheName, projectId) as cacheDataset: - if CustomSample().createCustomSample("zipedCache", cacheDataset.id, zipPath): + if cacheDataset.add(zipPath, "zipedCache"): logging.info(">> [MicrobiomeForensics] Successfuly cached processed data") else: logging.warning(">> [MicrobiomeForensics] Failed to cache processed data") diff --git a/tasks/bio-bodysite-prediction/src/load_data.py b/tasks/bio-bodysite-prediction/src/load_data.py index a7fe8692..a191475a 100644 --- a/tasks/bio-bodysite-prediction/src/load_data.py +++ b/tasks/bio-bodysite-prediction/src/load_data.py @@ -70,7 +70,7 @@ def readByteBlockUntilNewLine(file: BinaryIO, blockSize: int) -> Optional[bytes] return content + remainder -def processByteBatch(envInfoData: dict[str, str], filePath: Path, start: int, end: int) -> JsonTuple: +def processByteBatch(envInfoData: dict[str, str], filePath: Path, start: int, end: int) -> tuple[list[Sample], set[str], set[str]]: """ Called as a process by ProcessPoolExecutor for parallel processing. @@ -149,7 +149,7 @@ def processByteBatch(envInfoData: dict[str, str], filePath: Path, start: int, en int(count) )) - return JsonTuple(sampleData, uniqueBodySites, uniqueTaxons) + return sampleData, uniqueBodySites, uniqueTaxons def removeBadSamples(sampleData: list[Sample], uniqueTaxons: dict[str, int], uniqueBodySites: dict[str, int]) -> list[Sample]: @@ -321,7 +321,7 @@ def loadDataAtlas( validate = taskRun.parameters["validation"] cacheNameMatrix = getMatrixName( - dataset.name, + dataset.name[:42], sampleOrigin, sequencingTechnique, taskRun.parameters["percentile"], @@ -334,7 +334,7 @@ def loadDataAtlas( return loadMatrixCache(cacheNameMatrix, validate) cacheNameJson = getJsonName( - dataset.name, + dataset.name[:42], sampleOrigin, sequencingTechnique ) @@ -358,6 +358,9 @@ def loadDataAtlas( sampleInfoObj = readEnvInfo(infoPath, sampleOrigin, sequencingTechnique) workerCount = os.cpu_count() # This value should not exceed the total number of CPU cores + if workerCount is None: + workerCount = 1 + logging.info(f">> [MicrobiomeForensics] Using {workerCount} CPU cores to read the dataset") fileSize = mappedPath.stat().st_size @@ -367,11 +370,11 @@ def loadDataAtlas( step = fileSize // workerCount remainder = fileSize % workerCount - sampleData: list[Sample] = [] + sampleData: list[Sample] = [] # type: ignore[no-redef] # These two dictionaries represent the mapping between the names and encoded integers of the bodysites and taxons respectively - uniqueBodySite: dict[str, int] = {} - uniqueTaxons: dict[str, int] = {} + uniqueBodySite: dict[str, int] = {} # type: ignore[no-redef] + uniqueTaxons: dict[str, int] = {} # type: ignore[no-redef] if validate: # In the case of validation the same dictionaries will be used as during training @@ -395,8 +398,9 @@ def onProcessingFinished(future: Future) -> None: The future object of the process from ProcessPoolExecutor """ - if future.exception() is not None: - raise future.exception() + exception = future.exception() + if exception is not None: + raise exception processSampleData, processUniqueBodySite, processUniqueTaxons = future.result() @@ -510,10 +514,11 @@ def prepareForTrainingAtlas( if quantize: for i, num in enumerate(matrixData): if num > 65535: matrixData[i] = 65535 - matrixData = np.array(matrixData).astype(np.ushort) + + matrixDataU16 = np.array(matrixData).astype(np.ushort) # Assemble the input matrix in a sparse representation - inputMatrix = sparse.csr_matrix((matrixData, (rowIndices, columnIndices)), inputMatrixShape, dtype = np.ushort) + inputMatrix = sparse.csr_matrix((matrixDataU16, (rowIndices, columnIndices)), inputMatrixShape, dtype = np.ushort) else: inputMatrix = sparse.csr_matrix((matrixData, (rowIndices, columnIndices)), inputMatrixShape, dtype = np.int32) diff --git a/tasks/bio-bodysite-prediction/src/load_data_std.py b/tasks/bio-bodysite-prediction/src/load_data_std.py index 347bb4ae..96b2b310 100644 --- a/tasks/bio-bodysite-prediction/src/load_data_std.py +++ b/tasks/bio-bodysite-prediction/src/load_data_std.py @@ -10,7 +10,7 @@ from .utils import savePlotFig -def loadDataStd(dataset: CustomDataset, taskRun: TaskRun[CustomDataset]) -> tuple[int, int, dict[str, int], dict[str, int], list[int]]: +def loadDataStd(dataset: CustomDataset, taskRun: TaskRun[CustomDataset]) -> tuple[int, int, dict[str, int], dict[str, int]]: logging.info(">> [MicrobiomeForensics] Downloading dataset...") taskRun.updateStatus(TaskRunStatus.inProgress, "Downloading dataset...") dataset.download() @@ -37,15 +37,15 @@ def loadDataStd(dataset: CustomDataset, taskRun: TaskRun[CustomDataset]) -> tupl samplePath = glob.glob(os.path.join(sample.path, f"*.json"))[0] with open(samplePath, "r") as f: - sample = json.load(f) + sampleDict = json.load(f) - if not sample["body_site"] in uniqueBodySites: - uniqueBodySites[sample["body_site"]] = len(uniqueBodySites) - classDistribution[sample["body_site"]] = 1 + if not sampleDict["body_site"] in uniqueBodySites: + uniqueBodySites[sampleDict["body_site"]] = len(uniqueBodySites) + classDistribution[sampleDict["body_site"]] = 1 else: - classDistribution[sample["body_site"]] += 1 + classDistribution[sampleDict["body_site"]] += 1 - for bacteria in sample["97"]: + for bacteria in sampleDict["97"]: taxons = bacteria["taxon"].split(";") taxon = taxons[level] @@ -83,7 +83,7 @@ def loadDataStd(dataset: CustomDataset, taskRun: TaskRun[CustomDataset]) -> tupl return level, datasetLen, uniqueTaxons, uniqueBodySites -def prepareForTrainingStd(level: int, datasetLen: int, uniqueTaxons: dict, uniqueBodySites: dict, taskRun: TaskRun[CustomDataset]) -> tuple[np.ndarray, np.ndarray]: +def prepareForTrainingStd(level: int, datasetLen: int, uniqueTaxons: dict, uniqueBodySites: dict, taskRun: TaskRun[CustomDataset]) -> tuple[np.ndarray, np.ndarray, list[str]]: inputMatrix = np.zeros((datasetLen, len(uniqueTaxons))) outputMatrix = np.zeros((datasetLen, 1)) @@ -97,10 +97,10 @@ def prepareForTrainingStd(level: int, datasetLen: int, uniqueTaxons: dict, uniqu samplePath = glob.glob(os.path.join(sample.path, f"*.json"))[0] with open(samplePath, "r") as f: - sample = json.load(f) + sampleDict = json.load(f) - for bacteria in sample["97"]: - sampleIdList.append(sample["_id"]["$oid"]) + for bacteria in sampleDict["97"]: + sampleIdList.append(sampleDict["_id"]["$oid"]) taxons = bacteria["taxon"].split(";") taxon = taxons[level] @@ -109,6 +109,6 @@ def prepareForTrainingStd(level: int, datasetLen: int, uniqueTaxons: dict, uniqu c = bacteria["count"] inputMatrix[i, encodedTaxon] += c - outputMatrix[i, 0] = uniqueBodySites[sample["body_site"]] + outputMatrix[i, 0] = uniqueBodySites[sampleDict["body_site"]] return inputMatrix, outputMatrix, sampleIdList diff --git a/tasks/bio-bodysite-prediction/src/objects.py b/tasks/bio-bodysite-prediction/src/objects.py index 4b4d6776..9e573fc7 100644 --- a/tasks/bio-bodysite-prediction/src/objects.py +++ b/tasks/bio-bodysite-prediction/src/objects.py @@ -14,7 +14,7 @@ def __init__(self, taxonId: str, count: int): class Sample: - def __init__(self, sampleId: str, bodySite: str, associationSite: str, taxons: Optional[list[Taxon]] = []) -> None: + def __init__(self, sampleId: str, bodySite: str, associationSite: str, taxons: list[Taxon] = []) -> None: self.sampleId = sampleId self.bodySite = bodySite self.associationSite = associationSite diff --git a/tasks/bio-bodysite-prediction/src/utils.py b/tasks/bio-bodysite-prediction/src/utils.py index 3771ad00..f51193ea 100644 --- a/tasks/bio-bodysite-prediction/src/utils.py +++ b/tasks/bio-bodysite-prediction/src/utils.py @@ -103,7 +103,7 @@ def saveFeatureTable(featureTablePath: str, tableInput: np.ndarray, taskRun: Tas def savePlotFig( taskRun: TaskRun[CustomDataset], distributionDict: dict, - savePath: str, + savePath: Path, fileName: str, xLabelRotation: bool, xLabel: str, @@ -133,7 +133,7 @@ def savePlotFig( def savePredictionFile( taskRun: TaskRun[CustomDataset], - savePath: str, + savePath: Path, xTrain: csr_matrix, xTest: csr_matrix, sampleIdList: list, diff --git a/tasks/bio-bodysite-prediction/task.yaml b/tasks/bio-bodysite-prediction/task.yaml index a49e393a..07fe8d8a 100644 --- a/tasks/bio-bodysite-prediction/task.yaml +++ b/tasks/bio-bodysite-prediction/task.yaml @@ -31,7 +31,7 @@ param_groups: \ \"Microbiome Forensics\r\n Institute Zurich\". Coretex.ai sample must\ \ contain only a single json file\r\n which contains data for a single\ \ BioInformatics sample." - value: null + value: 6327 data_type: dataset required: true - name: trainedModel @@ -166,6 +166,6 @@ param_groups: \ uploaded to\r\n Coretex.ai just before training has started. This cache\ \ depends on\r\n the same parameters as the 1st step cache, as well as\ \ \"quantize\" and\r\n \"percentile\" parameters" - value: true + value: false data_type: bool required: true diff --git a/tasks/bio-read-quality/main.py b/tasks/bio-read-quality/main.py index e84d65c4..efec9786 100644 --- a/tasks/bio-read-quality/main.py +++ b/tasks/bio-read-quality/main.py @@ -9,7 +9,7 @@ from coretex import currentTaskRun, SequenceDataset, folder_manager, TaskRun -def calculateAverageScores(qualityScores: list[list[int]]) -> list[float]: +def calculateAverageScores(qualityScores: list[list[int]]) -> list[int]: maxLength = max(len(readScores) for readScores in qualityScores) totalScores = [0] * maxLength @@ -17,10 +17,10 @@ def calculateAverageScores(qualityScores: list[list[int]]) -> list[float]: for i in range(len(readScores)): totalScores[i] += readScores[i] - return [score / len(qualityScores) for score in totalScores] + return [int(score / len(qualityScores)) for score in totalScores] -def analyzeFastq(sequencePath: Path) -> list[float]: +def analyzeFastq(sequencePath: Path) -> list[int]: qualityScores: list[list[int]] = [] with sequencePath.open("r") as file: for record in SeqIO.parse(file, "fastq"): @@ -29,7 +29,7 @@ def analyzeFastq(sequencePath: Path) -> list[float]: return calculateAverageScores(qualityScores) -def createPlot(scores: list[float], title: str, plotPath: Path) -> Path: +def createPlot(scores: list[int], title: str, plotPath: Path) -> Path: fig, ax = plt.subplots(figsize = (10, 6)) ax.plot(range(len(scores)), scores, linestyle = "-", color = "b", linewidth = 2, label = "Phred Scores") @@ -52,8 +52,8 @@ def main() -> None: taskRun.setDatasetType(SequenceDataset) taskRun.dataset.download() - forwardScores: list[list[float]] = [] - reverseScores: list[list[float]] = [] + forwardScores: list[list[int]] = [] + reverseScores: list[list[int]] = [] for sample in taskRun.dataset.samples: logging.info(f">> [Quality Scores] Analysing sample \"{sample.name}\"") diff --git a/tasks/bio-region-seperation/src/__init__.py b/tasks/bio-region-seperation/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/bio-region-seperation/src/separate.py b/tasks/bio-region-seperation/src/separate.py index a3b2ae93..52674a4b 100644 --- a/tasks/bio-region-seperation/src/separate.py +++ b/tasks/bio-region-seperation/src/separate.py @@ -1,5 +1,5 @@ from pathlib import Path -from io import BufferedWriter +from io import TextIOWrapper import logging @@ -13,7 +13,7 @@ def argmax(array: list) -> int: def splitToFiles(inputFile: Path, readClasses: list[int], groups: list[Path]) -> None: - outFiles: list[BufferedWriter] = [] + outFiles: list[TextIOWrapper] = [] for group in groups: outFiles.append(open(group / inputFile.name, "a")) diff --git a/tasks/contextual-targeting/main.py b/tasks/contextual-targeting/main.py index 89880950..a5f8a848 100644 --- a/tasks/contextual-targeting/main.py +++ b/tasks/contextual-targeting/main.py @@ -17,7 +17,7 @@ MODEL_SCORER_NAME = "deepspeech-0.8.2-model.scorer" -def transcribe(dataset: CustomDataset, parameters: Dict[str, Any]) -> List[Tuple[CustomSample, Transcription]]: +def transcribe(dataset: CustomDataset, parameters: Dict[str, Any]) -> Transcription: modelFile = fetchModelFile(parameters["modelUrl"], MODEL_NAME, ".pbmm") modelScorerFile = fetchModelFile(parameters["modelScorerUrl"], MODEL_SCORER_NAME, ".scorer") diff --git a/tasks/image-augmentation/src/augmentation.py b/tasks/image-augmentation/src/augmentation.py index fa94d7d8..4406f61e 100644 --- a/tasks/image-augmentation/src/augmentation.py +++ b/tasks/image-augmentation/src/augmentation.py @@ -1,3 +1,5 @@ +from typing import Optional + import logging import cv2 @@ -10,7 +12,7 @@ from .utils import uploadAugmentedImage -def mask2poly(mask: np.ndarray) -> list[int]: +def mask2poly(mask: np.ndarray) -> Optional[list[int]]: contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) if len(contours) == 0: logging.warning(">> [Image Augmentation] Could not find annotated area on augmented image") @@ -27,13 +29,17 @@ def mask2poly(mask: np.ndarray) -> list[int]: return segmentation -def transformAnnotationInstances(sampleData: AnnotatedImageSampleData, pipeline: iaa.Sequential) -> CoretexImageAnnotation: +def transformAnnotationInstances(sampleData: AnnotatedImageSampleData, pipeline: iaa.Sequential) -> Optional[list[CoretexSegmentationInstance]]: augmentedInstances: list[CoretexSegmentationInstance] = [] - for instance in sampleData.annotation.instances: + annotation = sampleData.annotation + if annotation is None: + return None + + for instance in annotation.instances: mask = instance.extractSegmentationMask( - sampleData.annotation.width, - sampleData.annotation.height + annotation.width, + annotation.height ) mask = np.repeat(mask[..., None] * 255, 3, axis = -1) @@ -74,13 +80,15 @@ def augmentImage( augmentedImage = firstPipeline_.augment_image(image) augmentedImage = secondPipeline.augment_image(augmentedImage) augmentedInstances = transformAnnotationInstances(sampleData, firstPipeline_) - - annotation = CoretexImageAnnotation.create( - sample.name, - augmentedImage.shape[1], - augmentedImage.shape[0], - augmentedInstances - ) + if augmentedInstances is not None: + annotation = CoretexImageAnnotation.create( + sample.name, + augmentedImage.shape[1], + augmentedImage.shape[0], + augmentedInstances + ) + else: + annotation = None augmentedImageName = f"{sample.name}-{i}" + sample.imagePath.suffix uploadAugmentedImage(augmentedImageName, augmentedImage, annotation, outputDataset) diff --git a/tasks/image-augmentation/src/utils.py b/tasks/image-augmentation/src/utils.py index 967eaac7..f9f3c887 100644 --- a/tasks/image-augmentation/src/utils.py +++ b/tasks/image-augmentation/src/utils.py @@ -1,3 +1,5 @@ +from typing import Optional + import logging from numpy import ndarray @@ -10,7 +12,7 @@ def uploadAugmentedImage( imageName: str, augmentedImage: ndarray, - annotation: CoretexImageAnnotation, + annotation: Optional[CoretexImageAnnotation], outputDataset: ImageDataset ) -> None: @@ -23,8 +25,9 @@ def uploadAugmentedImage( logging.error(f">> [Image Augmentation] Failed to upload sample {imagePath} - \"{ex}\"") return - if not augmentedSample.saveAnnotation(annotation): - logging.error(f">> [Image Augmentation] Failed to update sample annotation {imagePath}") + if annotation is not None: + if not augmentedSample.saveAnnotation(annotation): + logging.error(f">> [Image Augmentation] Failed to update sample annotation {imagePath}") def copySample(sample: ImageSample, dataset: ImageDataset) -> None: diff --git a/tasks/image-augmentation/task.yaml b/tasks/image-augmentation/task.yaml index 7fe19739..4dc4eb00 100644 --- a/tasks/image-augmentation/task.yaml +++ b/tasks/image-augmentation/task.yaml @@ -13,7 +13,7 @@ param_groups: params: - name: dataset description: The image dataset from which images will be taken for augmentation. - value: null + value: 38168 data_type: dataset required: true - name: outputs @@ -27,7 +27,7 @@ param_groups: params: - name: numOfImages description: The number of augmented images each of the input images will generate. - value: 5 + value: 1 data_type: int required: true - name: flipHorizontalPrc diff --git a/tasks/image-segmentation/src/dataset.py b/tasks/image-segmentation/src/dataset.py index 6f265862..8ff069ce 100644 --- a/tasks/image-segmentation/src/dataset.py +++ b/tasks/image-segmentation/src/dataset.py @@ -14,7 +14,7 @@ class Augment(tf.keras.layers.Layer): - def __init__(self, seed=42): + def __init__(self, seed = 42): super().__init__() self.augmentInputs = RandomFlip( diff --git a/tasks/image-segmentation/src/detect.py b/tasks/image-segmentation/src/detect.py index 71b31df5..9f91c96f 100644 --- a/tasks/image-segmentation/src/detect.py +++ b/tasks/image-segmentation/src/detect.py @@ -18,7 +18,12 @@ def run(taskRun: TaskRun, model: KerasModel, dataset: ImageDataset) -> None: sampleData = sample.load() - if hasDotAnnotation(sampleData.annotation): + annotation = sampleData.annotation + if annotation is None: + logging.warning(f">> [Image Segmentation] Sample \"{sample.name}\" (ID: {sample.id}) has no annotation. Skipping Sample") + continue + + if hasDotAnnotation(annotation): logging.warning(f">> [Image Segmentation] Sample \"{sample.name}\" (ID: {sample.id}) has invalid annotation (too few coordinates). Skipping Sample") continue diff --git a/tasks/image-segmentation/src/utils.py b/tasks/image-segmentation/src/utils.py index eca7f28a..20f34376 100644 --- a/tasks/image-segmentation/src/utils.py +++ b/tasks/image-segmentation/src/utils.py @@ -28,7 +28,7 @@ def createMask(predictionMask: np.ndarray) -> tf.Tensor: def saveDatasetPredictions(group: str, model: KerasModel, dataset: tf.data.Dataset, classes: ImageDatasetClasses) -> None: predictions = model.predict(dataset) for index, prediction in enumerate(predictions): - mask: np.ndarray = createMask([prediction]).numpy() + mask: np.ndarray = createMask(np.array([prediction])).numpy() coloredMask = np.empty(shape = (mask.shape[0], mask.shape[1], 3)) for h, row in enumerate(mask): diff --git a/tasks/llama2-lora/src/configurations.py b/tasks/llama2-lora/src/configurations.py index 6b8fa5bf..f4503d94 100644 --- a/tasks/llama2-lora/src/configurations.py +++ b/tasks/llama2-lora/src/configurations.py @@ -6,7 +6,7 @@ def getPeftParameters(loraAlpha: float, loraDropout: float, rank: int) -> LoraConfig: return LoraConfig( - lora_alpha = loraAlpha, + lora_alpha = int(loraAlpha), lora_dropout = loraDropout, r = rank, bias = "none", diff --git a/tasks/llama2-lora/src/model.py b/tasks/llama2-lora/src/model.py index fd642d41..bac2d4ef 100644 --- a/tasks/llama2-lora/src/model.py +++ b/tasks/llama2-lora/src/model.py @@ -17,6 +17,8 @@ def getModelName(modelVersion: str) -> str: if modelVersion == "70b-chat": return "NousResearch/Llama-2-70b-chat-hf" + raise ValueError("Invalid model version") + def loadTokenizer(modelName: str, context: Optional[int] = None) -> AutoTokenizer: tokenizer = AutoTokenizer.from_pretrained(modelName, trust_remote_code=True, model_max_length = context) diff --git a/tasks/llm-text-processing/main.py b/tasks/llm-text-processing/main.py index 99657843..df79ad5d 100644 --- a/tasks/llm-text-processing/main.py +++ b/tasks/llm-text-processing/main.py @@ -1,3 +1,5 @@ +from typing import Optional + import time import logging import subprocess @@ -38,7 +40,7 @@ def checkOllamaServer() -> bool: return False -def launchOllamaServer() -> subprocess.Popen[bytes]: +def launchOllamaServer() -> Optional[subprocess.Popen[bytes]]: if not isOllamaInstalled(): installOllama() diff --git a/tasks/object-detection-yolov10/src/predict.py b/tasks/object-detection-yolov10/src/predict.py index e512d03b..bdcdf95b 100644 --- a/tasks/object-detection-yolov10/src/predict.py +++ b/tasks/object-detection-yolov10/src/predict.py @@ -19,7 +19,7 @@ def classByLabelId(labelId: int, classes: ImageDatasetClasses) -> Optional[Image return classes.classByLabel(classes.labels[labelId]) -def processResult(result: Results, classes: list[ImageDatasetClasses], savePath: Path) -> None: +def processResult(result: Results, classes: ImageDatasetClasses, savePath: Path) -> None: fig = plt.figure(num = 1, clear = True) plt.imshow(result.orig_img) @@ -45,11 +45,11 @@ def processResult(result: Results, classes: list[ImageDatasetClasses], savePath: def isSampleValid(sample: ImageSample) -> bool: try: - instances = sample.load().annotation.instances - if instances is None: + annotation = sample.load().annotation + if annotation is None: return False - for instance in instances: + for instance in annotation.instances: if any(len(segmentation) < DIMENSION_THRESHOLD for segmentation in instance.segmentations): return False except Exception as e: diff --git a/tasks/object-detection-yolov8/src/predict.py b/tasks/object-detection-yolov8/src/predict.py index 8d03efd0..ed97cacf 100644 --- a/tasks/object-detection-yolov8/src/predict.py +++ b/tasks/object-detection-yolov8/src/predict.py @@ -19,7 +19,7 @@ def classByLabelId(labelId: int, classes: ImageDatasetClasses) -> Optional[Image return classes.classByLabel(classes.labels[labelId]) -def processResult(result: Results, classes: list[ImageDatasetClasses], savePath: Path): +def processResult(result: Results, classes: ImageDatasetClasses, savePath: Path) -> None: fig = plt.figure(num = 1, clear = True) plt.imshow(result.orig_img) diff --git a/tasks/ollama-chatbot-fn/resources/function/function.py b/tasks/ollama-chatbot-fn/resources/function/function.py index f64362ff..233a344d 100644 --- a/tasks/ollama-chatbot-fn/resources/function/function.py +++ b/tasks/ollama-chatbot-fn/resources/function/function.py @@ -30,8 +30,8 @@ def response(requestData: dict[str, Any]) -> dict[str, Any]: sessionPath = memoryFolder / f"{sessionId}.json" query = requestData.get("query") - if query == None: - functions.badRequest("Query cannot be empty") + if not isinstance(query, str): + return functions.badRequest("Query cannot be empty") if inputSessionId is None or not sessionPath.exists(): logging.debug(">>> Creating new session") @@ -55,7 +55,7 @@ def response(requestData: dict[str, Any]) -> dict[str, Any]: }] else: with sessionPath.open("r") as file: - messages: list[dict[str, str]] = json.load(file) + messages: list[dict[str, str]] = json.load(file) # type: ignore[no-redef] messages.append({ "role": "user", diff --git a/tasks/ollama-chatbot-fn/resources/function/model.py b/tasks/ollama-chatbot-fn/resources/function/model.py index 38d2a319..4dd3eabd 100644 --- a/tasks/ollama-chatbot-fn/resources/function/model.py +++ b/tasks/ollama-chatbot-fn/resources/function/model.py @@ -1,3 +1,5 @@ +from typing import Optional + import logging import subprocess @@ -50,7 +52,7 @@ def checkOllamaServer() -> bool: return False -def launchOllamaServer() -> subprocess.Popen[bytes]: +def launchOllamaServer() -> Optional[subprocess.Popen[bytes]]: if not isOllamaInstalled(): installOllama() diff --git a/tasks/ollama-rag-index/main.py b/tasks/ollama-rag-index/main.py index 8712a96a..afcbe763 100644 --- a/tasks/ollama-rag-index/main.py +++ b/tasks/ollama-rag-index/main.py @@ -64,7 +64,7 @@ def loadCorpus(dataset: CustomDataset) -> np.ndarray: with open(txtPath, "r") as f: corpus.append(f.read()) - return np.array(corpus) + return np.array(corpus) def main(): diff --git a/tasks/ollama-rag-index/src/model.py b/tasks/ollama-rag-index/src/model.py index 38d2a319..4dd3eabd 100644 --- a/tasks/ollama-rag-index/src/model.py +++ b/tasks/ollama-rag-index/src/model.py @@ -1,3 +1,5 @@ +from typing import Optional + import logging import subprocess @@ -50,7 +52,7 @@ def checkOllamaServer() -> bool: return False -def launchOllamaServer() -> subprocess.Popen[bytes]: +def launchOllamaServer() -> Optional[subprocess.Popen[bytes]]: if not isOllamaInstalled(): installOllama() diff --git a/tasks/qiime-import/src/__init__.py b/tasks/qiime-import/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/qiime-import/src/utils.py b/tasks/qiime-import/src/utils.py index 050f92f0..01a87707 100644 --- a/tasks/qiime-import/src/utils.py +++ b/tasks/qiime-import/src/utils.py @@ -27,7 +27,7 @@ def detectFileEncoding(path: Path) -> Optional[str]: raise ValueError(">> [Qiime: Import] Metadate file is too small") with path.open("rb") as file: - encoding = cchardet.detect(file.read())["encoding"] + encoding: Optional[str] = cchardet.detect(file.read())["encoding"] if encoding is None: logging.warning(">> [Qiime: Import] Could not determine metadata encoding") From 7c1fe66e74272e59c8cc1a08c8c6490b354eb6d5 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Mon, 1 Jul 2024 14:50:02 +0200 Subject: [PATCH 02/32] CTX-5783: Additional mypy error fixes --- tasks/annotated-image-extractor/.mypy.ini | 36 +++++++++++++++++++ tasks/audio-analytics/main.py | 2 +- tasks/audio-analytics/src/text_search.py | 2 +- tasks/audio-analytics/src/utils.py | 2 +- tasks/bio-bodysite-prediction-nn/.mypy.ini | 33 +++++++++++++++++ .../resources/function/model.py | 8 ++--- .../resources/function/utils.py | 2 +- tasks/bio-bodysite-prediction-nn/src/model.py | 8 ++--- tasks/bio-bodysite-prediction-nn/src/train.py | 2 +- tasks/bio-bodysite-prediction-nn/src/utils.py | 6 ++-- tasks/bio-bodysite-prediction/.mypy.ini | 33 +++++++++++++++++ tasks/bio-bodysite-prediction/src/train.py | 8 +++-- tasks/bio-bodysite-prediction/src/utils.py | 4 +-- tasks/bio-primer-removal/.mypy.ini | 22 ++++++++++++ tasks/bio-primer-removal/main.py | 2 +- tasks/bio-read-quality/.mypy.ini | 27 ++++++++++++++ tasks/bio-region-seperation/mypy.ini | 2 +- tasks/body-tracking/.mypy.ini | 30 ++++++++++++++++ tasks/contextual-targeting/.mypy.ini | 6 ++++ tasks/contextual-targeting/main.py | 2 +- tasks/image-augmentation/.mypy.ini | 24 +++++++++++++ tasks/image-extractor/.mypy.ini | 6 ++++ tasks/image-quality-predictor/.mypy.ini | 3 ++ tasks/image-segmentation/.mypy.ini | 6 ++++ tasks/image-segmentation/src/callbacks.py | 4 +-- tasks/image-segmentation/src/dataset.py | 6 ++-- tasks/image-segmentation/src/model.py | 2 +- tasks/llama2-lora/.mypy.ini | 33 +++++++++++++++++ tasks/llama2-lora/main.py | 2 +- tasks/llm-text-processing/.mypy.ini | 24 +++++++++++++ tasks/model-comparison/.mypy.ini | 18 ++++++++++ tasks/model-transfer/.mypy.ini | 18 ++++++++++ tasks/object-detection-yolov10/.mypy.ini | 6 ++++ tasks/object-detection-yolov8/.mypy.ini | 6 ++++ tasks/object-detection-yolov8/src/predict.py | 8 ++--- tasks/ollama-chatbot-fn/.mypy.ini | 27 ++++++++++++++ tasks/ollama-chatbot-fn/main.py | 4 +-- .../resources/function/utils.py | 2 +- tasks/ollama-rag-index/.mypy.ini | 30 ++++++++++++++++ tasks/ollama-rag-index/main.py | 2 +- tasks/ollama-sentiment-analysis-fn/.mypy.ini | 27 ++++++++++++++ tasks/ollama-sentiment-analysis-fn/main.py | 2 +- tasks/qiime-alpha-beta-diversity/.mypy.ini | 24 +++++++++++++ tasks/qiime-alpha-beta-diversity/main.py | 13 +++---- tasks/qiime-dada2/.mypy.ini | 18 ++++++++++ tasks/qiime-demultiplexing/.mypy.ini | 18 ++++++++++ tasks/qiime-import/mypy.ini | 9 +++++ tasks/qiime-taxonomic-analysis/.mypy.ini | 18 ++++++++++ tasks/qiime-taxonomic-analysis/main.py | 2 +- tasks/s3-model-upload/.mypy.ini | 5 +++ tasks/stable-diffusion-fn/.mypy.ini | 24 +++++++++++++ tasks/stable-diffusion-fn/main.py | 2 +- tasks/stable-diffusion/.mypy.ini | 30 ++++++++++++++++ tasks/stable-diffusion/main.py | 2 +- tasks/synthetic-image-generator/.mypy.ini | 6 ++++ tasks/tabular-data-diagnostics/.mypy.ini | 33 +++++++++++++++++ tasks/tabular-data-diagnostics/main.py | 2 +- 57 files changed, 653 insertions(+), 50 deletions(-) create mode 100644 tasks/annotated-image-extractor/.mypy.ini create mode 100644 tasks/bio-bodysite-prediction-nn/.mypy.ini create mode 100644 tasks/bio-bodysite-prediction/.mypy.ini create mode 100644 tasks/bio-primer-removal/.mypy.ini create mode 100644 tasks/bio-read-quality/.mypy.ini create mode 100644 tasks/body-tracking/.mypy.ini create mode 100644 tasks/image-augmentation/.mypy.ini create mode 100644 tasks/llama2-lora/.mypy.ini create mode 100644 tasks/llm-text-processing/.mypy.ini create mode 100644 tasks/model-comparison/.mypy.ini create mode 100644 tasks/model-transfer/.mypy.ini create mode 100644 tasks/ollama-chatbot-fn/.mypy.ini create mode 100644 tasks/ollama-rag-index/.mypy.ini create mode 100644 tasks/ollama-sentiment-analysis-fn/.mypy.ini create mode 100644 tasks/qiime-alpha-beta-diversity/.mypy.ini create mode 100644 tasks/qiime-dada2/.mypy.ini create mode 100644 tasks/qiime-demultiplexing/.mypy.ini create mode 100644 tasks/qiime-taxonomic-analysis/.mypy.ini create mode 100644 tasks/stable-diffusion-fn/.mypy.ini create mode 100644 tasks/stable-diffusion/.mypy.ini create mode 100644 tasks/tabular-data-diagnostics/.mypy.ini diff --git a/tasks/annotated-image-extractor/.mypy.ini b/tasks/annotated-image-extractor/.mypy.ini new file mode 100644 index 00000000..b6edd0df --- /dev/null +++ b/tasks/annotated-image-extractor/.mypy.ini @@ -0,0 +1,36 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-ultralytics.*] +ignore_missing_imports = True + +[mypy-tensorflow.*] +ignore_missing_imports = True + +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-transformers.*] +ignore_missing_imports = True + +[mypy-coretex.*] +ignore_missing_imports = True diff --git a/tasks/audio-analytics/main.py b/tasks/audio-analytics/main.py index 1c79ff3b..40ba7cf9 100644 --- a/tasks/audio-analytics/main.py +++ b/tasks/audio-analytics/main.py @@ -3,7 +3,7 @@ import logging from coretex import CustomDataset, TaskRun, currentTaskRun -from coretex.nlp import AudioTranscriber +from coretex.nlp import AudioTranscriber # type: ignore[attr-defined] from src import text_search from src.utils import createTranscriptionArtfacts, fetchModelFile diff --git a/tasks/audio-analytics/src/text_search.py b/tasks/audio-analytics/src/text_search.py index f58f8fe0..4057c949 100644 --- a/tasks/audio-analytics/src/text_search.py +++ b/tasks/audio-analytics/src/text_search.py @@ -1,4 +1,4 @@ -from coretex.nlp import Token +from coretex.nlp import Token # type: ignore[attr-defined] from .occurence import EntityOccurrence diff --git a/tasks/audio-analytics/src/utils.py b/tasks/audio-analytics/src/utils.py index 9cd92f41..645e58ea 100644 --- a/tasks/audio-analytics/src/utils.py +++ b/tasks/audio-analytics/src/utils.py @@ -7,7 +7,7 @@ import logging from coretex import CustomSample, cache, TaskRun, folder_manager -from coretex.nlp import Token +from coretex.nlp import Token # type: ignore[attr-defined] from .occurence import NamedEntityRecognitionResult diff --git a/tasks/bio-bodysite-prediction-nn/.mypy.ini b/tasks/bio-bodysite-prediction-nn/.mypy.ini new file mode 100644 index 00000000..bc21b36c --- /dev/null +++ b/tasks/bio-bodysite-prediction-nn/.mypy.ini @@ -0,0 +1,33 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-tensorflow.*] +ignore_missing_imports = True + +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-sklearn.*] +ignore_missing_imports = True diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/model.py b/tasks/bio-bodysite-prediction-nn/resources/function/model.py index 8ecb16ab..6ca81653 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/model.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/model.py @@ -16,7 +16,7 @@ from utils import convertFromOneHot -class GatingLayer(tf.keras.layers.Layer): +class GatingLayer(tf.keras.layers.Layer): # type: ignore[misc] def __init__( self, @@ -83,7 +83,7 @@ def hard_sigmoid(self, x: Tensor, a: Tensor) -> Tensor: return x -class Model(tf.keras.Model): +class Model(tf.keras.Model): # type: ignore[misc] def __init__( self, @@ -325,7 +325,7 @@ def _valid_step(self, X: Tensor, y: Tensor) -> Tensor: return y_pred_hot - def predict(self, data: tf.data.Dataset, batches: int): + def predict(self, data: tf.data.Dataset, batches: int) -> np.ndarray: y_pred: list[list[int]] = [] for i, batch in enumerate(data): @@ -363,7 +363,7 @@ def test_from_array(self, X: ArrayLike) -> np.ndarray: if type(X) == sparse.csr_matrix: X = X.toarray().astype(np.float32) - return self.soft_to_hot(self._predict_from_array(X)).numpy() + return self.soft_to_hot(self._predict_from_array(X)).numpy() # type: ignore[no-any-return] @tf.function diff --git a/tasks/bio-bodysite-prediction-nn/resources/function/utils.py b/tasks/bio-bodysite-prediction-nn/resources/function/utils.py index a524321a..bbdbbe98 100644 --- a/tasks/bio-bodysite-prediction-nn/resources/function/utils.py +++ b/tasks/bio-bodysite-prediction-nn/resources/function/utils.py @@ -47,7 +47,7 @@ def oneHotEncoding(vector: Union[np.ndarray, int], numClasses: Optional[int] = N def convertFromOneHot(matrix: np.ndarray) -> np.ndarray: - numOfRows = len(matrix) if isinstance(matrix, list) else matrix.shape[0] + numOfRows = matrix.shape[0] if not numOfRows > 0: raise RuntimeError(f">> [MicrobiomeForensics] Encountered array with {numOfRows} rows when decoding one hot vector") diff --git a/tasks/bio-bodysite-prediction-nn/src/model.py b/tasks/bio-bodysite-prediction-nn/src/model.py index 8ecb16ab..6ca81653 100644 --- a/tasks/bio-bodysite-prediction-nn/src/model.py +++ b/tasks/bio-bodysite-prediction-nn/src/model.py @@ -16,7 +16,7 @@ from utils import convertFromOneHot -class GatingLayer(tf.keras.layers.Layer): +class GatingLayer(tf.keras.layers.Layer): # type: ignore[misc] def __init__( self, @@ -83,7 +83,7 @@ def hard_sigmoid(self, x: Tensor, a: Tensor) -> Tensor: return x -class Model(tf.keras.Model): +class Model(tf.keras.Model): # type: ignore[misc] def __init__( self, @@ -325,7 +325,7 @@ def _valid_step(self, X: Tensor, y: Tensor) -> Tensor: return y_pred_hot - def predict(self, data: tf.data.Dataset, batches: int): + def predict(self, data: tf.data.Dataset, batches: int) -> np.ndarray: y_pred: list[list[int]] = [] for i, batch in enumerate(data): @@ -363,7 +363,7 @@ def test_from_array(self, X: ArrayLike) -> np.ndarray: if type(X) == sparse.csr_matrix: X = X.toarray().astype(np.float32) - return self.soft_to_hot(self._predict_from_array(X)).numpy() + return self.soft_to_hot(self._predict_from_array(X)).numpy() # type: ignore[no-any-return] @tf.function diff --git a/tasks/bio-bodysite-prediction-nn/src/train.py b/tasks/bio-bodysite-prediction-nn/src/train.py index 476469f0..363e0918 100644 --- a/tasks/bio-bodysite-prediction-nn/src/train.py +++ b/tasks/bio-bodysite-prediction-nn/src/train.py @@ -108,4 +108,4 @@ def train(taskRun: TaskRun[CustomDataset], datasetPath: Path, uniqueBodySites: d with open(modelPath / "uniqueBodySites.pkl", "wb") as f: pickle.dump(uniqueBodySites, f) - return accuracy + return float(accuracy) diff --git a/tasks/bio-bodysite-prediction-nn/src/utils.py b/tasks/bio-bodysite-prediction-nn/src/utils.py index 20f65dfd..95a60013 100644 --- a/tasks/bio-bodysite-prediction-nn/src/utils.py +++ b/tasks/bio-bodysite-prediction-nn/src/utils.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, Union, Any from pathlib import Path import csv @@ -12,7 +12,7 @@ from coretex import CustomDataset, TaskRun, Model, folder_manager -def jsonPretty(data, savePath) -> None: +def jsonPretty(data: dict[str, Any], savePath: Path) -> None: with open(savePath, "w") as write_file: json.dump(data, write_file, indent=4) @@ -262,7 +262,7 @@ def oneHotEncoding(vector: Union[np.ndarray, int], numClasses: Optional[int] = N def convertFromOneHot(matrix: np.ndarray) -> np.ndarray: - numOfRows = len(matrix) if isinstance(matrix, list) else matrix.shape[0] + numOfRows = matrix.shape[0] if not numOfRows > 0: raise RuntimeError(f">> [MicrobiomeForensics] Encountered array with {numOfRows} rows when decoding one hot vector") diff --git a/tasks/bio-bodysite-prediction/.mypy.ini b/tasks/bio-bodysite-prediction/.mypy.ini new file mode 100644 index 00000000..0bc37ed1 --- /dev/null +++ b/tasks/bio-bodysite-prediction/.mypy.ini @@ -0,0 +1,33 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-sklearn.*] +ignore_missing_imports = True + +[mypy-xgboost.*] +ignore_missing_imports = True diff --git a/tasks/bio-bodysite-prediction/src/train.py b/tasks/bio-bodysite-prediction/src/train.py index 61c5986c..c06caf11 100644 --- a/tasks/bio-bodysite-prediction/src/train.py +++ b/tasks/bio-bodysite-prediction/src/train.py @@ -1,3 +1,5 @@ +from typing import Any + import logging import pickle import time @@ -16,7 +18,7 @@ from .utils import savePredictionFile -class Progress(TrainingCallback): +class Progress(TrainingCallback): # type: ignore[misc] """ Callback function for XGBoost showing the loss and accuracy of the model at the current epoch.\n @@ -34,7 +36,7 @@ def __init__( self.eval = xgb.DMatrix(evalSet[0][0], label = evalSet[0][1]) self.yEval = evalSet[0][1] - def after_iteration(self, model: XGBClassifier, epoch: int, evals_log) -> bool: + def after_iteration(self, model: XGBClassifier, epoch: int, evals_log: Any) -> bool: for data, metric in evals_log.items(): for metricName, log in metric.items(): loss = log[-1] @@ -134,4 +136,4 @@ def train( with open(modelPath / "uniqueBodySites.pkl", "wb") as f: pickle.dump(uniqueBodySites, f) - return accuracy + return float(accuracy) diff --git a/tasks/bio-bodysite-prediction/src/utils.py b/tasks/bio-bodysite-prediction/src/utils.py index f51193ea..342a579c 100644 --- a/tasks/bio-bodysite-prediction/src/utils.py +++ b/tasks/bio-bodysite-prediction/src/utils.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Any from pathlib import Path import csv @@ -16,7 +16,7 @@ from .objects import Sample -def jsonPretty(data, savePath) -> None: +def jsonPretty(data: dict[str, Any], savePath: Path) -> None: with open(savePath, "w") as write_file: json.dump(data, write_file, indent=4) diff --git a/tasks/bio-primer-removal/.mypy.ini b/tasks/bio-primer-removal/.mypy.ini new file mode 100644 index 00000000..44cabdf1 --- /dev/null +++ b/tasks/bio-primer-removal/.mypy.ini @@ -0,0 +1,22 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports diff --git a/tasks/bio-primer-removal/main.py b/tasks/bio-primer-removal/main.py index c7d376d0..384f4572 100644 --- a/tasks/bio-primer-removal/main.py +++ b/tasks/bio-primer-removal/main.py @@ -8,7 +8,7 @@ from coretex.bioinformatics import cutadaptTrim -def uploadTrimmedReads(sampleName: str, dataset: SequenceDataset, forwardFile: Path, reverseFile: Optional[Path] = None): +def uploadTrimmedReads(sampleName: str, dataset: SequenceDataset, forwardFile: Path, reverseFile: Optional[Path] = None) -> None: zipPath = folder_manager.temp / f"{sampleName}.zip" with ZipFile(zipPath, 'w', ZIP_DEFLATED) as archive: archive.write(forwardFile, forwardFile.name) diff --git a/tasks/bio-read-quality/.mypy.ini b/tasks/bio-read-quality/.mypy.ini new file mode 100644 index 00000000..595a74e1 --- /dev/null +++ b/tasks/bio-read-quality/.mypy.ini @@ -0,0 +1,27 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-Bio.*] +ignore_missing_imports = True diff --git a/tasks/bio-region-seperation/mypy.ini b/tasks/bio-region-seperation/mypy.ini index 21446fac..6f60cbe8 100644 --- a/tasks/bio-region-seperation/mypy.ini +++ b/tasks/bio-region-seperation/mypy.ini @@ -2,7 +2,7 @@ [mypy] exclude = venv -python_version = 3.8 +python_version = 3.9 pretty = True warn_return_any = True warn_no_return = True diff --git a/tasks/body-tracking/.mypy.ini b/tasks/body-tracking/.mypy.ini new file mode 100644 index 00000000..43a9b317 --- /dev/null +++ b/tasks/body-tracking/.mypy.ini @@ -0,0 +1,30 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-tensorflow.*] +ignore_missing_imports = True + +[mypy-tensorflowjs.*] +ignore_missing_imports = True + +[mypy-coremltools.*] +ignore_missing_imports = True diff --git a/tasks/contextual-targeting/.mypy.ini b/tasks/contextual-targeting/.mypy.ini index a9714b0e..dbf10734 100644 --- a/tasks/contextual-targeting/.mypy.ini +++ b/tasks/contextual-targeting/.mypy.ini @@ -24,3 +24,9 @@ exclude = venv [mypy-deepspeech.*] ignore_missing_imports = True + +[mypy-sentence_transformers.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True diff --git a/tasks/contextual-targeting/main.py b/tasks/contextual-targeting/main.py index a5f8a848..db40704c 100644 --- a/tasks/contextual-targeting/main.py +++ b/tasks/contextual-targeting/main.py @@ -4,7 +4,7 @@ import logging from coretex import CustomDataset, TaskRun, CustomSample, currentTaskRun, folder_manager -from coretex.nlp import AudioTranscriber, Transcription +from coretex.nlp import AudioTranscriber, Transcription # type: ignore[attr--defined] import matplotlib.pyplot as plt diff --git a/tasks/image-augmentation/.mypy.ini b/tasks/image-augmentation/.mypy.ini new file mode 100644 index 00000000..5d213555 --- /dev/null +++ b/tasks/image-augmentation/.mypy.ini @@ -0,0 +1,24 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-imgaug.*] +ignore_missing_imports = True diff --git a/tasks/image-extractor/.mypy.ini b/tasks/image-extractor/.mypy.ini index 3fe41755..c77de26e 100644 --- a/tasks/image-extractor/.mypy.ini +++ b/tasks/image-extractor/.mypy.ini @@ -31,3 +31,9 @@ ignore_missing_imports = True [mypy-transformers.*] ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-PIL.*] +ignore_missing_imports = True diff --git a/tasks/image-quality-predictor/.mypy.ini b/tasks/image-quality-predictor/.mypy.ini index 173ed2cc..44a7bc89 100644 --- a/tasks/image-quality-predictor/.mypy.ini +++ b/tasks/image-quality-predictor/.mypy.ini @@ -25,3 +25,6 @@ ignore_missing_imports = True [mypy-torchvision.*] ignore_missing_imports = True + +[mypy-PIL.*] +ignore_missing_imports = True diff --git a/tasks/image-segmentation/.mypy.ini b/tasks/image-segmentation/.mypy.ini index 8f55c84a..aa3114d6 100644 --- a/tasks/image-segmentation/.mypy.ini +++ b/tasks/image-segmentation/.mypy.ini @@ -31,3 +31,9 @@ ignore_missing_imports = True [mypy-tensorflowjs.*] ignore_missing_imports = True + +[mypy-PIL.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True diff --git a/tasks/image-segmentation/src/callbacks.py b/tasks/image-segmentation/src/callbacks.py index d60e7d66..0292e5fc 100644 --- a/tasks/image-segmentation/src/callbacks.py +++ b/tasks/image-segmentation/src/callbacks.py @@ -7,14 +7,14 @@ from coretex import currentTaskRun -class DisplayCallback(Callback): +class DisplayCallback(Callback): # type: ignore[misc] def __init__(self, epochs: int) -> None: super().__init__() self.epochs = epochs - def on_epoch_end(self, epoch: int, logs: Optional[dict[str, Any]] = None): + def on_epoch_end(self, epoch: int, logs: Optional[dict[str, Any]] = None) -> None: if logs is None: return diff --git a/tasks/image-segmentation/src/dataset.py b/tasks/image-segmentation/src/dataset.py index 8ff069ce..eac3314a 100644 --- a/tasks/image-segmentation/src/dataset.py +++ b/tasks/image-segmentation/src/dataset.py @@ -12,9 +12,9 @@ from .utils import hasDotAnnotation -class Augment(tf.keras.layers.Layer): +class Augment(tf.keras.layers.Layer): # type: ignore[misc] - def __init__(self, seed = 42): + def __init__(self, seed: int = 42) -> None: super().__init__() self.augmentInputs = RandomFlip( @@ -27,7 +27,7 @@ def __init__(self, seed = 42): seed=seed ) - def call(self, inputs, labels): + def call(self, inputs: tf.Tensor, labels: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor]: inputs = self.augmentInputs(inputs) labels = self.augmentLabels(labels) diff --git a/tasks/image-segmentation/src/model.py b/tasks/image-segmentation/src/model.py index 3e458331..c528ac53 100644 --- a/tasks/image-segmentation/src/model.py +++ b/tasks/image-segmentation/src/model.py @@ -7,7 +7,7 @@ from keras.applications.mobilenet_v2 import MobileNetV2 -class UpSampler(Sequential): +class UpSampler(Sequential): # type: ignore[misc] def __init__(self, filters: int, size: int): super(UpSampler, self).__init__() diff --git a/tasks/llama2-lora/.mypy.ini b/tasks/llama2-lora/.mypy.ini new file mode 100644 index 00000000..fdbd6aa0 --- /dev/null +++ b/tasks/llama2-lora/.mypy.ini @@ -0,0 +1,33 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-torch.*] +ignore_missing_imports = True + +[mypy-transformers.*] +ignore_missing_imports = True + +[mypy-datasets.*] +ignore_missing_imports = True + +[mypy-trl.*] +ignore_missing_imports = True diff --git a/tasks/llama2-lora/main.py b/tasks/llama2-lora/main.py index 54a45972..b48d43db 100644 --- a/tasks/llama2-lora/main.py +++ b/tasks/llama2-lora/main.py @@ -31,7 +31,7 @@ def loadData(dataset: CustomDataset) -> Dataset: def runInference(trainer: SFTTrainer, tokenizer: AutoTokenizer, prompt: str) -> str: textGenerator = pipeline(task = "text-generation", model = trainer.model, tokenizer = tokenizer, max_length = 200) output = textGenerator(f"[INST] {prompt} [/INST]") - return output[0]['generated_text'] + return str(output[0]['generated_text']) def main() -> None: diff --git a/tasks/llm-text-processing/.mypy.ini b/tasks/llm-text-processing/.mypy.ini new file mode 100644 index 00000000..930ede5f --- /dev/null +++ b/tasks/llm-text-processing/.mypy.ini @@ -0,0 +1,24 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-ollama.*] +ignore_missing_imports = True diff --git a/tasks/model-comparison/.mypy.ini b/tasks/model-comparison/.mypy.ini new file mode 100644 index 00000000..d3e1fa10 --- /dev/null +++ b/tasks/model-comparison/.mypy.ini @@ -0,0 +1,18 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False diff --git a/tasks/model-transfer/.mypy.ini b/tasks/model-transfer/.mypy.ini new file mode 100644 index 00000000..d3e1fa10 --- /dev/null +++ b/tasks/model-transfer/.mypy.ini @@ -0,0 +1,18 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False diff --git a/tasks/object-detection-yolov10/.mypy.ini b/tasks/object-detection-yolov10/.mypy.ini index 85078e4e..537f682d 100644 --- a/tasks/object-detection-yolov10/.mypy.ini +++ b/tasks/object-detection-yolov10/.mypy.ini @@ -22,3 +22,9 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-ultralytics.*] ignore_missing_imports = True + +[mypy-yaml.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True diff --git a/tasks/object-detection-yolov8/.mypy.ini b/tasks/object-detection-yolov8/.mypy.ini index 85078e4e..05b98bf1 100644 --- a/tasks/object-detection-yolov8/.mypy.ini +++ b/tasks/object-detection-yolov8/.mypy.ini @@ -22,3 +22,9 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-ultralytics.*] ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-yaml.*] +ignore_missing_imports = True diff --git a/tasks/object-detection-yolov8/src/predict.py b/tasks/object-detection-yolov8/src/predict.py index ed97cacf..854454e8 100644 --- a/tasks/object-detection-yolov8/src/predict.py +++ b/tasks/object-detection-yolov8/src/predict.py @@ -45,11 +45,11 @@ def processResult(result: Results, classes: ImageDatasetClasses, savePath: Path) def isSampleValid(sample: ImageSample) -> bool: try: - instances = sample.load().annotation.instances - if instances is None: + annotation = sample.load().annotation + if annotation is None: return False - for instance in instances: + for instance in annotation.instances: if any(len(segmentation) < DIMENSION_THRESHOLD for segmentation in instance.segmentations): return False except Exception as e: @@ -59,7 +59,7 @@ def isSampleValid(sample: ImageSample) -> bool: return True -def predictBatch(model: YOLO, dataset: ImageDataset, startIdx: int, endIdx: int, resultPath: Path): +def predictBatch(model: YOLO, dataset: ImageDataset, startIdx: int, endIdx: int, resultPath: Path) -> None: batch = [sample for sample in dataset.samples[startIdx:endIdx] if isSampleValid(sample)] results: Results = model.predict([sample.imagePath for sample in batch], save = True, project = "./results") diff --git a/tasks/ollama-chatbot-fn/.mypy.ini b/tasks/ollama-chatbot-fn/.mypy.ini new file mode 100644 index 00000000..13cd8bc6 --- /dev/null +++ b/tasks/ollama-chatbot-fn/.mypy.ini @@ -0,0 +1,27 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-faiss.*] +ignore_missing_imports = True + +[mypy-ollama.*] +ignore_missing_imports = True diff --git a/tasks/ollama-chatbot-fn/main.py b/tasks/ollama-chatbot-fn/main.py index 9b3326c0..25ba7791 100644 --- a/tasks/ollama-chatbot-fn/main.py +++ b/tasks/ollama-chatbot-fn/main.py @@ -11,14 +11,14 @@ def copyDir(src: Path, dst: Path, directoryName: str) -> None: shutil.copytree(src, dst / directoryName, copy_function = os.link) -def getIndexPath(dataset: CustomDataset): +def getIndexPath(dataset: CustomDataset) -> Path: sample = dataset.samples[0] sample.unzip() return sample.path / "embeddings.index" -def main(): +def main() -> None: taskRun = currentTaskRun() model = Model.createModel(f"{taskRun.id}-rag-chatbot", taskRun.id, 1.0, {}) diff --git a/tasks/ollama-chatbot-fn/resources/function/utils.py b/tasks/ollama-chatbot-fn/resources/function/utils.py index 6fc27952..1012f8af 100644 --- a/tasks/ollama-chatbot-fn/resources/function/utils.py +++ b/tasks/ollama-chatbot-fn/resources/function/utils.py @@ -15,6 +15,6 @@ def loadCorpusAndIndex(dirPath: Path) -> tuple[np.ndarray, Any]: return corpus, index -def retrieveDocuments(queryEmbedding: np.ndarray, index: faiss.IndexFlatL2, corpus: np.ndarray, k: int): +def retrieveDocuments(queryEmbedding: np.ndarray, index: faiss.IndexFlatL2, corpus: np.ndarray, k: int) -> list[tuple[str, int]]: distances, indices = index.search(queryEmbedding, k) return [(corpus[i], distances[0][j]) for j, i in enumerate(indices[0])] diff --git a/tasks/ollama-rag-index/.mypy.ini b/tasks/ollama-rag-index/.mypy.ini new file mode 100644 index 00000000..7b6c87aa --- /dev/null +++ b/tasks/ollama-rag-index/.mypy.ini @@ -0,0 +1,30 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-ollama.*] +ignore_missing_imports = True + +[mypy-faiss.*] +ignore_missing_imports = True + +[mypy-fitz.*] +ignore_missing_imports = True diff --git a/tasks/ollama-rag-index/main.py b/tasks/ollama-rag-index/main.py index afcbe763..a5c5bc27 100644 --- a/tasks/ollama-rag-index/main.py +++ b/tasks/ollama-rag-index/main.py @@ -67,7 +67,7 @@ def loadCorpus(dataset: CustomDataset) -> np.ndarray: return np.array(corpus) -def main(): +def main() -> None: taskRun = currentTaskRun() taskRun.dataset.download() diff --git a/tasks/ollama-sentiment-analysis-fn/.mypy.ini b/tasks/ollama-sentiment-analysis-fn/.mypy.ini new file mode 100644 index 00000000..9de5e299 --- /dev/null +++ b/tasks/ollama-sentiment-analysis-fn/.mypy.ini @@ -0,0 +1,27 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-ollama.*] +ignore_missing_imports = True + +[mypy-bs4.*] +ignore_missing_imports = True diff --git a/tasks/ollama-sentiment-analysis-fn/main.py b/tasks/ollama-sentiment-analysis-fn/main.py index 9872508c..4d3d9f17 100644 --- a/tasks/ollama-sentiment-analysis-fn/main.py +++ b/tasks/ollama-sentiment-analysis-fn/main.py @@ -1,6 +1,6 @@ from coretex import currentTaskRun, Model -def main(): +def main() -> None: taskRun = currentTaskRun() model = Model.createModel(taskRun.generateEntityName(), taskRun.id, 1.0, {}) diff --git a/tasks/qiime-alpha-beta-diversity/.mypy.ini b/tasks/qiime-alpha-beta-diversity/.mypy.ini new file mode 100644 index 00000000..9f2db0ac --- /dev/null +++ b/tasks/qiime-alpha-beta-diversity/.mypy.ini @@ -0,0 +1,24 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-py3nvml.*] +ignore_missing_imports = True diff --git a/tasks/qiime-alpha-beta-diversity/main.py b/tasks/qiime-alpha-beta-diversity/main.py index 797ed85a..edd6ee0b 100644 --- a/tasks/qiime-alpha-beta-diversity/main.py +++ b/tasks/qiime-alpha-beta-diversity/main.py @@ -1,3 +1,4 @@ +from typing import Optional from pathlib import Path from zipfile import ZipFile @@ -24,7 +25,7 @@ def diversityCoreMetricsPhylogeneticSample( samplingDepth: int, metadataPath: Path, outputDir: Path, - threads + threads: Optional[int] ) -> Path: phylogenyPath = sample.joinPath("rooted-tree.qza") @@ -54,7 +55,7 @@ def diversityAlphaGroupSignificance( outputDataset: CustomDataset, outputPath: Path, taskRun: TaskRun -): +) -> None: ctx_qiime2.diversityAlphaGroupSignificance( str(alphaDiversityPath), @@ -79,7 +80,7 @@ def diversityBetaGroupSignificance( outputDataset: CustomDataset, outputPath: Path, taskRun: TaskRun -): +) -> None: ctx_qiime2.diversityBetaGroupSignificance( str(distanceMatrixPath), @@ -105,7 +106,7 @@ def emperorPlot( outputDataset: CustomDataset, outputPath: Path, taskRun: TaskRun -): +) -> None: ctx_qiime2.emperorPlot( str(pcoaPath), @@ -132,7 +133,7 @@ def diversityAlphaRarefaction( outputDataset: CustomDataset, outputPath: Path, taskRun: TaskRun -): +) -> None: ctx_qiime2.diversityAlphaRarefaction( str(tablePath), str(phylogenyPath), @@ -158,7 +159,7 @@ def processSample( taskRun: TaskRun, outputDataset: CustomDataset, outputDir: Path -): +) -> None: sample.unzip() metadataSample.unzip() diff --git a/tasks/qiime-dada2/.mypy.ini b/tasks/qiime-dada2/.mypy.ini new file mode 100644 index 00000000..d3e1fa10 --- /dev/null +++ b/tasks/qiime-dada2/.mypy.ini @@ -0,0 +1,18 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False diff --git a/tasks/qiime-demultiplexing/.mypy.ini b/tasks/qiime-demultiplexing/.mypy.ini new file mode 100644 index 00000000..d3e1fa10 --- /dev/null +++ b/tasks/qiime-demultiplexing/.mypy.ini @@ -0,0 +1,18 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False diff --git a/tasks/qiime-import/mypy.ini b/tasks/qiime-import/mypy.ini index 21446fac..382d22bd 100644 --- a/tasks/qiime-import/mypy.ini +++ b/tasks/qiime-import/mypy.ini @@ -17,3 +17,12 @@ disallow_incomplete_defs = True no_implicit_optional = True strict_optional = True allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-cchardet.*] +ignore_missing_imports = True + +[mypy-pandas.*] +ignore_missing_imports = True diff --git a/tasks/qiime-taxonomic-analysis/.mypy.ini b/tasks/qiime-taxonomic-analysis/.mypy.ini new file mode 100644 index 00000000..d3e1fa10 --- /dev/null +++ b/tasks/qiime-taxonomic-analysis/.mypy.ini @@ -0,0 +1,18 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False diff --git a/tasks/qiime-taxonomic-analysis/main.py b/tasks/qiime-taxonomic-analysis/main.py index 9bc9109e..3dbad1d8 100644 --- a/tasks/qiime-taxonomic-analysis/main.py +++ b/tasks/qiime-taxonomic-analysis/main.py @@ -37,7 +37,7 @@ def processSample( outputDataset: CustomDataset, outputDir: Path, threads: Optional[int] -): +) -> None: sample.unzip() metadataSample.unzip() diff --git a/tasks/s3-model-upload/.mypy.ini b/tasks/s3-model-upload/.mypy.ini index 3682b4f5..ac48fe36 100644 --- a/tasks/s3-model-upload/.mypy.ini +++ b/tasks/s3-model-upload/.mypy.ini @@ -20,3 +20,8 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-boto3.*] +ignore_missing_imports = True + +[mypy-botocore.*] +ignore_missing_imports = True diff --git a/tasks/stable-diffusion-fn/.mypy.ini b/tasks/stable-diffusion-fn/.mypy.ini new file mode 100644 index 00000000..a98b776d --- /dev/null +++ b/tasks/stable-diffusion-fn/.mypy.ini @@ -0,0 +1,24 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-diffusers.*] +ignore_missing_imports = True diff --git a/tasks/stable-diffusion-fn/main.py b/tasks/stable-diffusion-fn/main.py index 8db84ddc..f20ab231 100644 --- a/tasks/stable-diffusion-fn/main.py +++ b/tasks/stable-diffusion-fn/main.py @@ -3,7 +3,7 @@ from coretex import currentTaskRun, Model -def main(): +def main() -> None: taskRun = currentTaskRun() logging.info(">> [StableDiffusion] Creating Coretex model") diff --git a/tasks/stable-diffusion/.mypy.ini b/tasks/stable-diffusion/.mypy.ini new file mode 100644 index 00000000..97880871 --- /dev/null +++ b/tasks/stable-diffusion/.mypy.ini @@ -0,0 +1,30 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-transformers.*] +ignore_missing_imports = True + +[mypy-diffusers.*] +ignore_missing_imports = True diff --git a/tasks/stable-diffusion/main.py b/tasks/stable-diffusion/main.py index 70decc4b..78fbeb64 100644 --- a/tasks/stable-diffusion/main.py +++ b/tasks/stable-diffusion/main.py @@ -60,7 +60,7 @@ def generateImages( width = width, height = height, seed = seed - ).images # type: ignore + ).images imagePaths: list[Path] = [] diff --git a/tasks/synthetic-image-generator/.mypy.ini b/tasks/synthetic-image-generator/.mypy.ini index 3fe41755..3a9ec2e5 100644 --- a/tasks/synthetic-image-generator/.mypy.ini +++ b/tasks/synthetic-image-generator/.mypy.ini @@ -31,3 +31,9 @@ ignore_missing_imports = True [mypy-transformers.*] ignore_missing_imports = True + +[mypy-PIL.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True diff --git a/tasks/tabular-data-diagnostics/.mypy.ini b/tasks/tabular-data-diagnostics/.mypy.ini new file mode 100644 index 00000000..84da1f14 --- /dev/null +++ b/tasks/tabular-data-diagnostics/.mypy.ini @@ -0,0 +1,33 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-pandas.*] +ignore_missing_imports = True + +[mypy-sklearn.*] +ignore_missing_imports = True + +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-transformers.*] +ignore_missing_imports = True diff --git a/tasks/tabular-data-diagnostics/main.py b/tasks/tabular-data-diagnostics/main.py index f1ea97cb..8d64e607 100644 --- a/tasks/tabular-data-diagnostics/main.py +++ b/tasks/tabular-data-diagnostics/main.py @@ -9,7 +9,7 @@ from src.dataset import extractTestTrainData, loadDataset -def saveModel(taskRun: TaskRun[CustomDataset], accuracy: float, trainColumnCount: int, labels: list[str]): +def saveModel(taskRun: TaskRun[CustomDataset], accuracy: float, trainColumnCount: int, labels: list[str]) -> None: model = Model.createModel(taskRun.generateEntityName(), taskRun.id, accuracy, {}) modelPath = folder_manager.temp / "model" From 9930eb7ea58c5fc9ab72e6c8317a34e63e1955e4 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Thu, 4 Jul 2024 15:28:47 +0200 Subject: [PATCH 03/32] CTX-5783: Additional typing improvements --- tasks/bio-bodysite-prediction/src/train.py | 2 +- tasks/bio-read-quality/main.py | 2 +- tasks/contextual-targeting/main.py | 2 +- tasks/image-orientation/src/dataset.py | 12 +++++++----- tasks/image-orientation/src/model.py | 6 ++++-- tasks/image-orientation/src/train.py | 10 +++++----- tasks/image-orientation/src/utils.py | 14 +++++++++----- tasks/image-orientation/src/validation.py | 2 +- tasks/image-quality-predictor/src/data.py | 6 +++++- tasks/image-quality-predictor/src/validation.py | 2 ++ tasks/llm-text-processing/main.py | 2 +- tasks/object-detection-yolov10/.mypy.ini | 3 +++ tasks/ollama-rag-index/.mypy.ini | 3 +++ tasks/stable-diffusion-fn/.mypy.ini | 3 +++ tasks/stable-diffusion/.mypy.ini | 3 +++ .../src/sample_generator.py | 2 ++ tasks/translation-ollama/model.py | 2 +- 17 files changed, 52 insertions(+), 24 deletions(-) diff --git a/tasks/bio-bodysite-prediction/src/train.py b/tasks/bio-bodysite-prediction/src/train.py index c06caf11..b68f7d87 100644 --- a/tasks/bio-bodysite-prediction/src/train.py +++ b/tasks/bio-bodysite-prediction/src/train.py @@ -18,7 +18,7 @@ from .utils import savePredictionFile -class Progress(TrainingCallback): # type: ignore[misc] +class Progress(TrainingCallback): """ Callback function for XGBoost showing the loss and accuracy of the model at the current epoch.\n diff --git a/tasks/bio-read-quality/main.py b/tasks/bio-read-quality/main.py index efec9786..f9a91494 100644 --- a/tasks/bio-read-quality/main.py +++ b/tasks/bio-read-quality/main.py @@ -23,7 +23,7 @@ def calculateAverageScores(qualityScores: list[list[int]]) -> list[int]: def analyzeFastq(sequencePath: Path) -> list[int]: qualityScores: list[list[int]] = [] with sequencePath.open("r") as file: - for record in SeqIO.parse(file, "fastq"): + for record in SeqIO.parse(file, "fastq"): # type: ignore[no-untyped-call] qualityScores.append(record.letter_annotations["phred_quality"]) return calculateAverageScores(qualityScores) diff --git a/tasks/contextual-targeting/main.py b/tasks/contextual-targeting/main.py index db40704c..ff76299a 100644 --- a/tasks/contextual-targeting/main.py +++ b/tasks/contextual-targeting/main.py @@ -4,7 +4,7 @@ import logging from coretex import CustomDataset, TaskRun, CustomSample, currentTaskRun, folder_manager -from coretex.nlp import AudioTranscriber, Transcription # type: ignore[attr--defined] +from coretex.nlp import AudioTranscriber, Transcription # type: ignore[attr-defined] import matplotlib.pyplot as plt diff --git a/tasks/image-orientation/src/dataset.py b/tasks/image-orientation/src/dataset.py index 41539606..7ba4e1ab 100644 --- a/tasks/image-orientation/src/dataset.py +++ b/tasks/image-orientation/src/dataset.py @@ -29,17 +29,19 @@ def __init__( def __len__(self) -> int: return len(self.sampleIds) - def __getitem__(self, idx) -> dict[str, Any]: + def __getitem__(self, idx: int) -> dict[str, Any]: imagePath = self.imagesDir / f"{self.sampleIds[idx]}.png" metadataPath = self.imagesDir / f"{self.sampleIds[idx]}.json" image = ImageOps.exif_transpose(Image.open(imagePath).convert("RGB")) + if image is None: + raise ValueError(f">> [ImageOrientation] Failed to open image {imagePath.name}") + with metadataPath.open("r") as file: meta = json.load(file) flipped = meta.get(self.labelColumn, False) - label = [1, 0] if flipped else [0, 1] - label = torch.tensor(label).type(torch.float) + label = torch.tensor([1, 0] if flipped else [0, 1]).type(torch.float) if self.transform is not None: image = self.transform(image) @@ -73,9 +75,9 @@ def prepareDataset(dataset: ImageDataset) -> tuple[Path, list[int]]: return imagesDir, sampleIds -def splitDataset(dataset: OrientedDataset, validSplit: float) -> tuple["OrientedDataset", "OrientedDataset"]: +def splitDataset(dataset: OrientedDataset, validSplit: float) -> tuple[OrientedDataset, OrientedDataset]: totalSize = len(dataset) trainSize = int((1.0 - validSplit) * totalSize) validationSize = totalSize - trainSize - return random_split(dataset, [trainSize, validationSize]) + return random_split(dataset, [trainSize, validationSize]) # type: ignore[return-value] diff --git a/tasks/image-orientation/src/model.py b/tasks/image-orientation/src/model.py index 5ac99c2a..820cd3ac 100644 --- a/tasks/image-orientation/src/model.py +++ b/tasks/image-orientation/src/model.py @@ -1,10 +1,12 @@ +from torch import Tensor + import torch.nn as nn import torch.nn.functional as F class OrientationClassifier(nn.Module): - def __init__(self): + def __init__(self) -> None: super(OrientationClassifier, self).__init__() # Convolutional layers self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding = 1) @@ -26,7 +28,7 @@ def __init__(self): # Dropout self.dropout = nn.Dropout(0.25) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: x = self.pool1(F.leaky_relu(self.conv1(x))) x = self.pool2(F.leaky_relu(self.conv2(x))) x = self.pool3(F.leaky_relu(self.conv3(x))) diff --git a/tasks/image-orientation/src/train.py b/tasks/image-orientation/src/train.py index ca9f15a6..0bd1146f 100644 --- a/tasks/image-orientation/src/train.py +++ b/tasks/image-orientation/src/train.py @@ -18,7 +18,7 @@ def trainEpoch( trainLoader: DataLoader, model: OrientationClassifier, optimizer: optim.Adam, - criterion: nn.MSELoss, + criterion: nn.CrossEntropyLoss, device: torch.device ) -> tuple[float, float]: @@ -90,7 +90,7 @@ def runTraining( scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = "min", factor = 0.3, patience = max(5, int(epochs * 0.05))) earlyStopping = EarlyStopping(max(10, int(epochs * 0.1))) - bestLoss: Optional[torch.Tensor] = None + bestLoss: Optional[float] = None exampleInput = torch.randn(1, 3, imageSize, imageSize) for epoch in range(epochs): @@ -117,13 +117,13 @@ def runTraining( bestLoss = validationLoss # Save the best model - tsModel = torch.jit.trace(model, exampleInput) + tsModel = torch.jit.trace(model, exampleInput) # type: ignore[no-untyped-call] tsModel.save(modelPath / "best.pt") # Save the latest model - tsModel = torch.jit.trace(model, exampleInput) + tsModel = torch.jit.trace(model, exampleInput) # type: ignore[no-untyped-call] tsModel.save(modelPath / "last.pt") if not modelPath.joinpath("best.pt").exists(): - tsModel = torch.jit.trace(model, exampleInput) + tsModel = torch.jit.trace(model, exampleInput) # type: ignore[no-untyped-call] tsModel.save(modelPath / "best.pt") diff --git a/tasks/image-orientation/src/utils.py b/tasks/image-orientation/src/utils.py index 1fa6c113..e2bae647 100644 --- a/tasks/image-orientation/src/utils.py +++ b/tasks/image-orientation/src/utils.py @@ -27,14 +27,18 @@ def __call__(self, bestLoss: Union[float, torch.Tensor], latestLoss: Union[float def getMeanAndStd(directory: Path) -> tuple[list[float], list[float]]: channelsSum, channelsSquaredSum, numImages = 0, 0, 0 - for image_file in directory.glob("*.png"): - image = np.array(ImageOps.exif_transpose(Image.open(image_file)).convert("RGB"), dtype = np.float32) / 255.0 # Normalize pixel values to [0, 1] + for imagePath in directory.glob("*.png"): + image = ImageOps.exif_transpose(Image.open(imagePath)) + if image is None: + raise ValueError(f"Failed to read image {imagePath.name}") - channelsSum += np.mean(image, axis = (0, 1)) - channelsSquaredSum += np.mean(np.square(image), axis = (0, 1)) + imageArray = np.array(image.convert("RGB"), dtype = np.float32) / 255.0 # Normalize pixel values to [0, 1] + + channelsSum += np.mean(imageArray, axis = (0, 1)) + channelsSquaredSum += np.mean(np.square(imageArray), axis = (0, 1)) numImages += 1 - mean = channelsSum / numImages + mean = np.array(channelsSum / numImages) std = np.sqrt(channelsSquaredSum / numImages - np.square(mean)) return mean.tolist(), std.tolist() diff --git a/tasks/image-orientation/src/validation.py b/tasks/image-orientation/src/validation.py index b950b369..8e2fb8d2 100644 --- a/tasks/image-orientation/src/validation.py +++ b/tasks/image-orientation/src/validation.py @@ -56,7 +56,7 @@ def runValidation( taskRun: TaskRun ) -> float: - model = torch.jit.load(modelPath) + model = torch.jit.load(modelPath) # type: ignore[no-untyped-call] model.to(device) model.eval() diff --git a/tasks/image-quality-predictor/src/data.py b/tasks/image-quality-predictor/src/data.py index c8532c99..410a5da1 100644 --- a/tasks/image-quality-predictor/src/data.py +++ b/tasks/image-quality-predictor/src/data.py @@ -87,7 +87,11 @@ def __len__(self) -> int: def __getitem__(self, idx: int) -> tuple[Any, float]: sample, quality = self.data[idx] - image = ImageOps.exif_transpose(Image.open(sample.imagePath)).convert("RGB") + image = ImageOps.exif_transpose(Image.open(sample.imagePath)) + if image is None: + raise ValueError(f">> [ImageQuality] Failed to open image {sample.name}") + + image = image.convert("RGB") if self.transform: image = self.transform(image) diff --git a/tasks/image-quality-predictor/src/validation.py b/tasks/image-quality-predictor/src/validation.py index 6d83aae4..f8a21cb1 100644 --- a/tasks/image-quality-predictor/src/validation.py +++ b/tasks/image-quality-predictor/src/validation.py @@ -41,6 +41,8 @@ def run(modelPath: Path, dataset: list[tuple[ImageSample, float]], transform: tr for sample, quality in dataset: logging.info(f">> [ImageQuality] Validating sample \"{sample.name}\"...") image = ImageOps.exif_transpose(Image.open(sample.imagePath).convert("RGB")) + if image is None: + raise ValueError(f">> [ImageQuality] Failed to open image {sample.name}") if quality == 0: logging.warning("\tSample has quality == 0") diff --git a/tasks/llm-text-processing/main.py b/tasks/llm-text-processing/main.py index df79ad5d..e78ab705 100644 --- a/tasks/llm-text-processing/main.py +++ b/tasks/llm-text-processing/main.py @@ -107,7 +107,7 @@ def main() -> None: logging.info(f">> [LLMTextProcessing] Prompt: {prompt}") - responseContent = response["message"]["content"] + responseContent = response["message"]["content"] # type: ignore[index] logging.info(f">> [LLMTextProcessing] Response: {responseContent}") responseTextPath = folder_manager.temp / "response.txt" diff --git a/tasks/object-detection-yolov10/.mypy.ini b/tasks/object-detection-yolov10/.mypy.ini index 537f682d..f2806080 100644 --- a/tasks/object-detection-yolov10/.mypy.ini +++ b/tasks/object-detection-yolov10/.mypy.ini @@ -28,3 +28,6 @@ ignore_missing_imports = True [mypy-matplotlib.*] ignore_missing_imports = True + +[mypy-requests.*] +ignore_missing_imports = True diff --git a/tasks/ollama-rag-index/.mypy.ini b/tasks/ollama-rag-index/.mypy.ini index 7b6c87aa..14ec2246 100644 --- a/tasks/ollama-rag-index/.mypy.ini +++ b/tasks/ollama-rag-index/.mypy.ini @@ -28,3 +28,6 @@ ignore_missing_imports = True [mypy-fitz.*] ignore_missing_imports = True + +[mypy-requests.*] +ignore_missing_imports = True diff --git a/tasks/stable-diffusion-fn/.mypy.ini b/tasks/stable-diffusion-fn/.mypy.ini index a98b776d..c5af1153 100644 --- a/tasks/stable-diffusion-fn/.mypy.ini +++ b/tasks/stable-diffusion-fn/.mypy.ini @@ -22,3 +22,6 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-diffusers.*] ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = True diff --git a/tasks/stable-diffusion/.mypy.ini b/tasks/stable-diffusion/.mypy.ini index 97880871..324e81a8 100644 --- a/tasks/stable-diffusion/.mypy.ini +++ b/tasks/stable-diffusion/.mypy.ini @@ -28,3 +28,6 @@ ignore_missing_imports = True [mypy-diffusers.*] ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = True diff --git a/tasks/synthetic-image-generator/src/sample_generator.py b/tasks/synthetic-image-generator/src/sample_generator.py index 916b0f92..0901a46f 100644 --- a/tasks/synthetic-image-generator/src/sample_generator.py +++ b/tasks/synthetic-image-generator/src/sample_generator.py @@ -51,6 +51,8 @@ def generateSample( image = Image.fromarray(data.image) backgroundImage = ImageOps.exif_transpose(Image.open(backgroundImagePath)) + if backgroundImage is None: + raise ValueError(f"Failed to open background image. ID: {backgroundImagePath.parent.name}") # Resize image parentAnnotationWidth = int(backgroundImage.width * random.uniform(minImageSize, maxImageSize)) diff --git a/tasks/translation-ollama/model.py b/tasks/translation-ollama/model.py index 0453e3cd..f8650779 100644 --- a/tasks/translation-ollama/model.py +++ b/tasks/translation-ollama/model.py @@ -33,7 +33,7 @@ def installOllama() -> None: def isOllamaServerActiveAndRunning() -> bool: try: response = requests.get(OLLAMA_SERVER_URL) - return response.ok + return response.ok # type: ignore[no-any-return] except requests.ConnectionError: return False From 8e2906d234f1a8d5872a2ec177433010b5a824f4 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Thu, 4 Jul 2024 16:17:40 +0200 Subject: [PATCH 04/32] CTX-5783: Added missing mypy.ini files --- tasks/image-orientation/.mypy.ini | 39 +++++++++++++++++++ .../.mypy.ini | 18 +++++++++ tasks/stable-diffusion/main.py | 2 +- tasks/translation-ollama/.mypy.ini | 30 ++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 tasks/image-orientation/.mypy.ini create mode 100644 tasks/qiime-tree-phylogenetic-diversity-analysis/.mypy.ini create mode 100644 tasks/translation-ollama/.mypy.ini diff --git a/tasks/image-orientation/.mypy.ini b/tasks/image-orientation/.mypy.ini new file mode 100644 index 00000000..902893ac --- /dev/null +++ b/tasks/image-orientation/.mypy.ini @@ -0,0 +1,39 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = True + +[mypy-torchvision.*] +ignore_missing_imports = True + +[mypy-sklearn.*] +ignore_missing_imports = True + +[mypy-seaborn.*] +ignore_missing_imports = True + +[mypy-PIL.*] +ignore_missing_imports = True diff --git a/tasks/qiime-tree-phylogenetic-diversity-analysis/.mypy.ini b/tasks/qiime-tree-phylogenetic-diversity-analysis/.mypy.ini new file mode 100644 index 00000000..d3e1fa10 --- /dev/null +++ b/tasks/qiime-tree-phylogenetic-diversity-analysis/.mypy.ini @@ -0,0 +1,18 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False diff --git a/tasks/stable-diffusion/main.py b/tasks/stable-diffusion/main.py index 78fbeb64..69bab6a5 100644 --- a/tasks/stable-diffusion/main.py +++ b/tasks/stable-diffusion/main.py @@ -18,7 +18,7 @@ def loadModel(device: str) -> StableDiffusionPipeline: dtype = torch.float16 if device == "cuda" else torch.float32 - pipe = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype = dtype) + pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype = dtype) pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) return pipe.to(device) diff --git a/tasks/translation-ollama/.mypy.ini b/tasks/translation-ollama/.mypy.ini new file mode 100644 index 00000000..e1191fd9 --- /dev/null +++ b/tasks/translation-ollama/.mypy.ini @@ -0,0 +1,30 @@ +# Global options: + +[mypy] +python_version = 3.9 +pretty = True +warn_return_any = True +warn_no_return = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_unreachable = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +strict_optional = True +allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-ollama.*] +ignore_missing_imports = True + +[mypy-requests.*] +ignore_missing_imports = True + +[mypy-fitz.*] +ignore_missing_imports = True From 3a272f5b3b07e998973231921128cac3579c48f9 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Thu, 4 Jul 2024 17:08:01 +0200 Subject: [PATCH 05/32] CTX-5783: Additional linter errors fixed --- tasks/stable-diffusion/main.py | 8 ++++---- tasks/translation-ollama/main.py | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tasks/stable-diffusion/main.py b/tasks/stable-diffusion/main.py index 69bab6a5..c3c45dc7 100644 --- a/tasks/stable-diffusion/main.py +++ b/tasks/stable-diffusion/main.py @@ -18,9 +18,9 @@ def loadModel(device: str) -> StableDiffusionPipeline: dtype = torch.float16 if device == "cuda" else torch.float32 - pipe: StableDiffusionPipeline = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype = dtype) - pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) - return pipe.to(device) + pipe = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype = dtype) # type: ignore[no-untyped-call] + pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) # type: ignore[no-untyped-call] + return pipe.to(device) # type: ignore[no-any-return] def getDefault(taskRun: TaskRun, name: str, default: Any) -> Any: @@ -53,7 +53,7 @@ def generateImages( # Create an array equal to number of input prompts negativePrompts = [negativePrompt] * len(prompts) - images = model( + images = model( # type: ignore[operator] prompts, negative_prompt = negativePrompts, num_inference_steps = steps, diff --git a/tasks/translation-ollama/main.py b/tasks/translation-ollama/main.py index 09b23f25..7476b179 100644 --- a/tasks/translation-ollama/main.py +++ b/tasks/translation-ollama/main.py @@ -13,7 +13,7 @@ def readPDF(filePath: Path) -> list[str]: pagesText: list[str] = [] - + with fitz.open(filePath) as doc: for page in doc: paragraphs = page.get_text().split("\n") @@ -24,14 +24,14 @@ def readPDF(filePath: Path) -> list[str]: def loadCorpus(dataset: CustomDataset) -> list[list[str]]: corpus: list[list[str]] = [] - + for sample in dataset.samples: sample.unzip() pdfPaths = list(sample.path.rglob("*.pdf")) if len(pdfPaths) == 0: raise ValueError(">> [LLM Translate] The provided dataset does not contain any .pdf documents") - + for pdfPath in pdfPaths: if not "__MACOSX" in str(pdfPath): corpus.append(readPDF(pdfPath)) @@ -45,13 +45,13 @@ def main() -> None: dataset.download() launchOllamaServer() - + logging.info(">> [OllamaRAG] Pulling model") pullModel(LLM) - + logging.info(">> [OllamaRAG] Loading text corpus") corpus = loadCorpus(taskRun.dataset) - + translatedDataset = CustomDataset.createDataset(f"{taskRun.id}-translated", taskRun.projectId) language = taskRun.parameters["language"] @@ -59,7 +59,7 @@ def main() -> None: for counter, document in enumerate(corpus, start = 1): document = [x.strip() for x in document] document = [line for line in document if line != ""] - + translatedText = "" for paragraph in document: logging.info(">> [OllamaRAG] Translating paragraph") @@ -70,10 +70,10 @@ def main() -> None: "role": "user", "content": query } - response = ollama.chat(model = LLM, messages = [msg]) - answer = response["message"]["content"] + response = ollama.chat(model = LLM, messages = [msg]) # type: ignore[list-item] + answer = response["message"]["content"] # type: ignore[index] translatedText += answer + "\n" - + txtFileName = f"file-{counter}.txt" txtFile = folder_manager.temp / txtFileName with open(txtFile, "w") as f: @@ -83,7 +83,7 @@ def main() -> None: zipFile = folder_manager.temp / zipFileName with zipfile.ZipFile(zipFile, "w") as zf: zf.write(txtFile, txtFileName) - + translatedDataset.add(zipFile) taskRun.submitOutput("translatedDataset", translatedDataset) From 0d76879adfd69475939f80c42edcae364ebdc3b0 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Fri, 5 Jul 2024 11:36:13 +0200 Subject: [PATCH 06/32] CTX-5783: Added type: ignores --- tasks/image-segmentation/src/detect.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tasks/image-segmentation/src/detect.py b/tasks/image-segmentation/src/detect.py index 9f91c96f..cf17ebea 100644 --- a/tasks/image-segmentation/src/detect.py +++ b/tasks/image-segmentation/src/detect.py @@ -38,14 +38,14 @@ def run(taskRun: TaskRun, model: KerasModel, dataset: ImageDataset) -> None: fig, axes = plt.subplots(1, 3) - axes[0].set_title("Input image") - axes[0].imshow(resized) + axes[0].set_title("Input image") # type: ignore[index] + axes[0].imshow(resized) # type: ignore[index] - axes[1].set_title("Groundtruth mask") - axes[1].imshow(groundtruth) + axes[1].set_title("Groundtruth mask") # type: ignore[index] + axes[1].imshow(groundtruth) # type: ignore[index] - axes[2].set_title("Predicted mask") - axes[2].imshow(prediction) + axes[2].set_title("Predicted mask") # type: ignore[index] + axes[2].imshow(prediction) # type: ignore[index] plotPath = folder_manager.temp / f"{sample.id}.png" From 9cd99b376ea4287e734b7bb415a63b591c71049a Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Fri, 5 Jul 2024 13:29:03 +0200 Subject: [PATCH 07/32] CTX-5783: Added github action --- .github/workflows/linter-code-check.yml | 72 +++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 .github/workflows/linter-code-check.yml diff --git a/.github/workflows/linter-code-check.yml b/.github/workflows/linter-code-check.yml new file mode 100644 index 00000000..466a8db7 --- /dev/null +++ b/.github/workflows/linter-code-check.yml @@ -0,0 +1,72 @@ +name: Linter code check + +on: + push: + branches: + - main + - stage + - develop + pull_request: + types: [opened, reopened, synchronize] + branches: + - main + - stage + - develop + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Install mypy globally + run: | + pip install mypy + - name: Analysing templates with mypy + run: | + eval "$(conda shell.bash hook)" + for dir in tasks/* ; do + echo "Checking directory: $dir" + + # Skip the directory if no .mypy.ini file is found + if [ ! -f "$dir/.mypy.ini" ]; then + echo "No .mypy.ini file found in $dir, skipping..." + continue + fi + + if [ -f "$dir/environment.yml" ]; then + echo "Setting up conda environment for $dir" + conda env create -n $(basename "$dir") -f "$dir/environment.yml" + echo "Created conda environment" + conda activate $(basename "$dir") + pip install mypy + elif [ -f "$dir/requirements.txt" ]; then + echo "Setting up venv for $dir" + python -m venv "$dir/venv" + source "$dir/venv/bin/activate" + pip install -r "$dir/requirements.txt" + pip install mypy + fi + + echo "Running mypy in $dir" + set +e # Disable exit on error + mypy_output=$(mypy --config-file "$dir/.mypy.ini" "$dir" 2>&1) + set -e # Re-enable exit on error + echo "$mypy_output" + + if echo "$mypy_output" | grep -q 'error:'; then + echo "Running install-types in $dir" + mypy --install-types --non-interactive --config-file "$dir/.mypy.ini" "$dir" + fi + + if [ -f "$dir/environment.yml" ]; then + conda deactivate + conda remove -y -n $(basename "$dir") --all + elif [ -f "$dir/requirements.txt" ]; then + deactivate + rm -rf "$dir/venv" + fi + done From 861da8e528ed0ecd2fe4fe1d91e1d8a3117b9cf2 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Fri, 5 Jul 2024 13:39:27 +0200 Subject: [PATCH 08/32] CTX-5783: Removed .mypy.ini from audio-analytics --- tasks/audio-analytics/.mypy.ini | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 tasks/audio-analytics/.mypy.ini diff --git a/tasks/audio-analytics/.mypy.ini b/tasks/audio-analytics/.mypy.ini deleted file mode 100644 index a9714b0e..00000000 --- a/tasks/audio-analytics/.mypy.ini +++ /dev/null @@ -1,26 +0,0 @@ -# Global options: - -[mypy] -python_version = 3.9 -pretty = True -warn_return_any = True -warn_no_return = True -warn_redundant_casts = True -warn_unused_configs = True -warn_unused_ignores = True -warn_unreachable = True -disallow_subclassing_any = True -disallow_untyped_calls = True -disallow_untyped_defs = True -disallow_incomplete_defs = True -no_implicit_optional = True -strict_optional = True -allow_redefinition = False -exclude = venv - - -# Per-module options: -# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports - -[mypy-deepspeech.*] -ignore_missing_imports = True From 132183f1dbf6d20765512fea475ead657837e468 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Fri, 5 Jul 2024 16:56:24 +0200 Subject: [PATCH 09/32] CTX-5783: Fixed dependencies --- tasks/body-tracking/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/body-tracking/requirements.txt b/tasks/body-tracking/requirements.txt index cebefd48..5409dc6e 100644 --- a/tasks/body-tracking/requirements.txt +++ b/tasks/body-tracking/requirements.txt @@ -1,4 +1,5 @@ tensorflowjs>=3.9.0 +protobuf~=3.19.0 tensorflow==2.8 coremltools==6.3.0 coretex From abbce1515ddbadc36140110b6659d4216a8921a0 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Mon, 8 Jul 2024 11:04:22 +0200 Subject: [PATCH 10/32] CTX-5783: Removed hardcoded tensorflow versions in requirements.txts --- tasks/body-tracking/requirements.txt | 6 +++--- tasks/image-segmentation/requirements.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tasks/body-tracking/requirements.txt b/tasks/body-tracking/requirements.txt index 5409dc6e..2db665a9 100644 --- a/tasks/body-tracking/requirements.txt +++ b/tasks/body-tracking/requirements.txt @@ -1,5 +1,5 @@ -tensorflowjs>=3.9.0 -protobuf~=3.19.0 -tensorflow==2.8 +tensorflowjs +protobuf +tensorflow coremltools==6.3.0 coretex diff --git a/tasks/image-segmentation/requirements.txt b/tasks/image-segmentation/requirements.txt index 74d06ead..0b82c337 100644 --- a/tasks/image-segmentation/requirements.txt +++ b/tasks/image-segmentation/requirements.txt @@ -1,7 +1,7 @@ -tensorflow==2.8 +tensorflow numpy matplotlib -protobuf~=3.19.0 +protobuf opencv-python coremltools==6.3.0 coretex From 827d3d4ec1b52e5ce19190f014bf90017f1d6fae Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 9 Jul 2024 11:39:03 +0200 Subject: [PATCH 11/32] CTX-5783: Extracted code to bash script and explicit python version usage --- .github/workflows/linter-code-check.sh | 45 +++++++++++++++++++++++++ .github/workflows/linter-code-check.yml | 44 +----------------------- 2 files changed, 46 insertions(+), 43 deletions(-) create mode 100644 .github/workflows/linter-code-check.sh diff --git a/.github/workflows/linter-code-check.sh b/.github/workflows/linter-code-check.sh new file mode 100644 index 00000000..933d65ff --- /dev/null +++ b/.github/workflows/linter-code-check.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +eval "$(conda shell.bash hook)" +for dir in tasks/* ; do + echo "Checking directory: $dir" + + # Skip the directory if no .mypy.ini file is found + if [ ! -f "$dir/.mypy.ini" ]; then + echo "No .mypy.ini file found in $dir, skipping..." + continue + fi + + if [ -f "$dir/environment.yml" ]; then + echo "Setting up conda environment for $dir" + conda env create -n $(basename "$dir") -f "$dir/environment.yml" + echo "Created conda environment" + conda activate $(basename "$dir") + pip install mypy + elif [ -f "$dir/requirements.txt" ]; then + echo "Setting up venv for $dir" + python3.9 -m venv "$dir/venv" + source "$dir/venv/bin/activate" + pip install -r "$dir/requirements.txt" + pip install mypy + fi + + echo "Running mypy in $dir" + set +e # Disable exit on error + mypy_output=$(mypy --config-file "$dir/.mypy.ini" "$dir" 2>&1) + set -e # Re-enable exit on error + + echo "$mypy_output" + if echo "$mypy_output" | grep -q 'error:'; then + echo "Running install-types in $dir" + mypy --install-types --non-interactive --config-file "$dir/.mypy.ini" "$dir" + fi + + if [ -f "$dir/environment.yml" ]; then + conda deactivate + conda remove -y -n $(basename "$dir") --all + elif [ -f "$dir/requirements.txt" ]; then + deactivate + rm -rf "$dir/venv" + fi +done diff --git a/.github/workflows/linter-code-check.yml b/.github/workflows/linter-code-check.yml index 466a8db7..9b91abd7 100644 --- a/.github/workflows/linter-code-check.yml +++ b/.github/workflows/linter-code-check.yml @@ -27,46 +27,4 @@ jobs: pip install mypy - name: Analysing templates with mypy run: | - eval "$(conda shell.bash hook)" - for dir in tasks/* ; do - echo "Checking directory: $dir" - - # Skip the directory if no .mypy.ini file is found - if [ ! -f "$dir/.mypy.ini" ]; then - echo "No .mypy.ini file found in $dir, skipping..." - continue - fi - - if [ -f "$dir/environment.yml" ]; then - echo "Setting up conda environment for $dir" - conda env create -n $(basename "$dir") -f "$dir/environment.yml" - echo "Created conda environment" - conda activate $(basename "$dir") - pip install mypy - elif [ -f "$dir/requirements.txt" ]; then - echo "Setting up venv for $dir" - python -m venv "$dir/venv" - source "$dir/venv/bin/activate" - pip install -r "$dir/requirements.txt" - pip install mypy - fi - - echo "Running mypy in $dir" - set +e # Disable exit on error - mypy_output=$(mypy --config-file "$dir/.mypy.ini" "$dir" 2>&1) - set -e # Re-enable exit on error - echo "$mypy_output" - - if echo "$mypy_output" | grep -q 'error:'; then - echo "Running install-types in $dir" - mypy --install-types --non-interactive --config-file "$dir/.mypy.ini" "$dir" - fi - - if [ -f "$dir/environment.yml" ]; then - conda deactivate - conda remove -y -n $(basename "$dir") --all - elif [ -f "$dir/requirements.txt" ]; then - deactivate - rm -rf "$dir/venv" - fi - done + bash .github/workflows/linter-code-check.sh From ad826026167bb7d618a1ffeb0e7fe765556384c9 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 9 Jul 2024 14:52:24 +0200 Subject: [PATCH 12/32] CTX-5783: Fixed mypy errors for sql-connector --- tasks/sql-connector/main.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/tasks/sql-connector/main.py b/tasks/sql-connector/main.py index 42fff26f..7917bb19 100644 --- a/tasks/sql-connector/main.py +++ b/tasks/sql-connector/main.py @@ -19,18 +19,18 @@ def connectMysqlDatabase(connectionConfig: dict[str, str]) -> CMySQLConnection: try: conn = mysql.connector.connect(**connectionConfig) except mysql.connector.errors.Error as e: - logging.error(f">> [SQL Connector] Error while connecting to database: {e}") + raise mysql.connector.errors.Error(f">> [SQL Connector] Error while connecting to database: {e}") - return conn + return conn # type: ignore[return-value] def connectPostgresqlDatabase(connectionConfig: dict[str, str]) -> connection: logging.info(f">> [SQL Connector] Connecting with PostgreSQL database \"{connectionConfig['database']}\"...") try: - conn = psycopg2.connect(**connectionConfig) + conn: connection = psycopg2.connect(**connectionConfig) # type: ignore[call-overload] except psycopg2._psycopg.Error as e: - logging.error(f">> [SQL Connector] Error while connecting to database: {e}") + raise psycopg2._psycopg.Error(f">> [SQL Connector] Error while connecting to database: {e}") return conn @@ -42,28 +42,26 @@ def fetchAllData(conn: Union[CMySQLConnection, connection], dataset: CustomDatas if len(tables) < 1: raise RuntimeError("There are no tables in the database") - tables = [table[0] for table in tables] - - for table in tables: + for table in [table[0] for table in tables]: tableData: list[dict[str, str]] = [] - cursor.execute(queryGetRows + f"'{table}'") + cursor.execute(queryGetRows + f"'{table}'") # type: ignore[str-bytes-safe] columnNames = list(cursor.fetchall()) columnNames = [name[0] for name in columnNames] - cursor.execute(f"SELECT * FROM {table}") + cursor.execute(f"SELECT * FROM {table}") # type: ignore[str-bytes-safe] rows = list(cursor.fetchall()) for row in rows: - tableData.append(dict(zip(columnNames, list(row)))) + tableData.append(dict(zip(columnNames, list(row)))) # type: ignore[arg-type] - sampleNameCsv = f"{table}.csv" + sampleNameCsv = f"{table}.csv" # type: ignore[str-bytes-safe] with open(sampleNameCsv, "w", newline = "") as file: writer = csv.DictWriter(file, fieldnames = columnNames) writer.writeheader() - writer.writerows(tableData) + writer.writerows(tableData) # type: ignore[arg-type] - sampleNameZip = f"{table}.zip" + sampleNameZip = f"{table}.zip" # type: ignore[str-bytes-safe] with zipfile.ZipFile(sampleNameZip, "w") as zipFile: zipFile.write(sampleNameCsv) @@ -92,23 +90,23 @@ def main() -> None: } if databaseType == "MySQL": - conn: Union[CMySQLConnection, connection] = connectMysqlDatabase(connectionConfig) + mySqlConn = connectMysqlDatabase(connectionConfig) - if conn.is_connected(): + if mySqlConn.is_connected(): dataset = CustomDataset.createDataset(f"{taskRun.id}-{database}", taskRun.projectId) queryGetTables = f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{database}'" queryGetRows = f"SELECT column_name FROM information_schema.columns WHERE table_schema = '{database}' AND table_name = " - fetchAllData(conn, dataset, queryGetTables, queryGetRows) + fetchAllData(mySqlConn, dataset, queryGetTables, queryGetRows) else: logging.warning(">> [SQL Connector] Problem with the database connection") elif databaseType == "PostgreSQL": - conn = connectPostgresqlDatabase(connectionConfig) + postgreSqlConn = connectPostgresqlDatabase(connectionConfig) - if conn: + if postgreSqlConn: dataset = CustomDataset.createDataset(f"{taskRun.id}-{database}", taskRun.projectId) queryGetTables = f"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'" queryGetRows = f"SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = " - fetchAllData(conn, dataset, queryGetTables, queryGetRows) + fetchAllData(postgreSqlConn, dataset, queryGetTables, queryGetRows) else: logging.warning(">> [SQL Connector] Problem with the database connection") From da7722bda8a6ba34d80fffb1eb3658bb5af7d9a9 Mon Sep 17 00:00:00 2001 From: nemanja Date: Wed, 17 Jul 2024 09:31:48 +0200 Subject: [PATCH 13/32] first commit --- tt.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tt.txt diff --git a/tt.txt b/tt.txt new file mode 100644 index 00000000..e69de29b From 654cc30fbe4e08c2aec2d28b5f8395031abf9f6e Mon Sep 17 00:00:00 2001 From: nemanja Date: Wed, 17 Jul 2024 09:59:15 +0200 Subject: [PATCH 14/32] removed unnecessary file --- tt.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tt.txt diff --git a/tt.txt b/tt.txt deleted file mode 100644 index e69de29b..00000000 From 57fe81abf98ed2242c7feccd73d08a61322d79ef Mon Sep 17 00:00:00 2001 From: nemanja Date: Wed, 17 Jul 2024 10:09:53 +0200 Subject: [PATCH 15/32] parallelism implemented --- .github/workflows/linter-code-check.sh | 89 ++++++++++++++----------- .github/workflows/linter-code-check.yml | 15 ++++- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/.github/workflows/linter-code-check.sh b/.github/workflows/linter-code-check.sh index 933d65ff..803b1d43 100644 --- a/.github/workflows/linter-code-check.sh +++ b/.github/workflows/linter-code-check.sh @@ -1,45 +1,58 @@ #!/bin/bash eval "$(conda shell.bash hook)" -for dir in tasks/* ; do - echo "Checking directory: $dir" +dir=$1 +echo "DIR is = $dir" +# for dir in tasks/* ; do +echo "Checking directory: $dir" - # Skip the directory if no .mypy.ini file is found - if [ ! -f "$dir/.mypy.ini" ]; then - echo "No .mypy.ini file found in $dir, skipping..." - continue - fi +# Skip the tasks/dataset-split directory +if [ "$dir" == "tasks/dataset-split/" ]; then + exit 0; +fi +# Skip the directory if no .mypy.ini file is found +if [ ! -f "$dir/.mypy.ini" ]; then + echo "No .mypy.ini file found in $dir, skipping..." + # continue + exit 0 +fi +date1=$(date +"%s") +if [ -f "$dir/environment.yml" ]; then + echo "Setting up conda environment for $dir" + conda env create -n $(basename "$dir") -f "$dir/environment.yml" + echo "Created conda environment" + conda activate $(basename "$dir") + pip install mypy +elif [ -f "$dir/requirements.txt" ]; then + echo "Setting up venv for $dir" + python3.9 -m venv "$dir/venv" + echo "activate venv" + source "$dir/venv/bin/activate" + echo "install requirements" + pip install --upgrade pip + pip install -r "$dir/requirements.txt" + pip install mypy +fi - if [ -f "$dir/environment.yml" ]; then - echo "Setting up conda environment for $dir" - conda env create -n $(basename "$dir") -f "$dir/environment.yml" - echo "Created conda environment" - conda activate $(basename "$dir") - pip install mypy - elif [ -f "$dir/requirements.txt" ]; then - echo "Setting up venv for $dir" - python3.9 -m venv "$dir/venv" - source "$dir/venv/bin/activate" - pip install -r "$dir/requirements.txt" - pip install mypy - fi +echo "Running mypy in $dir" +set +e # Disable exit on error +mypy_output=$(mypy --config-file "$dir/.mypy.ini" "$dir" 2>&1) +set -e # Re-enable exit on error - echo "Running mypy in $dir" - set +e # Disable exit on error - mypy_output=$(mypy --config-file "$dir/.mypy.ini" "$dir" 2>&1) - set -e # Re-enable exit on error +echo "$mypy_output" +if echo "$mypy_output" | grep -q 'error:'; then + echo "Running install-types in $dir" + mypy --install-types --non-interactive --config-file "$dir/.mypy.ini" "$dir" +fi - echo "$mypy_output" - if echo "$mypy_output" | grep -q 'error:'; then - echo "Running install-types in $dir" - mypy --install-types --non-interactive --config-file "$dir/.mypy.ini" "$dir" - fi - - if [ -f "$dir/environment.yml" ]; then - conda deactivate - conda remove -y -n $(basename "$dir") --all - elif [ -f "$dir/requirements.txt" ]; then - deactivate - rm -rf "$dir/venv" - fi -done +if [ -f "$dir/environment.yml" ]; then + conda deactivate + conda remove -y -n $(basename "$dir") --all +elif [ -f "$dir/requirements.txt" ]; then + deactivate + rm -rf "$dir/venv" +fi +date2=$(date +"%s") +DIFF=$(($date2-$date1)) +echo "Duration in $dir: $(($DIFF / 3600 )) hours $((($DIFF % 3600) / 60)) minutes $(($DIFF % 60)) seconds" +# done diff --git a/.github/workflows/linter-code-check.yml b/.github/workflows/linter-code-check.yml index 9b91abd7..173c09d6 100644 --- a/.github/workflows/linter-code-check.yml +++ b/.github/workflows/linter-code-check.yml @@ -14,8 +14,21 @@ on: - develop jobs: + define-dirs: + runs-on: ubuntu-latest + outputs: + dirs: ${{ steps.dirs.outputs.dirs }} + steps: + - uses: actions/checkout@v3 + - name: Define Dirs + id: dirs + run: result=$(echo tasks/*/ | sed 's/\([^ ]*\)/"\1",/g') && result="${result%,}" && echo "dirs=[$result]" >> "$GITHUB_OUTPUT" build: runs-on: ubuntu-latest + needs: define-dirs + strategy: + matrix: + dirs: ${{ fromJSON(needs.define-dirs.outputs.dirs) }} steps: - uses: actions/checkout@v3 - name: Set up Python @@ -27,4 +40,4 @@ jobs: pip install mypy - name: Analysing templates with mypy run: | - bash .github/workflows/linter-code-check.sh + bash .github/workflows/linter-code-check.sh ${{matrix.dirs}} From 3f33f7e494b9375557a24505495921c91cdf98b8 Mon Sep 17 00:00:00 2001 From: nemanja Date: Wed, 17 Jul 2024 10:18:29 +0200 Subject: [PATCH 16/32] deleted debugging lines from scripts --- .github/workflows/linter-code-check.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linter-code-check.sh b/.github/workflows/linter-code-check.sh index 803b1d43..cda7a68a 100644 --- a/.github/workflows/linter-code-check.sh +++ b/.github/workflows/linter-code-check.sh @@ -16,7 +16,7 @@ if [ ! -f "$dir/.mypy.ini" ]; then # continue exit 0 fi -date1=$(date +"%s") + if [ -f "$dir/environment.yml" ]; then echo "Setting up conda environment for $dir" conda env create -n $(basename "$dir") -f "$dir/environment.yml" @@ -52,7 +52,5 @@ elif [ -f "$dir/requirements.txt" ]; then deactivate rm -rf "$dir/venv" fi -date2=$(date +"%s") -DIFF=$(($date2-$date1)) -echo "Duration in $dir: $(($DIFF / 3600 )) hours $((($DIFF % 3600) / 60)) minutes $(($DIFF % 60)) seconds" + # done From 5429deaca28468aff5eea33077321da1222c7e8c Mon Sep 17 00:00:00 2001 From: nemanja Date: Wed, 17 Jul 2024 10:44:28 +0200 Subject: [PATCH 17/32] CTX-5783: deleted unnecessary lines --- .github/workflows/linter-code-check.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/linter-code-check.sh b/.github/workflows/linter-code-check.sh index cda7a68a..0ecdadca 100644 --- a/.github/workflows/linter-code-check.sh +++ b/.github/workflows/linter-code-check.sh @@ -6,10 +6,6 @@ echo "DIR is = $dir" # for dir in tasks/* ; do echo "Checking directory: $dir" -# Skip the tasks/dataset-split directory -if [ "$dir" == "tasks/dataset-split/" ]; then - exit 0; -fi # Skip the directory if no .mypy.ini file is found if [ ! -f "$dir/.mypy.ini" ]; then echo "No .mypy.ini file found in $dir, skipping..." From c19c3b06681a082538ab17010e84c6116205a0d8 Mon Sep 17 00:00:00 2001 From: nemanja Date: Wed, 17 Jul 2024 10:50:14 +0200 Subject: [PATCH 18/32] CTX-5783: removed unnecessary lines --- .github/workflows/linter-code-check.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/linter-code-check.sh b/.github/workflows/linter-code-check.sh index 0ecdadca..3fd65e63 100644 --- a/.github/workflows/linter-code-check.sh +++ b/.github/workflows/linter-code-check.sh @@ -2,14 +2,11 @@ eval "$(conda shell.bash hook)" dir=$1 -echo "DIR is = $dir" -# for dir in tasks/* ; do echo "Checking directory: $dir" # Skip the directory if no .mypy.ini file is found if [ ! -f "$dir/.mypy.ini" ]; then echo "No .mypy.ini file found in $dir, skipping..." - # continue exit 0 fi From e0e05fc1e32e4c2520c783ec83f277405a68ab26 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Thu, 18 Jul 2024 10:09:19 +0200 Subject: [PATCH 19/32] CTX-5783: Fixed linter errors for model-transfer and dataset-split --- tasks/dataset-split/.mypy.ini | 6 ++++++ tasks/dataset-split/main.py | 13 +++++++------ tasks/dataset-split/src/__init__.py | 0 tasks/dataset-split/src/utils.py | 3 +-- tasks/model-transfer/main.py | 2 +- 5 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 tasks/dataset-split/src/__init__.py diff --git a/tasks/dataset-split/.mypy.ini b/tasks/dataset-split/.mypy.ini index d3e1fa10..138bfe20 100644 --- a/tasks/dataset-split/.mypy.ini +++ b/tasks/dataset-split/.mypy.ini @@ -16,3 +16,9 @@ disallow_incomplete_defs = True no_implicit_optional = True strict_optional = True allow_redefinition = False + + +# Per-module options: +# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports +[mypy-chardet.*] +ignore_missing_imports = True diff --git a/tasks/dataset-split/main.py b/tasks/dataset-split/main.py index f056c37b..1451a9fb 100644 --- a/tasks/dataset-split/main.py +++ b/tasks/dataset-split/main.py @@ -1,11 +1,12 @@ +from typing import Sequence + import logging -from coretex import currentTaskRun, ImageDataset, CustomDataset, SequenceDataset +from coretex import currentTaskRun, ImageDataset, CustomDataset, SequenceDataset, NetworkDataset -from split_custom_dataset import splitCustomDataset -from split_image_dataset import splitImageDataset -from split_sequence_dataset import splitSequenceDataset -from src.utils import DatasetType +from src.split_custom_dataset import splitCustomDataset +from src.split_image_dataset import splitImageDataset +from src.split_sequence_dataset import splitSequenceDataset def main() -> None: @@ -21,7 +22,7 @@ def main() -> None: if datasetParts < 2: raise ValueError("Dataset can be divided into at least two parts") - splitDatasets: list[DatasetType] + splitDatasets: Sequence[NetworkDataset] if isinstance(originalDataset, ImageDataset): logging.info(f">> [Dataset Split] Splitting ImageDataset {originalDataset.name}...") diff --git a/tasks/dataset-split/src/__init__.py b/tasks/dataset-split/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/dataset-split/src/utils.py b/tasks/dataset-split/src/utils.py index cd7ce974..c43dcc49 100644 --- a/tasks/dataset-split/src/utils.py +++ b/tasks/dataset-split/src/utils.py @@ -1,10 +1,9 @@ from typing import TypeVar -from coretex import NetworkSample, NetworkDataset +from coretex import NetworkSample SampleType = TypeVar("SampleType", bound = NetworkSample) -DatasetType = TypeVar("DatasetType", bound = NetworkDataset) def splitOriginalSamples(originalSamples: list[SampleType], datasetCount: int) -> list[list[SampleType]]: diff --git a/tasks/model-transfer/main.py b/tasks/model-transfer/main.py index b74883b0..a1b89ee2 100644 --- a/tasks/model-transfer/main.py +++ b/tasks/model-transfer/main.py @@ -41,7 +41,7 @@ def main() -> None: modelName = model.name logging.info(">> [Coretex] Creating Model...") - destinationModel = Model.createProjectModel( + destinationModel = Model.createModel( modelName, taskRun.parameters["destinationProject"], model.accuracy, From ed6cdb9c6fccdcc7b5fedd88bc8428f5537d4543 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 6 Aug 2024 11:49:44 +0200 Subject: [PATCH 20/32] CTX-5783: Removed unecessary packages from mypy ignore imports --- tasks/annotated-image-extractor/.mypy.ini | 18 ------------------ tasks/bio-bodysite-prediction-nn/.mypy.ini | 3 --- tasks/bio-bodysite-prediction/.mypy.ini | 8 +------- tasks/contextual-targeting/.mypy.ini | 3 --- tasks/image-extractor/.mypy.ini | 21 --------------------- tasks/image-orientation/.mypy.ini | 9 --------- tasks/image-quality-predictor/.mypy.ini | 6 ------ tasks/image-segmentation/.mypy.ini | 6 ------ tasks/llama2-lora/.mypy.ini | 6 +++--- tasks/object-detection-yolov10/.mypy.ini | 9 --------- tasks/object-detection-yolov8/.mypy.ini | 6 ------ tasks/ollama-rag-index/.mypy.ini | 3 --- tasks/stable-diffusion-fn/.mypy.ini | 3 --- tasks/stable-diffusion/.mypy.ini | 9 --------- tasks/synthetic-image-generator/.mypy.ini | 21 --------------------- tasks/tabular-data-diagnostics/.mypy.ini | 6 ------ tasks/translation-ollama/.mypy.ini | 3 --- 17 files changed, 4 insertions(+), 136 deletions(-) diff --git a/tasks/annotated-image-extractor/.mypy.ini b/tasks/annotated-image-extractor/.mypy.ini index b6edd0df..d3e1fa10 100644 --- a/tasks/annotated-image-extractor/.mypy.ini +++ b/tasks/annotated-image-extractor/.mypy.ini @@ -16,21 +16,3 @@ disallow_incomplete_defs = True no_implicit_optional = True strict_optional = True allow_redefinition = False - - -# Per-module options: -# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-ultralytics.*] -ignore_missing_imports = True - -[mypy-tensorflow.*] -ignore_missing_imports = True - -[mypy-scipy.*] -ignore_missing_imports = True - -[mypy-transformers.*] -ignore_missing_imports = True - -[mypy-coretex.*] -ignore_missing_imports = True diff --git a/tasks/bio-bodysite-prediction-nn/.mypy.ini b/tasks/bio-bodysite-prediction-nn/.mypy.ini index bc21b36c..4c38c077 100644 --- a/tasks/bio-bodysite-prediction-nn/.mypy.ini +++ b/tasks/bio-bodysite-prediction-nn/.mypy.ini @@ -26,8 +26,5 @@ ignore_missing_imports = True [mypy-scipy.*] ignore_missing_imports = True -[mypy-matplotlib.*] -ignore_missing_imports = True - [mypy-sklearn.*] ignore_missing_imports = True diff --git a/tasks/bio-bodysite-prediction/.mypy.ini b/tasks/bio-bodysite-prediction/.mypy.ini index 0bc37ed1..ebb9536e 100644 --- a/tasks/bio-bodysite-prediction/.mypy.ini +++ b/tasks/bio-bodysite-prediction/.mypy.ini @@ -20,14 +20,8 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-scipy.*] -ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True - [mypy-sklearn.*] ignore_missing_imports = True -[mypy-xgboost.*] +[mypy-scipy.*] ignore_missing_imports = True diff --git a/tasks/contextual-targeting/.mypy.ini b/tasks/contextual-targeting/.mypy.ini index dbf10734..1b535120 100644 --- a/tasks/contextual-targeting/.mypy.ini +++ b/tasks/contextual-targeting/.mypy.ini @@ -27,6 +27,3 @@ ignore_missing_imports = True [mypy-sentence_transformers.*] ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True diff --git a/tasks/image-extractor/.mypy.ini b/tasks/image-extractor/.mypy.ini index c77de26e..d3e1fa10 100644 --- a/tasks/image-extractor/.mypy.ini +++ b/tasks/image-extractor/.mypy.ini @@ -16,24 +16,3 @@ disallow_incomplete_defs = True no_implicit_optional = True strict_optional = True allow_redefinition = False - - -# Per-module options: -# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-shapely.*] -ignore_missing_imports = True - -[mypy-pytesseract.*] -ignore_missing_imports = True - -[mypy-easyocr.*] -ignore_missing_imports = True - -[mypy-transformers.*] -ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True - -[mypy-PIL.*] -ignore_missing_imports = True diff --git a/tasks/image-orientation/.mypy.ini b/tasks/image-orientation/.mypy.ini index 902893ac..e1a70fa8 100644 --- a/tasks/image-orientation/.mypy.ini +++ b/tasks/image-orientation/.mypy.ini @@ -20,12 +20,6 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-matplotlib.*] -ignore_missing_imports = True - -[mypy-torch.*] -ignore_missing_imports = True - [mypy-torchvision.*] ignore_missing_imports = True @@ -34,6 +28,3 @@ ignore_missing_imports = True [mypy-seaborn.*] ignore_missing_imports = True - -[mypy-PIL.*] -ignore_missing_imports = True diff --git a/tasks/image-quality-predictor/.mypy.ini b/tasks/image-quality-predictor/.mypy.ini index 44a7bc89..2496755a 100644 --- a/tasks/image-quality-predictor/.mypy.ini +++ b/tasks/image-quality-predictor/.mypy.ini @@ -20,11 +20,5 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-torch.*] -ignore_missing_imports = True - [mypy-torchvision.*] ignore_missing_imports = True - -[mypy-PIL.*] -ignore_missing_imports = True diff --git a/tasks/image-segmentation/.mypy.ini b/tasks/image-segmentation/.mypy.ini index aa3114d6..8f55c84a 100644 --- a/tasks/image-segmentation/.mypy.ini +++ b/tasks/image-segmentation/.mypy.ini @@ -31,9 +31,3 @@ ignore_missing_imports = True [mypy-tensorflowjs.*] ignore_missing_imports = True - -[mypy-PIL.*] -ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True diff --git a/tasks/llama2-lora/.mypy.ini b/tasks/llama2-lora/.mypy.ini index fdbd6aa0..a05e3f3b 100644 --- a/tasks/llama2-lora/.mypy.ini +++ b/tasks/llama2-lora/.mypy.ini @@ -20,9 +20,6 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-torch.*] -ignore_missing_imports = True - [mypy-transformers.*] ignore_missing_imports = True @@ -31,3 +28,6 @@ ignore_missing_imports = True [mypy-trl.*] ignore_missing_imports = True + +[mypy-peft.*] +ignore_missing_imports = True diff --git a/tasks/object-detection-yolov10/.mypy.ini b/tasks/object-detection-yolov10/.mypy.ini index f2806080..85078e4e 100644 --- a/tasks/object-detection-yolov10/.mypy.ini +++ b/tasks/object-detection-yolov10/.mypy.ini @@ -22,12 +22,3 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-ultralytics.*] ignore_missing_imports = True - -[mypy-yaml.*] -ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True - -[mypy-requests.*] -ignore_missing_imports = True diff --git a/tasks/object-detection-yolov8/.mypy.ini b/tasks/object-detection-yolov8/.mypy.ini index 05b98bf1..85078e4e 100644 --- a/tasks/object-detection-yolov8/.mypy.ini +++ b/tasks/object-detection-yolov8/.mypy.ini @@ -22,9 +22,3 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-ultralytics.*] ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True - -[mypy-yaml.*] -ignore_missing_imports = True diff --git a/tasks/ollama-rag-index/.mypy.ini b/tasks/ollama-rag-index/.mypy.ini index 14ec2246..7b6c87aa 100644 --- a/tasks/ollama-rag-index/.mypy.ini +++ b/tasks/ollama-rag-index/.mypy.ini @@ -28,6 +28,3 @@ ignore_missing_imports = True [mypy-fitz.*] ignore_missing_imports = True - -[mypy-requests.*] -ignore_missing_imports = True diff --git a/tasks/stable-diffusion-fn/.mypy.ini b/tasks/stable-diffusion-fn/.mypy.ini index c5af1153..a98b776d 100644 --- a/tasks/stable-diffusion-fn/.mypy.ini +++ b/tasks/stable-diffusion-fn/.mypy.ini @@ -22,6 +22,3 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-diffusers.*] ignore_missing_imports = True - -[mypy-torch.*] -ignore_missing_imports = True diff --git a/tasks/stable-diffusion/.mypy.ini b/tasks/stable-diffusion/.mypy.ini index 324e81a8..a98b776d 100644 --- a/tasks/stable-diffusion/.mypy.ini +++ b/tasks/stable-diffusion/.mypy.ini @@ -20,14 +20,5 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-scipy.*] -ignore_missing_imports = True - -[mypy-transformers.*] -ignore_missing_imports = True - [mypy-diffusers.*] ignore_missing_imports = True - -[mypy-torch.*] -ignore_missing_imports = True diff --git a/tasks/synthetic-image-generator/.mypy.ini b/tasks/synthetic-image-generator/.mypy.ini index 3a9ec2e5..d3e1fa10 100644 --- a/tasks/synthetic-image-generator/.mypy.ini +++ b/tasks/synthetic-image-generator/.mypy.ini @@ -16,24 +16,3 @@ disallow_incomplete_defs = True no_implicit_optional = True strict_optional = True allow_redefinition = False - - -# Per-module options: -# https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-shapely.*] -ignore_missing_imports = True - -[mypy-pytesseract.*] -ignore_missing_imports = True - -[mypy-easyocr.*] -ignore_missing_imports = True - -[mypy-transformers.*] -ignore_missing_imports = True - -[mypy-PIL.*] -ignore_missing_imports = True - -[mypy-matplotlib.*] -ignore_missing_imports = True diff --git a/tasks/tabular-data-diagnostics/.mypy.ini b/tasks/tabular-data-diagnostics/.mypy.ini index 84da1f14..ebb9536e 100644 --- a/tasks/tabular-data-diagnostics/.mypy.ini +++ b/tasks/tabular-data-diagnostics/.mypy.ini @@ -20,14 +20,8 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-pandas.*] -ignore_missing_imports = True - [mypy-sklearn.*] ignore_missing_imports = True [mypy-scipy.*] ignore_missing_imports = True - -[mypy-transformers.*] -ignore_missing_imports = True diff --git a/tasks/translation-ollama/.mypy.ini b/tasks/translation-ollama/.mypy.ini index e1191fd9..b2b4f473 100644 --- a/tasks/translation-ollama/.mypy.ini +++ b/tasks/translation-ollama/.mypy.ini @@ -23,8 +23,5 @@ allow_redefinition = False [mypy-ollama.*] ignore_missing_imports = True -[mypy-requests.*] -ignore_missing_imports = True - [mypy-fitz.*] ignore_missing_imports = True From 46d27664adf6e594e2c888a670caa574248dcfb7 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Mon, 12 Aug 2024 10:00:01 +0200 Subject: [PATCH 21/32] CTX-5783: Fixed linter error --- tasks/llm-text-processing/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/llm-text-processing/main.py b/tasks/llm-text-processing/main.py index e78ab705..df79ad5d 100644 --- a/tasks/llm-text-processing/main.py +++ b/tasks/llm-text-processing/main.py @@ -107,7 +107,7 @@ def main() -> None: logging.info(f">> [LLMTextProcessing] Prompt: {prompt}") - responseContent = response["message"]["content"] # type: ignore[index] + responseContent = response["message"]["content"] logging.info(f">> [LLMTextProcessing] Response: {responseContent}") responseTextPath = folder_manager.temp / "response.txt" From 00d37cb7df799f49fb344a2c5266f466920951fa Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 13 Aug 2024 15:49:31 +0200 Subject: [PATCH 22/32] CTX-5783: Fixed linter error --- tasks/stable-diffusion-fn/.mypy.ini | 3 +++ tasks/stable-diffusion/.mypy.ini | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tasks/stable-diffusion-fn/.mypy.ini b/tasks/stable-diffusion-fn/.mypy.ini index a98b776d..84fa56da 100644 --- a/tasks/stable-diffusion-fn/.mypy.ini +++ b/tasks/stable-diffusion-fn/.mypy.ini @@ -22,3 +22,6 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-diffusers.*] ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = Truev diff --git a/tasks/stable-diffusion/.mypy.ini b/tasks/stable-diffusion/.mypy.ini index a98b776d..c5af1153 100644 --- a/tasks/stable-diffusion/.mypy.ini +++ b/tasks/stable-diffusion/.mypy.ini @@ -22,3 +22,6 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-diffusers.*] ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = True From 84dbea9b64d874a6aa79f434368bf653af28be75 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 13 Aug 2024 16:20:32 +0200 Subject: [PATCH 23/32] CTX-5783: Fixed linter error (img-seg) --- tasks/image-segmentation/src/detect.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tasks/image-segmentation/src/detect.py b/tasks/image-segmentation/src/detect.py index cf17ebea..9f91c96f 100644 --- a/tasks/image-segmentation/src/detect.py +++ b/tasks/image-segmentation/src/detect.py @@ -38,14 +38,14 @@ def run(taskRun: TaskRun, model: KerasModel, dataset: ImageDataset) -> None: fig, axes = plt.subplots(1, 3) - axes[0].set_title("Input image") # type: ignore[index] - axes[0].imshow(resized) # type: ignore[index] + axes[0].set_title("Input image") + axes[0].imshow(resized) - axes[1].set_title("Groundtruth mask") # type: ignore[index] - axes[1].imshow(groundtruth) # type: ignore[index] + axes[1].set_title("Groundtruth mask") + axes[1].imshow(groundtruth) - axes[2].set_title("Predicted mask") # type: ignore[index] - axes[2].imshow(prediction) # type: ignore[index] + axes[2].set_title("Predicted mask") + axes[2].imshow(prediction) plotPath = folder_manager.temp / f"{sample.id}.png" From 2d2fe62c5bb8ffbd964114d422e13fbec908a970 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 13 Aug 2024 16:31:52 +0200 Subject: [PATCH 24/32] CTX-5783: Fixed linter error (diffusion-fn) --- tasks/stable-diffusion-fn/.mypy.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/stable-diffusion-fn/.mypy.ini b/tasks/stable-diffusion-fn/.mypy.ini index 84fa56da..c5af1153 100644 --- a/tasks/stable-diffusion-fn/.mypy.ini +++ b/tasks/stable-diffusion-fn/.mypy.ini @@ -24,4 +24,4 @@ allow_redefinition = False ignore_missing_imports = True [mypy-torch.*] -ignore_missing_imports = Truev +ignore_missing_imports = True From 66761eb9d2ea60400a9d7ce3b917faf7210a67b4 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 13 Aug 2024 17:00:48 +0200 Subject: [PATCH 25/32] CTX-5783: FIxed unpacking error (image-quality-predictor) --- tasks/image-quality-predictor/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tasks/image-quality-predictor/main.py b/tasks/image-quality-predictor/main.py index 6a52a212..c70cd414 100644 --- a/tasks/image-quality-predictor/main.py +++ b/tasks/image-quality-predictor/main.py @@ -97,12 +97,15 @@ def train(taskRun: TaskRun, dataset: list[tuple[ImageSample, float]]) -> None: # Calculate model accuracy logging.info(">> [ImageQuality] Validating model...") - sampleResultsCsvPath, accuracy = validation.run(modelPath / "best.pt", trainData + validData, transform) + sampleResultsCsvPath, datasetResultsCsvvPath, accuracy = validation.run(modelPath / "best.pt", trainData + validData, transform) logging.info(f">> [ImageQuality] Model accuracy: {accuracy:.2f}%") if taskRun.createArtifact(sampleResultsCsvPath, sampleResultsCsvPath.name) is None: logging.error(f">> [ImageQuality] Failed to create artifact \"{sampleResultsCsvPath.name}\"") + if taskRun.createArtifact(datasetResultsCsvvPath, datasetResultsCsvvPath.name) is None: + logging.error(f">> [ImageQuality] Failed to create artifact \"{datasetResultsCsvvPath.name}\"") + logging.info(">> [ImageQuality] Uploading model...") ctxModel = Model.createModel(taskRun.generateEntityName(), taskRun.projectId, accuracy) ctxModel.upload(modelPath) From c86d52b3604779f7f752d01dcc2558de4dc312a1 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Wed, 14 Aug 2024 09:42:38 +0200 Subject: [PATCH 26/32] CTX-5783: Fixed linter error (translation-ollama) --- tasks/image-quality-predictor/main.py | 6 +++--- tasks/translation-ollama/main.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/image-quality-predictor/main.py b/tasks/image-quality-predictor/main.py index c70cd414..4e354d01 100644 --- a/tasks/image-quality-predictor/main.py +++ b/tasks/image-quality-predictor/main.py @@ -97,14 +97,14 @@ def train(taskRun: TaskRun, dataset: list[tuple[ImageSample, float]]) -> None: # Calculate model accuracy logging.info(">> [ImageQuality] Validating model...") - sampleResultsCsvPath, datasetResultsCsvvPath, accuracy = validation.run(modelPath / "best.pt", trainData + validData, transform) + sampleResultsCsvPath, datasetResultsCsvPath, accuracy = validation.run(modelPath / "best.pt", trainData + validData, transform) logging.info(f">> [ImageQuality] Model accuracy: {accuracy:.2f}%") if taskRun.createArtifact(sampleResultsCsvPath, sampleResultsCsvPath.name) is None: logging.error(f">> [ImageQuality] Failed to create artifact \"{sampleResultsCsvPath.name}\"") - if taskRun.createArtifact(datasetResultsCsvvPath, datasetResultsCsvvPath.name) is None: - logging.error(f">> [ImageQuality] Failed to create artifact \"{datasetResultsCsvvPath.name}\"") + if taskRun.createArtifact(datasetResultsCsvPath, datasetResultsCsvPath.name) is None: + logging.error(f">> [ImageQuality] Failed to create artifact \"{datasetResultsCsvPath.name}\"") logging.info(">> [ImageQuality] Uploading model...") ctxModel = Model.createModel(taskRun.generateEntityName(), taskRun.projectId, accuracy) diff --git a/tasks/translation-ollama/main.py b/tasks/translation-ollama/main.py index 7476b179..d57bbccf 100644 --- a/tasks/translation-ollama/main.py +++ b/tasks/translation-ollama/main.py @@ -71,7 +71,7 @@ def main() -> None: "content": query } response = ollama.chat(model = LLM, messages = [msg]) # type: ignore[list-item] - answer = response["message"]["content"] # type: ignore[index] + answer = response["message"]["content"] translatedText += answer + "\n" txtFileName = f"file-{counter}.txt" From 5148830f92ee093c7420646c0da22f12574e4040 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Wed, 14 Aug 2024 09:49:30 +0200 Subject: [PATCH 27/32] CTX-5783: Fixed linter error (tabular-data-diagnostics) --- tasks/tabular-data-diagnostics/src/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/tabular-data-diagnostics/src/dataset.py b/tasks/tabular-data-diagnostics/src/dataset.py index e42945d2..2bd3b0f4 100644 --- a/tasks/tabular-data-diagnostics/src/dataset.py +++ b/tasks/tabular-data-diagnostics/src/dataset.py @@ -5,7 +5,7 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import LabelEncoder -from pandas import DataFrame +from pandas import DataFrame, Series import pandas as pd @@ -68,14 +68,14 @@ def loadDataset(coretexDataset: CustomDataset, validationSplit: float, labelColu return train, test, labels -def extractXY(df: DataFrame, labelColumn: str) -> tuple[DataFrame, DataFrame]: +def extractXY(df: DataFrame, labelColumn: str) -> tuple[DataFrame, Series]: x = df.drop(labelColumn, axis = 1) y = df[labelColumn] return x, y -def extractTestTrainData(train: DataFrame, test: DataFrame, labelColumn: str) -> tuple[DataFrame, DataFrame, DataFrame, DataFrame]: +def extractTestTrainData(train: DataFrame, test: DataFrame, labelColumn: str) -> tuple[DataFrame, DataFrame, Series, Series]: logging.info(f">> [Tabular Data Diagnostics] Extracting test and train data...") xTrain, yTrain = extractXY(train, labelColumn) xTest, yTest = extractXY(test, labelColumn) From f19db18fe58daa05030da94d21d8a27bff1f1ea1 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Wed, 14 Aug 2024 09:59:17 +0200 Subject: [PATCH 28/32] CTX-5783: Fixed linter error (translation-ollama) --- tasks/translation-ollama/main.py | 2 +- tasks/translation-ollama/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/translation-ollama/main.py b/tasks/translation-ollama/main.py index d57bbccf..a0d28b40 100644 --- a/tasks/translation-ollama/main.py +++ b/tasks/translation-ollama/main.py @@ -70,7 +70,7 @@ def main() -> None: "role": "user", "content": query } - response = ollama.chat(model = LLM, messages = [msg]) # type: ignore[list-item] + response = ollama.chat(model = LLM, messages = [msg]) answer = response["message"]["content"] translatedText += answer + "\n" diff --git a/tasks/translation-ollama/model.py b/tasks/translation-ollama/model.py index f8650779..0453e3cd 100644 --- a/tasks/translation-ollama/model.py +++ b/tasks/translation-ollama/model.py @@ -33,7 +33,7 @@ def installOllama() -> None: def isOllamaServerActiveAndRunning() -> bool: try: response = requests.get(OLLAMA_SERVER_URL) - return response.ok # type: ignore[no-any-return] + return response.ok except requests.ConnectionError: return False From 5ab69a0fa65f56588b4de2eef3614bfafb3052d0 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Wed, 14 Aug 2024 10:12:22 +0200 Subject: [PATCH 29/32] CTX-5783: Fixed linter error (translation-ollama) --- tasks/translation-ollama/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/translation-ollama/main.py b/tasks/translation-ollama/main.py index a0d28b40..d57bbccf 100644 --- a/tasks/translation-ollama/main.py +++ b/tasks/translation-ollama/main.py @@ -70,7 +70,7 @@ def main() -> None: "role": "user", "content": query } - response = ollama.chat(model = LLM, messages = [msg]) + response = ollama.chat(model = LLM, messages = [msg]) # type: ignore[list-item] answer = response["message"]["content"] translatedText += answer + "\n" From a6f693290841e3f24c445de92d509a9df91d0355 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 20 Aug 2024 14:56:33 +0200 Subject: [PATCH 30/32] CTX-5783: Removed ignore_missing_imports --- tasks/bio-read-quality/.mypy.ini | 3 --- tasks/qiime-import/mypy.ini | 3 --- 2 files changed, 6 deletions(-) diff --git a/tasks/bio-read-quality/.mypy.ini b/tasks/bio-read-quality/.mypy.ini index 595a74e1..9b7a6fef 100644 --- a/tasks/bio-read-quality/.mypy.ini +++ b/tasks/bio-read-quality/.mypy.ini @@ -20,8 +20,5 @@ allow_redefinition = False # Per-module options: # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -[mypy-matplotlib.*] -ignore_missing_imports = True - [mypy-Bio.*] ignore_missing_imports = True diff --git a/tasks/qiime-import/mypy.ini b/tasks/qiime-import/mypy.ini index 382d22bd..c9224ebb 100644 --- a/tasks/qiime-import/mypy.ini +++ b/tasks/qiime-import/mypy.ini @@ -23,6 +23,3 @@ allow_redefinition = False # https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports [mypy-cchardet.*] ignore_missing_imports = True - -[mypy-pandas.*] -ignore_missing_imports = True From 4048cd40e670211afdddda6b23ceaef334a626c1 Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 20 Aug 2024 15:42:21 +0200 Subject: [PATCH 31/32] CTX-5783: Fixed linter error (image-extractor) --- tasks/image-extractor/src/sample_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/image-extractor/src/sample_generator.py b/tasks/image-extractor/src/sample_generator.py index 7f0b24e1..be0fa084 100644 --- a/tasks/image-extractor/src/sample_generator.py +++ b/tasks/image-extractor/src/sample_generator.py @@ -42,7 +42,7 @@ def generateSample(sample: ImageSample, parentClass: Optional[ImageDatasetClass] try: sample.metadataPath.link_to(samplePath / "metadata.json") except AttributeError as e: - samplePath.joinpath("metadata.json").hardlink_to(sample.metadataPath) + samplePath.joinpath("metadata.json").hardlink_to(sample.metadataPath) # type: ignore[attr-defined] imagePaths.append(samplePath) From eb35f642c1ad1bccf871396cccf6aa00ab6f61ed Mon Sep 17 00:00:00 2001 From: Vuk Manojlovic Date: Tue, 20 Aug 2024 15:54:56 +0200 Subject: [PATCH 32/32] CTX-5783: Fixed linter error (synthetic-image-generator) --- tasks/synthetic-image-generator/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tasks/synthetic-image-generator/main.py b/tasks/synthetic-image-generator/main.py index b6decbf0..62c56d1d 100644 --- a/tasks/synthetic-image-generator/main.py +++ b/tasks/synthetic-image-generator/main.py @@ -1,3 +1,4 @@ +from typing import Any, Optional from pathlib import Path from contextlib import ExitStack from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, Future @@ -19,7 +20,11 @@ def getRandomSamples(dataset: ImageDataset, count: int) -> list[ImageSample]: return [dataset.samples[i] for i in indexes] -def didGenerateSample(dataset: ImageDataset, future: Future[tuple[Path, CoretexImageAnnotation]]) -> None: +def didGenerateSample( + dataset: ImageDataset, + future: Future[tuple[Path, CoretexImageAnnotation, Optional[dict[str, Any]]]] +) -> None: + try: imagePath, annotation, metadata = future.result() generatedSample = dataset.add(imagePath)