diff --git a/README.md b/README.md index 3caef81a..381a7dce 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ The JAR file will be placed in *adaa.analytics.rules/build/libs* subdirectory. O ``` java -jar rulekit--all.jar minimal-deals.xml ``` -Ignore the SLF4J warning reported on the console - it does not affect the procedure. The results of the analysis will be located in *./examples/results-minimal/deals/* folder. Note, that the repository already contains reference results - they will be overwritten. See [this Wiki section](../../wiki/1-Batch-interface) for detailed information on how to configure batch analyses in RuleKit. +Ignore the SLF4J warning reported on the console - it does not affect the procedure. The results of the analysis will be located in *./examples/results-minimal/deals/* folder. Note, that the repository already contains reference results - they will be overwritten. See [this Wiki section](https://github.com/adaa-polsl/RuleKit/wiki/1-Batch-interface) for detailed information on how to configure batch analyses in RuleKit. ## RapidMiner plugin @@ -55,7 +55,7 @@ To perform the analysis under RapidMiner, import [./examples/preparation.rmp](/e As the next step, please import [./examples/regression.rmp](./examples/regression.rmp) process. After executing it, datasets are loaded from the RM repository with *Retrieve* operators. Then, the training set is provided as an input for *RuleKit Generator*. The model generated by *RuleKit Generator* is then applied on unseen data (*Apply Model* operator). The performance of the prediction is assesed using *RuleKit Evaluator* operator. Performance metrices as well as generated model are passed as process outputs. -See [this Wiki section](../../wiki/2-RapidMiner-plugin) for detailed information how to configure RuleKit RapidMiner plugin. +See [this Wiki section](https://github.com/adaa-polsl/RuleKit/wiki/2-RapidMiner-plugin) for detailed information how to configure RuleKit RapidMiner plugin. ## R package @@ -74,7 +74,7 @@ Then, build the package with *Install and Restart* button (the appropiate versio Below we present a survival analysis of *BMT-Ch* dataset with RuleKit R package. The set concerns the problem of analyzing factors contributing to the patients’ survival following bone marrow transplants. In order to perform the experiment, please run [./examples/survival.R](./examples/survival.R) script in R. As a result, a rule model is trained and survival function estimates for the entire dataset and for the rules are plotted. -[This Wiki section](../../wiki/3-R-package) contains detailed information on using RuleKit R package. +[This Wiki section](https://github.com/adaa-polsl/RuleKit/wiki/3-R-package) contains detailed information on using RuleKit R package. ## Python package @@ -82,36 +82,36 @@ Rulekit Python package can be found [here](https://github.com/adaa-polsl/RuleKit # Documentation -The detailed RuleKit documentation can be found on [Wiki pages](../../wiki) which cover the following topics: - -1. [Batch interface](../../wiki/1-Batch-interface) - 1. [General information](../../wiki/1-Batch-interface#11-general-information) - 2. [Parameter set definition](../../wiki/1-Batch-interface#12-parameter-set-definition) - 3. [Dataset definition](../../wiki/1-Batch-interface#13-dataset-definition) - 4. [Example](../../wiki/1-Batch-interface#14-example) -2. [RapidMiner plugin](../../wiki/2-RapidMiner-plugin) - 1. [Installation](../../wiki/2-RapidMiner-plugin#21-installation) - 2. [Usage](../../wiki/2-RapidMiner-plugin#22-usage) - 3. [Example](../../wiki/2-RapidMiner-plugin#23-example) -3. [R package](../../wiki/3-R-package) - 1. [Installation](../../wiki/3-R-package#31-installation) - 2. [Usage](../../wiki/3-R-package#32-usage) - 3. [Example](../../wiki/3-R-package#33-example) -4. [Quality and evaluation](../../wiki/4-Quality-and-evaluation) - 1. [Rule quality](../../wiki/4-Quality-and-evaluation#41-rule-quality) - 2. [Model characteristics](../../wiki/4-Quality-and-evaluation#42-model-characteristics) - 2. [Performance metrices](../../wiki/4-Quality-and-evaluation#43-performance-metrices) -5. [Output files](../../wiki/5-Output-files) - 1. [Training report](../../wiki/5-Output-files#51-training-report) - 2. [Prediction performance report](../../wiki/5-Output-files#52-prediction-performance-report) -6. [User-guided induction](../../wiki/6-User-guided-induction) - 1. [Defining user's knowledge](../../wiki/6-User-guided-induction#61-defining-users-knowledge) - 2. [Examples from GuideR paper](../../wiki/6-User-guided-induction#62-examples-from-guider-paper) -7. [Library API](../../wiki/7-Library-API) - 1. [Running an experiment](../../wiki/7-Library-API#71-running-an-experiment) - 2. [Developing a new algorithm](../../wiki/7-Library-API#72-developing-a-new-algorithm) -8. [Empirical results](../../wiki/8-Empirical-results) -9. [Contrast set mining](../../wiki/9-Contrast-set-mining) +The detailed RuleKit documentation can be found on [Wiki pages](https://github.com/adaa-polsl/RuleKit/wiki) which cover the following topics: + +1. [Batch interface](https://github.com/adaa-polsl/RuleKit/wiki/1-Batch-interface) + 1. [General information](https://github.com/adaa-polsl/RuleKit/wiki/1-Batch-interface#11-general-information) + 2. [Parameter set definition](https://github.com/adaa-polsl/RuleKit/wiki/1-Batch-interface#12-parameter-set-definition) + 3. [Dataset definition](https://github.com/adaa-polsl/RuleKit/wiki/1-Batch-interface#13-dataset-definition) + 4. [Example](https://github.com/adaa-polsl/RuleKit/wiki/1-Batch-interface#14-example) +2. [RapidMiner plugin](https://github.com/adaa-polsl/RuleKit/wiki/2-RapidMiner-plugin) + 1. [Installation](https://github.com/adaa-polsl/RuleKit/wiki/2-RapidMiner-plugin#21-installation) + 2. [Usage](https://github.com/adaa-polsl/RuleKit/wiki/2-RapidMiner-plugin#22-usage) + 3. [Example](https://github.com/adaa-polsl/RuleKit/wiki/2-RapidMiner-plugin#23-example) +3. [R package](https://github.com/adaa-polsl/RuleKit/wiki/3-R-package) + 1. [Installation](https://github.com/adaa-polsl/RuleKit/wiki/3-R-package#31-installation) + 2. [Usage](https://github.com/adaa-polsl/RuleKit/wiki/3-R-package#32-usage) + 3. [Example](https://github.com/adaa-polsl/RuleKit/wiki/3-R-package#33-example) +4. [Quality and evaluation](https://github.com/adaa-polsl/RuleKit/wiki/4-Quality-and-evaluation) + 1. [Rule quality](https://github.com/adaa-polsl/RuleKit/wiki/4-Quality-and-evaluation#41-rule-quality) + 2. [Model characteristics](https://github.com/adaa-polsl/RuleKit/wiki/4-Quality-and-evaluation#42-model-characteristics) + 2. [Performance metrices](https://github.com/adaa-polsl/RuleKit/wiki/4-Quality-and-evaluation#43-performance-metrices) +5. [Output files](https://github.com/adaa-polsl/RuleKit/wiki/5-Output-files) + 1. [Training report](https://github.com/adaa-polsl/RuleKit/wiki/5-Output-files#51-training-report) + 2. [Prediction performance report](https://github.com/adaa-polsl/RuleKit/wiki/5-Output-files#52-prediction-performance-report) +6. [User-guided induction](https://github.com/adaa-polsl/RuleKit/wiki/6-User-guided-induction) + 1. [Defining user's knowledge](https://github.com/adaa-polsl/RuleKit/wiki/6-User-guided-induction#61-defining-users-knowledge) + 2. [Examples from GuideR paper](https://github.com/adaa-polsl/RuleKit/wiki/6-User-guided-induction#62-examples-from-guider-paper) +7. [Library API](https://github.com/adaa-polsl/RuleKit/wiki/7-Library-API) + 1. [Running an experiment](https://github.com/adaa-polsl/RuleKit/wiki/7-Library-API#71-running-an-experiment) + 2. [Developing a new algorithm](https://github.com/adaa-polsl/RuleKit/wiki/7-Library-API#72-developing-a-new-algorithm) +8. [Empirical results](https://github.com/adaa-polsl/RuleKit/wiki/8-Empirical-results) +9. [Contrast set mining](https://github.com/adaa-polsl/RuleKit/wiki/9-Contrast-set-mining) JavaDoc for the project is available [here](https://adaa-polsl.github.io/RuleKit/). diff --git a/adaa.analytics.rules/build.gradle b/adaa.analytics.rules/build.gradle index ed6b3986..876557b1 100644 --- a/adaa.analytics.rules/build.gradle +++ b/adaa.analytics.rules/build.gradle @@ -27,7 +27,7 @@ codeQuality { } sourceCompatibility = 1.8 -version = '1.7.0' +version = '1.7.1' jar { diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/consoles/RSupportedConsole.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/consoles/RSupportedConsole.java deleted file mode 100644 index 3f4abd20..00000000 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/consoles/RSupportedConsole.java +++ /dev/null @@ -1,196 +0,0 @@ -/******************************************************************************* - * Copyright (C) 2019 RuleKit Development Team - * - * This program is free software: you can redistribute it and/or modify it under the terms of the - * GNU Affero General Public License as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License along with this program. - * If not, see http://www.gnu.org/licenses/. - ******************************************************************************/ -package adaa.analytics.rules.consoles; - -import adaa.analytics.rules.experiments.*; -import adaa.analytics.rules.logic.representation.Logger; -import adaa.analytics.rules.logic.representation.SurvivalRule; -import adaa.analytics.rules.operator.ExpertRuleGenerator; - -import com.rapidminer.RapidMiner; -import com.rapidminer.example.Attributes; - -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.logging.Level; - -public class RSupportedConsole { - - protected class ParamSetWrapper { - String name; - Map map = new TreeMap(); - } - - public static void main(String[] args) { - try { - if (args.length == 1) { - - RSupportedConsole console = new RSupportedConsole(); - console.execute(args[0]); - - } else { - throw new IllegalArgumentException("Please specify two arguments"); - } - - } catch (IOException | ParserConfigurationException | SAXException | InterruptedException | ExecutionException e) { - e.printStackTrace(); - } - } - - protected void execute(String configFile) throws ParserConfigurationException, SAXException, IOException, InterruptedException, ExecutionException { - RapidMiner.init(); - Logger.getInstance().addStream(System.out, Level.FINE); - //Logger.getInstance().addStream(new PrintStream("d:/bad.log"), Level.FINEST); - String lineSeparator = System.getProperty("line.separator"); - - int threadCount = 1;//Runtime.getRuntime().availableProcessors(); - - ExecutorService pool = Executors.newFixedThreadPool(threadCount); - List futures = new ArrayList(); - - List paramSets = new ArrayList(); - - DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); - Document doc = (Document) dBuilder.parse(configFile); - - NodeList paramSetNodes = ((Document) doc).getElementsByTagName("parameter_set"); - - for (int setId = 0; setId < paramSetNodes.getLength(); setId++) { - ParamSetWrapper wrapper = new ParamSetWrapper(); - Element setNode = (Element) paramSetNodes.item(setId); - wrapper.name = setNode.getAttribute("name"); - Logger.getInstance().log("Reading parameter set " + setNode.getAttribute("name") - + lineSeparator, Level.INFO); - NodeList paramNodes = setNode.getElementsByTagName("param"); - - for (int paramId = 0; paramId < paramNodes.getLength(); ++paramId) { - Element paramNode = (Element) paramNodes.item(paramId); - String name = paramNode.getAttribute("name"); - - String[] expertParamNames = new String[]{ - ExpertRuleGenerator.PARAMETER_EXPERT_RULES, - ExpertRuleGenerator.PARAMETER_EXPERT_PREFERRED_CONDITIONS, - ExpertRuleGenerator.PARAMETER_EXPERT_FORBIDDEN_CONDITIONS - }; - - // parse expert rules/conditions - boolean paramProcessed = false; - for (String expertParamName : expertParamNames) { - if (name.equals(expertParamName)) { - List expertRules = new ArrayList(); - NodeList ruleNodes = paramNode.getElementsByTagName("entry"); - - for (int ruleId = 0; ruleId < ruleNodes.getLength(); ++ruleId) { - Element ruleNode = (Element) ruleNodes.item(ruleId); - String ruleName = ruleNode.getAttribute("name"); - String ruleContent = ruleNode.getTextContent(); - expertRules.add(new String[]{ruleName, ruleContent}); - } - wrapper.map.put(expertParamName, expertRules); - paramProcessed = true; - } - } - - if (!paramProcessed) { - String value = paramNode.getTextContent(); - wrapper.map.put(name, value); - } - } - - paramSets.add(wrapper); - } - Logger.getInstance().log("Processing datasets" + lineSeparator, Level.INFO); - NodeList datasetNodes = ((Document) doc).getElementsByTagName("dataset"); - for (int datasetId = 0; datasetId < datasetNodes.getLength(); datasetId++) { - Logger.getInstance().log("Processing dataset" + datasetId + lineSeparator, Level.INFO); - Element node = (Element) datasetNodes.item(datasetId); - - String name = node.getAttribute("name"); - String path = node.getElementsByTagName("path").item(0).getTextContent(); - String label = node.getElementsByTagName("label").item(0).getTextContent(); - String reportPath = node.getElementsByTagName("report_path").item(0).getTextContent(); - - Map options = new HashMap(); - if (node.getElementsByTagName(SurvivalRule.SURVIVAL_TIME_ROLE).getLength() > 0) { - String val = node.getElementsByTagName(SurvivalRule.SURVIVAL_TIME_ROLE).item(0).getTextContent(); - options.put(SurvivalRule.SURVIVAL_TIME_ROLE, val); - } - - if (node.getElementsByTagName(Attributes.WEIGHT_NAME).getLength() > 0) { - String val = node.getElementsByTagName(Attributes.WEIGHT_NAME).item(0).getTextContent(); - options.put(Attributes.WEIGHT_NAME, val); - } - - Logger.log("Name " + name + lineSeparator + - "Path " + path + lineSeparator + - "Label " + label + lineSeparator + - "Report path " + reportPath + lineSeparator, Level.INFO); - - // create experiments for all params sets - for (ParamSetWrapper wrapper : paramSets) { - String paramString = ""; - - if (wrapper.name.length() > 0) { - paramString += ", " + wrapper.name; - - } else { - for (String key : wrapper.map.keySet()) { - Object o = wrapper.map.get(key); - if (o instanceof String) { - paramString += ", " + key + "=" + wrapper.map.get(key); - - } - } - } - File file = new File(path); - - String qualityReport = reportPath + "/" + name + paramString + ".csv"; - String modelReport = reportPath + "/" + name + paramString + ".res"; - - ExperimentBase exp = null; - exp = new GeneralExperiment(file, new SynchronizedReport(qualityReport), new SynchronizedReport(modelReport), label, options, wrapper.map); - Future f = pool.submit(exp); - futures.add(f); - } - } - Logger.getInstance().log("Finished processing datasets" + lineSeparator, Level.INFO); - - for (Future f : futures) { - f.get(); - } - - Logger.getInstance().log("Experiments finished", Level.INFO); - RapidMiner.quit(RapidMiner.ExitMode.NORMAL); - } -} diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/experiments/InternalXValidationExperiment.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/experiments/InternalXValidationExperiment.java deleted file mode 100644 index f140aedd..00000000 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/experiments/InternalXValidationExperiment.java +++ /dev/null @@ -1,251 +0,0 @@ -/******************************************************************************* - * Copyright (C) 2019 RuleKit Development Team - * - * This program is free software: you can redistribute it and/or modify it under the terms of the - * GNU Affero General Public License as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License along with this program. - * If not, see http://www.gnu.org/licenses/. - ******************************************************************************/ -package adaa.analytics.rules.experiments; - -import adaa.analytics.rules.logic.representation.Logger; -import adaa.analytics.rules.logic.representation.RuleSetBase; -import adaa.analytics.rules.logic.representation.SurvivalRule; -import adaa.analytics.rules.operator.RuleGenerator; -import adaa.analytics.rules.utils.RapidMiner5; - -import com.rapidminer.example.Attributes; -import com.rapidminer.example.set.SplittedExampleSet; -import com.rapidminer.operator.*; -import com.rapidminer.operator.performance.PerformanceVector; -import com.rapidminer.operator.preprocessing.filter.ChangeAttributeRole; -import com.rapidminer.operator.validation.XValidation; -import com.rapidminer.tools.OperatorService; -import com.rapidminer.tools.RandomGenerator; -import com.rapidminer5.operator.io.ArffExampleSource; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.security.InvalidParameterException; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; - -public class InternalXValidationExperiment extends ExperimentBase { - - protected String modelFile; - - protected File arffFile; - - protected List> paramsSets; - - - public InternalXValidationExperiment( - File arffFile, - SynchronizedReport qualityReport, - SynchronizedReport modelReport, - String labelParameter, - int foldCount, - Type experimentType, - List> paramsSets) { - - super(qualityReport, modelReport); - - try { - this.arffFile = arffFile; - this.paramsSets = paramsSets = paramsSets; - - ArffExampleSource arffSource = RapidMiner5.createOperator(ArffExampleSource.class); - ChangeAttributeRole roleSetter = (ChangeAttributeRole)OperatorService.createOperator(ChangeAttributeRole.class); - XValidation validation = (XValidation)OperatorService.createOperator(XValidation.class); - validation.setParameter(XValidation.PARAMETER_NUMBER_OF_VALIDATIONS, Integer.toString(foldCount)); - validation.setParameter(XValidation.PARAMETER_CREATE_COMPLETE_MODEL, Boolean.toString(true)); - - ModelApplier globalApplier = (ModelApplier)OperatorService.createOperator(ModelApplier.class); - - // configure main process - process.getRootOperator().getSubprocess(0).addOperator(arffSource); - process.getRootOperator().getSubprocess(0).addOperator(roleSetter); - process.getRootOperator().getSubprocess(0).addOperator(validation); - process.getRootOperator().getSubprocess(0).addOperator(globalApplier); - process.getRootOperator().getSubprocess(0).addOperator(globalEvaluator); - - arffSource.getOutputPorts().getPortByName("output").connectTo(roleSetter.getInputPorts().getPortByName("example set input")); - - // configure role setter - roleSetter.setParameter(roleSetter.PARAMETER_NAME, labelParameter); - roleSetter.setParameter(roleSetter.PARAMETER_TARGET_ROLE, Attributes.LABEL_NAME); - - if (experimentType == Type.SURVIVAL_BY_CLASSIFICATION || experimentType == Type.SURVIVAL_BY_REGRESSION) { - List roles = new ArrayList(); - roles.add(new String[]{"survival_time", SurvivalRule.SURVIVAL_TIME_ROLE}); - roleSetter.setListParameter(roleSetter.PARAMETER_CHANGE_ATTRIBUTES, roles); - } - - roleSetter.getOutputPorts().getPortByName("example set output").connectTo(validation.getInputPorts().getPortByName("training")); - - // use stratified CV in all cases beside regression - validation.setParameter(XValidation.PARAMETER_SAMPLING_TYPE, - experimentType == Type.REGRESSION || experimentType == Type.SURVIVAL_BY_REGRESSION - ? SplittedExampleSet.SHUFFLED_SAMPLING + "" - : SplittedExampleSet.STRATIFIED_SAMPLING + ""); - - validation.setParameter(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED, "true"); - validation.setParameter(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED, "1"); - - // configure training subprocess - ExecutionUnit trainer = validation.getSubprocess(0); - trainer.addOperator(ruleGenerator); - trainer.getInnerSources().getPortByName("training").connectTo(ruleGenerator.getInputPorts().getPortByName("training set")); - ruleGenerator.getOutputPorts().getPortByName("model").connectTo(trainer.getInnerSinks().getPortByName("model")); - ruleGenerator.getOutputPorts().getPortByName("estimated performance").connectTo(trainer.getInnerSinks().getPortByName("through 1")); - - // configure testing subprocess - ExecutionUnit tester = validation.getSubprocess(1); - - ModelApplier applier = (ModelApplier)OperatorService.createOperator(ModelApplier.class); - - tester.addOperator(applier); - tester.addOperator(validationEvaluator); - - tester.getInnerSources().getPortByName("model").connectTo(applier.getInputPorts().getPortByName("model")); - tester.getInnerSources().getPortByName("test set").connectTo(applier.getInputPorts().getPortByName("unlabelled data")); - applier.getOutputPorts().getPortByName("labelled data").connectTo(validationEvaluator.getInputPorts().getPortByName("labelled data")); - validationEvaluator.getOutputPorts().getPortByName("performance").connectTo(tester.getInnerSinks().getPortByName("averagable 1")); - - tester.getInnerSources().getPortByName("through 1").connectTo(validationEvaluator.getInputPorts().getPortByName("performance")); - - // connect performance vector directly to process output - validation.getOutputPorts().getPortByName("averagable 1").connectTo( - process.getRootOperator().getSubprocess(0).getInnerSinks().getPortByIndex(0)); - - - // connect entire training set and corresponding model from validation to globa applier - validation.getOutputPorts().getPortByName("training").connectTo(globalApplier.getInputPorts().getPortByName("unlabelled data")); - validation.getOutputPorts().getPortByName("model").connectTo(globalApplier.getInputPorts().getPortByName("model")); - - // get outputs of model applier to - globalApplier.getOutputPorts().getPortByName("model").connectTo(process.getRootOperator().getSubprocess(0).getInnerSinks().getPortByIndex(1)); - - globalApplier.getOutputPorts().getPortByName("labelled data").connectTo(globalEvaluator.getInputPorts().getPortByName("labelled data")); - globalEvaluator.getOutputPorts().getPortByName("performance").connectTo(process.getRootOperator().getSubprocess(0).getInnerSinks().getPortByIndex(2)); - - arffSource.setParameter(ArffExampleSource.PARAMETER_DATA_FILE, arffFile.getAbsolutePath()); - - } catch (Exception ex) { - ex.printStackTrace(); - } - } - - @Override - public void run() { - - try { - - DateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd_HH.mm.ss"); - - Logger.log("Processing: " + arffFile.getName() + "\n", Level.FINE); - - Date begin = new Date(); - String dateString = dateFormat.format(begin); - - Logger.log("started!\n", Level.FINE); - - // generate headers - String paramsHeader = ", ,"; - String performanceHeader = "Dataset, time started, "; - String row = arffFile + "," + dateString + ","; - - for (Map params : paramsSets) { - - // set parameters - for (String key: params.keySet()) { - Object o = params.get(key); - - if (o instanceof String) { - ruleGenerator.setParameter(key, (String)o); - } else if (o instanceof List) { - ruleGenerator.setListParameter(key, (List)o); - } else { - throw new InvalidParameterException(); - } - } - - - long t1 = System.nanoTime(); - IOContainer out; - out = process.run(); - long t2 = System.nanoTime(); - - if (modelFile.length() > 0) { - IOObject[] objs = out.getIOObjects(); - FileWriter fw = new FileWriter(modelFile); - BufferedWriter bw = new BufferedWriter(fw); - Model model = (Model)objs[1]; - bw.write(model.toString()); - - bw.write("\n"); - - // add model performance - RuleSetBase rs = (RuleSetBase)model; - PerformanceVector performance = RuleGenerator.recalculatePerformance(rs); - for (String name : performance.getCriteriaNames()) { - double avg = performance.getCriterion(name).getAverage(); - bw.write(name + ": " + avg + "\n"); - } - - // add evaluator performance - performance = (PerformanceVector)objs[2]; - for (String name : performance.getCriteriaNames()) { - double avg = performance.getCriterion(name).getAverage(); - bw.write(name + ": " + avg + "\n"); - } - - bw.close(); - } - - - double elapsedSec = (double)(t2 - t1) / 1e9; - - PerformanceVector performance = out.get(PerformanceVector.class, 0); - String[] columns = performance.getCriteriaNames(); - - Logger.log(performance + "\n", Level.FINE); - - // generate headers - paramsHeader += ruleGenerator.toString() + ","; - performanceHeader += "elapsed[s], "; - row += elapsedSec + ","; - - for (String name : columns) { - paramsHeader += ", "; - performanceHeader += name + ","; - } - - for (String name : performance.getCriteriaNames()) { - double avg = performance.getCriterion(name).getAverage(); - row += avg + ", "; - } - } - - qualityReport.add(new String[] {paramsHeader, performanceHeader}, row); - - } catch (OperatorException | IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - -} diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/experiments/SplittedXValidationExperiment.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/experiments/SplittedXValidationExperiment.java deleted file mode 100644 index 50902ea3..00000000 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/experiments/SplittedXValidationExperiment.java +++ /dev/null @@ -1,244 +0,0 @@ -/******************************************************************************* - * Copyright (C) 2019 RuleKit Development Team - * - * This program is free software: you can redistribute it and/or modify it under the terms of the - * GNU Affero General Public License as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without - * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License along with this program. - * If not, see http://www.gnu.org/licenses/. - ******************************************************************************/ -package adaa.analytics.rules.experiments; - -import java.io.File; -import java.io.IOException; -import java.security.InvalidParameterException; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; - -import adaa.analytics.rules.utils.RapidMiner5; - -import org.apache.commons.lang.StringUtils; - -import adaa.analytics.rules.logic.representation.Logger; -import adaa.analytics.rules.logic.representation.SurvivalRule; - -import com.rapidminer.example.Attributes; -import com.rapidminer.operator.IOContainer; -import com.rapidminer.operator.IOObject; -import com.rapidminer.operator.Model; -import com.rapidminer.operator.ModelApplier; -import com.rapidminer5.operator.io.ArffExampleSource; -import com.rapidminer.operator.performance.PerformanceVector; -import com.rapidminer.operator.preprocessing.filter.ChangeAttributeRole; -import com.rapidminer.tools.OperatorService; -import com.sun.tools.javac.util.Pair; - -public class SplittedXValidationExperiment extends ExperimentBase { - - protected File arffDir; - - protected ArffExampleSource trainArff; - - protected ArffExampleSource testArff; - - Pair> paramSet; - - public SplittedXValidationExperiment( - File arffDir, - SynchronizedReport qualityReport, - SynchronizedReport modelReport, - String labelAttribute, - Map options, - Pair> paramSet) { - - super(qualityReport, modelReport); - - try { - this.arffDir = arffDir; - this.paramSet = paramSet; - - - trainArff = RapidMiner5.createOperator(ArffExampleSource.class); - testArff = RapidMiner5.createOperator(ArffExampleSource.class); - ChangeAttributeRole trainRoleSetter = (ChangeAttributeRole)OperatorService.createOperator(ChangeAttributeRole.class); - ChangeAttributeRole testRoleSetter = (ChangeAttributeRole)OperatorService.createOperator(ChangeAttributeRole.class); - ModelApplier applier = (ModelApplier)OperatorService.createOperator(ModelApplier.class); - - - // configure main process - process = new com.rapidminer.Process(); - process.getRootOperator().getSubprocess(0).addOperator(trainArff); - process.getRootOperator().getSubprocess(0).addOperator(testArff); - process.getRootOperator().getSubprocess(0).addOperator(trainRoleSetter); - process.getRootOperator().getSubprocess(0).addOperator(testRoleSetter); - process.getRootOperator().getSubprocess(0).addOperator(ruleGenerator); - process.getRootOperator().getSubprocess(0).addOperator(applier); - process.getRootOperator().getSubprocess(0).addOperator(validationEvaluator); - - trainArff.getOutputPorts().getPortByName("output").connectTo(trainRoleSetter.getInputPorts().getPortByName("example set input")); - trainRoleSetter.getOutputPorts().getPortByName("example set output").connectTo(ruleGenerator.getInputPorts().getPortByName("training set")); - - testArff.getOutputPorts().getPortByName("output").connectTo(testRoleSetter.getInputPorts().getPortByName("example set input")); - testRoleSetter.getOutputPorts().getPortByName("example set output").connectTo(applier.getInputPorts().getPortByName("unlabelled data")); - - ruleGenerator.getOutputPorts().getPortByName("model").connectTo(applier.getInputPorts().getPortByName("model")); - - applier.getOutputPorts().getPortByName("labelled data").connectTo( - validationEvaluator.getInputPorts().getPortByName("labelled data")); - - // pass estimated performance to - ruleGenerator.getOutputPorts().getPortByName("estimated performance").connectTo( - validationEvaluator.getInputPorts().getPortByName("performance")); - - validationEvaluator.getOutputPorts().getPortByName("performance").connectTo( - process.getRootOperator().getSubprocess(0).getInnerSinks().getPortByIndex(0)); - applier.getOutputPorts().getPortByName("model").connectTo( - process.getRootOperator().getSubprocess(0).getInnerSinks().getPortByIndex(1)); - - - // configure role setter - trainRoleSetter.setParameter(trainRoleSetter.PARAMETER_NAME, labelAttribute); - trainRoleSetter.setParameter(trainRoleSetter.PARAMETER_TARGET_ROLE, Attributes.LABEL_NAME); - - testRoleSetter.setParameter(testRoleSetter.PARAMETER_NAME, labelAttribute); - testRoleSetter.setParameter(testRoleSetter.PARAMETER_TARGET_ROLE, Attributes.LABEL_NAME); - - // survival dataset - set proper role - List roles = new ArrayList(); - - if (options.containsKey(SurvivalRule.SURVIVAL_TIME_ROLE)) { - roles.add(new String[]{options.get(SurvivalRule.SURVIVAL_TIME_ROLE), SurvivalRule.SURVIVAL_TIME_ROLE}); - } - - if (options.containsKey(Attributes.WEIGHT_NAME)) { - roles.add(new String[]{options.get(Attributes.WEIGHT_NAME), Attributes.WEIGHT_NAME}); - } - - if (roles.size() > 0) { - trainRoleSetter.setListParameter(trainRoleSetter.PARAMETER_CHANGE_ATTRIBUTES, roles); - testRoleSetter.setListParameter(testRoleSetter.PARAMETER_CHANGE_ATTRIBUTES, roles); - } - - } catch (Exception ex) { - ex.printStackTrace(); - } - - } - - @Override - public void run() { - try { - - Map params = paramSet.snd; - - for (String key: params.keySet()) { - Object o = params.get(key); - - if (o instanceof String) { - ruleGenerator.setParameter(key, (String)o); - } else if (o instanceof List) { - ruleGenerator.setListParameter(key, (List)o); - } else { - throw new InvalidParameterException(); - } - } - - DateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd_HH.mm.ss"); - - Logger.log("Processing: " + arffDir.getName() + "\n", Level.FINE); - - File[] filesListing = arffDir.listFiles(); - if (filesListing == null) { - throw new IOException(); - } - - for (File child : filesListing) { - if (!child.isFile() | !child.getName().contains("train")) { - continue; - } - - String trainFile = child.getName(); - String testFile = trainFile.replace("train", "test"); - - File f = new File(arffDir.getAbsolutePath() + "/" + testFile); - if (!f.exists()) { - Logger.log("TRAIN: " + trainFile + ", TEST: " + testFile + " NOT FOUND!\n" , Level.FINE); - continue; - } - - Logger.log("TRAIN: " + trainFile + ", TEST: " + testFile + "\n" , Level.FINE); - - Date begin = new Date(); - String dateString = dateFormat.format(begin); - - - Logger.log("started!\n", Level.FINE); - trainArff.setParameter(ArffExampleSource.PARAMETER_DATA_FILE, arffDir.getAbsolutePath() + "/" + trainFile); - testArff.setParameter(ArffExampleSource.PARAMETER_DATA_FILE, arffDir.getAbsolutePath() + "/" + testFile); - - long t1 = System.nanoTime(); - IOContainer out = process.run(); - IOObject[] objs = out.getIOObjects(); - long t2 = System.nanoTime(); - double elapsedSec = (double)(t2 - t1) / 1e9; - - PerformanceVector performance = (PerformanceVector)objs[0]; - - if (modelReport != null) { - StringBuilder sb = new StringBuilder(); - sb.append(StringUtils.repeat("=", 80)); - sb.append("\n"); - sb.append(testFile); - sb.append("\n\n"); - Model model = (Model)objs[1]; - sb.append(model.toString()); - - sb.append("\n"); - - // add performance - for (String name : performance.getCriteriaNames()) { - double avg = performance.getCriterion(name).getAverage(); - sb.append(name + ": " + avg + "\n"); - } - - sb.append("\n\n"); - modelReport.append(sb.toString()); - } - - String[] columns = performance.getCriteriaNames(); - - Logger.log(performance + "\n", Level.FINE); - - // generate headers - String performanceHeader = "Dataset, time started, elapsed[s], "; - String row = testFile + "," + dateString + "," + elapsedSec + ","; - - for (String name : columns) { - performanceHeader += name + ","; - } - - for (String name : performance.getCriteriaNames()) { - double avg = performance.getCriterion(name).getAverage(); - row += avg + ", "; - } - - qualityReport.add(new String[] {ruleGenerator.toString(), performanceHeader}, row); - } - - } catch (Exception e) { - e.printStackTrace(); - } - } - - -} diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/ExpertRuleGenerator.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/ExpertRuleGenerator.java index bbbf8e6e..3caa4566 100644 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/ExpertRuleGenerator.java +++ b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/ExpertRuleGenerator.java @@ -267,33 +267,7 @@ public Model learn(ExampleSet exampleSet) throws OperatorException { knowledge.setPreferredConditionsPerRule(getParameterAsInt(PARAMETER_PREFERRED_CONDITIONS_PER_RULE)); knowledge.setPreferredAttributesPerRule(getParameterAsInt(PARAMETER_PREFERRED_ATTRIBUTES_PER_RULE)); - InductionParameters params = new InductionParameters(); - params.setInductionMeasure(createMeasure(MeasureDestination.INDUCTION, new ClassificationMeasure(ClassificationMeasure.Correlation))); - params.setPruningMeasure(createMeasure(MeasureDestination.PRUNING, params.getInductionMeasure())); - params.setVotingMeasure(createMeasure(MeasureDestination.VOTING, params.getInductionMeasure())); - - params.setMaximumUncoveredFraction(getParameterAsDouble(PARAMETER_MAX_UNCOVERED_FRACTION)); - - params.setMinimumCovered(getParameterAsDouble(PARAMETER_MINCOV_NEW)); - params.setMaxcovNegative(getParameterAsDouble(PARAMETER_MAXCOV_NEGATIVE)); - params.setMaxRuleCount(getParameterAsInt(PARAMETER_MAX_RULE_COUNT)); - - params.setEnablePruning(getParameterAsBoolean(PARAMETER_ENABLE_PRUNING)); - params.setIgnoreMissing(getParameterAsBoolean(PARAMETER_IGNORE_MISSING)); - params.setMaxGrowingConditions(getParameterAsDouble(PARAMETER_MAX_GROWING)); - params.setSelectBestCandidate(getParameterAsBoolean(PARAMETER_SELECT_BEST_CANDIDATE)); - params.setConditionComplementEnabled(getParameterAsBoolean(PARAMETER_COMPLEMENTARY_CONDITIONS)); - - String tmp = getParameterAsString(PARAMETER_MINCOV_ALL); - if (tmp.length() > 0) { - List mincovs = Arrays.stream(tmp.split(" +")).map(Double::parseDouble).collect(Collectors.toList()); - - if (mincovs.size() == 1) { - params.setMinimumCoveredAll(mincovs.get(0)); - } else { - params.setMinimumCoveredAll_list(mincovs); - } - } + InductionParameters params = fillParameters(); AbstractFinder finder = null; AbstractSeparateAndConquer snc = null; diff --git a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/RuleGenerator.java b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/RuleGenerator.java index 75053549..51b42481 100644 --- a/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/RuleGenerator.java +++ b/adaa.analytics.rules/src/main/java/adaa/analytics/rules/operator/RuleGenerator.java @@ -215,42 +215,7 @@ public Model learn(ExampleSet exampleSet) throws OperatorException { Model model = null; try { - InductionParameters params = new InductionParameters(); - params.setInductionMeasure(createMeasure(MeasureDestination.INDUCTION, new ClassificationMeasure(ClassificationMeasure.Correlation))); - params.setPruningMeasure(createMeasure(MeasureDestination.PRUNING, params.getInductionMeasure())); - params.setVotingMeasure(createMeasure(MeasureDestination.VOTING, params.getInductionMeasure())); - - params.setMaximumUncoveredFraction(getParameterAsDouble(PARAMETER_MAX_UNCOVERED_FRACTION)); - - params.setMinimumCovered(getParameterAsDouble(PARAMETER_MINCOV_NEW)); - params.setMaxcovNegative(getParameterAsDouble(PARAMETER_MAXCOV_NEGATIVE)); - params.setMaxRuleCount(getParameterAsInt(PARAMETER_MAX_RULE_COUNT)); - - params.setEnablePruning(getParameterAsBoolean(PARAMETER_ENABLE_PRUNING)); - params.setIgnoreMissing(getParameterAsBoolean(PARAMETER_IGNORE_MISSING)); - params.setMaxGrowingConditions(getParameterAsDouble(PARAMETER_MAX_GROWING)); - params.setSelectBestCandidate(getParameterAsBoolean(PARAMETER_SELECT_BEST_CANDIDATE)); - params.setConditionComplementEnabled(getParameterAsBoolean(PARAMETER_COMPLEMENTARY_CONDITIONS)); - - params.setPenaltyStrength(getParameterAsDouble(PARAMETER_PENALTY_STRENGTH)); - params.setPenaltySaturation(getParameterAsDouble(PARAMETER_PENALTY_SATURATION)); - params.setMaxPassesCount(getParameterAsInt(PARAMETER_MAX_PASSES_COUNT)); - params.setBinaryContrastIncluded(getParameterAsBoolean(PARAMETER_INCLUDE_BINARY_CONTRAST)); - params.setMeanBasedRegression(getParameterAsBoolean(PARAMETER_MEAN_BASED_REGRESSION)); - params.setControlAprioriPrecision(getParameterAsBoolean(PARAMETER_CONTROL_APRORI_PRECISION)); - params.setApproximateInduction(getParameterAsBoolean(PARAMETER_APPROXIMATE_INDUCTION)); - params.setApproximateBinsCount(getParameterAsInt(PARAMETER_APPROXIMATE_BINS_COUNT)); - - String tmp = getParameterAsString(PARAMETER_MINCOV_ALL); - if (tmp.length() > 0) { - List mincovs = Arrays.stream(tmp.split(" +")).map(Double::parseDouble).collect(Collectors.toList()); - - if (mincovs.size() == 1) { - params.setMinimumCoveredAll(mincovs.get(0)); - } else { - params.setMinimumCoveredAll_list(mincovs); - } - } + InductionParameters params = fillParameters(); AbstractSeparateAndConquer snc; AbstractFinder finder; @@ -543,4 +508,45 @@ public static PerformanceVector recalculatePerformance(RuleSetBase rs) { } return pv; } + + protected InductionParameters fillParameters() throws OperatorException, IllegalAccessException { + InductionParameters params = new InductionParameters(); + params.setInductionMeasure(createMeasure(MeasureDestination.INDUCTION, new ClassificationMeasure(ClassificationMeasure.Correlation))); + params.setPruningMeasure(createMeasure(MeasureDestination.PRUNING, params.getInductionMeasure())); + params.setVotingMeasure(createMeasure(MeasureDestination.VOTING, params.getInductionMeasure())); + + params.setMaximumUncoveredFraction(getParameterAsDouble(PARAMETER_MAX_UNCOVERED_FRACTION)); + + params.setMinimumCovered(getParameterAsDouble(PARAMETER_MINCOV_NEW)); + params.setMaxcovNegative(getParameterAsDouble(PARAMETER_MAXCOV_NEGATIVE)); + params.setMaxRuleCount(getParameterAsInt(PARAMETER_MAX_RULE_COUNT)); + + params.setEnablePruning(getParameterAsBoolean(PARAMETER_ENABLE_PRUNING)); + params.setIgnoreMissing(getParameterAsBoolean(PARAMETER_IGNORE_MISSING)); + params.setMaxGrowingConditions(getParameterAsDouble(PARAMETER_MAX_GROWING)); + params.setSelectBestCandidate(getParameterAsBoolean(PARAMETER_SELECT_BEST_CANDIDATE)); + params.setConditionComplementEnabled(getParameterAsBoolean(PARAMETER_COMPLEMENTARY_CONDITIONS)); + + params.setPenaltyStrength(getParameterAsDouble(PARAMETER_PENALTY_STRENGTH)); + params.setPenaltySaturation(getParameterAsDouble(PARAMETER_PENALTY_SATURATION)); + params.setMaxPassesCount(getParameterAsInt(PARAMETER_MAX_PASSES_COUNT)); + params.setBinaryContrastIncluded(getParameterAsBoolean(PARAMETER_INCLUDE_BINARY_CONTRAST)); + params.setMeanBasedRegression(getParameterAsBoolean(PARAMETER_MEAN_BASED_REGRESSION)); + params.setControlAprioriPrecision(getParameterAsBoolean(PARAMETER_CONTROL_APRORI_PRECISION)); + params.setApproximateInduction(getParameterAsBoolean(PARAMETER_APPROXIMATE_INDUCTION)); + params.setApproximateBinsCount(getParameterAsInt(PARAMETER_APPROXIMATE_BINS_COUNT)); + + String tmp = getParameterAsString(PARAMETER_MINCOV_ALL); + if (tmp.length() > 0) { + List mincovs = Arrays.stream(tmp.split(" +")).map(Double::parseDouble).collect(Collectors.toList()); + + if (mincovs.size() == 1) { + params.setMinimumCoveredAll(mincovs.get(0)); + } else { + params.setMinimumCoveredAll_list(mincovs); + } + } + + return params; + } }