From 2012d1fc6095cc1dbf51c29c41b1cf348a1aa2b7 Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Fri, 20 Jan 2023 16:23:53 +0100 Subject: [PATCH 01/15] Ignore VS Code specific files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3a06d2527..758bef454 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ # Created by https://www.toptal.com/developers/gitignore/api/intellij+all,maven # Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all,maven From 14575f894cffeb91c5af238a7c425024049ba6ff Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Fri, 20 Jan 2023 16:26:35 +0100 Subject: [PATCH 02/15] Move all datasets into the `docs/datasets` directory --- INSTALL.md | 2 +- README.md | 10 +++++----- STATUS.md | 6 +++--- docker/execute.sh | 6 +++--- docs/{datasets.md => datasets/all.md} | 0 .../datasets.md => datasets/esecfse22-replication.md} | 0 .../datasets.md => datasets/esecfse22-verification.md} | 0 .../diffdetective/datasets/DefaultDatasets.java | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) rename docs/{datasets.md => datasets/all.md} (100%) rename docs/{replication/datasets.md => datasets/esecfse22-replication.md} (100%) rename docs/{verification/datasets.md => datasets/esecfse22-verification.md} (100%) diff --git a/INSTALL.md b/INSTALL.md index 3e9f4609f..ebde581d0 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -133,7 +133,7 @@ You can also run DiffDetective on other datasets by providing the path to the da #### Linux/Mac (bash): `./execute.sh path/to/custom/dataset.md` -The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets.md) in the [docs](docs) folder. +The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets/all.md) in the [docs/datasets](docs/datasets) folder. ## Troubleshooting diff --git a/README.md b/README.md index e23d4515f..2aae8bc9e 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ This replication package consists of four parts: 1. **DiffDetective**: For our validation, we built _DiffDetective_, a java library and command-line tool to classify edits to variability in git histories of preprocessor-based software product lines. 2. **Appendix**: The appendix of our paper is given in PDF format in the file [appendix.pdf][appendix]. 3. **Haskell Formalization**: We provide an extended formalization in the Haskell programming language as described in our appendix. Its implementation can be found in the Haskell project in the [proofs](proofs) directory. -4. **Dataset Overview**: We provide an overview of the 44 inspected datasets with updated links to their repositories in the file [docs/datasets.md][dataset]. +4. **Dataset Overview**: We provide an overview of the 44 inspected datasets with updated links to their repositories in the file [docs/datasets/all.md][dataset]. ## 1. DiffDetective DiffDetective is a java library and command-line tool to parse and classify edits to variability in git histories of preprocessor-based software product lines by creating [variation diffs][difftree_class] and operating on them. @@ -93,13 +93,13 @@ How to build our library and how to run the example is described in the [proofs/ ## 4. Dataset Overview ### 4.1 Open-Source Repositories -We provide an overview of the used 44 open-source preprocessor-based software product lines in the [docs/datasets.md][dataset] file. +We provide an overview of the used 44 open-source preprocessor-based software product lines in the [docs/datasets/all.md][dataset] file. As described in our paper in Section 5.1, this list contains all systems that were studied by Liebig et al., extended by four new subject systems (Busybox, Marlin, LibSSH, Godot). We provide updated links for each system's repository. ### 4.2 Forked Repositories for Replication To guarantee the exact replication of our validation, we created forks of all 44 open-source repositories at the state we performed the validation for our paper. -The forked repositories are listed in the [replication datasets](docs/replication/datasets.md) and are located at the Github user profile [DiffDetective](https://github.com/DiffDetective?tab=repositories). +The forked repositories are listed in the [replication datasets](docs/datasets/esecfse22-replication.md.md) and are located at the Github user profile [DiffDetective](https://github.com/DiffDetective?tab=repositories). These repositories are used when running the replication as described under `1.2` and in the [INSTALL](INSTALL.md). ## 5. Running DiffDetective on Custom Datasets @@ -110,11 +110,11 @@ You can also run DiffDetective on other datasets by providing the path to the da #### Linux/Mac (bash): `./execute.sh path/to/custom/dataset.md` -The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets.md) in the [docs](docs) folder. +The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets/all.md) in the [docs/datasets](docs/datasets) folder. [difftree_class]: https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/difftree/DiffTree.html [haskell]: https://www.haskell.org/ -[dataset]: docs/datasets.md +[dataset]: docs/datasets/all.md [appendix]: appendix.pdf [documentation]: https://variantsync.github.io/DiffDetective/docs/javadoc/ diff --git a/STATUS.md b/STATUS.md index 10c908018..df8decb61 100644 --- a/STATUS.md +++ b/STATUS.md @@ -8,7 +8,7 @@ The artifact for the paper _Classifying Edits to Variability in Source Code_ con Practitioners and researches are free to ignore the appendix as well as the haskell formalization and may use DiffDetective out-of-the-box. 2. **Appendix**: The appendix of our paper is given in PDF format in the file [`appendix.pdf`][ddappendix]. 3. **Haskell Formalization**: We provide an extended formalization in the Haskell programming language as described in our appendix. Its implementation can be found in the Haskell project in the [`proofs`][ddproofs] directory. -4. **Dataset Overview**: We provide an overview of the 44 inspected open-source software product lines with updated links to their repositories in the file [docs/datasets.md][dddatasets]. +4. **Dataset Overview**: We provide an overview of the 44 inspected open-source software product lines with updated links to their repositories in the file [docs/datasets/all.md][dddatasets]. ## Purpose Our artifact has the following purposes: @@ -44,6 +44,6 @@ Furthermore, both DiffDetective and our Haskell formalization serve as reference [ddappendix]: https://github.com/VariantSync/DiffDetective/raw/esecfse22/appendix.pdf [ddproofs]: https://github.com/VariantSync/DiffDetective/tree/esecfse22/proofs [ddlicense]: https://github.com/VariantSync/DiffDetective/blob/main/LICENSE.LGPL3 -[dddatasets]: docs/datasets.md -[ddforks]: docs/replication/datasets.md +[dddatasets]: docs/datasets/all.md +[ddforks]: docs/datasets/esecfse22-replication.md [dddocumentation]: https://variantsync.github.io/DiffDetective/docs/javadoc/ diff --git a/docker/execute.sh b/docker/execute.sh index de77fb4a2..491ed4ecf 100644 --- a/docker/execute.sh +++ b/docker/execute.sh @@ -6,7 +6,7 @@ if [ $1 == '' ] || [ $1 == '--help' ] || [ $1 == '-help' ]; then echo "-- Examples --" echo "Run replication: './execute.sh replication'" echo "Validate the setup: './execute.sh verification'" - echo "# See ./docs/verification/datasets.md for format details" + echo "# See ./docs/datasets/esecfse22-verification.md for format details" echo "Custom dataset: './execute.sh path/to/my_dataset.md'" exit fi @@ -15,10 +15,10 @@ cd /home/sherlock || exit if [ "$1" == 'replication' ]; then echo "Running full replication. Depending on your system, this will require several hours or even a few days." - java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation + java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation docs/datasets/esecfse22-replication.md elif [ "$1" == 'verification' ]; then echo "Running a short verification." - java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation docs/verification/datasets.md + java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation docs/datasets/esecfse22-verification.md else echo "" echo "Running detection on a custom dataset with the input file $1" diff --git a/docs/datasets.md b/docs/datasets/all.md similarity index 100% rename from docs/datasets.md rename to docs/datasets/all.md diff --git a/docs/replication/datasets.md b/docs/datasets/esecfse22-replication.md similarity index 100% rename from docs/replication/datasets.md rename to docs/datasets/esecfse22-replication.md diff --git a/docs/verification/datasets.md b/docs/datasets/esecfse22-verification.md similarity index 100% rename from docs/verification/datasets.md rename to docs/datasets/esecfse22-verification.md diff --git a/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java b/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java index ba4a7a9ce..15bafe021 100644 --- a/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java +++ b/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java @@ -15,7 +15,7 @@ public class DefaultDatasets { /** * Path to the markdown file with the links and metadata for each default dataset. */ - public final static Path DEFAULT_DATASETS_FILE = Path.of("docs", "replication", "datasets.md"); + public final static Path DEFAULT_DATASETS_FILE = Path.of("docs", "datasets", "esecfse22-replication.md"); /** * Path to the markdown file with the links and metadata for Emacs only. From b60ec0e7dd7cedb085d8105143aad6098e345575 Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Thu, 19 Jan 2023 22:46:35 +0100 Subject: [PATCH 03/15] Escape all shell variable substitutions --- docker/execute.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/execute.sh b/docker/execute.sh index 491ed4ecf..2bc209e5b 100644 --- a/docker/execute.sh +++ b/docker/execute.sh @@ -1,6 +1,6 @@ #! /bin/bash -if [ $1 == '' ] || [ $1 == '--help' ] || [ $1 == '-help' ]; then +if [ "$1" == '' ] || [ "$1" == '--help' ] || [ "$1" == '-help' ]; then echo "Either fully run DiffDetective as presented in the paper (replication), do quick setup verification (verification), or run DiffDetective on a custom dataset by providing the path to the dataset file." echo "-- Examples --" @@ -23,7 +23,7 @@ else echo "" echo "Running detection on a custom dataset with the input file $1" echo "" - java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation $1 + java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation "$1" fi echo "Collecting results." cp -r results/* ../results/ From c89ab7242a5548b55e4473fde4cb50c874f2f8fd Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 13:04:17 +0100 Subject: [PATCH 04/15] Improve log messages in `HistoryAnalysis.runAsync` A repository is either processed or it's skipped but not both. --- .../analysis/HistoryAnalysis.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java index 86e7fc33d..75780d697 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java @@ -192,20 +192,23 @@ public static void exportMetadataToFile(final Path outputFile, final Metadat */ public void runAsync() { for (final Repository repo : repositoriesToAnalyze) { - Logger.info(" === Begin Processing {} ===", repo.getRepositoryName()); - final Clock clock = new Clock(); - clock.start(); - final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName()); /// Don't repeat work we already did: - if (!Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) { + if (Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) { + Logger.info(" Skipping repository {} because it has already been processed.", + repo.getRepositoryName()); + } else { + Logger.info(" === Begin Processing {} ===", repo.getRepositoryName()); + final Clock clock = new Clock(); + clock.start(); + analyzeAsync(repo, repoOutputDir, whatToDo, commitsToProcessPerThread); postProcessingOnRepositoryOutputDir.accept(repoOutputDir); - } else { - Logger.info(" Skipping repository {} because it has already been processed.", repo.getRepositoryName()); - } - Logger.info(" === End Processing {} after {} ===", repo.getRepositoryName(), clock.printPassedSeconds()); + Logger.info(" === End Processing {} after {} ===", + repo.getRepositoryName(), + clock.printPassedSeconds()); + } } } } From b16af31dcc1eebab2d8cf245128d61c0b41072ad Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 18:53:02 +0100 Subject: [PATCH 05/15] Remove the deprecated method HistoryAnalysis.analyze --- .../analysis/HistoryAnalysis.java | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java index 75780d697..88830cbdc 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java @@ -55,50 +55,6 @@ public record HistoryAnalysis( */ public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000; - @Deprecated - public static void analyze( - final Repository repo, - final Path outputDir, - final ExplainedFilter treeFilter, - final List treePreProcessing, - final LineGraphExportOptions exportOptions, - final AnalysisStrategy strategy) - { - AnalysisResult totalResult; - final GitDiffer differ = new GitDiffer(repo); - final Clock clock = new Clock(); - - // prepare tasks - Logger.info(">>> Scheduling synchronous mining"); - clock.start(); - List commitsToProcess = differ.yieldRevCommits().toList(); - final CommitHistoryAnalysisTask task = new MiningTask(new CommitHistoryAnalysisTask.Options( - repo, - differ, - outputDir.resolve(repo.getRepositoryName() + ".lg"), - treeFilter, - treePreProcessing, - strategy, - commitsToProcess - ), exportOptions); - Logger.info("Scheduled {} commits.", commitsToProcess.size()); - commitsToProcess = null; // free reference to enable garbage collection - Logger.info("<<< done after {}", clock.printPassedSeconds()); - - Logger.info(">>> Run mining"); - clock.start(); - try { - totalResult = task.call(); - } catch (Exception e) { - Logger.error(e); - Logger.info("<<< aborted after {}", clock.printPassedSeconds()); - return; - } - Logger.info("<<< done after {}", clock.printPassedSeconds()); - - exportMetadata(outputDir, totalResult); - } - /** * Static analysis method that can be used without creating an HistoryAnalysis object first. * Analyzes the history of the given repository with the given parameters. From e58ba14ae2c159dc35a810f9117dc575ab2661ad Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Thu, 9 Feb 2023 13:16:41 +0100 Subject: [PATCH 06/15] Use hooks for modifying analysis behaviour --- .../analysis/AnalysisResult.java | 135 ++++---- .../analysis/CommitHistoryAnalysisTask.java | 108 ------- .../CommitHistoryAnalysisTaskFactory.java | 30 -- .../analysis/FilterAnalysis.java | 32 ++ .../analysis/HistoryAnalysis.java | 295 ++++++++++++++---- .../analysis/LineGraphExportAnalysis.java | 49 +++ .../diffdetective/analysis/PatchAnalysis.java | 60 ++++ .../analysis/PreprocessingAnalysis.java | 25 ++ .../analysis/StatisticsAnalysis.java | 86 +++++ .../analysis/strategies/AnalysisStrategy.java | 3 +- .../diffdetective/mining/DiffTreeMiner.java | 63 ++-- .../diffdetective/mining/MiningTask.java | 124 -------- .../validation/EditClassValidationTask.java | 121 ------- .../validation/FindMedianCommitTime.java | 4 +- .../diffdetective/validation/Validation.java | 78 ++--- src/test/java/MarlinDebug.java | 18 +- 16 files changed, 635 insertions(+), 596 deletions(-) delete mode 100644 src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java delete mode 100644 src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java create mode 100644 src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java create mode 100644 src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java create mode 100644 src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java create mode 100644 src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java create mode 100644 src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java delete mode 100644 src/main/java/org/variantsync/diffdetective/mining/MiningTask.java delete mode 100644 src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java diff --git a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java index e9e576f09..77d09366d 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java @@ -69,24 +69,68 @@ public class AnalysisResult implements Metadata { ISEMIGROUP ); - public String repoName; - public int totalCommits; - public int exportedCommits; - public int emptyCommits; - public int failedCommits; - public int exportedTrees; - public double runtimeInSeconds; - public double runtimeWithMultithreadingInSeconds; - public final CommitProcessTime min, max; - public final DiffTreeSerializeDebugData debugData; - public ExplainedFilterSummary filterHits; - public EditClassCount editClassCounts; + /** + * The repo from which the results where collected. + */ + public String repoName = NO_REPO; + /** + * The total number of commits in the observed history of the given repository. + */ + public int totalCommits = 0; + /** + * The number of commits that were processed. + * {@code exportedCommits <= totalCommits} + */ + public int exportedCommits = 0; + /** + * Number of commits that were not processed because they had no DiffTrees. + * A commit is empty iff at least of one of the following conditions is met for every of its patches: + *
    + *
  • the patch did not edit a C file, + *
  • the DiffTree became empty after transformations (this can happen if there are only whitespace changes), + *
  • or the patch had syntax errors in its annotations, so the DiffTree could not be parsed. + *
+ */ + public int emptyCommits = 0; + /** + * Number of commits that could not be parsed at all because of exceptions when operating JGit. + * + * The number of commits that were filtered because they are a merge commit is thus given as + * {@code totalCommits - exportedCommits - emptyCommits - failedCommits} + */ + public int failedCommits = 0; + /** + * Number of DiffTrees that were processed. + */ + public int exportedTrees = 0; + /** + * The total runtime in seconds (irrespective of multithreading). + */ + public double runtimeInSeconds = 0; + /** + * The effective runtime in seconds that we have when using multithreading. + */ + public double runtimeWithMultithreadingInSeconds = 0; + /** + * The commit that was processed the fastest. + */ + public final CommitProcessTime min; + /** + * The commit that was processed the slowest. + */ + public final CommitProcessTime max; + /** + * Debug data for DiffTree serialization. + */ + public final DiffTreeSerializeDebugData debugData = new DiffTreeSerializeDebugData(); + /** + * Explanations for filter hits, when filtering DiffTrees (e.g., because a diff was empty). + */ + public ExplainedFilterSummary filterHits = new ExplainedFilterSummary(); + public EditClassCount editClassCounts = new EditClassCount(); private final LinkedHashMap customInfo = new LinkedHashMap<>(); private final MergeMap diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum); - /** - * Creates an empty analysis result. - */ public AnalysisResult() { this(NO_REPO); } @@ -96,65 +140,10 @@ public AnalysisResult() { * @param repoName The repo for which to collect results. */ public AnalysisResult(final String repoName) { - this( - repoName, - 0, 0, 0, 0, - 0, - 0, 0, - CommitProcessTime.Unknown(repoName, Long.MAX_VALUE), - CommitProcessTime.Unknown(repoName, Long.MIN_VALUE), - new DiffTreeSerializeDebugData(), - new ExplainedFilterSummary()); - } - - /** - * Creates am analysis result with the given inital values. - * @param repoName The repo from which the results where collected. - * @param totalCommits The total number of commits in the observed history of the given repository. - * @param exportedCommits The number of commits that were processed. exportedCommits <= totalCommits - * @param emptyCommits Number of commits that were not processed because they had no DiffTrees. - * A commit is empty iff at least of one of the following conditions is met for every of its patches: - * - the patch did not edit a C file, - * - the DiffTree became empty after transformations (this can happen if there are only whitespace changes), - * - or the patch had syntax errors in its annotations, so the DiffTree could not be parsed. - * @param failedCommits Number of commits that could not be parsed at all because of exceptions when operating JGit. - * The number of commits that were filtered because they are a merge commit is thus given as - * totalCommits - exportedCommits - emptyCommits - failedCommits - * @param exportedTrees Number of DiffTrees that were processed. - * @param runtimeInSeconds The total runtime in seconds (irrespective of multithreading). - * @param runtimeWithMultithreadingInSeconds The effective runtime in seconds that we have when using multithreading. - * @param min The commit that was processed the fastest. - * @param max The commit that was processed the slowest. - * @param debugData Debug data for DiffTree serialization. - * @param filterHits Explanations for filter hits, when filtering DiffTrees (e.g., because a diff was empty). - */ - public AnalysisResult( - final String repoName, - int totalCommits, - int exportedCommits, - int emptyCommits, - int failedCommits, - int exportedTrees, - double runtimeInSeconds, - double runtimeWithMultithreadingInSeconds, - final CommitProcessTime min, - final CommitProcessTime max, - final DiffTreeSerializeDebugData debugData, - final ExplainedFilterSummary filterHits) - { this.repoName = repoName; - this.totalCommits = totalCommits; - this.exportedCommits = exportedCommits; - this.emptyCommits = emptyCommits; - this.failedCommits = failedCommits; - this.exportedTrees = exportedTrees; - this.runtimeInSeconds = runtimeInSeconds; - this.runtimeWithMultithreadingInSeconds = runtimeWithMultithreadingInSeconds; - this.debugData = debugData; - this.filterHits = filterHits; - this.editClassCounts = new EditClassCount(); - this.min = min; - this.max = max; + + this.min = CommitProcessTime.Unknown(repoName, Long.MAX_VALUE); + this.max = CommitProcessTime.Unknown(repoName, Long.MIN_VALUE); } /** diff --git a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java b/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java deleted file mode 100644 index 27deb4c49..000000000 --- a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java +++ /dev/null @@ -1,108 +0,0 @@ -package org.variantsync.diffdetective.analysis; - -import org.eclipse.jgit.revwalk.RevCommit; -import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; -import org.variantsync.diffdetective.datasets.Repository; -import org.variantsync.diffdetective.diff.git.GitDiffer; -import org.variantsync.diffdetective.util.CSV; -import org.variantsync.diffdetective.util.IO; -import org.variantsync.diffdetective.util.StringUtils; -import org.variantsync.diffdetective.variation.diff.DiffTree; -import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; -import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.List; -import java.util.concurrent.Callable; - -/** - * Abstract base class for tasks to run during a {@link HistoryAnalysis}. - * A CommitHistoryAnalysisTasks purpose is to process a given set of commits with a specific analysis. - * @author Paul Bittner - */ -public abstract class CommitHistoryAnalysisTask implements Callable { - public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt"; - public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv"; - - /** - * Options that may be specified for processing a set of commits. - * @param repository The repository that is analyzed. - * @param differ The differ that should be used to obtain diffs. - * @param outputDir The path to which any output should be written on disk. - * @param treeFilter filters commits before processing them - * @param treePreProcessing applies a processing function after filtering, but before processing - * @param analysisStrategy A callback that is invoked for each commit. - * @param commits The set of commits to process in this task. - */ - public record Options( - Repository repository, - GitDiffer differ, - Path outputDir, - ExplainedFilter treeFilter, - List treePreProcessing, - AnalysisStrategy analysisStrategy, - Iterable commits - ) {} - - protected final Options options; - - protected CommitHistoryAnalysisTask(final Options options) { - this.options = options; - } - - /** - * Returns the options for this task. - * @return the options for this task. - */ - public CommitHistoryAnalysisTask.Options getOptions() { - return options; - } - - @Override - public AnalysisResult call() throws Exception { - options.analysisStrategy().start(options.repository(), options.outputDir()); - - final AnalysisResult miningResult = new AnalysisResult(options.repository.getRepositoryName()); - miningResult.putCustomInfo(MetadataKeys.TASKNAME, this.getClass().getName()); - - return miningResult; - } - - /** - * Exports the given commit times to the given file. Overwrites existing files. - * @param commitTimes List of all CommitProcessTimes to write into a single file. - * @param pathToOutputFile Output file to write. - */ - public static void exportCommitTimes(final List commitTimes, final Path pathToOutputFile) { - final StringBuilder times = new StringBuilder(); - - for (final CommitProcessTime ct : commitTimes) { - times.append(ct.toString()).append(StringUtils.LINEBREAK); - } - - try { - IO.write(pathToOutputFile, times.toString()); - } catch (IOException e) { - Logger.error(e); - System.exit(1); - } - } - - /** - * Exports the given patch statistics to the given file. Overwrites existing files. - * @param commitTimes List of all PatchStatistics to write into a single file. - * @param pathToOutputFile Output file to write. - */ - public static void exportPatchStatistics(final List commitTimes, final Path pathToOutputFile) { - final String csv = CSV.toCSV(commitTimes); - - try { - IO.write(pathToOutputFile, csv); - } catch (IOException e) { - Logger.error(e); - System.exit(1); - } - } -} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java b/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java deleted file mode 100644 index 231376da9..000000000 --- a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.variantsync.diffdetective.analysis; - -import org.eclipse.jgit.revwalk.RevCommit; -import org.variantsync.diffdetective.datasets.Repository; -import org.variantsync.diffdetective.diff.git.GitDiffer; - -import java.nio.file.Path; - -/** - * Factory for tasks for {@link HistoryAnalysis}. - * This factory creates a task to run for a given repository and a given set of commits. - * @author Paul Bittner - */ -@FunctionalInterface -public interface CommitHistoryAnalysisTaskFactory { - /** - * Create a task for the given set of commits from the given repository. - * @param repository The repository for whose analysis a task should be created. - * @param differ The differ that should be used to create diffs from the given commits. - * @param outputPath The output path to which any results should be written on disk if necessary. - * @param commits The set of commits that should be processed by the produced task. - * @return A task that process the given set of commits. - */ - CommitHistoryAnalysisTask create( - final Repository repository, - final GitDiffer differ, - final Path outputPath, - Iterable commits - ); -} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java new file mode 100644 index 000000000..dad6f3dbe --- /dev/null +++ b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java @@ -0,0 +1,32 @@ +package org.variantsync.diffdetective.analysis; + +import java.util.Arrays; + +import org.variantsync.diffdetective.metadata.ExplainedFilterSummary; +import org.variantsync.diffdetective.variation.diff.DiffTree; +import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; +import org.variantsync.diffdetective.variation.diff.filter.TaggedPredicate; + +public class FilterAnalysis implements HistoryAnalysis.Hooks { + private ExplainedFilter treeFilter; + + public FilterAnalysis(ExplainedFilter treeFilter) { + this.treeFilter = treeFilter; + } + + @SafeVarargs + public FilterAnalysis(TaggedPredicate... treeFilter) { + this.treeFilter = new ExplainedFilter(Arrays.stream(treeFilter)); + } + + @Override + public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + return treeFilter.test(analysis.getCurrentDiffTree()); + } + + @Override + public void endCommit(HistoryAnalysis analysis) { + analysis.getResult().filterHits.append(new ExplainedFilterSummary(treeFilter)); + treeFilter.resetExplanations(); + } +} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java index 88830cbdc..ae03abfe2 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java @@ -1,96 +1,134 @@ package org.variantsync.diffdetective.analysis; +import org.apache.commons.lang3.function.FailableBiConsumer; +import org.apache.commons.lang3.function.FailableBiFunction; import org.eclipse.jgit.revwalk.RevCommit; import org.tinylog.Logger; import org.variantsync.diffdetective.analysis.monitoring.TaskCompletionMonitor; -import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; import org.variantsync.diffdetective.datasets.Repository; +import org.variantsync.diffdetective.diff.git.CommitDiff; import org.variantsync.diffdetective.diff.git.GitDiffer; +import org.variantsync.diffdetective.diff.git.PatchDiff; +import org.variantsync.diffdetective.diff.result.CommitDiffResult; import org.variantsync.diffdetective.metadata.Metadata; -import org.variantsync.diffdetective.mining.MiningTask; import org.variantsync.diffdetective.parallel.ScheduledTasksIterator; import org.variantsync.diffdetective.util.Clock; import org.variantsync.diffdetective.util.Diagnostics; import org.variantsync.diffdetective.util.InvocationCounter; import org.variantsync.diffdetective.variation.diff.DiffTree; -import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; -import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; -import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; import org.variantsync.functjonal.iteration.ClusteredIterator; import org.variantsync.functjonal.iteration.MappedIterator; import java.nio.file.Files; import java.nio.file.Path; import java.util.Iterator; +import java.util.ListIterator; import java.util.List; -import java.util.function.Consumer; +import java.util.concurrent.Callable; +import java.util.function.BiConsumer; +import java.util.function.Supplier; /** - * An analyses that is performed for the entire commit histories of each given git repository. - * @param repositoriesToAnalyze The repositories whose commit history should be analyzed. - * @param outputDir The directory to which any produced results should be written. - * @param commitsToProcessPerThread Number of commits that should be processed by each single thread if multithreading is used. - * Each thread will be given this number of commits to process. - * A larger number means fewer threads and less scheduling. - * A smaller number means more threads but also more scheduling. - * @param whatToDo A factory for tasks that should be executed for the commits of a certain repository. - * @param postProcessingOnRepositoryOutputDir A callback that is invoked after all analyses are completed. - * The argument is the output directory on which postprocessing might occur. - * @author Paul Bittner + * @author Paul Bittner, Benjamin Moosherr */ -public record HistoryAnalysis( - List repositoriesToAnalyze, - Path outputDir, - int commitsToProcessPerThread, - CommitHistoryAnalysisTaskFactory whatToDo, - Consumer postProcessingOnRepositoryOutputDir -) { +public class HistoryAnalysis { /** * File name that is used to store the analysis results for each repository. */ public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + AnalysisResult.EXTENSION; /** * Default value for commitsToProcessPerThread - * @see org.variantsync.diffdetective.analysis.HistoryAnalysis#HistoryAnalysis(List, Path, int, CommitHistoryAnalysisTaskFactory, Consumer) + * @see forEachCommit(Supplier, int, int) */ public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000; - /** - * Static analysis method that can be used without creating an HistoryAnalysis object first. - * Analyzes the history of the given repository with the given parameters. - * @param repo The repository to analyze. - * @param outputDir The directory to which any produced results should be written. - * @param taskFactory A factory for tasks that should be executed for the commits of a certain repository. - * @param commitsToProcessPerThread Number of commits that should be processed by each single thread if multithreading is used. - */ - public static void analyzeAsync( - final Repository repo, - final Path outputDir, - final CommitHistoryAnalysisTaskFactory taskFactory, - int commitsToProcessPerThread) - { - final AnalysisResult totalResult = new AnalysisResult(repo.getRepositoryName()); - final GitDiffer differ = new GitDiffer(repo); + protected final List hooks; + protected final Repository repository; + + protected GitDiffer differ; + protected RevCommit currentCommit; + protected CommitDiff currentCommitDiff; + protected PatchDiff currentPatch; + protected DiffTree currentDiffTree; + + protected final Path outputDir; + protected Path outputFile; + protected final AnalysisResult result; + + public Repository getRepository() { + return repository; + } + + public RevCommit getCurrentCommit() { + return currentCommit; + } + + public CommitDiff getCurrentCommitDiff() { + return currentCommitDiff; + } + + public PatchDiff getCurrentPatch() { + return currentPatch; + } + + public DiffTree getCurrentDiffTree() { + return currentDiffTree; + } + + public Path getOutputDir() { + return outputDir; + } + + public Path getOutputFile() { + return outputFile; + } + + public AnalysisResult getResult() { + return result; + } + + public interface Hooks { + default void beginBatch(HistoryAnalysis analysis) throws Exception {} + default boolean beginCommit(HistoryAnalysis analysis) throws Exception { return true; } + default boolean onParsedCommit(HistoryAnalysis analysis) throws Exception { return true; } + default boolean beginPatch(HistoryAnalysis analysis) throws Exception { return true; } + default boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { return true; } + default void endPatch(HistoryAnalysis analysis) throws Exception {} + default void endCommit(HistoryAnalysis analysis) throws Exception {} + default void endBatch(HistoryAnalysis analysis) throws Exception {} + } + + public static AnalysisResult forEachCommit(Supplier analysis) { + return forEachCommit( + analysis, + COMMITS_TO_PROCESS_PER_THREAD_DEFAULT, + Diagnostics.INSTANCE.run().getNumberOfAvailableProcessors() + ); + } + + public static AnalysisResult forEachCommit( + Supplier analysisFactory, + final int commitsToProcessPerThread, + final int nThreads + ) { + var analysis = analysisFactory.get(); + analysis.differ = new GitDiffer(analysis.getRepository()); + final Clock clock = new Clock(); // prepare tasks - final int nThreads = Diagnostics.INSTANCE.run().getNumberOfAvailableProcessors(); Logger.info(">>> Scheduling asynchronous analysis on {} threads.", nThreads); clock.start(); final InvocationCounter numberOfTotalCommits = InvocationCounter.justCount(); - final Iterator tasks = new MappedIterator<>( + final Iterator> tasks = new MappedIterator<>( /// 1.) Retrieve COMMITS_TO_PROCESS_PER_THREAD commits from the differ and cluster them into one list. new ClusteredIterator<>( - differ.yieldRevCommitsAfter(numberOfTotalCommits), + analysis.differ.yieldRevCommitsAfter(numberOfTotalCommits), commitsToProcessPerThread ), /// 2.) Create a MiningTask for the list of commits. This task will then be processed by one /// particular thread. - commitList -> taskFactory.create( - repo, - differ, - outputDir.resolve(commitList.get(0).getId().getName() + ".lg"), - commitList) + commitList -> () -> analysisFactory.get().processCommits(commitList, analysis.differ) ); Logger.info("<<< done in {}", clock.printPassedSeconds()); @@ -101,7 +139,7 @@ public static void analyzeAsync( try (final ScheduledTasksIterator threads = new ScheduledTasksIterator<>(tasks, nThreads)) { while (threads.hasNext()) { final AnalysisResult threadsResult = threads.next(); - totalResult.append(threadsResult); + analysis.getResult().append(threadsResult); commitSpeedMonitor.addFinishedTasks(threadsResult.exportedCommits); } } catch (Exception e) { @@ -112,10 +150,146 @@ public static void analyzeAsync( final double runtime = clock.getPassedSeconds(); Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime)); - totalResult.runtimeWithMultithreadingInSeconds = runtime; - totalResult.totalCommits = numberOfTotalCommits.invocationCount().get(); + analysis.getResult().runtimeWithMultithreadingInSeconds = runtime; + analysis.getResult().totalCommits = numberOfTotalCommits.invocationCount().get(); - exportMetadata(outputDir, totalResult); + exportMetadata(analysis.getOutputDir(), analysis.getResult()); + return analysis.getResult(); + } + + public HistoryAnalysis( + List hooks, + Repository repository, + Path outputDir + ) { + this.hooks = hooks; + this.repository = repository; + this.outputDir = outputDir; + this.result = new AnalysisResult(repository.getRepositoryName()); + } + + public AnalysisResult processCommits(List commits) throws Exception { + return processCommits(commits, new GitDiffer(getRepository())); + } + + public AnalysisResult processCommits(List commits, GitDiffer differ) throws Exception { + this.differ = differ; + processCommitBatch(commits); + return getResult(); + } + + protected AnalysisResult processCommitBatch(List commits) throws Exception { + outputFile = outputDir.resolve(commits.get(0).getId().getName() + ".lg"); + + ListIterator batchHook = hooks.listIterator(); + try { + result.putCustomInfo(MetadataKeys.TASKNAME, this.getClass().getName()); + runHook(batchHook, Hooks::beginBatch); + + // For each commit + for (final RevCommit finalCommit : commits) { + currentCommit = finalCommit; + + ListIterator commitHook = hooks.listIterator(); + try { + if (!runFilterHook(commitHook, Hooks::beginCommit)) { + continue; + } + + processCommit(); + } catch (Exception e) { + Logger.error(e, "An unexpected error occurred at {} in {}", currentCommit.getId().getName(), repository.getRepositoryName()); + throw e; + } finally { + runReverseHook(commitHook, Hooks::endCommit); + } + } + } finally { + runReverseHook(batchHook, Hooks::endBatch); + } + + return result; + } + + protected void processCommit() throws Exception { + // parse the commit + final CommitDiffResult commitDiffResult = differ.createCommitDiff(currentCommit); + + // report any errors that occurred and exit in case no DiffTree could be parsed. + result.reportDiffErrors(commitDiffResult.errors()); + if (commitDiffResult.diff().isEmpty()) { + Logger.debug("found commit that failed entirely because:\n{}", commitDiffResult.errors()); + ++result.failedCommits; + return; + } + + // extract the produced commit diff and inform the strategy + currentCommitDiff = commitDiffResult.diff().get(); + if (!runFilterHook(hooks.listIterator(), Hooks::onParsedCommit)) { + return; + } + + // inspect every patch + for (final PatchDiff finalPatch : currentCommitDiff.getPatchDiffs()) { + currentPatch = finalPatch; + + ListIterator patchHook = hooks.listIterator(); + try { + if (!runFilterHook(patchHook, Hooks::beginPatch)) { + continue; + } + + processPatch(); + } finally { + runReverseHook(patchHook, Hooks::endPatch); + } + } + } + + protected void processPatch() throws Exception { + if (currentPatch.isValid()) { + // generate TreeDiff + currentDiffTree = currentPatch.getDiffTree(); + currentDiffTree.assertConsistency(); + + runFilterHook(hooks.listIterator(), Hooks::analyzeDiffTree); + } + } + + protected void runHook(ListIterator hook, FailableBiConsumer callHook) throws Exception { + while (hook.hasNext()) { + callHook.accept(hook.next(), this); + } + } + + protected boolean runFilterHook(ListIterator hook, FailableBiFunction callHook) throws Exception { + while (hook.hasNext()) { + if (!callHook.apply(hook.next(), this)) { + return false; + } + } + + return true; + } + + protected void runReverseHook(ListIterator hook, FailableBiConsumer callHook) throws Exception { + Exception catchedException = null; + while (hook.hasPrevious()) { + try { + callHook.accept(hook.previous(), this); + } catch (Exception e) { + Logger.error(e, "An exception thrown in an end hooks of HistoryAnalysis will be rethrown later"); + if (catchedException == null) { + catchedException = e; + } else { + catchedException.addSuppressed(e); + } + } + } + + if (catchedException != null) { + throw catchedException; + } } /** @@ -140,13 +314,11 @@ public static void exportMetadataToFile(final Path outputFile, final Metadat Logger.info("Metadata:\n{}", prettyMetadata); } - /** - * Runs this analysis asynchronously. - * Processes each repository sequentially and runs - * {@link org.variantsync.diffdetective.analysis.HistoryAnalysis#analyzeAsync(Repository, Path, CommitHistoryAnalysisTaskFactory, int)} - * on each of them. - */ - public void runAsync() { + public static void forEachRepository( + List repositoriesToAnalyze, + Path outputDir, + BiConsumer analyzeRepository + ) { for (final Repository repo : repositoriesToAnalyze) { final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName()); /// Don't repeat work we already did: @@ -158,8 +330,7 @@ public void runAsync() { final Clock clock = new Clock(); clock.start(); - analyzeAsync(repo, repoOutputDir, whatToDo, commitsToProcessPerThread); - postProcessingOnRepositoryOutputDir.accept(repoOutputDir); + analyzeRepository.accept(repo, repoOutputDir); Logger.info(" === End Processing {} after {} ===", repo.getRepositoryName(), diff --git a/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java new file mode 100644 index 000000000..19bfac9f8 --- /dev/null +++ b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java @@ -0,0 +1,49 @@ +package org.variantsync.diffdetective.analysis; + +import java.io.OutputStream; + +import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; +import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExport; +import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; + +public class LineGraphExportAnalysis implements HistoryAnalysis.Hooks { + private final AnalysisStrategy analysisStrategy; + private final LineGraphExportOptions exportOptions; + private OutputStream lineGraphDestination; + + public LineGraphExportAnalysis(final AnalysisStrategy analysisStrategy, final LineGraphExportOptions exportOptions) { + this.analysisStrategy = analysisStrategy; + this.exportOptions = exportOptions; + } + + @Override + public void beginBatch(HistoryAnalysis analysis) { + analysis.getResult().putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName()); + analysis.getResult().putCustomInfo(MetadataKeys.NODEFORMAT, exportOptions.nodeFormat().getName()); + analysis.getResult().putCustomInfo(MetadataKeys.EDGEFORMAT, exportOptions.edgeFormat().getName()); + + analysisStrategy.start(analysis.getRepository(), analysis.getOutputFile()); + } + + @Override + public boolean onParsedCommit(HistoryAnalysis analysis) { + lineGraphDestination = analysisStrategy.onCommit(analysis.getCurrentCommitDiff()); + return true; + } + + @Override + public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + analysis.getResult().append(LineGraphExport.toLineGraphFormat(analysis.getResult().repoName, analysis.getCurrentPatch(), exportOptions, lineGraphDestination)); + return true; + } + + @Override + public void endCommit(HistoryAnalysis analysis) throws Exception { + lineGraphDestination.close(); + } + + @Override + public void endBatch(HistoryAnalysis analysis) { + analysisStrategy.end(); + } +} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java new file mode 100644 index 000000000..3cbff3041 --- /dev/null +++ b/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java @@ -0,0 +1,60 @@ +package org.variantsync.diffdetective.analysis; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import org.variantsync.diffdetective.editclass.EditClass; +import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; +import org.variantsync.diffdetective.util.CSV; +import org.variantsync.diffdetective.util.FileUtils; +import org.variantsync.diffdetective.util.IO; + +public class PatchAnalysis implements HistoryAnalysis.Hooks { + public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv"; + + private List patchStatistics; + private PatchStatistics thisPatchesStatistics; + + @Override + public void beginBatch(HistoryAnalysis analysis) { + patchStatistics = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); + } + + @Override + public boolean beginPatch(HistoryAnalysis analysis) { + thisPatchesStatistics = new PatchStatistics(analysis.getCurrentPatch(), ProposedEditClasses.Instance); + return true; + } + + @Override + public boolean analyzeDiffTree(HistoryAnalysis analysis) { + analysis.getCurrentDiffTree().forAll(node -> { + if (node.isArtifact()) { + final EditClass editClass = ProposedEditClasses.Instance.match(node); + analysis.getResult().editClassCounts.reportOccurrenceFor( + editClass, + analysis.getCurrentCommitDiff() + ); + thisPatchesStatistics.editClassCount().increment(editClass); + } + }); + + return true; + } + + @Override + public void endPatch(HistoryAnalysis analysis) { + patchStatistics.add(thisPatchesStatistics); + } + + @Override + public void endBatch(HistoryAnalysis analysis) throws IOException { + exportPatchStatistics(patchStatistics, FileUtils.addExtension(analysis.getOutputFile(), PATCH_STATISTICS_EXTENSION)); + } + + public static void exportPatchStatistics(final List commitTimes, final Path pathToOutputFile) throws IOException { + IO.write(pathToOutputFile, CSV.toCSV(commitTimes)); + } +} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java new file mode 100644 index 000000000..7a13c9d29 --- /dev/null +++ b/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java @@ -0,0 +1,25 @@ +package org.variantsync.diffdetective.analysis; + +import java.util.Arrays; +import java.util.List; + +import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; + +public class PreprocessingAnalysis implements HistoryAnalysis.Hooks { + private List preprocessors; + + public PreprocessingAnalysis(List preprocessors) { + this.preprocessors = preprocessors; + } + + public PreprocessingAnalysis(DiffTreeTransformer... preprocessors) { + this.preprocessors = Arrays.asList(preprocessors); + } + + @Override + public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + DiffTreeTransformer.apply(preprocessors, analysis.getCurrentDiffTree()); + analysis.getCurrentDiffTree().assertConsistency(); + return true; + } +} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java new file mode 100644 index 000000000..71c10b1e3 --- /dev/null +++ b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java @@ -0,0 +1,86 @@ +package org.variantsync.diffdetective.analysis; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import org.variantsync.diffdetective.util.Clock; +import org.variantsync.diffdetective.util.FileUtils; +import org.variantsync.diffdetective.util.IO; +import org.variantsync.diffdetective.util.StringUtils; + +public class StatisticsAnalysis implements HistoryAnalysis.Hooks { + public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt"; + + // List to store the process time of each commit. + private final List commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); + // Clock for runtime measurement. + private final Clock totalTime = new Clock(); + private final Clock commitProcessTimer = new Clock(); + private int numDiffTrees = 0; + + @Override + public void beginBatch(HistoryAnalysis analysis) { + totalTime.start(); + } + + @Override + public boolean beginCommit(HistoryAnalysis analysis) { + commitProcessTimer.start(); + numDiffTrees = 0; + return true; + } + + @Override + public boolean analyzeDiffTree(HistoryAnalysis analysis) { + ++numDiffTrees; + return true; + } + + @Override + public void endCommit(HistoryAnalysis analysis) { + analysis.getResult().exportedTrees += numDiffTrees; + + // Report the commit process time if the commit is not empty. + if (numDiffTrees > 0) { + final long commitTimeMS = commitProcessTimer.getPassedMilliseconds(); + // find max commit time + if (commitTimeMS > analysis.getResult().max.milliseconds()) { + analysis.getResult().max.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS); + } + // find min commit time + if (commitTimeMS < analysis.getResult().min.milliseconds()) { + analysis.getResult().min.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS); + } + // report time + commitTimes.add(new CommitProcessTime(analysis.getCurrentCommitDiff().getCommitHash(), analysis.getRepository().getRepositoryName(), commitTimeMS)); + ++analysis.getResult().exportedCommits; + } else { + ++analysis.getResult().emptyCommits; + } + } + + @Override + public void endBatch(HistoryAnalysis analysis) throws IOException { + // shutdown; report total time; export results + analysis.getResult().runtimeInSeconds = totalTime.getPassedSeconds(); + analysis.getResult().exportTo(FileUtils.addExtension(analysis.getOutputFile(), AnalysisResult.EXTENSION)); + exportCommitTimes(commitTimes, FileUtils.addExtension(analysis.getOutputFile(), COMMIT_TIME_FILE_EXTENSION)); + } + + /** + * Exports the given commit times to the given file. Overwrites existing files. + * @param commitTimes List of all CommitProcessTimes to write into a single file. + * @param pathToOutputFile Output file to write. + */ + public static void exportCommitTimes(final List commitTimes, final Path pathToOutputFile) throws IOException { + final StringBuilder times = new StringBuilder(); + + for (final CommitProcessTime ct : commitTimes) { + times.append(ct.toString()).append(StringUtils.LINEBREAK); + } + + IO.write(pathToOutputFile, times.toString()); + } +} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java b/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java index e2033b0a5..8ddc24550 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java @@ -1,5 +1,6 @@ package org.variantsync.diffdetective.analysis.strategies; +import org.variantsync.diffdetective.analysis.LineGraphExportAnalysis; // For Javadoc import org.variantsync.diffdetective.datasets.Repository; import org.variantsync.diffdetective.diff.git.CommitDiff; @@ -7,7 +8,7 @@ import java.nio.file.Path; /** - * Callbacks for {@link org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask}. + * Callbacks for {@link LineGraphExportAnalysis}. * A strategy may perform arbitrary additional tasks upon the execution of a task. * The strategy is notified about the start and end of a task as well after each processed commit. * @author Paul Bittner diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java index 5d1aff06c..c6158cc82 100644 --- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java +++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java @@ -1,13 +1,28 @@ package org.variantsync.diffdetective.mining; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.function.BiFunction; +import java.util.function.Consumer; + import org.apache.commons.io.FileUtils; import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask; -import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTaskFactory; +import org.variantsync.diffdetective.analysis.FilterAnalysis; import org.variantsync.diffdetective.analysis.HistoryAnalysis; +import org.variantsync.diffdetective.analysis.LineGraphExportAnalysis; +import org.variantsync.diffdetective.analysis.PatchAnalysis; +import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; import org.variantsync.diffdetective.analysis.strategies.AnalyzeAllThenExport; -import org.variantsync.diffdetective.datasets.*; +import org.variantsync.diffdetective.datasets.DatasetDescription; +import org.variantsync.diffdetective.datasets.DatasetFactory; +import org.variantsync.diffdetective.datasets.DefaultDatasets; +import org.variantsync.diffdetective.datasets.ParseOptions; +import org.variantsync.diffdetective.datasets.Repository; import org.variantsync.diffdetective.datasets.predefined.StanciulescuMarlin; import org.variantsync.diffdetective.feature.CPPAnnotationParser; import org.variantsync.diffdetective.metadata.ExplainedFilterSummary; @@ -15,7 +30,6 @@ import org.variantsync.diffdetective.mining.formats.MiningNodeFormat; import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat; import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter; -import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; import org.variantsync.diffdetective.variation.diff.serialize.GraphFormat; import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; import org.variantsync.diffdetective.variation.diff.serialize.edgeformat.EdgeLabelFormat; @@ -25,13 +39,6 @@ import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; import org.variantsync.diffdetective.variation.diff.transform.Starfold; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Consumer; - public class DiffTreeMiner { public static final Path DATASET_FILE = DefaultDatasets.EMACS; public static final boolean SEARCH_FOR_GOOD_RUNNING_EXAMPLES = false; @@ -94,12 +101,11 @@ public static AnalysisStrategy MiningStrategy() { // ); } - public static CommitHistoryAnalysisTaskFactory Mine() { - return (repo, differ, outputPath, commits) -> new MiningTask(new CommitHistoryAnalysisTask.Options( - repo, - differ, - outputPath, - new ExplainedFilter<>( + public static BiFunction AnalysisFactory = + (repo, repoOutputDir) -> new HistoryAnalysis( + List.of( + new PreprocessingAnalysis(Postprocessing(repo)), + new FilterAnalysis( DiffTreeFilter.notEmpty(), DiffTreeFilter.moreThanOneArtifactNode(), /// We want to exclude patches that do not edit variability. @@ -109,11 +115,13 @@ public static CommitHistoryAnalysisTaskFactory Mine() { /// We thus filter them. DiffTreeFilter.hasAtLeastOneEditToVariability() ), - Postprocessing(repo), - MiningStrategy(), - commits - ), MiningExportOptions(repo)); - } + new LineGraphExportAnalysis(MiningStrategy(), MiningExportOptions(repo)), + new PatchAnalysis(), + new StatisticsAnalysis() + ), + repo, + repoOutputDir + ); public static void main(String[] args) throws IOException { // setupLogger(Level.INFO); @@ -166,13 +174,10 @@ public static void main(String[] args) throws IOException { repoPostProcessing = p -> {}; } - final HistoryAnalysis analysis = new HistoryAnalysis( - repos, - outputDir, - HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT, - Mine(), - repoPostProcessing); - analysis.runAsync(); + HistoryAnalysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> { + HistoryAnalysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)); + repoPostProcessing.accept(repoOutputDir); + }); Logger.info("Done"); final String logFile = "log.txt"; diff --git a/src/main/java/org/variantsync/diffdetective/mining/MiningTask.java b/src/main/java/org/variantsync/diffdetective/mining/MiningTask.java deleted file mode 100644 index 2e1230f1a..000000000 --- a/src/main/java/org/variantsync/diffdetective/mining/MiningTask.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.variantsync.diffdetective.mining; - -import org.eclipse.jgit.revwalk.RevCommit; -import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.*; -import org.variantsync.diffdetective.diff.git.CommitDiff; -import org.variantsync.diffdetective.diff.git.PatchDiff; -import org.variantsync.diffdetective.diff.result.CommitDiffResult; -import org.variantsync.diffdetective.editclass.EditClass; -import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; -import org.variantsync.diffdetective.metadata.ExplainedFilterSummary; -import org.variantsync.diffdetective.util.Clock; -import org.variantsync.diffdetective.util.FileUtils; -import org.variantsync.diffdetective.variation.diff.DiffTree; -import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExport; -import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; -import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; - -import java.util.ArrayList; -import java.util.List; - -public class MiningTask extends CommitHistoryAnalysisTask { - private final LineGraphExportOptions exportOptions; - - public MiningTask(final Options options, final LineGraphExportOptions exportOptions) { - super(options); - - this.exportOptions = exportOptions; - } - - @Override - public AnalysisResult call() throws Exception { - final AnalysisResult miningResult = super.call(); - miningResult.putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName()); - miningResult.putCustomInfo(MetadataKeys.NODEFORMAT, exportOptions.nodeFormat().getName()); - miningResult.putCustomInfo(MetadataKeys.EDGEFORMAT, exportOptions.edgeFormat().getName()); - - final Clock totalTime = new Clock(); - - final List commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); - final List patchStatistics = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); - final Clock commitProcessTimer = new Clock(); - - totalTime.start(); - - for (final RevCommit commit : options.commits()) { - commitProcessTimer.start(); - final CommitDiffResult commitDiffResult = options.differ().createCommitDiff(commit); - - miningResult.reportDiffErrors(commitDiffResult.errors()); - if (commitDiffResult.diff().isEmpty()) { - Logger.debug("found commit that failed entirely and was not filtered because:\n{}", commitDiffResult.errors()); - continue; - } - - /* - * We count the edit classes of all difftrees that match our filter criteria - * (e.g., match more than one edit class) and export them to the destination - * determined by the AnalysisStrategy. - */ - int numDiffTrees = 0; - final CommitDiff commitDiff = commitDiffResult.diff().get(); - try (var lineGraphDestination = options.analysisStrategy().onCommit(commitDiff)) { - for (final PatchDiff patch : commitDiff.getPatchDiffs()) { - final PatchStatistics thisPatchesStatistics = new PatchStatistics(patch, ProposedEditClasses.Instance); - - if (patch.isValid()) { - final DiffTree t = patch.getDiffTree(); - DiffTreeTransformer.apply(options.treePreProcessing(), t); - t.assertConsistency(); - - if (!options.treeFilter().test(t)) { - continue; - } - - miningResult.append(LineGraphExport.toLineGraphFormat(miningResult.repoName, patch, exportOptions, lineGraphDestination)); - - t.forAll(node -> { - if (node.isArtifact()) { - final EditClass editClass = ProposedEditClasses.Instance.match(node); - miningResult.editClassCounts.reportOccurrenceFor( - editClass, - commitDiff - ); - thisPatchesStatistics.editClassCount().increment(editClass); - } - }); - - ++numDiffTrees; - } - - patchStatistics.add(thisPatchesStatistics); - } - } - - miningResult.exportedCommits += 1; - miningResult.exportedTrees += numDiffTrees; - miningResult.filterHits.append(new ExplainedFilterSummary(options.treeFilter())); - options.treeFilter().resetExplanations(); - - // Only consider non-empty commits - if (numDiffTrees > 0) { - final long commitTimeMS = commitProcessTimer.getPassedMilliseconds(); - if (commitTimeMS > miningResult.max.milliseconds()) { - miningResult.max.set(commitDiff.getCommitHash(), commitTimeMS); - } - if (commitTimeMS < miningResult.min.milliseconds()) { - miningResult.min.set(commitDiff.getCommitHash(), commitTimeMS); - } - commitTimes.add(new CommitProcessTime(commitDiff.getCommitHash(), options.repository().getRepositoryName(), commitTimeMS)); - ++miningResult.exportedCommits; - } else { - ++miningResult.emptyCommits; - } - } - - options.analysisStrategy().end(); - miningResult.runtimeInSeconds = totalTime.getPassedSeconds(); - miningResult.exportTo(FileUtils.addExtension(options.outputDir(), AnalysisResult.EXTENSION)); - exportCommitTimes(commitTimes, FileUtils.addExtension(options.outputDir(), COMMIT_TIME_FILE_EXTENSION)); - exportPatchStatistics(patchStatistics, FileUtils.addExtension(options.outputDir(), PATCH_STATISTICS_EXTENSION)); - return miningResult; - } -} diff --git a/src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java b/src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java deleted file mode 100644 index 5981e48a9..000000000 --- a/src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java +++ /dev/null @@ -1,121 +0,0 @@ -package org.variantsync.diffdetective.validation; - -import org.eclipse.jgit.revwalk.RevCommit; -import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.AnalysisResult; -import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask; -import org.variantsync.diffdetective.analysis.CommitProcessTime; -import org.variantsync.diffdetective.analysis.HistoryAnalysis; -import org.variantsync.diffdetective.diff.git.CommitDiff; -import org.variantsync.diffdetective.diff.git.PatchDiff; -import org.variantsync.diffdetective.diff.result.CommitDiffResult; -import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; -import org.variantsync.diffdetective.metadata.ExplainedFilterSummary; -import org.variantsync.diffdetective.util.Clock; -import org.variantsync.diffdetective.util.FileUtils; -import org.variantsync.diffdetective.variation.diff.DiffTree; -import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; - -import java.util.ArrayList; -import java.util.List; - -/** - * Task for performing the ESEC/FSE'22 validation on a set of commits from a given repository. - * @author Paul Bittner - */ -public class EditClassValidationTask extends CommitHistoryAnalysisTask { - public EditClassValidationTask(Options options) { - super(options); - } - - @Override - public AnalysisResult call() throws Exception { - // Setup. Obtain the result from the initial setup in the super class. - final AnalysisResult miningResult = super.call(); - // List to store the process time of each commit. - final List commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); - // Clock for runtime measurement. - final Clock totalTime = new Clock(); - totalTime.start(); - final Clock commitProcessTimer = new Clock(); - - // For each commit: - for (final RevCommit commit : options.commits()) { - try { - commitProcessTimer.start(); - - // parse the commit - final CommitDiffResult commitDiffResult = options.differ().createCommitDiff(commit); - - // report any errors that occurred and exit in case no DiffTree could be parsed. - miningResult.reportDiffErrors(commitDiffResult.errors()); - if (commitDiffResult.diff().isEmpty()) { - Logger.debug("[MiningTask::call] found commit that failed entirely and was not filtered because:\n{}", commitDiffResult.errors()); - ++miningResult.failedCommits; - continue; - } - - // extract the produced commit diff and inform the strategy - final CommitDiff commitDiff = commitDiffResult.diff().get(); - options.analysisStrategy().onCommit(commitDiff).close(); - - // Count edit class matches - int numDiffTrees = 0; - for (final PatchDiff patch : commitDiff.getPatchDiffs()) { - if (patch.isValid()) { - final DiffTree t = patch.getDiffTree(); - DiffTreeTransformer.apply(options.treePreProcessing(), t); - t.assertConsistency(); - - if (!options.treeFilter().test(t)) { - continue; - } - - t.forAll(node -> { - if (node.isArtifact()) { - miningResult.editClassCounts.reportOccurrenceFor( - ProposedEditClasses.Instance.match(node), - commitDiff - ); - } - }); - - ++numDiffTrees; - } - } - miningResult.exportedTrees += numDiffTrees; - miningResult.filterHits.append(new ExplainedFilterSummary(options.treeFilter())); - options.treeFilter().resetExplanations(); - - // Report the commit process time if the commit is not empty. - if (numDiffTrees > 0) { - final long commitTimeMS = commitProcessTimer.getPassedMilliseconds(); - // find max commit time - if (commitTimeMS > miningResult.max.milliseconds()) { - miningResult.max.set(commitDiff.getCommitHash(), commitTimeMS); - } - // find min commit time - if (commitTimeMS < miningResult.min.milliseconds()) { - miningResult.min.set(commitDiff.getCommitHash(), commitTimeMS); - } - // report time - commitTimes.add(new CommitProcessTime(commitDiff.getCommitHash(), options.repository().getRepositoryName(), commitTimeMS)); - ++miningResult.exportedCommits; - } else { - ++miningResult.emptyCommits; - } - - } catch (Exception e) { - Logger.error(e, "An unexpected error occurred at {} in {}", commit.getId().getName(), getOptions().repository().getRepositoryName()); - throw e; - } - } - - // shutdown; report total time; export results - options.analysisStrategy().end(); - miningResult.runtimeInSeconds = totalTime.getPassedSeconds(); - miningResult.exportTo(FileUtils.addExtension(options.outputDir(), AnalysisResult.EXTENSION)); - exportCommitTimes(commitTimes, FileUtils.addExtension(options.outputDir(), COMMIT_TIME_FILE_EXTENSION)); - return miningResult; - } -} diff --git a/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java b/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java index 194685e6b..bc584e03b 100644 --- a/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java +++ b/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java @@ -3,8 +3,8 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.tinylog.Logger; import org.variantsync.diffdetective.analysis.AutomationResult; -import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask; import org.variantsync.diffdetective.analysis.CommitProcessTime; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.util.FileUtils; import java.io.IOException; @@ -72,7 +72,7 @@ public static AutomationResult getResultOfDirectory(final Path directory) throws try (Stream paths = Files.walk(directory)) { result = paths .parallel() - .filter(p -> FileUtils.hasExtension(p, CommitHistoryAnalysisTask.COMMIT_TIME_FILE_EXTENSION)) + .filter(p -> FileUtils.hasExtension(p, StatisticsAnalysis.COMMIT_TIME_FILE_EXTENSION)) .filter(Files::isRegularFile) // .peek(path -> Logger.info("Processing file {}", path)) .flatMap(FindMedianCommitTime::parse) diff --git a/src/main/java/org/variantsync/diffdetective/validation/Validation.java b/src/main/java/org/variantsync/diffdetective/validation/Validation.java index 9da6f5f2f..1b8d7f96f 100644 --- a/src/main/java/org/variantsync/diffdetective/validation/Validation.java +++ b/src/main/java/org/variantsync/diffdetective/validation/Validation.java @@ -1,40 +1,42 @@ package org.variantsync.diffdetective.validation; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.function.BiFunction; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.eclipse.jgit.api.errors.GitAPIException; import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask; -import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTaskFactory; + +import org.variantsync.diffdetective.analysis.AnalysisResult; +import org.variantsync.diffdetective.analysis.FilterAnalysis; import org.variantsync.diffdetective.analysis.HistoryAnalysis; -import org.variantsync.diffdetective.analysis.strategies.NullStrategy; +import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.datasets.*; +import org.variantsync.diffdetective.datasets.Repository; +import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; import org.variantsync.diffdetective.mining.formats.DirectedEdgeLabelFormat; import org.variantsync.diffdetective.mining.formats.MiningNodeFormat; import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat; import org.variantsync.diffdetective.util.Assert; import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter; -import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; import org.variantsync.diffdetective.variation.diff.serialize.GraphFormat; import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; import org.variantsync.diffdetective.variation.diff.serialize.edgeformat.EdgeLabelFormat; import org.variantsync.diffdetective.variation.diff.serialize.treeformat.CommitDiffDiffTreeLabelFormat; import org.variantsync.diffdetective.variation.diff.transform.CutNonEditedSubtrees; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.List; -import java.util.function.Consumer; -import java.util.stream.Collectors; - /** * This is the validation from our ESEC/FSE'22 paper. * It provides all configuration settings and facilities to setup the validation by * creating a {@link HistoryAnalysis} and run it. * @author Paul Bittner */ -public class Validation { +public class Validation implements HistoryAnalysis.Hooks { /** * Hardcoded configuration option that determines of all analyzed repositories should be updated * (i.e., git pull) before the validation. @@ -47,20 +49,17 @@ public class Validation { // public static final boolean PRINT_LATEX_TABLE = true; // public static final int PRINT_LARGEST_SUBJECTS = 3; - /** - * The {@link CommitHistoryAnalysisTaskFactory} for the {@link HistoryAnalysis} that will run our validation. - * This factory creates {@link EditClassValidationTask}s with the respective settings. - */ - public static final CommitHistoryAnalysisTaskFactory VALIDATION_TASK_FACTORY = - (repo, differ, outputPath, commits) -> new EditClassValidationTask(new CommitHistoryAnalysisTask.Options( - repo, - differ, - outputPath, - new ExplainedFilter<>(DiffTreeFilter.notEmpty()), - List.of(new CutNonEditedSubtrees()), - new NullStrategy(), - commits - )); + // This is only needed for the `MarlinDebug` test. + public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new HistoryAnalysis( + List.of( + new PreprocessingAnalysis(new CutNonEditedSubtrees()), + new FilterAnalysis(DiffTreeFilter.notEmpty()), // filters unwanted trees + new Validation(), + new StatisticsAnalysis() + ), + repo, + repoOutputDir + ); /** * Returns the node format that should be used for DiffNode IO. @@ -186,17 +185,26 @@ public static void main(String[] args) throws IOException { | END OF ARGUMENTS | \* ************************ */ - final Consumer repoPostProcessing = p -> {}; - final HistoryAnalysis analysis = new HistoryAnalysis( - repos, - outputDir, - HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT, - VALIDATION_TASK_FACTORY, - repoPostProcessing); - analysis.runAsync(); + HistoryAnalysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> + HistoryAnalysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)) + ); Logger.info("Done"); final String logFile = "log.txt"; FileUtils.copyFile(Path.of(logFile).toFile(), outputDir.resolve(logFile).toFile()); } + + @Override + public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + analysis.getCurrentDiffTree().forAll(node -> { + if (node.isArtifact()) { + analysis.getResult().editClassCounts.reportOccurrenceFor( + ProposedEditClasses.Instance.match(node), + analysis.getCurrentCommitDiff() + ); + } + }); + + return true; + } } diff --git a/src/test/java/MarlinDebug.java b/src/test/java/MarlinDebug.java index ea4b8abc8..b99ded0e4 100644 --- a/src/test/java/MarlinDebug.java +++ b/src/test/java/MarlinDebug.java @@ -136,12 +136,10 @@ public static void asMiningTask(final RepoInspection repoInspection, final Strin final RevWalk revWalk = new RevWalk(git.getRepository()); final RevCommit childCommit = revWalk.parseCommit(ObjectId.fromString(commitHash)); - DiffTreeMiner.Mine().create( + DiffTreeMiner.AnalysisFactory.apply( repoInspection.repo, - new GitDiffer(repoInspection.repo), - repoInspection.outputPath, - List.of(childCommit) - ).call(); + repoInspection.outputPath + ).processCommits(List.of(childCommit)); } public static void asValidationTask(final RepoInspection repoInspection, final String commitHash) throws Exception { @@ -150,12 +148,10 @@ public static void asValidationTask(final RepoInspection repoInspection, final S final RevWalk revWalk = new RevWalk(git.getRepository()); final RevCommit childCommit = revWalk.parseCommit(ObjectId.fromString(commitHash)); - Validation.VALIDATION_TASK_FACTORY.create( - repoInspection.repo, - new GitDiffer(repoInspection.repo), - repoInspection.outputPath, - List.of(childCommit) - ).call(); + Validation.AnalysisFactory.apply( + repoInspection.repo, + repoInspection.outputPath + ).processCommits(List.of(childCommit)); } public static void test(final RepoInspection repoInspection) throws Exception { From 1bb19ef48c69a2e2efb648fe8ebcb646fd6298b1 Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 21:13:38 +0100 Subject: [PATCH 07/15] Rename `HistoryAnalysis` to `Analysis` This makes it easier to distinguish between hooks and the actual analysis. The lost information, that this analysis is about commit history, could be recovered by creating a new package for history related analysis if necessary. --- .../{HistoryAnalysis.java => Analysis.java} | 34 +++++++++---------- .../analysis/AnalysisResult.java | 2 +- .../analysis/FilterAnalysis.java | 6 ++-- .../analysis/LineGraphExportAnalysis.java | 12 +++---- .../diffdetective/analysis/PatchAnalysis.java | 14 ++++---- .../analysis/PreprocessingAnalysis.java | 4 +-- .../analysis/StatisticsAnalysis.java | 14 ++++---- .../diffdetective/mining/DiffTreeMiner.java | 10 +++--- .../tablegen/MiningResultAccumulator.java | 8 ++--- .../diffdetective/validation/Validation.java | 14 ++++---- 10 files changed, 59 insertions(+), 59 deletions(-) rename src/main/java/org/variantsync/diffdetective/analysis/{HistoryAnalysis.java => Analysis.java} (90%) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java similarity index 90% rename from src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java rename to src/main/java/org/variantsync/diffdetective/analysis/Analysis.java index ae03abfe2..91a4b8471 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java @@ -31,7 +31,7 @@ /** * @author Paul Bittner, Benjamin Moosherr */ -public class HistoryAnalysis { +public class Analysis { /** * File name that is used to store the analysis results for each repository. */ @@ -88,17 +88,17 @@ public AnalysisResult getResult() { } public interface Hooks { - default void beginBatch(HistoryAnalysis analysis) throws Exception {} - default boolean beginCommit(HistoryAnalysis analysis) throws Exception { return true; } - default boolean onParsedCommit(HistoryAnalysis analysis) throws Exception { return true; } - default boolean beginPatch(HistoryAnalysis analysis) throws Exception { return true; } - default boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { return true; } - default void endPatch(HistoryAnalysis analysis) throws Exception {} - default void endCommit(HistoryAnalysis analysis) throws Exception {} - default void endBatch(HistoryAnalysis analysis) throws Exception {} + default void beginBatch(Analysis analysis) throws Exception {} + default boolean beginCommit(Analysis analysis) throws Exception { return true; } + default boolean onParsedCommit(Analysis analysis) throws Exception { return true; } + default boolean beginPatch(Analysis analysis) throws Exception { return true; } + default boolean analyzeDiffTree(Analysis analysis) throws Exception { return true; } + default void endPatch(Analysis analysis) throws Exception {} + default void endCommit(Analysis analysis) throws Exception {} + default void endBatch(Analysis analysis) throws Exception {} } - public static AnalysisResult forEachCommit(Supplier analysis) { + public static AnalysisResult forEachCommit(Supplier analysis) { return forEachCommit( analysis, COMMITS_TO_PROCESS_PER_THREAD_DEFAULT, @@ -107,7 +107,7 @@ public static AnalysisResult forEachCommit(Supplier AnalysisResult forEachCommit( - Supplier analysisFactory, + Supplier analysisFactory, final int commitsToProcessPerThread, final int nThreads ) { @@ -157,7 +157,7 @@ public static AnalysisResult forEachCommit( return analysis.getResult(); } - public HistoryAnalysis( + public Analysis( List hooks, Repository repository, Path outputDir @@ -256,13 +256,13 @@ protected void processPatch() throws Exception { } } - protected void runHook(ListIterator hook, FailableBiConsumer callHook) throws Exception { + protected void runHook(ListIterator hook, FailableBiConsumer callHook) throws Exception { while (hook.hasNext()) { callHook.accept(hook.next(), this); } } - protected boolean runFilterHook(ListIterator hook, FailableBiFunction callHook) throws Exception { + protected boolean runFilterHook(ListIterator hook, FailableBiFunction callHook) throws Exception { while (hook.hasNext()) { if (!callHook.apply(hook.next(), this)) { return false; @@ -272,13 +272,13 @@ protected boolean runFilterHook(ListIterator hook, FailableBiFuncti return true; } - protected void runReverseHook(ListIterator hook, FailableBiConsumer callHook) throws Exception { + protected void runReverseHook(ListIterator hook, FailableBiConsumer callHook) throws Exception { Exception catchedException = null; while (hook.hasPrevious()) { try { callHook.accept(hook.previous(), this); } catch (Exception e) { - Logger.error(e, "An exception thrown in an end hooks of HistoryAnalysis will be rethrown later"); + Logger.error(e, "An exception thrown in an end hooks of Analysis will be rethrown later"); if (catchedException == null) { catchedException = e; } else { @@ -294,7 +294,7 @@ protected void runReverseHook(ListIterator hook, FailableBiConsumer /** * Exports the given metadata object to a file named according - * {@link org.variantsync.diffdetective.analysis.HistoryAnalysis#TOTAL_RESULTS_FILE_NAME} in the given directory. + * {@link org.variantsync.diffdetective.analysis.Analysis#TOTAL_RESULTS_FILE_NAME} in the given directory. * @param outputDir The directory into which the metadata object file should be written. * @param metadata The metadata to serialize * @param Type of the metadata. diff --git a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java index 77d09366d..7e6323389 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java @@ -20,7 +20,7 @@ import java.util.function.BiConsumer; /** - * The result of a {@link HistoryAnalysis}. + * The result of a {@link Analysis}. * This result stores various metadata and statistics that we use for the validation of our ESEC/FSE paper. * An AnalysisResult also allows to store any custom metadata or information. * @author Paul Bittner diff --git a/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java index dad6f3dbe..6c6143bd9 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java @@ -7,7 +7,7 @@ import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; import org.variantsync.diffdetective.variation.diff.filter.TaggedPredicate; -public class FilterAnalysis implements HistoryAnalysis.Hooks { +public class FilterAnalysis implements Analysis.Hooks { private ExplainedFilter treeFilter; public FilterAnalysis(ExplainedFilter treeFilter) { @@ -20,12 +20,12 @@ public FilterAnalysis(TaggedPredicate... treeFilter) { } @Override - public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + public boolean analyzeDiffTree(Analysis analysis) throws Exception { return treeFilter.test(analysis.getCurrentDiffTree()); } @Override - public void endCommit(HistoryAnalysis analysis) { + public void endCommit(Analysis analysis) { analysis.getResult().filterHits.append(new ExplainedFilterSummary(treeFilter)); treeFilter.resetExplanations(); } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java index 19bfac9f8..a18917375 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java @@ -6,7 +6,7 @@ import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExport; import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; -public class LineGraphExportAnalysis implements HistoryAnalysis.Hooks { +public class LineGraphExportAnalysis implements Analysis.Hooks { private final AnalysisStrategy analysisStrategy; private final LineGraphExportOptions exportOptions; private OutputStream lineGraphDestination; @@ -17,7 +17,7 @@ public LineGraphExportAnalysis(final AnalysisStrategy analysisStrategy, final Li } @Override - public void beginBatch(HistoryAnalysis analysis) { + public void beginBatch(Analysis analysis) { analysis.getResult().putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName()); analysis.getResult().putCustomInfo(MetadataKeys.NODEFORMAT, exportOptions.nodeFormat().getName()); analysis.getResult().putCustomInfo(MetadataKeys.EDGEFORMAT, exportOptions.edgeFormat().getName()); @@ -26,24 +26,24 @@ public void beginBatch(HistoryAnalysis analysis) { } @Override - public boolean onParsedCommit(HistoryAnalysis analysis) { + public boolean onParsedCommit(Analysis analysis) { lineGraphDestination = analysisStrategy.onCommit(analysis.getCurrentCommitDiff()); return true; } @Override - public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + public boolean analyzeDiffTree(Analysis analysis) throws Exception { analysis.getResult().append(LineGraphExport.toLineGraphFormat(analysis.getResult().repoName, analysis.getCurrentPatch(), exportOptions, lineGraphDestination)); return true; } @Override - public void endCommit(HistoryAnalysis analysis) throws Exception { + public void endCommit(Analysis analysis) throws Exception { lineGraphDestination.close(); } @Override - public void endBatch(HistoryAnalysis analysis) { + public void endBatch(Analysis analysis) { analysisStrategy.end(); } } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java index 3cbff3041..c1e960984 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java @@ -11,25 +11,25 @@ import org.variantsync.diffdetective.util.FileUtils; import org.variantsync.diffdetective.util.IO; -public class PatchAnalysis implements HistoryAnalysis.Hooks { +public class PatchAnalysis implements Analysis.Hooks { public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv"; private List patchStatistics; private PatchStatistics thisPatchesStatistics; @Override - public void beginBatch(HistoryAnalysis analysis) { - patchStatistics = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); + public void beginBatch(Analysis analysis) { + patchStatistics = new ArrayList<>(Analysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); } @Override - public boolean beginPatch(HistoryAnalysis analysis) { + public boolean beginPatch(Analysis analysis) { thisPatchesStatistics = new PatchStatistics(analysis.getCurrentPatch(), ProposedEditClasses.Instance); return true; } @Override - public boolean analyzeDiffTree(HistoryAnalysis analysis) { + public boolean analyzeDiffTree(Analysis analysis) { analysis.getCurrentDiffTree().forAll(node -> { if (node.isArtifact()) { final EditClass editClass = ProposedEditClasses.Instance.match(node); @@ -45,12 +45,12 @@ public boolean analyzeDiffTree(HistoryAnalysis analysis) { } @Override - public void endPatch(HistoryAnalysis analysis) { + public void endPatch(Analysis analysis) { patchStatistics.add(thisPatchesStatistics); } @Override - public void endBatch(HistoryAnalysis analysis) throws IOException { + public void endBatch(Analysis analysis) throws IOException { exportPatchStatistics(patchStatistics, FileUtils.addExtension(analysis.getOutputFile(), PATCH_STATISTICS_EXTENSION)); } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java index 7a13c9d29..83ec3f861 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java @@ -5,7 +5,7 @@ import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer; -public class PreprocessingAnalysis implements HistoryAnalysis.Hooks { +public class PreprocessingAnalysis implements Analysis.Hooks { private List preprocessors; public PreprocessingAnalysis(List preprocessors) { @@ -17,7 +17,7 @@ public PreprocessingAnalysis(DiffTreeTransformer... preprocessors) { } @Override - public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + public boolean analyzeDiffTree(Analysis analysis) throws Exception { DiffTreeTransformer.apply(preprocessors, analysis.getCurrentDiffTree()); analysis.getCurrentDiffTree().assertConsistency(); return true; diff --git a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java index 71c10b1e3..6961fa111 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java @@ -10,36 +10,36 @@ import org.variantsync.diffdetective.util.IO; import org.variantsync.diffdetective.util.StringUtils; -public class StatisticsAnalysis implements HistoryAnalysis.Hooks { +public class StatisticsAnalysis implements Analysis.Hooks { public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt"; // List to store the process time of each commit. - private final List commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); + private final List commitTimes = new ArrayList<>(Analysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); // Clock for runtime measurement. private final Clock totalTime = new Clock(); private final Clock commitProcessTimer = new Clock(); private int numDiffTrees = 0; @Override - public void beginBatch(HistoryAnalysis analysis) { + public void beginBatch(Analysis analysis) { totalTime.start(); } @Override - public boolean beginCommit(HistoryAnalysis analysis) { + public boolean beginCommit(Analysis analysis) { commitProcessTimer.start(); numDiffTrees = 0; return true; } @Override - public boolean analyzeDiffTree(HistoryAnalysis analysis) { + public boolean analyzeDiffTree(Analysis analysis) { ++numDiffTrees; return true; } @Override - public void endCommit(HistoryAnalysis analysis) { + public void endCommit(Analysis analysis) { analysis.getResult().exportedTrees += numDiffTrees; // Report the commit process time if the commit is not empty. @@ -62,7 +62,7 @@ public void endCommit(HistoryAnalysis analysis) { } @Override - public void endBatch(HistoryAnalysis analysis) throws IOException { + public void endBatch(Analysis analysis) throws IOException { // shutdown; report total time; export results analysis.getResult().runtimeInSeconds = totalTime.getPassedSeconds(); analysis.getResult().exportTo(FileUtils.addExtension(analysis.getOutputFile(), AnalysisResult.EXTENSION)); diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java index c6158cc82..c7ae680da 100644 --- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java +++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java @@ -11,7 +11,7 @@ import org.apache.commons.io.FileUtils; import org.tinylog.Logger; import org.variantsync.diffdetective.analysis.FilterAnalysis; -import org.variantsync.diffdetective.analysis.HistoryAnalysis; +import org.variantsync.diffdetective.analysis.Analysis; import org.variantsync.diffdetective.analysis.LineGraphExportAnalysis; import org.variantsync.diffdetective.analysis.PatchAnalysis; import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; @@ -101,8 +101,8 @@ public static AnalysisStrategy MiningStrategy() { // ); } - public static BiFunction AnalysisFactory = - (repo, repoOutputDir) -> new HistoryAnalysis( + public static BiFunction AnalysisFactory = + (repo, repoOutputDir) -> new Analysis( List.of( new PreprocessingAnalysis(Postprocessing(repo)), new FilterAnalysis( @@ -174,8 +174,8 @@ public static void main(String[] args) throws IOException { repoPostProcessing = p -> {}; } - HistoryAnalysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> { - HistoryAnalysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)); + Analysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> { + Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)); repoPostProcessing.accept(repoOutputDir); }); Logger.info("Done"); diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java b/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java index 015b3c6e7..2f0ee44ab 100644 --- a/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java +++ b/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java @@ -3,7 +3,7 @@ import org.tinylog.Logger; import org.variantsync.diffdetective.analysis.AnalysisResult; import org.variantsync.diffdetective.analysis.AutomationResult; -import org.variantsync.diffdetective.analysis.HistoryAnalysis; +import org.variantsync.diffdetective.analysis.Analysis; import org.variantsync.diffdetective.analysis.MetadataKeys; import org.variantsync.diffdetective.datasets.DatasetDescription; import org.variantsync.diffdetective.datasets.DefaultDatasets; @@ -39,7 +39,7 @@ public class MiningResultAccumulator { /** * Finds all {@code AnalysisResult}s in {@code folderPath} recursively. - * All files having a {@link HistoryAnalysis#TOTAL_RESULTS_FILE_NAME} filename ending are + * All files having a {@link Analysis#TOTAL_RESULTS_FILE_NAME} filename ending are * parsed and associated with their filename. * * @param folderPath the folder which is scanned for analysis results recursively @@ -49,7 +49,7 @@ public static Map getAllTotalResultsIn(final Path folder // get all files in the directory which are outputs of DiffTreeMiningResult final List paths = Files.walk(folderPath) .filter(Files::isRegularFile) - .filter(p -> p.toString().endsWith(HistoryAnalysis.TOTAL_RESULTS_FILE_NAME)) + .filter(p -> p.toString().endsWith(Analysis.TOTAL_RESULTS_FILE_NAME)) .peek(path -> Logger.info("Processing file {}", path)) .toList(); @@ -115,7 +115,7 @@ public static void main(final String[] args) throws IOException, ParseException final Map allResults = getAllTotalResultsIn(inputPath); final AnalysisResult ultimateResult = computeTotalMetadataResult(allResults.values()); - HistoryAnalysis.exportMetadataToFile(inputPath.resolve("ultimateresult" + AnalysisResult.EXTENSION), ultimateResult); + Analysis.exportMetadataToFile(inputPath.resolve("ultimateresult" + AnalysisResult.EXTENSION), ultimateResult); final Map datasetByName; try { diff --git a/src/main/java/org/variantsync/diffdetective/validation/Validation.java b/src/main/java/org/variantsync/diffdetective/validation/Validation.java index 1b8d7f96f..84f90bdeb 100644 --- a/src/main/java/org/variantsync/diffdetective/validation/Validation.java +++ b/src/main/java/org/variantsync/diffdetective/validation/Validation.java @@ -13,7 +13,7 @@ import org.variantsync.diffdetective.analysis.AnalysisResult; import org.variantsync.diffdetective.analysis.FilterAnalysis; -import org.variantsync.diffdetective.analysis.HistoryAnalysis; +import org.variantsync.diffdetective.analysis.Analysis; import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.datasets.*; @@ -33,10 +33,10 @@ /** * This is the validation from our ESEC/FSE'22 paper. * It provides all configuration settings and facilities to setup the validation by - * creating a {@link HistoryAnalysis} and run it. + * creating a {@link Analysis} and run it. * @author Paul Bittner */ -public class Validation implements HistoryAnalysis.Hooks { +public class Validation implements Analysis.Hooks { /** * Hardcoded configuration option that determines of all analyzed repositories should be updated * (i.e., git pull) before the validation. @@ -50,7 +50,7 @@ public class Validation implements HistoryAnalysis.Hooks { // public static final int PRINT_LARGEST_SUBJECTS = 3; // This is only needed for the `MarlinDebug` test. - public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new HistoryAnalysis( + public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new Analysis( List.of( new PreprocessingAnalysis(new CutNonEditedSubtrees()), new FilterAnalysis(DiffTreeFilter.notEmpty()), // filters unwanted trees @@ -185,8 +185,8 @@ public static void main(String[] args) throws IOException { | END OF ARGUMENTS | \* ************************ */ - HistoryAnalysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> - HistoryAnalysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)) + Analysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> + Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)) ); Logger.info("Done"); @@ -195,7 +195,7 @@ public static void main(String[] args) throws IOException { } @Override - public boolean analyzeDiffTree(HistoryAnalysis analysis) throws Exception { + public boolean analyzeDiffTree(Analysis analysis) throws Exception { analysis.getCurrentDiffTree().forAll(node -> { if (node.isArtifact()) { analysis.getResult().editClassCounts.reportOccurrenceFor( From b370624989b648c7e336f5840a1e99fa2f3c8f77 Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 20:37:56 +0100 Subject: [PATCH 08/15] Refactor `AnalysisResult` to be composable There are some behavioral changes: - The > temporary fix for renaming from Unchanged to Untouched has been removed. - There are two more metadata keys: `exportedCommits` and `exportedTrees`. These where previously called `processedCommits` and `processedTrees` and used with different meanings in the `DiffTreeMiner` and the validations which caused a bug increasing this these counters twice, although that was probably my mistake, introduced during refactoring or merging. - The order of metadata snapshots has probably changed --- .../diffdetective/analysis/Analysis.java | 63 +++- .../analysis/AnalysisResult.java | 345 +++++++----------- .../analysis/AutomationResult.java | 8 +- .../analysis/FilterAnalysis.java | 7 +- .../analysis/LineGraphExportAnalysis.java | 53 ++- .../diffdetective/analysis/MetadataKeys.java | 3 + .../diffdetective/analysis/PatchAnalysis.java | 8 +- .../analysis/StatisticsAnalysis.java | 120 +++++- .../metadata/EditClassCount.java | 34 ++ .../metadata/ExplainedFilterSummary.java | 15 + .../diffdetective/metadata/Metadata.java | 30 ++ .../diffdetective/mining/DiffTreeMiner.java | 1 + .../tablegen/MiningResultAccumulator.java | 47 ++- .../tablegen/rows/ContentRow.java | 6 + .../tablegen/styles/ShortTable.java | 16 +- .../diffdetective/tablegen/styles/Table1.java | 10 +- .../tablegen/styles/VariabilityShare.java | 4 +- .../diffdetective/validation/Validation.java | 10 +- .../serialize/DiffTreeSerializeDebugData.java | 7 + .../diff/serialize/LineGraphExport.java | 83 +++-- 20 files changed, 547 insertions(+), 323 deletions(-) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java index 91a4b8471..3fd557467 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java @@ -1,9 +1,19 @@ package org.variantsync.diffdetective.analysis; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.concurrent.Callable; +import java.util.function.BiConsumer; +import java.util.function.Supplier; + import org.apache.commons.lang3.function.FailableBiConsumer; import org.apache.commons.lang3.function.FailableBiFunction; import org.eclipse.jgit.revwalk.RevCommit; import org.tinylog.Logger; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; import org.variantsync.diffdetective.analysis.monitoring.TaskCompletionMonitor; import org.variantsync.diffdetective.datasets.Repository; import org.variantsync.diffdetective.diff.git.CommitDiff; @@ -19,23 +29,18 @@ import org.variantsync.functjonal.iteration.ClusteredIterator; import org.variantsync.functjonal.iteration.MappedIterator; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Iterator; -import java.util.ListIterator; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.function.BiConsumer; -import java.util.function.Supplier; - /** * @author Paul Bittner, Benjamin Moosherr */ public class Analysis { + /** + * File extension that is used when writing AnalysisResults to disk. + */ + public static final String EXTENSION = ".metadata.txt"; /** * File name that is used to store the analysis results for each repository. */ - public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + AnalysisResult.EXTENSION; + public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + EXTENSION; /** * Default value for commitsToProcessPerThread * @see forEachCommit(Supplier, int, int) @@ -87,9 +92,19 @@ public AnalysisResult getResult() { return result; } + public > T get(ResultKey resultKey) { + return result.get(resultKey); + } + + public > void append(ResultKey resultKey, T value) { + result.append(resultKey, value); + } + public interface Hooks { + default void initializeResults(Analysis analysis) {} default void beginBatch(Analysis analysis) throws Exception {} default boolean beginCommit(Analysis analysis) throws Exception { return true; } + default void onFailedCommit(Analysis analysis) throws Exception {} default boolean onParsedCommit(Analysis analysis) throws Exception { return true; } default boolean beginPatch(Analysis analysis) throws Exception { return true; } default boolean analyzeDiffTree(Analysis analysis) throws Exception { return true; } @@ -98,7 +113,7 @@ default void endCommit(Analysis analysis) throws Exception {} default void endBatch(Analysis analysis) throws Exception {} } - public static AnalysisResult forEachCommit(Supplier analysis) { + public static AnalysisResult forEachCommit(Supplier analysis) { return forEachCommit( analysis, COMMITS_TO_PROCESS_PER_THREAD_DEFAULT, @@ -106,7 +121,7 @@ public static AnalysisResult forEachCommit(Supplier AnalysisResult forEachCommit( + public static AnalysisResult forEachCommit( Supplier analysisFactory, final int commitsToProcessPerThread, final int nThreads @@ -140,7 +155,11 @@ public static AnalysisResult forEachCommit( while (threads.hasNext()) { final AnalysisResult threadsResult = threads.next(); analysis.getResult().append(threadsResult); - commitSpeedMonitor.addFinishedTasks(threadsResult.exportedCommits); + + var statistics = threadsResult.get(StatisticsAnalysis.RESULT); + if (statistics != null) { + commitSpeedMonitor.addFinishedTasks(statistics.processedCommits); + } } } catch (Exception e) { Logger.error(e, "Failed to run all mining task"); @@ -158,6 +177,7 @@ public static AnalysisResult forEachCommit( } public Analysis( + String taskName, List hooks, Repository repository, Path outputDir @@ -165,7 +185,13 @@ public Analysis( this.hooks = hooks; this.repository = repository; this.outputDir = outputDir; - this.result = new AnalysisResult(repository.getRepositoryName()); + this.result = new AnalysisResult(); + + this.result.repoName = repository.getRepositoryName(); + this.result.taskName = taskName; + for (var hook : hooks) { + hook.initializeResults(this); + } } public AnalysisResult processCommits(List commits) throws Exception { @@ -178,12 +204,11 @@ public AnalysisResult processCommits(List commits, GitDiffer differ) return getResult(); } - protected AnalysisResult processCommitBatch(List commits) throws Exception { + protected void processCommitBatch(List commits) throws Exception { outputFile = outputDir.resolve(commits.get(0).getId().getName() + ".lg"); ListIterator batchHook = hooks.listIterator(); try { - result.putCustomInfo(MetadataKeys.TASKNAME, this.getClass().getName()); runHook(batchHook, Hooks::beginBatch); // For each commit @@ -207,8 +232,6 @@ protected AnalysisResult processCommitBatch(List commits) throws Exce } finally { runReverseHook(batchHook, Hooks::endBatch); } - - return result; } protected void processCommit() throws Exception { @@ -216,10 +239,10 @@ protected void processCommit() throws Exception { final CommitDiffResult commitDiffResult = differ.createCommitDiff(currentCommit); // report any errors that occurred and exit in case no DiffTree could be parsed. - result.reportDiffErrors(commitDiffResult.errors()); + getResult().reportDiffErrors(commitDiffResult.errors()); if (commitDiffResult.diff().isEmpty()) { Logger.debug("found commit that failed entirely because:\n{}", commitDiffResult.errors()); - ++result.failedCommits; + runHook(hooks.listIterator(), Hooks::onFailedCommit); return; } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java index 7e6323389..d1984b74f 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java @@ -1,135 +1,119 @@ package org.variantsync.diffdetective.analysis; +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + import org.variantsync.diffdetective.diff.result.DiffError; -import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; -import org.variantsync.diffdetective.metadata.EditClassCount; -import org.variantsync.diffdetective.metadata.ExplainedFilterSummary; import org.variantsync.diffdetective.metadata.Metadata; -import org.variantsync.diffdetective.variation.diff.serialize.DiffTreeSerializeDebugData; +import org.variantsync.functjonal.Cast; import org.variantsync.functjonal.Functjonal; import org.variantsync.functjonal.category.InplaceMonoid; import org.variantsync.functjonal.category.InplaceSemigroup; -import org.variantsync.functjonal.category.Semigroup; import org.variantsync.functjonal.map.MergeMap; -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.*; -import java.util.function.BiConsumer; - /** * The result of a {@link Analysis}. * This result stores various metadata and statistics that we use for the validation of our ESEC/FSE paper. * An AnalysisResult also allows to store any custom metadata or information. * @author Paul Bittner */ -public class AnalysisResult implements Metadata { +public final class AnalysisResult implements Metadata { /** * Placeholder name for data that is not associated to a repository or where the repository is unknown. */ public final static String NO_REPO = ""; - /** - * File extension that is used when writing AnalysisResults to disk. - */ - public final static String EXTENSION = ".metadata.txt"; - private final static String ERROR_BEGIN = "#Error["; private final static String ERROR_END = "]"; - /** - * Inplace semigroup for AnalysisResult. - * Merges the second results values into the first result. - */ - public final static InplaceSemigroup ISEMIGROUP = (a, b) -> { - a.totalCommits += b.totalCommits; - a.exportedCommits += b.exportedCommits; - a.emptyCommits += b.emptyCommits; - a.failedCommits += b.failedCommits; - a.exportedTrees += b.exportedTrees; - a.runtimeInSeconds += b.runtimeInSeconds; - a.runtimeWithMultithreadingInSeconds += b.runtimeWithMultithreadingInSeconds; - a.min.set(CommitProcessTime.min(a.min, b.min)); - a.max.set(CommitProcessTime.max(a.max, b.max)); - a.debugData.append(b.debugData); - a.filterHits.append(b.filterHits); - a.editClassCounts.append(b.editClassCounts); - MergeMap.putAllValues(a.customInfo, b.customInfo, Semigroup.assertEquals()); - a.diffErrors.append(b.diffErrors); - }; - - /** - * Inplace monoid for AnalysisResult. - * @see AnalysisResult#ISEMIGROUP - */ - public static InplaceMonoid IMONOID= InplaceMonoid.From( - AnalysisResult::new, - ISEMIGROUP - ); - /** * The repo from which the results where collected. */ public String repoName = NO_REPO; + public String taskName; /** - * The total number of commits in the observed history of the given repository. - */ - public int totalCommits = 0; - /** - * The number of commits that were processed. - * {@code exportedCommits <= totalCommits} + * The effective runtime in seconds that we have when using multithreading. */ - public int exportedCommits = 0; + public double runtimeWithMultithreadingInSeconds = 0; /** - * Number of commits that were not processed because they had no DiffTrees. - * A commit is empty iff at least of one of the following conditions is met for every of its patches: - *
    - *
  • the patch did not edit a C file, - *
  • the DiffTree became empty after transformations (this can happen if there are only whitespace changes), - *
  • or the patch had syntax errors in its annotations, so the DiffTree could not be parsed. - *
+ * The total number of commits in the observed history of the given repository. */ - public int emptyCommits = 0; + public int totalCommits = 0; + public final MergeMap diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum); + + private final Map> results = new HashMap<>(); + /** - * Number of commits that could not be parsed at all because of exceptions when operating JGit. + * Type proxy and runtime key for the type of a {@code Metadata} subclass. + * There should be no two {@code ResultKey} instances with the same {@code key} but different + * types {@code T}, otherwise {@link get} or {@link append} may throw {@link + * ClassCastException}s. * - * The number of commits that were filtered because they are a merge commit is thus given as - * {@code totalCommits - exportedCommits - emptyCommits - failedCommits} - */ - public int failedCommits = 0; - /** - * Number of DiffTrees that were processed. - */ - public int exportedTrees = 0; - /** - * The total runtime in seconds (irrespective of multithreading). - */ - public double runtimeInSeconds = 0; - /** - * The effective runtime in seconds that we have when using multithreading. - */ - public double runtimeWithMultithreadingInSeconds = 0; - /** - * The commit that was processed the fastest. + * @param key the runtime key for looking up the requested type + * @param a subclass of {@code Metadata} */ - public final CommitProcessTime min; + public record ResultKey>(String key) { + } + /** - * The commit that was processed the slowest. + * Returns the value previously added using {@link append}. + * + * @param resultKey the key which is used to identify the data and its type + * @param the type of the value which was previously stored */ - public final CommitProcessTime max; + public > T get(ResultKey resultKey) { + return Cast.unchecked(results.get(resultKey.key())); + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + private void unsafeAppend(String key, Metadata value) { + results.merge(key, value, (first, second) -> { + // `first` and `second` should have the same type if there are no two + // `ResultKey` instances with the same `ResultKey.key` and `results` is only + // modified by `append`. + ((Metadata) first).append((Metadata) second); + return first; + }); + } + /** - * Debug data for DiffTree serialization. + * Adds a new value or {@link Metadata#append}s it to the old value which is indexed by {@code + * resultKey}. + * + * @param resultKey the key which is used to identify the data and its type + * @param the type of the value which is appended + * @see get */ - public final DiffTreeSerializeDebugData debugData = new DiffTreeSerializeDebugData(); + public > void append(ResultKey resultKey, T value) { + unsafeAppend(resultKey.key(), value); + } + /** - * Explanations for filter hits, when filtering DiffTrees (e.g., because a diff was empty). + * Inplace semigroup for AnalysisResult. + * Merges the second results values into the first result. */ - public ExplainedFilterSummary filterHits = new ExplainedFilterSummary(); - public EditClassCount editClassCounts = new EditClassCount(); - private final LinkedHashMap customInfo = new LinkedHashMap<>(); - private final MergeMap diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum); + public static final InplaceSemigroup ISEMIGROUP = (a, b) -> { + a.repoName = Metadata.mergeEqual(a.repoName, b.repoName); + a.taskName = Metadata.mergeEqual(a.taskName, b.taskName); + a.runtimeWithMultithreadingInSeconds += b.runtimeWithMultithreadingInSeconds; + a.totalCommits += b.totalCommits; + a.diffErrors.append(b.diffErrors); + b.results.forEach((key, value) -> a.unsafeAppend(key, value)); + }; + + public static final InplaceMonoid IMONOID = + InplaceMonoid.From(AnalysisResult::new, ISEMIGROUP); + + @Override + public InplaceSemigroup semigroup() { + return ISEMIGROUP; + } public AnalysisResult() { this(NO_REPO); @@ -141,18 +125,6 @@ public AnalysisResult() { */ public AnalysisResult(final String repoName) { this.repoName = repoName; - - this.min = CommitProcessTime.Unknown(repoName, Long.MAX_VALUE); - this.max = CommitProcessTime.Unknown(repoName, Long.MIN_VALUE); - } - - /** - * Stores the given custom key value information in this analysis result. - * @param key The name of the given value that is used to associate the value. - * @param value The value to store. - */ - public void putCustomInfo(final String key, final String value) { - customInfo.put(key, value); } /** @@ -164,131 +136,66 @@ public void reportDiffErrors(final List errors) { diffErrors.put(e, 1); } } - - /** - * Imports a metadata file, which is an output of a {@link AnalysisResult}, and saves back to {@link AnalysisResult}. - * - * @param p {@link Path} to the metadata file - * @param customParsers A list of parsers to handle custom values that were stored with {@link AnalysisResult#putCustomInfo(String, String)}. - * Each parser parses the value (second argument) of a given key (first entry in the map) and stores it in the given AnalysisResult (first argument). - * @return The reconstructed {@link AnalysisResult} - * @throws IOException when the file could not be read. - */ - public static AnalysisResult importFrom(final Path p, final Map> customParsers) throws IOException { - AnalysisResult result = new AnalysisResult(); - - final List filterHitsLines = new ArrayList<>(); - final List editClassCountsLines = new ArrayList<>(); - try (BufferedReader input = Files.newBufferedReader(p)) { - // examine each line of the metadata file separately - String line; - while ((line = input.readLine()) != null) { - String[] keyValuePair = line.split(": "); - String key = keyValuePair[0]; - String value = keyValuePair[1]; + @Override + public LinkedHashMap snapshot() { + LinkedHashMap snap = new LinkedHashMap<>(); + snap.put(MetadataKeys.TASKNAME, taskName); + snap.put(MetadataKeys.RUNTIME_WITH_MULTITHREADING, runtimeWithMultithreadingInSeconds); + snap.put(MetadataKeys.TOTAL_COMMITS, totalCommits); + + var statistics = get(StatisticsAnalysis.RESULT); + if (statistics != null) { + snap.put(MetadataKeys.FILTERED_COMMITS, totalCommits - statistics.processedCommits - statistics.emptyCommits - statistics.failedCommits); + } + + snap.putAll(Functjonal.bimap(diffErrors, error -> ERROR_BEGIN + error + ERROR_END, Object::toString)); + snap.put(MetadataKeys.REPONAME, repoName); + for (var result : results.values()) { + snap.putAll(result.snapshot()); + } + return snap; + } + + @Override + public void setFromSnapshot(LinkedHashMap snap) { + repoName = snap.get(MetadataKeys.REPONAME); + taskName = snap.get(MetadataKeys.TASKNAME); - switch (key) { - case MetadataKeys.REPONAME -> result.repoName = value; - case MetadataKeys.TREES -> result.exportedTrees = Integer.parseInt(value); - case MetadataKeys.PROCESSED_COMMITS -> result.exportedCommits = Integer.parseInt(value); - case MetadataKeys.TOTAL_COMMITS -> result.totalCommits = Integer.parseInt(value); - case MetadataKeys.EMPTY_COMMITS -> result.emptyCommits = Integer.parseInt(value); - case MetadataKeys.FAILED_COMMITS -> result.failedCommits = Integer.parseInt(value); - case MetadataKeys.FILTERED_COMMITS -> { /* Do nothing because this value is derived. */ } - case MetadataKeys.NON_NODE_COUNT -> result.debugData.numExportedNonNodes = Integer.parseInt(value); - case MetadataKeys.ADD_NODE_COUNT -> result.debugData.numExportedAddNodes = Integer.parseInt(value); - case MetadataKeys.REM_NODE_COUNT -> result.debugData.numExportedRemNodes = Integer.parseInt(value); - case MetadataKeys.MINCOMMIT -> result.min.set(CommitProcessTime.fromString(value)); - case MetadataKeys.MAXCOMMIT -> result.max.set(CommitProcessTime.fromString(value)); - case MetadataKeys.RUNTIME -> { - if (value.endsWith("s")) { - value = value.substring(0, value.length() - 1); - } - result.runtimeInSeconds = Double.parseDouble(value); - } - case MetadataKeys.RUNTIME_WITH_MULTITHREADING -> { - if (value.endsWith("s")) { - value = value.substring(0, value.length() - 1); - } - result.runtimeWithMultithreadingInSeconds = Double.parseDouble(value); - } - default -> { + String runtime = snap.get(MetadataKeys.RUNTIME_WITH_MULTITHREADING); + if (runtime.endsWith("s")) { + runtime = runtime.substring(0, runtime.length() - 1); + } + runtimeWithMultithreadingInSeconds = Double.parseDouble(runtime); - // temporary fix for renaming from Unchanged to Untouched - final String unchanged = "Unchanged"; - if (key.startsWith(unchanged)) { - key = ProposedEditClasses.Untouched.getName(); - line = key + line.substring(unchanged.length()); - } + totalCommits = Integer.parseInt(snap.get(MetadataKeys.TOTAL_COMMITS)); - final String finalKey = key; - if (ProposedEditClasses.All.stream().anyMatch(editClass -> editClass.getName().equals(finalKey))) { - editClassCountsLines.add(line); - } else if (key.startsWith(ExplainedFilterSummary.FILTERED_MESSAGE_BEGIN)) { - filterHitsLines.add(line); - } else if (key.startsWith(ERROR_BEGIN)) { - var errorId = key.substring(ERROR_BEGIN.length(), key.length() - ERROR_END.length()); - var e = DiffError.fromMessage(errorId); - if (e.isEmpty()) { - throw new RuntimeException("Invalid error id " + errorId + " while importing " + p); - } - // add DiffError - result.diffErrors.put(e.get(), Integer.parseInt(value)); - } else { - final BiConsumer customParser = customParsers.get(key); - if (customParser == null) { - final String errorMessage = "Unknown entry \"" + line + "\"!"; - throw new IOException(errorMessage); - } else { - customParser.accept(result, value); - } - } - } + for (var entry : snap.entrySet()) { + String key = entry.getKey(); + if (entry.getKey().startsWith(ERROR_BEGIN)) { + var errorId = key.substring(ERROR_BEGIN.length(), key.length() - ERROR_END.length()); + var e = DiffError.fromMessage(errorId); + if (e.isEmpty()) { + throw new RuntimeException("Invalid error id " + errorId); } + // add DiffError + diffErrors.put(e.get(), Integer.parseInt(entry.getValue())); } } - - result.filterHits = ExplainedFilterSummary.parse(filterHitsLines); - result.editClassCounts = EditClassCount.parse(editClassCountsLines, p.toString()); - - return result; } - /** - * Helper method to construct custom parsers for {@link AnalysisResult#importFrom(Path, Map)}. - * This method creates a parser for custom values that just stores the parsed values as string values for the given key. - * @param key The key whose values should be stored as unparsed strings. - * @return A custom parser for {@link AnalysisResult#importFrom(Path, Map)}. - */ - public static Map.Entry> storeAsCustomInfo(String key) { - return Map.entry(key, (r, val) -> r.putCustomInfo(key, val)); - } + public void setFrom(final Path path) throws IOException { + var snapshot = new LinkedHashMap(); - @Override - public LinkedHashMap snapshot() { - LinkedHashMap snap = new LinkedHashMap<>(); - snap.put(MetadataKeys.REPONAME, repoName); - snap.put(MetadataKeys.TOTAL_COMMITS, totalCommits); - snap.put(MetadataKeys.FILTERED_COMMITS, totalCommits - exportedCommits - emptyCommits - failedCommits); - snap.put(MetadataKeys.FAILED_COMMITS, failedCommits); - snap.put(MetadataKeys.EMPTY_COMMITS, emptyCommits); - snap.put(MetadataKeys.PROCESSED_COMMITS, exportedCommits); - snap.put(MetadataKeys.TREES, exportedTrees); - snap.put(MetadataKeys.MINCOMMIT, min.toString()); - snap.put(MetadataKeys.MAXCOMMIT, max.toString()); - snap.put(MetadataKeys.RUNTIME, runtimeInSeconds); - snap.put(MetadataKeys.RUNTIME_WITH_MULTITHREADING, runtimeWithMultithreadingInSeconds); - snap.putAll(customInfo); - snap.putAll(debugData.snapshot()); - snap.putAll(filterHits.snapshot()); - snap.putAll(editClassCounts.snapshot()); - snap.putAll(Functjonal.bimap(diffErrors, error -> ERROR_BEGIN + error + ERROR_END, Object::toString)); - return snap; - } + try (BufferedReader input = Files.newBufferedReader(path)) { + // examine each line of the metadata file separately + String line; + while ((line = input.readLine()) != null) { + String[] keyValuePair = line.split(": "); + snapshot.put(keyValuePair[0], keyValuePair[1]); + } + } - @Override - public InplaceSemigroup semigroup() { - return ISEMIGROUP; + setFromSnapshot(snapshot); } } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java b/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java index 3f39951fd..dbb98377b 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java @@ -1,5 +1,6 @@ package org.variantsync.diffdetective.analysis; +import org.apache.commons.lang3.NotImplementedException; import org.variantsync.diffdetective.metadata.Metadata; import org.variantsync.functjonal.category.InplaceSemigroup; @@ -52,8 +53,13 @@ public String toString() { return snap; } + @Override + public void setFromSnapshot(LinkedHashMap snap) { + throw new NotImplementedException(); + } + @Override public InplaceSemigroup semigroup() { - return null; + throw new NotImplementedException(); } } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java index 6c6143bd9..ad460484a 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java @@ -19,6 +19,11 @@ public FilterAnalysis(TaggedPredicate... treeFilter) { this.treeFilter = new ExplainedFilter(Arrays.stream(treeFilter)); } + @Override + public void initializeResults(Analysis analysis) { + analysis.append(ExplainedFilterSummary.KEY, new ExplainedFilterSummary()); + } + @Override public boolean analyzeDiffTree(Analysis analysis) throws Exception { return treeFilter.test(analysis.getCurrentDiffTree()); @@ -26,7 +31,7 @@ public boolean analyzeDiffTree(Analysis analysis) throws Exception { @Override public void endCommit(Analysis analysis) { - analysis.getResult().filterHits.append(new ExplainedFilterSummary(treeFilter)); + analysis.append(ExplainedFilterSummary.KEY, new ExplainedFilterSummary(treeFilter)); treeFilter.resetExplanations(); } } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java index a18917375..12cfc8ef1 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java @@ -1,12 +1,49 @@ package org.variantsync.diffdetective.analysis; import java.io.OutputStream; +import java.util.LinkedHashMap; +import org.apache.commons.lang3.NotImplementedException; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; +import org.variantsync.diffdetective.metadata.Metadata; import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExport; import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; +import org.variantsync.functjonal.category.InplaceSemigroup; public class LineGraphExportAnalysis implements Analysis.Hooks { + public static final ResultKey RESULT = new ResultKey<>("LineGraphExportAnalysis"); + public static final class Result implements Metadata { + public String treeFormat; + public String nodeFormat; + public String edgeFormat; + + public static final InplaceSemigroup ISEMIGROUP = (a, b) -> { + a.treeFormat = Metadata.mergeEqual(a.treeFormat, b.treeFormat); + a.nodeFormat = Metadata.mergeEqual(a.nodeFormat, b.nodeFormat); + a.edgeFormat = Metadata.mergeEqual(a.edgeFormat, b.edgeFormat); + }; + + @Override + public InplaceSemigroup semigroup() { + return ISEMIGROUP; + } + + @Override + public LinkedHashMap snapshot() { + var snap = new LinkedHashMap(); + snap.put(MetadataKeys.TREEFORMAT, treeFormat); + snap.put(MetadataKeys.NODEFORMAT, nodeFormat); + snap.put(MetadataKeys.EDGEFORMAT, edgeFormat); + return snap; + } + + @Override + public void setFromSnapshot(LinkedHashMap snap) { + throw new NotImplementedException(); + } + } + private final AnalysisStrategy analysisStrategy; private final LineGraphExportOptions exportOptions; private OutputStream lineGraphDestination; @@ -16,11 +53,16 @@ public LineGraphExportAnalysis(final AnalysisStrategy analysisStrategy, final Li this.exportOptions = exportOptions; } + @Override + public void initializeResults(Analysis analysis) { + analysis.append(RESULT, new Result()); + } + @Override public void beginBatch(Analysis analysis) { - analysis.getResult().putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName()); - analysis.getResult().putCustomInfo(MetadataKeys.NODEFORMAT, exportOptions.nodeFormat().getName()); - analysis.getResult().putCustomInfo(MetadataKeys.EDGEFORMAT, exportOptions.edgeFormat().getName()); + analysis.get(RESULT).treeFormat = exportOptions.treeFormat().getName(); + analysis.get(RESULT).nodeFormat = exportOptions.nodeFormat().getName(); + analysis.get(RESULT).edgeFormat = exportOptions.edgeFormat().getName(); analysisStrategy.start(analysis.getRepository(), analysis.getOutputFile()); } @@ -33,7 +75,10 @@ public boolean onParsedCommit(Analysis analysis) { @Override public boolean analyzeDiffTree(Analysis analysis) throws Exception { - analysis.getResult().append(LineGraphExport.toLineGraphFormat(analysis.getResult().repoName, analysis.getCurrentPatch(), exportOptions, lineGraphDestination)); + analysis.append( + LineGraphExport.STATISTIC, + LineGraphExport.toLineGraphFormat(analysis.getCurrentPatch(), exportOptions, lineGraphDestination) + ); return true; } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java b/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java index 1ba5002f2..86bdbebe5 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java @@ -27,4 +27,7 @@ public final class MetadataKeys { public static final String MINCOMMIT = "fastestCommit"; public static final String MAXCOMMIT = "slowestCommit"; public final static String TREES = "tree diffs"; + + public final static String EXPORTED_COMMITS = "exported commits"; + public final static String EXPORTED_TREES = "exported trees"; } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java index c1e960984..c005ccea4 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java @@ -7,6 +7,7 @@ import org.variantsync.diffdetective.editclass.EditClass; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; +import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.util.CSV; import org.variantsync.diffdetective.util.FileUtils; import org.variantsync.diffdetective.util.IO; @@ -17,6 +18,11 @@ public class PatchAnalysis implements Analysis.Hooks { private List patchStatistics; private PatchStatistics thisPatchesStatistics; + @Override + public void initializeResults(Analysis analysis) { + analysis.append(EditClassCount.KEY, new EditClassCount()); + } + @Override public void beginBatch(Analysis analysis) { patchStatistics = new ArrayList<>(Analysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); @@ -33,7 +39,7 @@ public boolean analyzeDiffTree(Analysis analysis) { analysis.getCurrentDiffTree().forAll(node -> { if (node.isArtifact()) { final EditClass editClass = ProposedEditClasses.Instance.match(node); - analysis.getResult().editClassCounts.reportOccurrenceFor( + analysis.get(EditClassCount.KEY).reportOccurrenceFor( editClass, analysis.getCurrentCommitDiff() ); diff --git a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java index 6961fa111..449ab92cd 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java @@ -3,16 +3,108 @@ import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; +import org.variantsync.diffdetective.metadata.Metadata; import org.variantsync.diffdetective.util.Clock; import org.variantsync.diffdetective.util.FileUtils; import org.variantsync.diffdetective.util.IO; import org.variantsync.diffdetective.util.StringUtils; +import org.variantsync.functjonal.category.InplaceSemigroup; public class StatisticsAnalysis implements Analysis.Hooks { public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt"; + public static final ResultKey RESULT = new ResultKey<>("StatisticsAnalysis"); + public static final class Result implements Metadata { + /** + * Number of commits that were not processed because they had no DiffTrees. + * A commit is empty iff at least of one of the following conditions is met for every of its patches: + *
    + *
  • the patch did not edit a C file, + *
  • the DiffTree became empty after transformations (this can happen if there are only whitespace changes), + *
  • or the patch had syntax errors in its annotations, so the DiffTree could not be parsed. + *
+ */ + public int emptyCommits = 0; + /** + * Number of commits that could not be parsed at all because of exceptions when operating JGit. + * + * The number of commits that were filtered because they are a merge commit is thus given as + * {@code totalCommits - processedCommits - emptyCommits - failedCommits} + */ + public int failedCommits = 0; + public int processedCommits = 0; + public int processedTrees = 0; + /** + * The total runtime in seconds (irrespective of multithreading). + */ + public double runtimeInSeconds = 0; + /** + * The commit that was processed the fastest. + */ + public final CommitProcessTime min; + /** + * The commit that was processed the slowest. + */ + public final CommitProcessTime max; + + public Result() { + this(AnalysisResult.NO_REPO); + } + + public Result(String repoName) { + this.min = CommitProcessTime.Unknown(repoName, Long.MAX_VALUE); + this.max = CommitProcessTime.Unknown(repoName, Long.MIN_VALUE); + } + + public static final InplaceSemigroup ISEMIGROUP = (a, b) -> { + a.emptyCommits += b.emptyCommits; + a.failedCommits += b.failedCommits; + a.processedCommits += b.processedCommits; + a.processedTrees += b.processedTrees; + a.runtimeInSeconds += b.runtimeInSeconds; + a.min.set(CommitProcessTime.min(a.min, b.min)); + a.max.set(CommitProcessTime.max(a.max, b.max)); + }; + + @Override + public InplaceSemigroup semigroup() { + return ISEMIGROUP; + } + + @Override + public LinkedHashMap snapshot() { + LinkedHashMap snap = new LinkedHashMap<>(); + snap.put(MetadataKeys.FAILED_COMMITS, failedCommits); + snap.put(MetadataKeys.EMPTY_COMMITS, emptyCommits); + snap.put(MetadataKeys.PROCESSED_COMMITS, processedCommits); + snap.put(MetadataKeys.TREES, processedTrees); + snap.put(MetadataKeys.MINCOMMIT, min.toString()); + snap.put(MetadataKeys.MAXCOMMIT, max.toString()); + snap.put(MetadataKeys.RUNTIME, runtimeInSeconds); + return snap; + } + + @Override + public void setFromSnapshot(LinkedHashMap snap) { + failedCommits = Integer.parseInt(snap.get(MetadataKeys.FAILED_COMMITS)); + emptyCommits = Integer.parseInt(snap.get(MetadataKeys.EMPTY_COMMITS)); + processedCommits = Integer.parseInt(snap.get(MetadataKeys.PROCESSED_COMMITS)); + min.set(CommitProcessTime.fromString(snap.get(MetadataKeys.MINCOMMIT))); + max.set(CommitProcessTime.fromString(snap.get(MetadataKeys.MAXCOMMIT))); + processedTrees = Integer.parseInt(snap.get(MetadataKeys.TREES)); + + String runtime = snap.get(MetadataKeys.RUNTIME); + if (runtime.endsWith("s")) { + runtime = runtime.substring(0, runtime.length() - 1); + } + runtimeInSeconds = Double.parseDouble(runtime); + } + } + // List to store the process time of each commit. private final List commitTimes = new ArrayList<>(Analysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); // Clock for runtime measurement. @@ -20,6 +112,11 @@ public class StatisticsAnalysis implements Analysis.Hooks { private final Clock commitProcessTimer = new Clock(); private int numDiffTrees = 0; + @Override + public void initializeResults(Analysis analysis) { + analysis.append(RESULT, new Result(analysis.getRepository().getRepositoryName())); + } + @Override public void beginBatch(Analysis analysis) { totalTime.start(); @@ -32,6 +129,11 @@ public boolean beginCommit(Analysis analysis) { return true; } + @Override + public void onFailedCommit(Analysis analysis) { + analysis.get(RESULT).failedCommits += 1; + } + @Override public boolean analyzeDiffTree(Analysis analysis) { ++numDiffTrees; @@ -40,32 +142,32 @@ public boolean analyzeDiffTree(Analysis analysis) { @Override public void endCommit(Analysis analysis) { - analysis.getResult().exportedTrees += numDiffTrees; + analysis.get(RESULT).processedTrees += numDiffTrees; // Report the commit process time if the commit is not empty. if (numDiffTrees > 0) { final long commitTimeMS = commitProcessTimer.getPassedMilliseconds(); // find max commit time - if (commitTimeMS > analysis.getResult().max.milliseconds()) { - analysis.getResult().max.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS); + if (commitTimeMS > analysis.get(RESULT).max.milliseconds()) { + analysis.get(RESULT).max.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS); } // find min commit time - if (commitTimeMS < analysis.getResult().min.milliseconds()) { - analysis.getResult().min.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS); + if (commitTimeMS < analysis.get(RESULT).min.milliseconds()) { + analysis.get(RESULT).min.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS); } // report time commitTimes.add(new CommitProcessTime(analysis.getCurrentCommitDiff().getCommitHash(), analysis.getRepository().getRepositoryName(), commitTimeMS)); - ++analysis.getResult().exportedCommits; + analysis.get(RESULT).processedCommits += 1; } else { - ++analysis.getResult().emptyCommits; + analysis.get(RESULT).emptyCommits += 1; } } @Override public void endBatch(Analysis analysis) throws IOException { // shutdown; report total time; export results - analysis.getResult().runtimeInSeconds = totalTime.getPassedSeconds(); - analysis.getResult().exportTo(FileUtils.addExtension(analysis.getOutputFile(), AnalysisResult.EXTENSION)); + analysis.get(RESULT).runtimeInSeconds = totalTime.getPassedSeconds(); + analysis.get(RESULT).exportTo(FileUtils.addExtension(analysis.getOutputFile(), Analysis.EXTENSION)); exportCommitTimes(commitTimes, FileUtils.addExtension(analysis.getOutputFile(), COMMIT_TIME_FILE_EXTENSION)); } diff --git a/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java b/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java index b526ad88c..0c6b1e81e 100644 --- a/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java +++ b/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java @@ -1,5 +1,6 @@ package org.variantsync.diffdetective.metadata; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; import org.variantsync.diffdetective.diff.git.CommitDiff; import org.variantsync.diffdetective.editclass.EditClass; import org.variantsync.diffdetective.editclass.EditClassCatalogue; @@ -19,6 +20,8 @@ * @author Paul Bittner */ public class EditClassCount implements Metadata { + public static final ResultKey KEY = new ResultKey<>("EditClassCount"); + /** * Counts the occurrences of a data point across commits. */ @@ -172,6 +175,37 @@ public LinkedHashMap snapshot() { ); } + @Override + public void setFromSnapshot(LinkedHashMap snap) { + for (var entry : snap.entrySet()) { + if (ProposedEditClasses.All.stream().anyMatch(editClass -> editClass.getName().equals(entry.getKey()))) { + var key = entry.getKey(); // edit class + var value = entry.getValue(); // key value content + value = value.replaceAll("[{} ]", ""); // remove unnecessary symbols + var innerKeyValuePair = value.split(";"); + var total = Integer.parseInt(innerKeyValuePair[0].split("=")[1]); // total count + var commits = Integer.parseInt(innerKeyValuePair[1].split("=")[1]); + + // get edit class from key + final String finalKey = key; + EditClass editClass = ProposedEditClasses.Instance.fromName(key).orElseThrow( + () -> new RuntimeException("Could not find EditClass with name " + finalKey) + ); + + Occurrences occurence = new Occurrences(); + occurence.totalAmount = total; + + // add fake commits + for (int i = 0; i < commits; ++i) { + occurence.uniqueCommits.add(String.valueOf(i)); + } + + // add occurrence + occurences.put(editClass, occurence); + } + } + } + /** * Mutates and returns first element. */ diff --git a/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java b/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java index a086875ea..e4f2e008c 100644 --- a/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java +++ b/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java @@ -1,5 +1,6 @@ package org.variantsync.diffdetective.metadata; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter; import org.variantsync.functjonal.Functjonal; import org.variantsync.functjonal.category.InplaceSemigroup; @@ -14,6 +15,8 @@ * @author Paul Bittner */ public class ExplainedFilterSummary implements Metadata { + public static final ResultKey KEY = new ResultKey<>("ExplainedFilterSummary"); + /** * Prefix for exported filter reasons. */ @@ -95,6 +98,18 @@ public LinkedHashMap snapshot() { ); } + @Override + public void setFromSnapshot(LinkedHashMap snap) { + for (var entry : snap.entrySet()) { + final String key = entry.getKey(); + if (key.startsWith(FILTERED_MESSAGE_BEGIN)) { + final String name = key.substring(FILTERED_MESSAGE_BEGIN.length(), key.length() - FILTERED_MESSAGE_END.length()); + + explanations.put(name, new ExplainedFilter.Explanation(Integer.parseInt(entry.getValue()), name)); + } + } + } + @Override public InplaceSemigroup semigroup() { return ISEMIGROUP; diff --git a/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java b/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java index 5eb7cb199..5867f12c7 100644 --- a/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java +++ b/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java @@ -1,6 +1,7 @@ package org.variantsync.diffdetective.metadata; import org.tinylog.Logger; +import org.variantsync.diffdetective.util.Assert; import org.variantsync.diffdetective.util.IO; import org.variantsync.functjonal.Cast; import org.variantsync.functjonal.category.InplaceSemigroup; @@ -22,6 +23,8 @@ public interface Metadata { */ LinkedHashMap snapshot(); + void setFromSnapshot(LinkedHashMap snapshot); + /** * Metadata should be composable. * Composition should be inplace to optimize performance. @@ -37,6 +40,33 @@ default void append(T other) { semigroup().appendToFirst(Cast.unchecked(this), other); } + /** + * Composes two equal values by returning that value unmodified. + * This method is intended to be used to implement a semigroup for objects which can't be merged + * but should always be the same anyway. If {@code !a.equals(b)} then an {@code AssertionError} + * is thrown. + * + *

The value {@code null} is treated as the neutral element in the sense that no exception is + * thrown if an element is {@code null}. In this case return value is defined by {@code + * mergeEqual(a, null) == a} and {@code mergeEqual(b, null) == b}. + * + * @param a the first element to merge + * @param b the second element to merge + * @param the type of the objects to be merged + * @return {@code a} or {@code b} + */ + static T mergeEqual(T a, T b) { + if (b == null) { + return a; + } + + if (a != null) { + Assert.assertTrue(a.equals(b)); + } + + return b; + } + /** * Prints all key-value pairs to a single string. * Falls back to {@link #show(String, Object)} on each entry. diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java index c7ae680da..6390e0895 100644 --- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java +++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java @@ -103,6 +103,7 @@ public static AnalysisStrategy MiningStrategy() { public static BiFunction AnalysisFactory = (repo, repoOutputDir) -> new Analysis( + "DiffTreeMiner", List.of( new PreprocessingAnalysis(Postprocessing(repo)), new FilterAnalysis( diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java b/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java index 2f0ee44ab..8149bdc79 100644 --- a/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java +++ b/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java @@ -1,18 +1,5 @@ package org.variantsync.diffdetective.tablegen; -import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.AnalysisResult; -import org.variantsync.diffdetective.analysis.AutomationResult; -import org.variantsync.diffdetective.analysis.Analysis; -import org.variantsync.diffdetective.analysis.MetadataKeys; -import org.variantsync.diffdetective.datasets.DatasetDescription; -import org.variantsync.diffdetective.datasets.DefaultDatasets; -import org.variantsync.diffdetective.tablegen.rows.ContentRow; -import org.variantsync.diffdetective.tablegen.styles.ShortTable; -import org.variantsync.diffdetective.tablegen.styles.VariabilityShare; -import org.variantsync.diffdetective.util.IO; -import org.variantsync.diffdetective.validation.FindMedianCommitTime; - import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -21,22 +8,27 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.function.BiConsumer; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; +import org.tinylog.Logger; +import org.variantsync.diffdetective.analysis.Analysis; +import org.variantsync.diffdetective.analysis.AnalysisResult; +import org.variantsync.diffdetective.analysis.AutomationResult; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; +import org.variantsync.diffdetective.datasets.DatasetDescription; +import org.variantsync.diffdetective.datasets.DefaultDatasets; +import org.variantsync.diffdetective.metadata.EditClassCount; +import org.variantsync.diffdetective.metadata.ExplainedFilterSummary; +import org.variantsync.diffdetective.tablegen.rows.ContentRow; +import org.variantsync.diffdetective.tablegen.styles.ShortTable; +import org.variantsync.diffdetective.tablegen.styles.VariabilityShare; +import org.variantsync.diffdetective.util.IO; +import org.variantsync.diffdetective.validation.FindMedianCommitTime; + /** Accumulates multiple {@link AnalysisResult}s of several datasets. */ public class MiningResultAccumulator { - /** Specification of the information loaded by {@link getAllTotalResultsIn}. */ - private final static Map> CustomEntryParsers = Map.ofEntries( - AnalysisResult.storeAsCustomInfo(MetadataKeys.TREEFORMAT), - AnalysisResult.storeAsCustomInfo(MetadataKeys.NODEFORMAT), - AnalysisResult.storeAsCustomInfo(MetadataKeys.EDGEFORMAT), - AnalysisResult.storeAsCustomInfo(MetadataKeys.TASKNAME), - Map.entry("org/variantsync/diffdetective/analysis", (r, val) -> r.putCustomInfo(MetadataKeys.TASKNAME, val)) - ); - /** * Finds all {@code AnalysisResult}s in {@code folderPath} recursively. * All files having a {@link Analysis#TOTAL_RESULTS_FILE_NAME} filename ending are @@ -55,7 +47,12 @@ public static Map getAllTotalResultsIn(final Path folder final Map results = new HashMap<>(); for (final Path p : paths) { - results.put(p.getParent().getFileName().toString(), AnalysisResult.importFrom(p, CustomEntryParsers)); + var result = new AnalysisResult(); + result.append(ExplainedFilterSummary.KEY, new ExplainedFilterSummary()); + result.append(EditClassCount.KEY, new EditClassCount()); + result.append(StatisticsAnalysis.RESULT, new StatisticsAnalysis.Result()); + result.setFrom(p); + results.put(p.getParent().getFileName().toString(), result); } return results; } @@ -115,7 +112,7 @@ public static void main(final String[] args) throws IOException, ParseException final Map allResults = getAllTotalResultsIn(inputPath); final AnalysisResult ultimateResult = computeTotalMetadataResult(allResults.values()); - Analysis.exportMetadataToFile(inputPath.resolve("ultimateresult" + AnalysisResult.EXTENSION), ultimateResult); + Analysis.exportMetadataToFile(inputPath.resolve("ultimateresult" + Analysis.EXTENSION), ultimateResult); final Map datasetByName; try { diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java b/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java index ab9d82309..d6c543b6a 100644 --- a/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java +++ b/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java @@ -2,7 +2,9 @@ import org.variantsync.diffdetective.analysis.AnalysisResult; import org.variantsync.diffdetective.analysis.AutomationResult; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; import org.variantsync.diffdetective.datasets.DatasetDescription; +import org.variantsync.diffdetective.metadata.Metadata; import org.variantsync.diffdetective.tablegen.ColumnDefinition; import org.variantsync.diffdetective.tablegen.Row; import org.variantsync.diffdetective.tablegen.TableGenerator; @@ -21,6 +23,10 @@ public record ContentRow( AnalysisResult results, AutomationResult automationResult ) implements Row { + public > T get(ResultKey resultKey) { + return results.get(resultKey); + } + @Override public String toLaTeXRow(final List columns) { final StringBuilder lineBuilder = new StringBuilder(); diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java b/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java index b235094e9..44a377925 100644 --- a/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java +++ b/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java @@ -1,9 +1,10 @@ package org.variantsync.diffdetective.tablegen.styles; import org.apache.commons.lang3.function.TriFunction; -import org.variantsync.diffdetective.metadata.EditClassCount; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.editclass.EditClass; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; +import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.tablegen.ColumnDefinition; import org.variantsync.diffdetective.tablegen.Row; import org.variantsync.diffdetective.tablegen.TableDefinition; @@ -77,11 +78,10 @@ private static List columns(final ShortTable t, final TriFunct col("Name", LEFT, row -> row.dataset().name().toLowerCase(Locale.US)), col("Domain", LEFT_DASH, row -> row.dataset().domain()), col("\\#total\\\\ commits", RIGHT, row -> t.makeReadable(row.results().totalCommits)), - col("\\#processed commits", RIGHT, row -> t.makeReadable(row.results().exportedCommits)), - col("\\#diffs", RIGHT, row -> t.makeReadable(row.results().exportedTrees)), + col("\\#processed commits", RIGHT, row -> t.makeReadable(row.get(StatisticsAnalysis.RESULT).processedCommits)), + col("\\#diffs", RIGHT, row -> t.makeReadable(row.get(StatisticsAnalysis.RESULT).processedTrees)), col("\\#artifact nodes", RIGHT_DASH, row -> t.makeReadable(row - .results() - .editClassCounts + .get(EditClassCount.KEY) .getOccurences() .values().stream() .map(EditClassCount.Occurrences::getTotalAmount) @@ -95,7 +95,7 @@ private static List columns(final ShortTable t, final TriFunct } } - cols.add(col("runtime", DASH_RIGHT, row -> t.makeReadable(row.results().runtimeInSeconds) + "s")); + cols.add(col("runtime", DASH_RIGHT, row -> t.makeReadable(row.get(StatisticsAnalysis.RESULT).runtimeInSeconds) + "s")); cols.add(col("avg. runtime~/\\\\ processed commit", RIGHT, row -> t.makeReadable(row.automationResult().avgTimeMS()) + "ms")); cols.add(col("median runtime~/\\\\ processed commit", RIGHT, row -> t.makeReadable(row.automationResult().median().milliseconds()) + "ms")); @@ -113,7 +113,7 @@ private static List columns(final ShortTable t, final TriFunct * @see column */ private static String absoluteCountOf(final ShortTable t, final EditClass editClass, final ContentRow row) { - return t.makeReadable(row.results().editClassCounts.getOccurences().get(editClass).getTotalAmount()); + return t.makeReadable(row.get(EditClassCount.KEY).getOccurences().get(editClass).getTotalAmount()); } /** @@ -128,7 +128,7 @@ private static String absoluteCountOf(final ShortTable t, final EditClass editCl */ private static String relativeCountOf(final ShortTable t, final EditClass editClass, final ContentRow row) { final LinkedHashMap editClassOccurrences = - row.results().editClassCounts.getOccurences(); + row.get(EditClassCount.KEY).getOccurences(); int numTotalMatches = 0; for (final Map.Entry occurrence : editClassOccurrences.entrySet()) { diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java b/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java index fa502418b..6e3201f25 100644 --- a/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java +++ b/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java @@ -1,7 +1,9 @@ package org.variantsync.diffdetective.tablegen.styles; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.editclass.EditClass; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; +import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.tablegen.Row; import org.variantsync.diffdetective.tablegen.TableDefinition; import org.variantsync.diffdetective.tablegen.TableGenerator; @@ -33,15 +35,15 @@ public Table1() { col("Name", LEFT, row -> row.dataset().name()), col("Domain", LEFT, row -> row.dataset().domain()), col("\\#total commits", RIGHT_DASH, row -> makeReadable(row.results().totalCommits)), - col("\\#processed commits", RIGHT, row -> makeReadable(row.results().exportedCommits)), - col("\\#diffs", RIGHT, row -> makeReadable(row.results().exportedTrees)) + col("\\#processed commits", RIGHT, row -> makeReadable(row.get(StatisticsAnalysis.RESULT).processedCommits)), + col("\\#diffs", RIGHT, row -> makeReadable(row.get(StatisticsAnalysis.RESULT).processedTrees)) )); for (final EditClass a : ProposedEditClasses.Instance.all()) { - this.columnDefinitions.add(col(a.getName(), RIGHT, row -> makeReadable(row.results().editClassCounts.getOccurences().get(a).getTotalAmount()))); + this.columnDefinitions.add(col(a.getName(), RIGHT, row -> makeReadable(row.get(EditClassCount.KEY).getOccurences().get(a).getTotalAmount()))); } - this.columnDefinitions.add(col("runtime (s)", RIGHT, row -> makeReadable(row.results().runtimeInSeconds))); + this.columnDefinitions.add(col("runtime (s)", RIGHT, row -> makeReadable(row.get(StatisticsAnalysis.RESULT).runtimeInSeconds))); } /** Sorts {@code rows} alphabetically and appends {@code ultimateResult} to the result. */ diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java b/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java index 4b0471401..f62215784 100644 --- a/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java +++ b/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java @@ -52,7 +52,7 @@ private static boolean isEditToVariability(final EditClass c) { /** Returns the number of occurrences of edit classes present in the table. */ private static Stream> getVariationalEditClasses(final ContentRow row) { - return row.results().editClassCounts.getOccurences().entrySet().stream() + return row.get(EditClassCount.KEY).getOccurences().entrySet().stream() .filter(entry -> isEditToVariability(entry.getKey())); } @@ -69,7 +69,7 @@ private static int countEditsToVariability(final ContentRow row) { */ private String getRelativeShareOf(final EditClass editClass, final ContentRow row) { final int totalAmount = countEditsToVariability(row); - return makeReadable(100.0 * ((double)row.results().editClassCounts.getOccurences().get(editClass).getTotalAmount()) / ((double) totalAmount)) + "\\%"; + return makeReadable(100.0 * ((double)row.get(EditClassCount.KEY).getOccurences().get(editClass).getTotalAmount()) / ((double) totalAmount)) + "\\%"; } /** diff --git a/src/main/java/org/variantsync/diffdetective/validation/Validation.java b/src/main/java/org/variantsync/diffdetective/validation/Validation.java index 84f90bdeb..84e776dfe 100644 --- a/src/main/java/org/variantsync/diffdetective/validation/Validation.java +++ b/src/main/java/org/variantsync/diffdetective/validation/Validation.java @@ -11,7 +11,6 @@ import org.eclipse.jgit.api.errors.GitAPIException; import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.AnalysisResult; import org.variantsync.diffdetective.analysis.FilterAnalysis; import org.variantsync.diffdetective.analysis.Analysis; import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; @@ -19,6 +18,7 @@ import org.variantsync.diffdetective.datasets.*; import org.variantsync.diffdetective.datasets.Repository; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; +import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.mining.formats.DirectedEdgeLabelFormat; import org.variantsync.diffdetective.mining.formats.MiningNodeFormat; import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat; @@ -51,6 +51,7 @@ public class Validation implements Analysis.Hooks { // This is only needed for the `MarlinDebug` test. public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new Analysis( + "EditClassValidation", List.of( new PreprocessingAnalysis(new CutNonEditedSubtrees()), new FilterAnalysis(DiffTreeFilter.notEmpty()), // filters unwanted trees @@ -194,11 +195,16 @@ public static void main(String[] args) throws IOException { FileUtils.copyFile(Path.of(logFile).toFile(), outputDir.resolve(logFile).toFile()); } + @Override + public void initializeResults(Analysis analysis) { + analysis.append(EditClassCount.KEY, new EditClassCount()); + } + @Override public boolean analyzeDiffTree(Analysis analysis) throws Exception { analysis.getCurrentDiffTree().forAll(node -> { if (node.isArtifact()) { - analysis.getResult().editClassCounts.reportOccurrenceFor( + analysis.get(EditClassCount.KEY).reportOccurrenceFor( ProposedEditClasses.Instance.match(node), analysis.getCurrentCommitDiff() ); diff --git a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java index 6d428a350..18cd64f54 100644 --- a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java +++ b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java @@ -45,6 +45,13 @@ public LinkedHashMap snapshot() { return map; } + @Override + public void setFromSnapshot(LinkedHashMap snap) { + numExportedNonNodes = Integer.parseInt(snap.get(MetadataKeys.NON_NODE_COUNT)); + numExportedAddNodes = Integer.parseInt(snap.get(MetadataKeys.ADD_NODE_COUNT)); + numExportedRemNodes = Integer.parseInt(snap.get(MetadataKeys.REM_NODE_COUNT)); + } + @Override public InplaceSemigroup semigroup() { return ISEMIGROUP; diff --git a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java index 01e8e78df..206e7228a 100644 --- a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java +++ b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java @@ -2,15 +2,18 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.LinkedHashMap; import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.AnalysisResult; +import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey; +import org.variantsync.diffdetective.analysis.MetadataKeys; import org.variantsync.diffdetective.diff.git.CommitDiff; import org.variantsync.diffdetective.diff.git.PatchDiff; -import org.variantsync.diffdetective.util.StringUtils; +import org.variantsync.diffdetective.metadata.Metadata; import org.variantsync.diffdetective.util.StringUtils; import org.variantsync.diffdetective.variation.diff.DiffTree; import org.variantsync.diffdetective.variation.diff.source.DiffTreeSource; +import org.variantsync.functjonal.category.InplaceSemigroup; /** * Class that contains functions for writing {@link CommitDiff}s and (sets of) {@link DiffTree}s to a linegraph file. @@ -19,6 +22,50 @@ public final class LineGraphExport { private LineGraphExport() {} + public static final ResultKey STATISTIC = new ResultKey<>("LineGraphExporter"); + public static final class Statistic implements Metadata { + /** + * The number of commits that were processed. + * {@code exportedCommits <= totalCommits} + */ + public int exportedCommits = 0; + /** + * Number of DiffTrees that were processed. + */ + public int exportedTrees = 0; + /** + * Debug data for DiffTree serialization. + */ + public final DiffTreeSerializeDebugData debugData = new DiffTreeSerializeDebugData(); + + public static final InplaceSemigroup ISEMIGROUP = (a, b) -> { + a.exportedCommits += b.exportedCommits; + a.exportedTrees += b.exportedTrees; + a.debugData.append(b.debugData); + }; + + @Override + public InplaceSemigroup semigroup() { + return ISEMIGROUP; + } + + @Override + public LinkedHashMap snapshot() { + var snap = new LinkedHashMap(); + snap.put(MetadataKeys.EXPORTED_COMMITS, exportedCommits); + snap.put(MetadataKeys.EXPORTED_TREES, exportedTrees); + snap.putAll(debugData.snapshot()); + return snap; + } + + @Override + public void setFromSnapshot(LinkedHashMap snap) { + exportedCommits = Integer.parseInt(snap.get(MetadataKeys.EXPORTED_COMMITS)); + exportedTrees = Integer.parseInt(snap.get(MetadataKeys.EXPORTED_TREES)); + debugData.setFromSnapshot(snap); + } + } + /** * Exports the given DiffTree to a linegraph String. No file will be written. * @param diffTree The difftree to export to linegraph format. @@ -33,13 +80,12 @@ public static DiffTreeSerializeDebugData toLineGraphFormat(final DiffTree diffTr /** * Exports the given DiffTrees that originated from a repository with the given name. - * @param repoName The name of the repository, the given DiffTrees originated from. * @param trees The set of trees to export. * @param options Configuration options for the export, such as the format used for node and edge labels. * @return A pair of (1) metadata about the exported DiffTrees, and (2) the produced linegraph as String. */ - public static AnalysisResult toLineGraphFormat(final String repoName, final Iterable trees, final LineGraphExportOptions options, OutputStream destination) throws IOException { - final AnalysisResult result = new AnalysisResult(repoName); + public static Statistic toLineGraphFormat(final Iterable trees, final LineGraphExportOptions options, OutputStream destination) throws IOException { + final var result = new Statistic(); for (final DiffTree t : trees) { destination.write(lineGraphHeader(t.getSource(), options).getBytes()); @@ -53,36 +99,19 @@ public static AnalysisResult toLineGraphFormat(final String repoName, final Iter return result; } - /** - * Same as {@link LineGraphExport#toLineGraphFormat(String, Iterable, LineGraphExportOptions, OutputStream)} but with an - * {@link AnalysisResult#NO_REPO unkown repository}. - */ - public static AnalysisResult toLineGraphFormat(final Iterable trees, final LineGraphExportOptions options, OutputStream destination) throws IOException { - return toLineGraphFormat(AnalysisResult.NO_REPO, trees, options, destination); - } - - /** - * Same as {@link LineGraphExport#toLineGraphFormat(String, CommitDiff, LineGraphExportOptions, OutputStream)} - * but with an {@link AnalysisResult#NO_REPO unkown repository}. - */ - public static AnalysisResult toLineGraphFormat(final CommitDiff commitDiff, final LineGraphExportOptions options, OutputStream destination) throws IOException { - return toLineGraphFormat(AnalysisResult.NO_REPO, commitDiff, options, destination); - } - /** * Writes the given commitDiff in linegraph format to the given StringBuilder. - * @param repoName The name of the repository from which the given CommitDiff originated. * @param commitDiff The diff to convert to line graph format. * @param options Configuration options for the export, such as the format used for node and edge labels. * @param destination where the resulting line graph is written * @return The number of the next diff tree to export (updated value of treeCounter). */ - public static AnalysisResult toLineGraphFormat(final String repoName, final CommitDiff commitDiff, LineGraphExportOptions options, OutputStream destination) throws IOException { - final AnalysisResult result = new AnalysisResult(repoName); + public static Statistic toLineGraphFormat(final CommitDiff commitDiff, LineGraphExportOptions options, OutputStream destination) throws IOException { + final var result = new Statistic(); for (final PatchDiff patchDiff : commitDiff.getPatchDiffs()) { try { - result.append(toLineGraphFormat(repoName, patchDiff, options, destination)); + result.append(toLineGraphFormat(patchDiff, options, destination)); } catch (Exception e) { options.onError().accept(patchDiff, e); break; @@ -101,8 +130,8 @@ public static AnalysisResult toLineGraphFormat(final String repoName, final Comm * @param destination where the resulting line graph is written * @return The number of the next diff tree to export (updated value of treeCounter). */ - public static AnalysisResult toLineGraphFormat(final String repoName, final PatchDiff patch, final LineGraphExportOptions options, OutputStream destination) throws IOException { - final AnalysisResult result = new AnalysisResult(repoName); + public static Statistic toLineGraphFormat(final PatchDiff patch, final LineGraphExportOptions options, OutputStream destination) throws IOException { + final var result = new Statistic(); if (patch.isValid()) { //Logger.info(" Exporting DiffTree #{}", treeCounter); From 9da9ca72dce6aafbfefdaa38ae923c33c1edb07b Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 22 Jan 2023 19:15:44 +0100 Subject: [PATCH 09/15] Make repository handling of validations reusable --- README.md | 2 +- docker/execute.sh | 6 +- scripts/runValidation.sh | 2 +- .../diffdetective/mining/DiffTreeMiner.java | 2 +- .../validation/EditClassValidation.java | 70 +++++++++++++++++ .../validation/FindMedianCommitTime.java | 4 +- .../diffdetective/validation/Validation.java | 76 +++++-------------- src/test/java/MarlinDebug.java | 4 +- 8 files changed, 97 insertions(+), 69 deletions(-) create mode 100644 src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java diff --git a/README.md b/README.md index 2aae8bc9e..b8a27ad6f 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Moreover, the results comprise the (LaTeX) tables that are part of our paper and DiffDetective is documented with javadoc. The documentation can be accessed on this [website][documentation]. Notable classes of our library are: - [DiffTree](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/difftree/DiffTree.html) and [DiffNode](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/difftree/DiffNode.html) implement variation diffs from our paper. A variation diff is represented by an instance of the `DiffTree` class. It stores the root node of the diff and offers various methods to parse, traverse, and analyze variation diffs. `DiffNode`s represent individual nodes within a variation diff. -- [Validation](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/validation/Validation.html) contains the main method for our validation. +- [EditClassValidation](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/validation/EditClassValidation.html) contains the main method for our validation. - [ProposedEditClasses](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/editclass/proposed/ProposedEditClasses.html) holds the catalog of the nine edit classes we proposed in our paper. It implements the interface [EditClassCatalogue](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/editclass/EditClassCatalogue.html), which allows to define custom edit classifications. - [BooleanAbstraction](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/feature/BooleanAbstraction.html) contains data and methods for boolean abstraction of higher-order logic formulas. We use this for macro parsing. - [GitDiffer](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/GitDiffer.html) may parse the history of a git repository to variation diffs. diff --git a/docker/execute.sh b/docker/execute.sh index 2bc209e5b..d308e9bae 100644 --- a/docker/execute.sh +++ b/docker/execute.sh @@ -15,15 +15,15 @@ cd /home/sherlock || exit if [ "$1" == 'replication' ]; then echo "Running full replication. Depending on your system, this will require several hours or even a few days." - java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation docs/datasets/esecfse22-replication.md + java -cp DiffDetective.jar org.variantsync.diffdetective.validation.EditClassValidation docs/datasets/esecfse22-replication.md elif [ "$1" == 'verification' ]; then echo "Running a short verification." - java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation docs/datasets/esecfse22-verification.md + java -cp DiffDetective.jar org.variantsync.diffdetective.validation.EditClassValidation docs/datasets/esecfse22-verification.md else echo "" echo "Running detection on a custom dataset with the input file $1" echo "" - java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation "$1" + java -cp DiffDetective.jar org.variantsync.diffdetective.validation.EditClassValidation "$1" fi echo "Collecting results." cp -r results/* ../results/ diff --git a/scripts/runValidation.sh b/scripts/runValidation.sh index 6c8adff97..2b080dc2c 100755 --- a/scripts/runValidation.sh +++ b/scripts/runValidation.sh @@ -1,2 +1,2 @@ -java -cp "target/diffdetective-1.0.0-jar-with-dependencies.jar" org.variantsync.diffdetective.validation.Validation +java -cp "target/diffdetective-1.0.0-jar-with-dependencies.jar" org.variantsync.diffdetective.validation.EditClassValidation echo "runValidation.sh DONE" diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java index 6390e0895..3c0d388a7 100644 --- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java +++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java @@ -148,7 +148,7 @@ public static void main(String[] args) throws IOException { final List datasets = DefaultDatasets.loadDatasets(DATASET_FILE); // if (PRINT_LATEX_TABLE) { -// Validation.printLaTeXTableFor(datasets); +// EditClassValidation.printLaTeXTableFor(datasets); // } final DatasetFactory miningDatasetFactory = new DatasetFactory(inputDir); diff --git a/src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java b/src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java new file mode 100644 index 000000000..3cc9c8c7c --- /dev/null +++ b/src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java @@ -0,0 +1,70 @@ +package org.variantsync.diffdetective.validation; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.function.BiFunction; + +import org.variantsync.diffdetective.analysis.Analysis; +import org.variantsync.diffdetective.analysis.FilterAnalysis; +import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; +import org.variantsync.diffdetective.analysis.StatisticsAnalysis; +import org.variantsync.diffdetective.datasets.Repository; +import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; +import org.variantsync.diffdetective.metadata.EditClassCount; +import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter; +import org.variantsync.diffdetective.variation.diff.transform.CutNonEditedSubtrees; + +/** + * This is the validation from our ESEC/FSE'22 paper. + * It provides all configuration settings and facilities to setup the validation by + * creating a {@link Analysis} and run it. + * @author Paul Bittner + */ +public class EditClassValidation implements Analysis.Hooks { + // This is only needed for the `MarlinDebug` test. + public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new Analysis( + "EditClassValidation", + List.of( + new PreprocessingAnalysis(new CutNonEditedSubtrees()), + new FilterAnalysis(DiffTreeFilter.notEmpty()), // filters unwanted trees + new EditClassValidation(), + new StatisticsAnalysis() + ), + repo, + repoOutputDir + ); + + /** + * Main method to start the validation. + * @param args Command-line options. + * @throws IOException When copying the log file fails. + */ + public static void main(String[] args) throws IOException { +// setupLogger(Level.INFO); +// setupLogger(Level.DEBUG); + + Validation.run(args, (repo, repoOutputDir) -> + Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)) + ); + } + + @Override + public void initializeResults(Analysis analysis) { + analysis.append(EditClassCount.KEY, new EditClassCount()); + } + + @Override + public boolean analyzeDiffTree(Analysis analysis) throws Exception { + analysis.getCurrentDiffTree().forAll(node -> { + if (node.isArtifact()) { + analysis.get(EditClassCount.KEY).reportOccurrenceFor( + ProposedEditClasses.Instance.match(node), + analysis.getCurrentCommitDiff() + ); + } + }); + + return true; + } +} diff --git a/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java b/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java index bc584e03b..cdc9a193d 100644 --- a/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java +++ b/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java @@ -17,7 +17,7 @@ import java.util.stream.Stream; /** - * Program to find the median commit time after the {@link Validation} has been performed. + * Program to find the median commit time after the {@link EditClassValidation} has been performed. * This program will iterate through all commit times reported by the validation, load them, * and find average time, median time, the fastest, and the slowest commit. * @author Paul Bittner @@ -54,7 +54,7 @@ public static void main(final String[] args) throws IOException { /** * Summarizes the commit time results found in the given validation output directory. * The directory should point to the root of the directory in which the results of an execution - * of the {@link Validation} can be found. + * of the {@link EditClassValidation} can be found. * @param directory Validation output directory. * @return Summary of commit process times with various speed statistics. * @throws IOException when iterating the files in the given directory fails for some reason. diff --git a/src/main/java/org/variantsync/diffdetective/validation/Validation.java b/src/main/java/org/variantsync/diffdetective/validation/Validation.java index 84e776dfe..b14b981b8 100644 --- a/src/main/java/org/variantsync/diffdetective/validation/Validation.java +++ b/src/main/java/org/variantsync/diffdetective/validation/Validation.java @@ -5,38 +5,31 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; -import java.util.function.BiFunction; +import java.util.function.BiConsumer; import java.util.stream.Collectors; + import org.apache.commons.io.FileUtils; import org.eclipse.jgit.api.errors.GitAPIException; import org.tinylog.Logger; - -import org.variantsync.diffdetective.analysis.FilterAnalysis; import org.variantsync.diffdetective.analysis.Analysis; -import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; -import org.variantsync.diffdetective.analysis.StatisticsAnalysis; -import org.variantsync.diffdetective.datasets.*; +import org.variantsync.diffdetective.datasets.DatasetDescription; +import org.variantsync.diffdetective.datasets.DatasetFactory; +import org.variantsync.diffdetective.datasets.DefaultDatasets; +import org.variantsync.diffdetective.datasets.ParseOptions; import org.variantsync.diffdetective.datasets.Repository; -import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; -import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.mining.formats.DirectedEdgeLabelFormat; import org.variantsync.diffdetective.mining.formats.MiningNodeFormat; import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat; import org.variantsync.diffdetective.util.Assert; -import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter; import org.variantsync.diffdetective.variation.diff.serialize.GraphFormat; import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions; import org.variantsync.diffdetective.variation.diff.serialize.edgeformat.EdgeLabelFormat; import org.variantsync.diffdetective.variation.diff.serialize.treeformat.CommitDiffDiffTreeLabelFormat; -import org.variantsync.diffdetective.variation.diff.transform.CutNonEditedSubtrees; - -/** - * This is the validation from our ESEC/FSE'22 paper. - * It provides all configuration settings and facilities to setup the validation by - * creating a {@link Analysis} and run it. - * @author Paul Bittner - */ -public class Validation implements Analysis.Hooks { + +public class Validation { + private Validation() { + } + /** * Hardcoded configuration option that determines of all analyzed repositories should be updated * (i.e., git pull) before the validation. @@ -49,19 +42,6 @@ public class Validation implements Analysis.Hooks { // public static final boolean PRINT_LATEX_TABLE = true; // public static final int PRINT_LARGEST_SUBJECTS = 3; - // This is only needed for the `MarlinDebug` test. - public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new Analysis( - "EditClassValidation", - List.of( - new PreprocessingAnalysis(new CutNonEditedSubtrees()), - new FilterAnalysis(DiffTreeFilter.notEmpty()), // filters unwanted trees - new Validation(), - new StatisticsAnalysis() - ), - repo, - repoOutputDir - ); - /** * Returns the node format that should be used for DiffNode IO. */ @@ -120,13 +100,10 @@ public static LineGraphExportOptions ValidationExportOptions(final Repository re /** * Main method to start the validation. - * @param args Command-line options. Currently ignored. + * @param args Command-line options. * @throws IOException When copying the log file fails. */ - public static void main(String[] args) throws IOException { -// setupLogger(Level.INFO); -// setupLogger(Level.DEBUG); - + public static void run(String[] args, BiConsumer validation) throws IOException { final Path datasetsFile; if (args.length < 1) { datasetsFile = DefaultDatasets.DEFAULT_DATASETS_FILE; @@ -135,10 +112,10 @@ public static void main(String[] args) throws IOException { return; } else { datasetsFile = Path.of(args[0]); + } - if (!Files.exists(datasetsFile)) { - Logger.error("The given datasets file \"" + datasetsFile + "\" does not exist."); - } + if (!Files.exists(datasetsFile)) { + Logger.error("The given datasets file \"" + datasetsFile + "\" does not exist."); } final ParseOptions.DiffStoragePolicy diffStoragePolicy = ParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER; @@ -187,30 +164,11 @@ public static void main(String[] args) throws IOException { \* ************************ */ Analysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> - Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir)) + validation.accept(repo, repoOutputDir) ); Logger.info("Done"); final String logFile = "log.txt"; FileUtils.copyFile(Path.of(logFile).toFile(), outputDir.resolve(logFile).toFile()); } - - @Override - public void initializeResults(Analysis analysis) { - analysis.append(EditClassCount.KEY, new EditClassCount()); - } - - @Override - public boolean analyzeDiffTree(Analysis analysis) throws Exception { - analysis.getCurrentDiffTree().forAll(node -> { - if (node.isArtifact()) { - analysis.get(EditClassCount.KEY).reportOccurrenceFor( - ProposedEditClasses.Instance.match(node), - analysis.getCurrentCommitDiff() - ); - } - }); - - return true; - } } diff --git a/src/test/java/MarlinDebug.java b/src/test/java/MarlinDebug.java index b99ded0e4..647556411 100644 --- a/src/test/java/MarlinDebug.java +++ b/src/test/java/MarlinDebug.java @@ -22,7 +22,7 @@ import org.variantsync.diffdetective.mining.DiffTreeMiner; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; import org.variantsync.diffdetective.util.Clock; -import org.variantsync.diffdetective.validation.Validation; +import org.variantsync.diffdetective.validation.EditClassValidation; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -148,7 +148,7 @@ public static void asValidationTask(final RepoInspection repoInspection, final S final RevWalk revWalk = new RevWalk(git.getRepository()); final RevCommit childCommit = revWalk.parseCommit(ObjectId.fromString(commitHash)); - Validation.AnalysisFactory.apply( + EditClassValidation.AnalysisFactory.apply( repoInspection.repo, repoInspection.outputPath ).processCommits(List.of(childCommit)); From 3db3898daf82ef127cf317d5c97f16f9a9d657a0 Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 21:27:17 +0100 Subject: [PATCH 10/15] Order functions in Analysis in suggested call order --- .../diffdetective/analysis/Analysis.java | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java index 3fd557467..11948f211 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java @@ -113,6 +113,31 @@ default void endCommit(Analysis analysis) throws Exception {} default void endBatch(Analysis analysis) throws Exception {} } + public static void forEachRepository( + List repositoriesToAnalyze, + Path outputDir, + BiConsumer analyzeRepository + ) { + for (final Repository repo : repositoriesToAnalyze) { + final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName()); + // Don't repeat work we already did: + if (Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) { + Logger.info(" Skipping repository {} because it has already been processed.", + repo.getRepositoryName()); + } else { + Logger.info(" === Begin Processing {} ===", repo.getRepositoryName()); + final Clock clock = new Clock(); + clock.start(); + + analyzeRepository.accept(repo, repoOutputDir); + + Logger.info(" === End Processing {} after {} ===", + repo.getRepositoryName(), + clock.printPassedSeconds()); + } + } + } + public static AnalysisResult forEachCommit(Supplier analysis) { return forEachCommit( analysis, @@ -336,29 +361,4 @@ public static void exportMetadataToFile(final Path outputFile, final Metadat final String prettyMetadata = metadata.exportTo(outputFile); Logger.info("Metadata:\n{}", prettyMetadata); } - - public static void forEachRepository( - List repositoriesToAnalyze, - Path outputDir, - BiConsumer analyzeRepository - ) { - for (final Repository repo : repositoriesToAnalyze) { - final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName()); - /// Don't repeat work we already did: - if (Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) { - Logger.info(" Skipping repository {} because it has already been processed.", - repo.getRepositoryName()); - } else { - Logger.info(" === Begin Processing {} ===", repo.getRepositoryName()); - final Clock clock = new Clock(); - clock.start(); - - analyzeRepository.accept(repo, repoOutputDir); - - Logger.info(" === End Processing {} after {} ===", - repo.getRepositoryName(), - clock.printPassedSeconds()); - } - } - } } From 702fd22385bfe38023923a5d97747d3d33d48eec Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 18:56:26 +0100 Subject: [PATCH 11/15] Document the public interface of Analysis --- .../diffdetective/analysis/Analysis.java | 170 +++++++++++++++++- 1 file changed, 166 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java index 11948f211..8c6c200db 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java @@ -30,6 +30,16 @@ import org.variantsync.functjonal.iteration.MappedIterator; /** + * Encapsulates the state and control flow during an analysis of the commit history of multiple + * repositories using {@link DiffTree}s. Each repository is processed sequentially but the commits + * of each repository can be processed in parallel. + * + *

For thread safety, each thread receives its own instance of {@code Analysis}. The getters + * provides access to the current state of the analysis in one thread. Depending on the current + * {@link Hooks phase} only a subset of the state accessible via getters may be valid. + * + * @see forEachRepository + * @see forEachCommit * @author Paul Bittner, Benjamin Moosherr */ public class Analysis { @@ -43,7 +53,7 @@ public class Analysis { public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + EXTENSION; /** * Default value for commitsToProcessPerThread - * @see forEachCommit(Supplier, int, int) + * @see forEachCommit(Supplier, int, int) */ public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000; @@ -60,59 +70,172 @@ public class Analysis { protected Path outputFile; protected final AnalysisResult result; + /** + * The repository this analysis is run on. + * Always valid. + */ public Repository getRepository() { return repository; } + /** + * The currently processed commit. + * Valid during the commit {@link Hooks phase}. + */ public RevCommit getCurrentCommit() { return currentCommit; } + /** + * The currently processed commit diff. + * Valid when {@link Hooks#onParsedCommit} is called until the end of the commit phase. + */ public CommitDiff getCurrentCommitDiff() { return currentCommitDiff; } + /** + * The currently processed patch. + * Valid during the patch {@link Hooks phase}. + */ public PatchDiff getCurrentPatch() { return currentPatch; } + /** + * The currently processed patch. + * Valid only during {@link Hooks#analyzeDiffTree}. + */ public DiffTree getCurrentDiffTree() { return currentDiffTree; } + /** + * The destination for results which are written to disk. + * Always valid. + */ public Path getOutputDir() { return outputDir; } + /** + * The destination for results which are written to disk and specific to the currently processed + * commit batch. + * Valid during the batch {@link Hooks phase}. + */ public Path getOutputFile() { return outputFile; } + /** + * The results of the analysis. This may be modified by any hook and should be initialized in + * {@link Hooks#initializeResults} (e.g. by using {@link append}). + * Always valid. + */ public AnalysisResult getResult() { return result; } + /** + * Convenience getter for {@link AnalysisResult#get} on {@link getResult}. + * Always valid. + */ public > T get(ResultKey resultKey) { return result.get(resultKey); } + /** + * Convenience function for {@link AnalysisResult#append} on {@link getResult}. + * Always valid. + */ public > void append(ResultKey resultKey, T value) { result.append(resultKey, value); } + /** + * Hooks for analyzing commits using {@link DiffTree}s. + * + *

In general the hooks of different {@code Hook} instances are called in sequence according + * to the order specified in {@link Analysis#Analysis} (except end hooks). Hooks are separated + * into two categories: phases and events. + * + *

A phase consists of two hooks with the prefix {@code begin} and {@code end}. It is + * guaranteed that the end hook is called if and only if the begin hook was called, even in the + * presence of exceptions, so they are safe to use for resource management. For this purpose, + * end hooks are called in reverse order as specified in {@link Analysis#Analysis}. + * + *

Phases can be called an arbitrary number of times but are nested in the following order + * (from outer to inner): + *

    + *
  • batch + *
  • commit + *
  • patch + *
+ * An inner phase is only executed while an outer phase runs (in between the phase's begin and + * end hooks). + * + *

An analysis implementing {@code Hooks} can perform various actions during each hook. This + * includes the {@link append creation} and {@link get modification} of {@link getResult + * analysis results}, modifying their internal state, performing IO operations and throwing + * exceptions. In contrast, the only analysis state hooks are allowed to modify is the {@link + * getResult result} of an {@link Analysis}. All other state (e.g. {@link getCurrentCommit}) + * must not be modified. Care must be taken to avoid the reliance of the internal state on a + * specific commit batch being processed as only the {@link getResult results} of each commit + * batch are merged and returned by {@link forEachCommit}. + * + *

Hooks that return a {@code boolean} are called filter hooks and can, in addition to the + * above, skip any further processing in the current phase (including following inner phases) by + * returning {@code false}. If a hook starts skipping, any invocations of the same filter hook + * of following {@code Hook} instances won't be executed. Processing continues (after calling + * missing end hooks of the current phase) in the next outer phase after the skipped phase. + * + *

Hooks without a {@code begin} or {@code end} prefix are events emitted during some + * specified conditions. See their respective documentation for details. + */ public interface Hooks { + /** + * Initialization hook for {@link getResult}. All result types should be appended with a + * neutral value using {@link append}. No other side effects should be performed during this + * methods as it might be called an arbitrary amount of times. + */ default void initializeResults(Analysis analysis) {} default void beginBatch(Analysis analysis) throws Exception {} default boolean beginCommit(Analysis analysis) throws Exception { return true; } + /** + * Signals a parsing failure of some patch in the current commit. + * Called at most once during the commit phase. If this hook is called {@link + * onParsedCommit} and the following patch phase invocations are skipped. + */ default void onFailedCommit(Analysis analysis) throws Exception {} + /** + * Signals the completion of the commit diff extraction. + * Called exactly once during the commit phase before the patch phase begins. + */ default boolean onParsedCommit(Analysis analysis) throws Exception { return true; } default boolean beginPatch(Analysis analysis) throws Exception { return true; } + /** + * The main hook for analyzing non-empty diff trees. + * Called at most once during the patch phase. + */ default boolean analyzeDiffTree(Analysis analysis) throws Exception { return true; } default void endPatch(Analysis analysis) throws Exception {} default void endCommit(Analysis analysis) throws Exception {} default void endBatch(Analysis analysis) throws Exception {} } + /** + * Runs {@code analyzeRepository} on each repository, skipping repositories where an analysis + * was already run. This skipping mechanism doesn't distinguish between different analyses as it + * only checks for the existence of {@link TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun + * the analysis. + * + * For each repository a directory in {@code outputDir} is passed to {@code analyzeRepository} + * where the results of the given repository should be written. + * + * @param repositoriesToAnalyze the repositories for which {@code analyzeRepository} is run + * @param outputDir the directory where all repositories will save their results + * @param analyzeRepository the callback which is invoked for each repository + */ public static void forEachRepository( List repositoriesToAnalyze, Path outputDir, @@ -138,6 +261,11 @@ public static void forEachRepository( } } + /** + * Same as {@link forEachCommit(Supplier, int, int)}. + * Defaults to {@link COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of + * {@link Diagnostics#getNumberOfAvailableProcessors}. + */ public static AnalysisResult forEachCommit(Supplier analysis) { return forEachCommit( analysis, @@ -146,6 +274,18 @@ public static AnalysisResult forEachCommit(Supplier analysis) { ); } + /** + * Runs the analysis for the repository given in {@link Analysis#Analysis}. The repository + * history is processed in batches of {@code commitsToProcessPerThread} on {@code nThreads} in + * parallel. {@link Hooks} passed to {@link Analysis#Analysis} are the main customization point + * for executing different analyses. By default only the total number of commits and the total + * runtime with multithreading of the {@link DiffTree} parsing is recorded. + * + * @param analysisFactory creates independent (at least thread safe) instances the analysis + * state + * @param commitsToProcessPerThread the commit batch size + * @param nThreads the number of parallel processed commit batches + */ public static AnalysisResult forEachCommit( Supplier analysisFactory, final int commitsToProcessPerThread, @@ -201,6 +341,14 @@ public static AnalysisResult forEachCommit( return analysis.getResult(); } + /** + * Constructs the state used during an analysis. + * + * @param taskName the name of the overall analysis task + * @param hooks the hooks to be run for analysis + * @param repository the repository to analyze + * @param outputDir the directory where all results are saved + */ public Analysis( String taskName, List hooks, @@ -219,10 +367,24 @@ public Analysis( } } + /** + * Entry point into a sequential analysis of {@code commits} as one batch. + * Same as {@link processCommits(List, GitDiffer)} with a default {@link GitDiffer}. + * + * @param commits the commit batch to be processed + * @see forEachCommit + */ public AnalysisResult processCommits(List commits) throws Exception { return processCommits(commits, new GitDiffer(getRepository())); } + /** + * Entry point into a sequential analysis of {@code commits} as one batch. + * + * @param commits the commit batch to be processed + * @param differ the differ to use + * @see forEachCommit + */ public AnalysisResult processCommits(List commits, GitDiffer differ) throws Exception { this.differ = differ; processCommitBatch(commits); @@ -342,9 +504,9 @@ protected void runReverseHook(ListIterator hook, FailableBiConsumer /** * Exports the given metadata object to a file named according - * {@link org.variantsync.diffdetective.analysis.Analysis#TOTAL_RESULTS_FILE_NAME} in the given directory. + * {@link TOTAL_RESULTS_FILE_NAME} in the given directory. * @param outputDir The directory into which the metadata object file should be written. - * @param metadata The metadata to serialize + * @param metadata The metadata to serialize * @param Type of the metadata. */ public static void exportMetadata(final Path outputDir, final Metadata metadata) { @@ -354,7 +516,7 @@ public static void exportMetadata(final Path outputDir, final Metadata me /** * Exports the given metadata object to the given file. Overwrites existing files. * @param outputFile The file to write. - * @param metadata The metadata to serialize + * @param metadata The metadata to serialize * @param Type of the metadata. */ public static void exportMetadataToFile(final Path outputFile, final Metadata metadata) { From ac04ed0cbdd5fc31f0df9d9b166e8559a00b6d51 Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Mon, 20 Feb 2023 11:38:17 +0100 Subject: [PATCH 12/15] Rename `PatchAnalysis` to `EditClassOccurenceAnalysis` --- .../{PatchAnalysis.java => EditClassOccurenceAnalysis.java} | 2 +- .../org/variantsync/diffdetective/mining/DiffTreeMiner.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename src/main/java/org/variantsync/diffdetective/analysis/{PatchAnalysis.java => EditClassOccurenceAnalysis.java} (97%) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java similarity index 97% rename from src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java rename to src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java index c005ccea4..b18a4005c 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/PatchAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java @@ -12,7 +12,7 @@ import org.variantsync.diffdetective.util.FileUtils; import org.variantsync.diffdetective.util.IO; -public class PatchAnalysis implements Analysis.Hooks { +public class EditClassOccurenceAnalysis implements Analysis.Hooks { public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv"; private List patchStatistics; diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java index 3c0d388a7..32c3d5a00 100644 --- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java +++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java @@ -10,10 +10,10 @@ import org.apache.commons.io.FileUtils; import org.tinylog.Logger; -import org.variantsync.diffdetective.analysis.FilterAnalysis; import org.variantsync.diffdetective.analysis.Analysis; +import org.variantsync.diffdetective.analysis.EditClassOccurenceAnalysis; +import org.variantsync.diffdetective.analysis.FilterAnalysis; import org.variantsync.diffdetective.analysis.LineGraphExportAnalysis; -import org.variantsync.diffdetective.analysis.PatchAnalysis; import org.variantsync.diffdetective.analysis.PreprocessingAnalysis; import org.variantsync.diffdetective.analysis.StatisticsAnalysis; import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; @@ -117,7 +117,7 @@ public static AnalysisStrategy MiningStrategy() { DiffTreeFilter.hasAtLeastOneEditToVariability() ), new LineGraphExportAnalysis(MiningStrategy(), MiningExportOptions(repo)), - new PatchAnalysis(), + new EditClassOccurenceAnalysis(), new StatisticsAnalysis() ), repo, From 93690bf3327ba4da2b7a072239d4b5c0977103fa Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Mon, 20 Feb 2023 12:09:03 +0100 Subject: [PATCH 13/15] Merge `PatchStatistics` and `EditClassCount` --- .../analysis/EditClassCount.java | 46 --------------- .../analysis/EditClassOccurenceAnalysis.java | 56 +++++++++++++++++-- .../analysis/PatchStatistics.java | 29 ---------- 3 files changed, 51 insertions(+), 80 deletions(-) delete mode 100644 src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java delete mode 100644 src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java diff --git a/src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java b/src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java deleted file mode 100644 index a07a2f378..000000000 --- a/src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java +++ /dev/null @@ -1,46 +0,0 @@ -package org.variantsync.diffdetective.analysis; - -import org.variantsync.diffdetective.editclass.EditClass; -import org.variantsync.diffdetective.editclass.EditClassCatalogue; -import org.variantsync.diffdetective.util.CSV; - -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Gathers statistics about matching edit classes. - * @author Paul Bittner - */ -public class EditClassCount implements CSV { - private final EditClassCatalogue catalogue; - private final Map editClassCounts; - - /** - * Creates a new counter object for the given catalogue of edit classes. - * @param catalogue The catalogue whose edit classes to match and count. - */ - public EditClassCount(final EditClassCatalogue catalogue) { - this.catalogue = catalogue; - this.editClassCounts = new HashMap<>(); - catalogue.all().forEach(e -> editClassCounts.put(e, 0)); - } - - /** - * Increment the count for the given edit class. - * The given edit class is assumed to be part of this counts catalog. - * @see EditClassCount#EditClassCount(EditClassCatalogue) - * @param editClass The edit class whose count to increase by one. - */ - public void increment(final EditClass editClass) { - editClassCounts.computeIfPresent(editClass, (p, i) -> i + 1); - } - - @Override - public String toCSV(final String delimiter) { - return catalogue.all().stream() - .map(editClassCounts::get) - .map(Object::toString) - .collect(Collectors.joining(delimiter)); - } -} diff --git a/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java index b18a4005c..054168ccc 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java @@ -3,9 +3,14 @@ import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.variantsync.diffdetective.diff.git.PatchDiff; import org.variantsync.diffdetective.editclass.EditClass; +import org.variantsync.diffdetective.editclass.EditClassCatalogue; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.util.CSV; @@ -13,10 +18,51 @@ import org.variantsync.diffdetective.util.IO; public class EditClassOccurenceAnalysis implements Analysis.Hooks { + /** + * Statistics for processing a patch in a commit. + * @param patchDiff The diff of the processed patch. + * @param editClassCount Count statistics for the edit class matched to the edits in the patch. + * @author Paul Bittner, Benjamin Moosherr + */ + private static record Counts( + PatchDiff patchDiff, + Map editClassCounts + ) implements CSV { + /** + * Creates empty patch statistics for the given catalogue of edit classes. + * @param patch The patch to gather statistics for. + * @param catalogue A catalogue of edit classes which should be used for classifying edits. + */ + public Counts(final PatchDiff patch, final EditClassCatalogue catalogue) { + this(patch, new LinkedHashMap<>()); + catalogue.all().forEach(e -> editClassCounts.put(e, 0)); + } + + /** + * Increment the count for the given edit class. + * The given edit class is assumed to be part of this counts catalog. + * @param editClass The edit class whose count to increase by one. + * @see Counts(PatchDiff, EditClassCatalogue) + */ + public void increment(final EditClass editClass) { + editClassCounts.computeIfPresent(editClass, (p, i) -> i + 1); + } + + @Override + public String toCSV(final String delimiter) { + var counts = editClassCounts + .values() + .stream() + .map(Object::toString) + .collect(Collectors.joining(delimiter)); + return patchDiff.getCommitHash() + delimiter + patchDiff.getFileName() + delimiter + counts; + } + } + public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv"; - private List patchStatistics; - private PatchStatistics thisPatchesStatistics; + private List patchStatistics; + private Counts thisPatchesStatistics; @Override public void initializeResults(Analysis analysis) { @@ -30,7 +76,7 @@ public void beginBatch(Analysis analysis) { @Override public boolean beginPatch(Analysis analysis) { - thisPatchesStatistics = new PatchStatistics(analysis.getCurrentPatch(), ProposedEditClasses.Instance); + thisPatchesStatistics = new Counts(analysis.getCurrentPatch(), ProposedEditClasses.Instance); return true; } @@ -43,7 +89,7 @@ public boolean analyzeDiffTree(Analysis analysis) { editClass, analysis.getCurrentCommitDiff() ); - thisPatchesStatistics.editClassCount().increment(editClass); + thisPatchesStatistics.increment(editClass); } }); @@ -60,7 +106,7 @@ public void endBatch(Analysis analysis) throws IOException { exportPatchStatistics(patchStatistics, FileUtils.addExtension(analysis.getOutputFile(), PATCH_STATISTICS_EXTENSION)); } - public static void exportPatchStatistics(final List commitTimes, final Path pathToOutputFile) throws IOException { + public static void exportPatchStatistics(final List commitTimes, final Path pathToOutputFile) throws IOException { IO.write(pathToOutputFile, CSV.toCSV(commitTimes)); } } diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java b/src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java deleted file mode 100644 index 9d207eba3..000000000 --- a/src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.variantsync.diffdetective.analysis; - -import org.variantsync.diffdetective.diff.git.PatchDiff; -import org.variantsync.diffdetective.editclass.EditClassCatalogue; -import org.variantsync.diffdetective.util.CSV; - -/** - * Statistics for processing a patch in a commit. - * @param patchDiff The diff of the processed patch. - * @param editClassCount Count statistics for the edit class matched to the edits in the patch. - * @author Paul Bittner - */ -public record PatchStatistics( - PatchDiff patchDiff, - EditClassCount editClassCount) implements CSV { - /** - * Creates empty patch statistics for the given catalogue of edit classes. - * @param patch The patch to gather statistics for. - * @param catalogue A catalogue of edit classes which should be used for classifying edits. - */ - public PatchStatistics(final PatchDiff patch, final EditClassCatalogue catalogue) { - this(patch, new EditClassCount(catalogue)); - } - - @Override - public String toCSV(final String delimiter) { - return patchDiff.getCommitHash() + delimiter + patchDiff.getFileName() + delimiter + editClassCount.toCSV(delimiter); - } -} From cc3f5896b7c367617bbd9a21ace1a90f5ef9ae0f Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Mon, 20 Feb 2023 12:43:18 +0100 Subject: [PATCH 14/15] Use `AnalysisStrategy` in `EditClassOccurenceAnalysis` --- .../analysis/EditClassOccurenceAnalysis.java | 101 +++++++----------- .../diffdetective/mining/DiffTreeMiner.java | 2 +- 2 files changed, 40 insertions(+), 63 deletions(-) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java index 054168ccc..b0bbb14ee 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java @@ -1,68 +1,29 @@ package org.variantsync.diffdetective.analysis; import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; +import java.io.OutputStreamWriter; +import java.io.Writer; import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; import java.util.stream.Collectors; +import java.util.stream.Stream; -import org.variantsync.diffdetective.diff.git.PatchDiff; +import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy; import org.variantsync.diffdetective.editclass.EditClass; -import org.variantsync.diffdetective.editclass.EditClassCatalogue; import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses; import org.variantsync.diffdetective.metadata.EditClassCount; import org.variantsync.diffdetective.util.CSV; import org.variantsync.diffdetective.util.FileUtils; -import org.variantsync.diffdetective.util.IO; +import org.variantsync.diffdetective.util.StringUtils; public class EditClassOccurenceAnalysis implements Analysis.Hooks { - /** - * Statistics for processing a patch in a commit. - * @param patchDiff The diff of the processed patch. - * @param editClassCount Count statistics for the edit class matched to the edits in the patch. - * @author Paul Bittner, Benjamin Moosherr - */ - private static record Counts( - PatchDiff patchDiff, - Map editClassCounts - ) implements CSV { - /** - * Creates empty patch statistics for the given catalogue of edit classes. - * @param patch The patch to gather statistics for. - * @param catalogue A catalogue of edit classes which should be used for classifying edits. - */ - public Counts(final PatchDiff patch, final EditClassCatalogue catalogue) { - this(patch, new LinkedHashMap<>()); - catalogue.all().forEach(e -> editClassCounts.put(e, 0)); - } - - /** - * Increment the count for the given edit class. - * The given edit class is assumed to be part of this counts catalog. - * @param editClass The edit class whose count to increase by one. - * @see Counts(PatchDiff, EditClassCatalogue) - */ - public void increment(final EditClass editClass) { - editClassCounts.computeIfPresent(editClass, (p, i) -> i + 1); - } - - @Override - public String toCSV(final String delimiter) { - var counts = editClassCounts - .values() - .stream() - .map(Object::toString) - .collect(Collectors.joining(delimiter)); - return patchDiff.getCommitHash() + delimiter + patchDiff.getFileName() + delimiter + counts; - } - } - public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv"; - private List patchStatistics; - private Counts thisPatchesStatistics; + private final AnalysisStrategy exportStrategy; + private Writer output; + + public EditClassOccurenceAnalysis(AnalysisStrategy exportStrategy) { + this.exportStrategy = exportStrategy; + } @Override public void initializeResults(Analysis analysis) { @@ -71,42 +32,58 @@ public void initializeResults(Analysis analysis) { @Override public void beginBatch(Analysis analysis) { - patchStatistics = new ArrayList<>(Analysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT); + exportStrategy.start( + analysis.getRepository(), + FileUtils.addExtension(analysis.getOutputFile(), PATCH_STATISTICS_EXTENSION) + ); } @Override - public boolean beginPatch(Analysis analysis) { - thisPatchesStatistics = new Counts(analysis.getCurrentPatch(), ProposedEditClasses.Instance); + public boolean beginCommit(Analysis analysis) { + output = new OutputStreamWriter(exportStrategy.onCommit(analysis.getCurrentCommitDiff())); return true; } @Override - public boolean analyzeDiffTree(Analysis analysis) { + public boolean analyzeDiffTree(Analysis analysis) throws IOException { + var editClassCounts = new LinkedHashMap(); + ProposedEditClasses.Instance.all().forEach(e -> editClassCounts.put(e, 0)); + analysis.getCurrentDiffTree().forAll(node -> { if (node.isArtifact()) { final EditClass editClass = ProposedEditClasses.Instance.match(node); + analysis.get(EditClassCount.KEY).reportOccurrenceFor( editClass, analysis.getCurrentCommitDiff() ); - thisPatchesStatistics.increment(editClass); + + editClassCounts.computeIfPresent(editClass, (p, i) -> i + 1); } }); + output.write( + Stream.concat( + Stream.of( + analysis.getCurrentPatch().getCommitHash(), + analysis.getCurrentPatch().getFileName() + ), + editClassCounts.values().stream()) + .map(Object::toString) + .collect(Collectors.joining(CSV.DEFAULT_CSV_DELIMITER)) + ); + output.write(StringUtils.LINEBREAK); + return true; } @Override - public void endPatch(Analysis analysis) { - patchStatistics.add(thisPatchesStatistics); + public void endCommit(Analysis analysis) throws IOException { + output.close(); } @Override public void endBatch(Analysis analysis) throws IOException { - exportPatchStatistics(patchStatistics, FileUtils.addExtension(analysis.getOutputFile(), PATCH_STATISTICS_EXTENSION)); - } - - public static void exportPatchStatistics(final List commitTimes, final Path pathToOutputFile) throws IOException { - IO.write(pathToOutputFile, CSV.toCSV(commitTimes)); + exportStrategy.end(); } } diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java index 32c3d5a00..0a724efb5 100644 --- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java +++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java @@ -117,7 +117,7 @@ public static AnalysisStrategy MiningStrategy() { DiffTreeFilter.hasAtLeastOneEditToVariability() ), new LineGraphExportAnalysis(MiningStrategy(), MiningExportOptions(repo)), - new EditClassOccurenceAnalysis(), + new EditClassOccurenceAnalysis(MiningStrategy()), new StatisticsAnalysis() ), repo, From 7f124ac243dca69f98562071a6856e5639c082bf Mon Sep 17 00:00:00 2001 From: Benjamin Moosherr Date: Sun, 19 Feb 2023 20:39:41 +0100 Subject: [PATCH 15/15] Count the total number of patches --- .../diffdetective/analysis/MetadataKeys.java | 2 ++ .../diffdetective/analysis/StatisticsAnalysis.java | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java b/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java index 86bdbebe5..b599894db 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java @@ -26,6 +26,8 @@ public final class MetadataKeys { public final static String RUNTIME_WITH_MULTITHREADING = "runtime with multithreading in seconds"; public static final String MINCOMMIT = "fastestCommit"; public static final String MAXCOMMIT = "slowestCommit"; + + public final static String TOTAL_PATCHES = "total patches"; public final static String TREES = "tree diffs"; public final static String EXPORTED_COMMITS = "exported commits"; diff --git a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java index 449ab92cd..ff0ac7787 100644 --- a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java +++ b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java @@ -37,6 +37,7 @@ public static final class Result implements Metadata { */ public int failedCommits = 0; public int processedCommits = 0; + public int totalTrees = 0; public int processedTrees = 0; /** * The total runtime in seconds (irrespective of multithreading). @@ -64,6 +65,7 @@ public Result(String repoName) { a.emptyCommits += b.emptyCommits; a.failedCommits += b.failedCommits; a.processedCommits += b.processedCommits; + a.totalTrees += b.totalTrees; a.processedTrees += b.processedTrees; a.runtimeInSeconds += b.runtimeInSeconds; a.min.set(CommitProcessTime.min(a.min, b.min)); @@ -81,6 +83,7 @@ public LinkedHashMap snapshot() { snap.put(MetadataKeys.FAILED_COMMITS, failedCommits); snap.put(MetadataKeys.EMPTY_COMMITS, emptyCommits); snap.put(MetadataKeys.PROCESSED_COMMITS, processedCommits); + snap.put(MetadataKeys.TOTAL_PATCHES, totalTrees); snap.put(MetadataKeys.TREES, processedTrees); snap.put(MetadataKeys.MINCOMMIT, min.toString()); snap.put(MetadataKeys.MAXCOMMIT, max.toString()); @@ -93,6 +96,7 @@ public void setFromSnapshot(LinkedHashMap snap) { failedCommits = Integer.parseInt(snap.get(MetadataKeys.FAILED_COMMITS)); emptyCommits = Integer.parseInt(snap.get(MetadataKeys.EMPTY_COMMITS)); processedCommits = Integer.parseInt(snap.get(MetadataKeys.PROCESSED_COMMITS)); + totalTrees = Integer.parseInt(snap.get(MetadataKeys.TOTAL_PATCHES)); min.set(CommitProcessTime.fromString(snap.get(MetadataKeys.MINCOMMIT))); max.set(CommitProcessTime.fromString(snap.get(MetadataKeys.MAXCOMMIT))); processedTrees = Integer.parseInt(snap.get(MetadataKeys.TREES)); @@ -129,6 +133,12 @@ public boolean beginCommit(Analysis analysis) { return true; } + @Override + public boolean onParsedCommit(Analysis analysis) { + analysis.get(RESULT).totalTrees += analysis.getCurrentCommitDiff().getPatchAmount(); + return true; + } + @Override public void onFailedCommit(Analysis analysis) { analysis.get(RESULT).failedCommits += 1;