diff --git a/.gitignore b/.gitignore
index 3a06d2527..758bef454 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.vscode/
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all,maven
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all,maven
diff --git a/INSTALL.md b/INSTALL.md
index 3e9f4609f..ebde581d0 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -133,7 +133,7 @@ You can also run DiffDetective on other datasets by providing the path to the da
#### Linux/Mac (bash):
`./execute.sh path/to/custom/dataset.md`
-The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets.md) in the [docs](docs) folder.
+The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets/all.md) in the [docs/datasets](docs/datasets) folder.
## Troubleshooting
diff --git a/README.md b/README.md
index e23d4515f..b8a27ad6f 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ This replication package consists of four parts:
1. **DiffDetective**: For our validation, we built _DiffDetective_, a java library and command-line tool to classify edits to variability in git histories of preprocessor-based software product lines.
2. **Appendix**: The appendix of our paper is given in PDF format in the file [appendix.pdf][appendix].
3. **Haskell Formalization**: We provide an extended formalization in the Haskell programming language as described in our appendix. Its implementation can be found in the Haskell project in the [proofs](proofs) directory.
-4. **Dataset Overview**: We provide an overview of the 44 inspected datasets with updated links to their repositories in the file [docs/datasets.md][dataset].
+4. **Dataset Overview**: We provide an overview of the 44 inspected datasets with updated links to their repositories in the file [docs/datasets/all.md][dataset].
## 1. DiffDetective
DiffDetective is a java library and command-line tool to parse and classify edits to variability in git histories of preprocessor-based software product lines by creating [variation diffs][difftree_class] and operating on them.
@@ -70,7 +70,7 @@ Moreover, the results comprise the (LaTeX) tables that are part of our paper and
DiffDetective is documented with javadoc. The documentation can be accessed on this [website][documentation]. Notable classes of our library are:
- [DiffTree](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/difftree/DiffTree.html) and [DiffNode](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/difftree/DiffNode.html) implement variation diffs from our paper. A variation diff is represented by an instance of the `DiffTree` class. It stores the root node of the diff and offers various methods to parse, traverse, and analyze variation diffs. `DiffNode`s represent individual nodes within a variation diff.
-- [Validation](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/validation/Validation.html) contains the main method for our validation.
+- [EditClassValidation](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/validation/EditClassValidation.html) contains the main method for our validation.
- [ProposedEditClasses](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/editclass/proposed/ProposedEditClasses.html) holds the catalog of the nine edit classes we proposed in our paper. It implements the interface [EditClassCatalogue](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/editclass/EditClassCatalogue.html), which allows to define custom edit classifications.
- [BooleanAbstraction](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/feature/BooleanAbstraction.html) contains data and methods for boolean abstraction of higher-order logic formulas. We use this for macro parsing.
- [GitDiffer](https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/GitDiffer.html) may parse the history of a git repository to variation diffs.
@@ -93,13 +93,13 @@ How to build our library and how to run the example is described in the [proofs/
## 4. Dataset Overview
### 4.1 Open-Source Repositories
-We provide an overview of the used 44 open-source preprocessor-based software product lines in the [docs/datasets.md][dataset] file.
+We provide an overview of the used 44 open-source preprocessor-based software product lines in the [docs/datasets/all.md][dataset] file.
As described in our paper in Section 5.1, this list contains all systems that were studied by Liebig et al., extended by four new subject systems (Busybox, Marlin, LibSSH, Godot).
We provide updated links for each system's repository.
### 4.2 Forked Repositories for Replication
To guarantee the exact replication of our validation, we created forks of all 44 open-source repositories at the state we performed the validation for our paper.
-The forked repositories are listed in the [replication datasets](docs/replication/datasets.md) and are located at the Github user profile [DiffDetective](https://github.com/DiffDetective?tab=repositories).
+The forked repositories are listed in the [replication datasets](docs/datasets/esecfse22-replication.md.md) and are located at the Github user profile [DiffDetective](https://github.com/DiffDetective?tab=repositories).
These repositories are used when running the replication as described under `1.2` and in the [INSTALL](INSTALL.md).
## 5. Running DiffDetective on Custom Datasets
@@ -110,11 +110,11 @@ You can also run DiffDetective on other datasets by providing the path to the da
#### Linux/Mac (bash):
`./execute.sh path/to/custom/dataset.md`
-The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets.md) in the [docs](docs) folder.
+The input file must have the same format as the other dataset files (i.e., repositories are listed in a Markdown table). You can find [dataset files](docs/datasets/all.md) in the [docs/datasets](docs/datasets) folder.
[difftree_class]: https://variantsync.github.io/DiffDetective/docs/javadoc/org/variantsync/diffdetective/diff/difftree/DiffTree.html
[haskell]: https://www.haskell.org/
-[dataset]: docs/datasets.md
+[dataset]: docs/datasets/all.md
[appendix]: appendix.pdf
[documentation]: https://variantsync.github.io/DiffDetective/docs/javadoc/
diff --git a/STATUS.md b/STATUS.md
index 10c908018..df8decb61 100644
--- a/STATUS.md
+++ b/STATUS.md
@@ -8,7 +8,7 @@ The artifact for the paper _Classifying Edits to Variability in Source Code_ con
Practitioners and researches are free to ignore the appendix as well as the haskell formalization and may use DiffDetective out-of-the-box.
2. **Appendix**: The appendix of our paper is given in PDF format in the file [`appendix.pdf`][ddappendix].
3. **Haskell Formalization**: We provide an extended formalization in the Haskell programming language as described in our appendix. Its implementation can be found in the Haskell project in the [`proofs`][ddproofs] directory.
-4. **Dataset Overview**: We provide an overview of the 44 inspected open-source software product lines with updated links to their repositories in the file [docs/datasets.md][dddatasets].
+4. **Dataset Overview**: We provide an overview of the 44 inspected open-source software product lines with updated links to their repositories in the file [docs/datasets/all.md][dddatasets].
## Purpose
Our artifact has the following purposes:
@@ -44,6 +44,6 @@ Furthermore, both DiffDetective and our Haskell formalization serve as reference
[ddappendix]: https://github.com/VariantSync/DiffDetective/raw/esecfse22/appendix.pdf
[ddproofs]: https://github.com/VariantSync/DiffDetective/tree/esecfse22/proofs
[ddlicense]: https://github.com/VariantSync/DiffDetective/blob/main/LICENSE.LGPL3
-[dddatasets]: docs/datasets.md
-[ddforks]: docs/replication/datasets.md
+[dddatasets]: docs/datasets/all.md
+[ddforks]: docs/datasets/esecfse22-replication.md
[dddocumentation]: https://variantsync.github.io/DiffDetective/docs/javadoc/
diff --git a/docker/execute.sh b/docker/execute.sh
index de77fb4a2..d308e9bae 100644
--- a/docker/execute.sh
+++ b/docker/execute.sh
@@ -1,12 +1,12 @@
#! /bin/bash
-if [ $1 == '' ] || [ $1 == '--help' ] || [ $1 == '-help' ]; then
+if [ "$1" == '' ] || [ "$1" == '--help' ] || [ "$1" == '-help' ]; then
echo "Either fully run DiffDetective as presented in the paper (replication), do quick setup verification (verification),
or run DiffDetective on a custom dataset by providing the path to the dataset file."
echo "-- Examples --"
echo "Run replication: './execute.sh replication'"
echo "Validate the setup: './execute.sh verification'"
- echo "# See ./docs/verification/datasets.md for format details"
+ echo "# See ./docs/datasets/esecfse22-verification.md for format details"
echo "Custom dataset: './execute.sh path/to/my_dataset.md'"
exit
fi
@@ -15,15 +15,15 @@ cd /home/sherlock || exit
if [ "$1" == 'replication' ]; then
echo "Running full replication. Depending on your system, this will require several hours or even a few days."
- java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation
+ java -cp DiffDetective.jar org.variantsync.diffdetective.validation.EditClassValidation docs/datasets/esecfse22-replication.md
elif [ "$1" == 'verification' ]; then
echo "Running a short verification."
- java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation docs/verification/datasets.md
+ java -cp DiffDetective.jar org.variantsync.diffdetective.validation.EditClassValidation docs/datasets/esecfse22-verification.md
else
echo ""
echo "Running detection on a custom dataset with the input file $1"
echo ""
- java -cp DiffDetective.jar org.variantsync.diffdetective.validation.Validation $1
+ java -cp DiffDetective.jar org.variantsync.diffdetective.validation.EditClassValidation "$1"
fi
echo "Collecting results."
cp -r results/* ../results/
diff --git a/docs/datasets.md b/docs/datasets/all.md
similarity index 100%
rename from docs/datasets.md
rename to docs/datasets/all.md
diff --git a/docs/replication/datasets.md b/docs/datasets/esecfse22-replication.md
similarity index 100%
rename from docs/replication/datasets.md
rename to docs/datasets/esecfse22-replication.md
diff --git a/docs/verification/datasets.md b/docs/datasets/esecfse22-verification.md
similarity index 100%
rename from docs/verification/datasets.md
rename to docs/datasets/esecfse22-verification.md
diff --git a/scripts/runValidation.sh b/scripts/runValidation.sh
index 6c8adff97..2b080dc2c 100755
--- a/scripts/runValidation.sh
+++ b/scripts/runValidation.sh
@@ -1,2 +1,2 @@
-java -cp "target/diffdetective-1.0.0-jar-with-dependencies.jar" org.variantsync.diffdetective.validation.Validation
+java -cp "target/diffdetective-1.0.0-jar-with-dependencies.jar" org.variantsync.diffdetective.validation.EditClassValidation
echo "runValidation.sh DONE"
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java
new file mode 100644
index 000000000..8c6c200db
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/analysis/Analysis.java
@@ -0,0 +1,526 @@
+package org.variantsync.diffdetective.analysis;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.concurrent.Callable;
+import java.util.function.BiConsumer;
+import java.util.function.Supplier;
+
+import org.apache.commons.lang3.function.FailableBiConsumer;
+import org.apache.commons.lang3.function.FailableBiFunction;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.tinylog.Logger;
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
+import org.variantsync.diffdetective.analysis.monitoring.TaskCompletionMonitor;
+import org.variantsync.diffdetective.datasets.Repository;
+import org.variantsync.diffdetective.diff.git.CommitDiff;
+import org.variantsync.diffdetective.diff.git.GitDiffer;
+import org.variantsync.diffdetective.diff.git.PatchDiff;
+import org.variantsync.diffdetective.diff.result.CommitDiffResult;
+import org.variantsync.diffdetective.metadata.Metadata;
+import org.variantsync.diffdetective.parallel.ScheduledTasksIterator;
+import org.variantsync.diffdetective.util.Clock;
+import org.variantsync.diffdetective.util.Diagnostics;
+import org.variantsync.diffdetective.util.InvocationCounter;
+import org.variantsync.diffdetective.variation.diff.DiffTree;
+import org.variantsync.functjonal.iteration.ClusteredIterator;
+import org.variantsync.functjonal.iteration.MappedIterator;
+
+/**
+ * Encapsulates the state and control flow during an analysis of the commit history of multiple
+ * repositories using {@link DiffTree}s. Each repository is processed sequentially but the commits
+ * of each repository can be processed in parallel.
+ *
+ *
For thread safety, each thread receives its own instance of {@code Analysis}. The getters
+ * provides access to the current state of the analysis in one thread. Depending on the current
+ * {@link Hooks phase} only a subset of the state accessible via getters may be valid.
+ *
+ * @see forEachRepository
+ * @see forEachCommit
+ * @author Paul Bittner, Benjamin Moosherr
+ */
+public class Analysis {
+ /**
+ * File extension that is used when writing AnalysisResults to disk.
+ */
+ public static final String EXTENSION = ".metadata.txt";
+ /**
+ * File name that is used to store the analysis results for each repository.
+ */
+ public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + EXTENSION;
+ /**
+ * Default value for commitsToProcessPerThread
+ * @see forEachCommit(Supplier, int, int)
+ */
+ public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000;
+
+ protected final List hooks;
+ protected final Repository repository;
+
+ protected GitDiffer differ;
+ protected RevCommit currentCommit;
+ protected CommitDiff currentCommitDiff;
+ protected PatchDiff currentPatch;
+ protected DiffTree currentDiffTree;
+
+ protected final Path outputDir;
+ protected Path outputFile;
+ protected final AnalysisResult result;
+
+ /**
+ * The repository this analysis is run on.
+ * Always valid.
+ */
+ public Repository getRepository() {
+ return repository;
+ }
+
+ /**
+ * The currently processed commit.
+ * Valid during the commit {@link Hooks phase}.
+ */
+ public RevCommit getCurrentCommit() {
+ return currentCommit;
+ }
+
+ /**
+ * The currently processed commit diff.
+ * Valid when {@link Hooks#onParsedCommit} is called until the end of the commit phase.
+ */
+ public CommitDiff getCurrentCommitDiff() {
+ return currentCommitDiff;
+ }
+
+ /**
+ * The currently processed patch.
+ * Valid during the patch {@link Hooks phase}.
+ */
+ public PatchDiff getCurrentPatch() {
+ return currentPatch;
+ }
+
+ /**
+ * The currently processed patch.
+ * Valid only during {@link Hooks#analyzeDiffTree}.
+ */
+ public DiffTree getCurrentDiffTree() {
+ return currentDiffTree;
+ }
+
+ /**
+ * The destination for results which are written to disk.
+ * Always valid.
+ */
+ public Path getOutputDir() {
+ return outputDir;
+ }
+
+ /**
+ * The destination for results which are written to disk and specific to the currently processed
+ * commit batch.
+ * Valid during the batch {@link Hooks phase}.
+ */
+ public Path getOutputFile() {
+ return outputFile;
+ }
+
+ /**
+ * The results of the analysis. This may be modified by any hook and should be initialized in
+ * {@link Hooks#initializeResults} (e.g. by using {@link append}).
+ * Always valid.
+ */
+ public AnalysisResult getResult() {
+ return result;
+ }
+
+ /**
+ * Convenience getter for {@link AnalysisResult#get} on {@link getResult}.
+ * Always valid.
+ */
+ public > T get(ResultKey resultKey) {
+ return result.get(resultKey);
+ }
+
+ /**
+ * Convenience function for {@link AnalysisResult#append} on {@link getResult}.
+ * Always valid.
+ */
+ public > void append(ResultKey resultKey, T value) {
+ result.append(resultKey, value);
+ }
+
+ /**
+ * Hooks for analyzing commits using {@link DiffTree}s.
+ *
+ * In general the hooks of different {@code Hook} instances are called in sequence according
+ * to the order specified in {@link Analysis#Analysis} (except end hooks). Hooks are separated
+ * into two categories: phases and events.
+ *
+ *
A phase consists of two hooks with the prefix {@code begin} and {@code end}. It is
+ * guaranteed that the end hook is called if and only if the begin hook was called, even in the
+ * presence of exceptions, so they are safe to use for resource management. For this purpose,
+ * end hooks are called in reverse order as specified in {@link Analysis#Analysis}.
+ *
+ *
Phases can be called an arbitrary number of times but are nested in the following order
+ * (from outer to inner):
+ *
+ * - batch
+ *
- commit
+ *
- patch
+ *
+ * An inner phase is only executed while an outer phase runs (in between the phase's begin and
+ * end hooks).
+ *
+ * An analysis implementing {@code Hooks} can perform various actions during each hook. This
+ * includes the {@link append creation} and {@link get modification} of {@link getResult
+ * analysis results}, modifying their internal state, performing IO operations and throwing
+ * exceptions. In contrast, the only analysis state hooks are allowed to modify is the {@link
+ * getResult result} of an {@link Analysis}. All other state (e.g. {@link getCurrentCommit})
+ * must not be modified. Care must be taken to avoid the reliance of the internal state on a
+ * specific commit batch being processed as only the {@link getResult results} of each commit
+ * batch are merged and returned by {@link forEachCommit}.
+ *
+ *
Hooks that return a {@code boolean} are called filter hooks and can, in addition to the
+ * above, skip any further processing in the current phase (including following inner phases) by
+ * returning {@code false}. If a hook starts skipping, any invocations of the same filter hook
+ * of following {@code Hook} instances won't be executed. Processing continues (after calling
+ * missing end hooks of the current phase) in the next outer phase after the skipped phase.
+ *
+ *
Hooks without a {@code begin} or {@code end} prefix are events emitted during some
+ * specified conditions. See their respective documentation for details.
+ */
+ public interface Hooks {
+ /**
+ * Initialization hook for {@link getResult}. All result types should be appended with a
+ * neutral value using {@link append}. No other side effects should be performed during this
+ * methods as it might be called an arbitrary amount of times.
+ */
+ default void initializeResults(Analysis analysis) {}
+ default void beginBatch(Analysis analysis) throws Exception {}
+ default boolean beginCommit(Analysis analysis) throws Exception { return true; }
+ /**
+ * Signals a parsing failure of some patch in the current commit.
+ * Called at most once during the commit phase. If this hook is called {@link
+ * onParsedCommit} and the following patch phase invocations are skipped.
+ */
+ default void onFailedCommit(Analysis analysis) throws Exception {}
+ /**
+ * Signals the completion of the commit diff extraction.
+ * Called exactly once during the commit phase before the patch phase begins.
+ */
+ default boolean onParsedCommit(Analysis analysis) throws Exception { return true; }
+ default boolean beginPatch(Analysis analysis) throws Exception { return true; }
+ /**
+ * The main hook for analyzing non-empty diff trees.
+ * Called at most once during the patch phase.
+ */
+ default boolean analyzeDiffTree(Analysis analysis) throws Exception { return true; }
+ default void endPatch(Analysis analysis) throws Exception {}
+ default void endCommit(Analysis analysis) throws Exception {}
+ default void endBatch(Analysis analysis) throws Exception {}
+ }
+
+ /**
+ * Runs {@code analyzeRepository} on each repository, skipping repositories where an analysis
+ * was already run. This skipping mechanism doesn't distinguish between different analyses as it
+ * only checks for the existence of {@link TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
+ * the analysis.
+ *
+ * For each repository a directory in {@code outputDir} is passed to {@code analyzeRepository}
+ * where the results of the given repository should be written.
+ *
+ * @param repositoriesToAnalyze the repositories for which {@code analyzeRepository} is run
+ * @param outputDir the directory where all repositories will save their results
+ * @param analyzeRepository the callback which is invoked for each repository
+ */
+ public static void forEachRepository(
+ List repositoriesToAnalyze,
+ Path outputDir,
+ BiConsumer analyzeRepository
+ ) {
+ for (final Repository repo : repositoriesToAnalyze) {
+ final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName());
+ // Don't repeat work we already did:
+ if (Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) {
+ Logger.info(" Skipping repository {} because it has already been processed.",
+ repo.getRepositoryName());
+ } else {
+ Logger.info(" === Begin Processing {} ===", repo.getRepositoryName());
+ final Clock clock = new Clock();
+ clock.start();
+
+ analyzeRepository.accept(repo, repoOutputDir);
+
+ Logger.info(" === End Processing {} after {} ===",
+ repo.getRepositoryName(),
+ clock.printPassedSeconds());
+ }
+ }
+ }
+
+ /**
+ * Same as {@link forEachCommit(Supplier, int, int)}.
+ * Defaults to {@link COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
+ * {@link Diagnostics#getNumberOfAvailableProcessors}.
+ */
+ public static AnalysisResult forEachCommit(Supplier analysis) {
+ return forEachCommit(
+ analysis,
+ COMMITS_TO_PROCESS_PER_THREAD_DEFAULT,
+ Diagnostics.INSTANCE.run().getNumberOfAvailableProcessors()
+ );
+ }
+
+ /**
+ * Runs the analysis for the repository given in {@link Analysis#Analysis}. The repository
+ * history is processed in batches of {@code commitsToProcessPerThread} on {@code nThreads} in
+ * parallel. {@link Hooks} passed to {@link Analysis#Analysis} are the main customization point
+ * for executing different analyses. By default only the total number of commits and the total
+ * runtime with multithreading of the {@link DiffTree} parsing is recorded.
+ *
+ * @param analysisFactory creates independent (at least thread safe) instances the analysis
+ * state
+ * @param commitsToProcessPerThread the commit batch size
+ * @param nThreads the number of parallel processed commit batches
+ */
+ public static AnalysisResult forEachCommit(
+ Supplier analysisFactory,
+ final int commitsToProcessPerThread,
+ final int nThreads
+ ) {
+ var analysis = analysisFactory.get();
+ analysis.differ = new GitDiffer(analysis.getRepository());
+
+ final Clock clock = new Clock();
+
+ // prepare tasks
+ Logger.info(">>> Scheduling asynchronous analysis on {} threads.", nThreads);
+ clock.start();
+ final InvocationCounter numberOfTotalCommits = InvocationCounter.justCount();
+ final Iterator> tasks = new MappedIterator<>(
+ /// 1.) Retrieve COMMITS_TO_PROCESS_PER_THREAD commits from the differ and cluster them into one list.
+ new ClusteredIterator<>(
+ analysis.differ.yieldRevCommitsAfter(numberOfTotalCommits),
+ commitsToProcessPerThread
+ ),
+ /// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
+ /// particular thread.
+ commitList -> () -> analysisFactory.get().processCommits(commitList, analysis.differ)
+ );
+ Logger.info("<<< done in {}", clock.printPassedSeconds());
+
+ final TaskCompletionMonitor commitSpeedMonitor = new TaskCompletionMonitor(0, TaskCompletionMonitor.LogProgress("commits"));
+ Logger.info(">>> Run Analysis");
+ clock.start();
+ commitSpeedMonitor.start();
+ try (final ScheduledTasksIterator threads = new ScheduledTasksIterator<>(tasks, nThreads)) {
+ while (threads.hasNext()) {
+ final AnalysisResult threadsResult = threads.next();
+ analysis.getResult().append(threadsResult);
+
+ var statistics = threadsResult.get(StatisticsAnalysis.RESULT);
+ if (statistics != null) {
+ commitSpeedMonitor.addFinishedTasks(statistics.processedCommits);
+ }
+ }
+ } catch (Exception e) {
+ Logger.error(e, "Failed to run all mining task");
+ System.exit(1);
+ }
+
+ final double runtime = clock.getPassedSeconds();
+ Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime));
+
+ analysis.getResult().runtimeWithMultithreadingInSeconds = runtime;
+ analysis.getResult().totalCommits = numberOfTotalCommits.invocationCount().get();
+
+ exportMetadata(analysis.getOutputDir(), analysis.getResult());
+ return analysis.getResult();
+ }
+
+ /**
+ * Constructs the state used during an analysis.
+ *
+ * @param taskName the name of the overall analysis task
+ * @param hooks the hooks to be run for analysis
+ * @param repository the repository to analyze
+ * @param outputDir the directory where all results are saved
+ */
+ public Analysis(
+ String taskName,
+ List hooks,
+ Repository repository,
+ Path outputDir
+ ) {
+ this.hooks = hooks;
+ this.repository = repository;
+ this.outputDir = outputDir;
+ this.result = new AnalysisResult();
+
+ this.result.repoName = repository.getRepositoryName();
+ this.result.taskName = taskName;
+ for (var hook : hooks) {
+ hook.initializeResults(this);
+ }
+ }
+
+ /**
+ * Entry point into a sequential analysis of {@code commits} as one batch.
+ * Same as {@link processCommits(List, GitDiffer)} with a default {@link GitDiffer}.
+ *
+ * @param commits the commit batch to be processed
+ * @see forEachCommit
+ */
+ public AnalysisResult processCommits(List commits) throws Exception {
+ return processCommits(commits, new GitDiffer(getRepository()));
+ }
+
+ /**
+ * Entry point into a sequential analysis of {@code commits} as one batch.
+ *
+ * @param commits the commit batch to be processed
+ * @param differ the differ to use
+ * @see forEachCommit
+ */
+ public AnalysisResult processCommits(List commits, GitDiffer differ) throws Exception {
+ this.differ = differ;
+ processCommitBatch(commits);
+ return getResult();
+ }
+
+ protected void processCommitBatch(List commits) throws Exception {
+ outputFile = outputDir.resolve(commits.get(0).getId().getName() + ".lg");
+
+ ListIterator batchHook = hooks.listIterator();
+ try {
+ runHook(batchHook, Hooks::beginBatch);
+
+ // For each commit
+ for (final RevCommit finalCommit : commits) {
+ currentCommit = finalCommit;
+
+ ListIterator commitHook = hooks.listIterator();
+ try {
+ if (!runFilterHook(commitHook, Hooks::beginCommit)) {
+ continue;
+ }
+
+ processCommit();
+ } catch (Exception e) {
+ Logger.error(e, "An unexpected error occurred at {} in {}", currentCommit.getId().getName(), repository.getRepositoryName());
+ throw e;
+ } finally {
+ runReverseHook(commitHook, Hooks::endCommit);
+ }
+ }
+ } finally {
+ runReverseHook(batchHook, Hooks::endBatch);
+ }
+ }
+
+ protected void processCommit() throws Exception {
+ // parse the commit
+ final CommitDiffResult commitDiffResult = differ.createCommitDiff(currentCommit);
+
+ // report any errors that occurred and exit in case no DiffTree could be parsed.
+ getResult().reportDiffErrors(commitDiffResult.errors());
+ if (commitDiffResult.diff().isEmpty()) {
+ Logger.debug("found commit that failed entirely because:\n{}", commitDiffResult.errors());
+ runHook(hooks.listIterator(), Hooks::onFailedCommit);
+ return;
+ }
+
+ // extract the produced commit diff and inform the strategy
+ currentCommitDiff = commitDiffResult.diff().get();
+ if (!runFilterHook(hooks.listIterator(), Hooks::onParsedCommit)) {
+ return;
+ }
+
+ // inspect every patch
+ for (final PatchDiff finalPatch : currentCommitDiff.getPatchDiffs()) {
+ currentPatch = finalPatch;
+
+ ListIterator patchHook = hooks.listIterator();
+ try {
+ if (!runFilterHook(patchHook, Hooks::beginPatch)) {
+ continue;
+ }
+
+ processPatch();
+ } finally {
+ runReverseHook(patchHook, Hooks::endPatch);
+ }
+ }
+ }
+
+ protected void processPatch() throws Exception {
+ if (currentPatch.isValid()) {
+ // generate TreeDiff
+ currentDiffTree = currentPatch.getDiffTree();
+ currentDiffTree.assertConsistency();
+
+ runFilterHook(hooks.listIterator(), Hooks::analyzeDiffTree);
+ }
+ }
+
+ protected void runHook(ListIterator hook, FailableBiConsumer callHook) throws Exception {
+ while (hook.hasNext()) {
+ callHook.accept(hook.next(), this);
+ }
+ }
+
+ protected boolean runFilterHook(ListIterator hook, FailableBiFunction callHook) throws Exception {
+ while (hook.hasNext()) {
+ if (!callHook.apply(hook.next(), this)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ protected void runReverseHook(ListIterator hook, FailableBiConsumer callHook) throws Exception {
+ Exception catchedException = null;
+ while (hook.hasPrevious()) {
+ try {
+ callHook.accept(hook.previous(), this);
+ } catch (Exception e) {
+ Logger.error(e, "An exception thrown in an end hooks of Analysis will be rethrown later");
+ if (catchedException == null) {
+ catchedException = e;
+ } else {
+ catchedException.addSuppressed(e);
+ }
+ }
+ }
+
+ if (catchedException != null) {
+ throw catchedException;
+ }
+ }
+
+ /**
+ * Exports the given metadata object to a file named according
+ * {@link TOTAL_RESULTS_FILE_NAME} in the given directory.
+ * @param outputDir The directory into which the metadata object file should be written.
+ * @param metadata The metadata to serialize
+ * @param Type of the metadata.
+ */
+ public static void exportMetadata(final Path outputDir, final Metadata metadata) {
+ exportMetadataToFile(outputDir.resolve(TOTAL_RESULTS_FILE_NAME), metadata);
+ }
+
+ /**
+ * Exports the given metadata object to the given file. Overwrites existing files.
+ * @param outputFile The file to write.
+ * @param metadata The metadata to serialize
+ * @param Type of the metadata.
+ */
+ public static void exportMetadataToFile(final Path outputFile, final Metadata metadata) {
+ final String prettyMetadata = metadata.exportTo(outputFile);
+ Logger.info("Metadata:\n{}", prettyMetadata);
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java
index e9e576f09..d1984b74f 100644
--- a/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java
+++ b/src/main/java/org/variantsync/diffdetective/analysis/AnalysisResult.java
@@ -1,92 +1,120 @@
package org.variantsync.diffdetective.analysis;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
import org.variantsync.diffdetective.diff.result.DiffError;
-import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
-import org.variantsync.diffdetective.metadata.EditClassCount;
-import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
import org.variantsync.diffdetective.metadata.Metadata;
-import org.variantsync.diffdetective.variation.diff.serialize.DiffTreeSerializeDebugData;
+import org.variantsync.functjonal.Cast;
import org.variantsync.functjonal.Functjonal;
import org.variantsync.functjonal.category.InplaceMonoid;
import org.variantsync.functjonal.category.InplaceSemigroup;
-import org.variantsync.functjonal.category.Semigroup;
import org.variantsync.functjonal.map.MergeMap;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.*;
-import java.util.function.BiConsumer;
-
/**
- * The result of a {@link HistoryAnalysis}.
+ * The result of a {@link Analysis}.
* This result stores various metadata and statistics that we use for the validation of our ESEC/FSE paper.
* An AnalysisResult also allows to store any custom metadata or information.
* @author Paul Bittner
*/
-public class AnalysisResult implements Metadata {
+public final class AnalysisResult implements Metadata {
/**
* Placeholder name for data that is not associated to a repository or where the repository is unknown.
*/
public final static String NO_REPO = "";
+ private final static String ERROR_BEGIN = "#Error[";
+ private final static String ERROR_END = "]";
+
+ /**
+ * The repo from which the results where collected.
+ */
+ public String repoName = NO_REPO;
+ public String taskName;
/**
- * File extension that is used when writing AnalysisResults to disk.
+ * The effective runtime in seconds that we have when using multithreading.
*/
- public final static String EXTENSION = ".metadata.txt";
+ public double runtimeWithMultithreadingInSeconds = 0;
+ /**
+ * The total number of commits in the observed history of the given repository.
+ */
+ public int totalCommits = 0;
+ public final MergeMap diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum);
- private final static String ERROR_BEGIN = "#Error[";
- private final static String ERROR_END = "]";
+ private final Map> results = new HashMap<>();
+
+ /**
+ * Type proxy and runtime key for the type of a {@code Metadata} subclass.
+ * There should be no two {@code ResultKey} instances with the same {@code key} but different
+ * types {@code T}, otherwise {@link get} or {@link append} may throw {@link
+ * ClassCastException}s.
+ *
+ * @param key the runtime key for looking up the requested type
+ * @param a subclass of {@code Metadata}
+ */
+ public record ResultKey>(String key) {
+ }
+
+ /**
+ * Returns the value previously added using {@link append}.
+ *
+ * @param resultKey the key which is used to identify the data and its type
+ * @param the type of the value which was previously stored
+ */
+ public > T get(ResultKey resultKey) {
+ return Cast.unchecked(results.get(resultKey.key()));
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private void unsafeAppend(String key, Metadata> value) {
+ results.merge(key, value, (first, second) -> {
+ // `first` and `second` should have the same type if there are no two
+ // `ResultKey` instances with the same `ResultKey.key` and `results` is only
+ // modified by `append`.
+ ((Metadata) first).append((Metadata) second);
+ return first;
+ });
+ }
+
+ /**
+ * Adds a new value or {@link Metadata#append}s it to the old value which is indexed by {@code
+ * resultKey}.
+ *
+ * @param resultKey the key which is used to identify the data and its type
+ * @param the type of the value which is appended
+ * @see get
+ */
+ public > void append(ResultKey resultKey, T value) {
+ unsafeAppend(resultKey.key(), value);
+ }
/**
* Inplace semigroup for AnalysisResult.
* Merges the second results values into the first result.
*/
- public final static InplaceSemigroup ISEMIGROUP = (a, b) -> {
- a.totalCommits += b.totalCommits;
- a.exportedCommits += b.exportedCommits;
- a.emptyCommits += b.emptyCommits;
- a.failedCommits += b.failedCommits;
- a.exportedTrees += b.exportedTrees;
- a.runtimeInSeconds += b.runtimeInSeconds;
+ public static final InplaceSemigroup ISEMIGROUP = (a, b) -> {
+ a.repoName = Metadata.mergeEqual(a.repoName, b.repoName);
+ a.taskName = Metadata.mergeEqual(a.taskName, b.taskName);
a.runtimeWithMultithreadingInSeconds += b.runtimeWithMultithreadingInSeconds;
- a.min.set(CommitProcessTime.min(a.min, b.min));
- a.max.set(CommitProcessTime.max(a.max, b.max));
- a.debugData.append(b.debugData);
- a.filterHits.append(b.filterHits);
- a.editClassCounts.append(b.editClassCounts);
- MergeMap.putAllValues(a.customInfo, b.customInfo, Semigroup.assertEquals());
+ a.totalCommits += b.totalCommits;
a.diffErrors.append(b.diffErrors);
+ b.results.forEach((key, value) -> a.unsafeAppend(key, value));
};
- /**
- * Inplace monoid for AnalysisResult.
- * @see AnalysisResult#ISEMIGROUP
- */
- public static InplaceMonoid IMONOID= InplaceMonoid.From(
- AnalysisResult::new,
- ISEMIGROUP
- );
+ public static final InplaceMonoid IMONOID =
+ InplaceMonoid.From(AnalysisResult::new, ISEMIGROUP);
- public String repoName;
- public int totalCommits;
- public int exportedCommits;
- public int emptyCommits;
- public int failedCommits;
- public int exportedTrees;
- public double runtimeInSeconds;
- public double runtimeWithMultithreadingInSeconds;
- public final CommitProcessTime min, max;
- public final DiffTreeSerializeDebugData debugData;
- public ExplainedFilterSummary filterHits;
- public EditClassCount editClassCounts;
- private final LinkedHashMap customInfo = new LinkedHashMap<>();
- private final MergeMap diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum);
+ @Override
+ public InplaceSemigroup semigroup() {
+ return ISEMIGROUP;
+ }
- /**
- * Creates an empty analysis result.
- */
public AnalysisResult() {
this(NO_REPO);
}
@@ -96,74 +124,7 @@ public AnalysisResult() {
* @param repoName The repo for which to collect results.
*/
public AnalysisResult(final String repoName) {
- this(
- repoName,
- 0, 0, 0, 0,
- 0,
- 0, 0,
- CommitProcessTime.Unknown(repoName, Long.MAX_VALUE),
- CommitProcessTime.Unknown(repoName, Long.MIN_VALUE),
- new DiffTreeSerializeDebugData(),
- new ExplainedFilterSummary());
- }
-
- /**
- * Creates am analysis result with the given inital values.
- * @param repoName The repo from which the results where collected.
- * @param totalCommits The total number of commits in the observed history of the given repository.
- * @param exportedCommits The number of commits that were processed. exportedCommits <= totalCommits
- * @param emptyCommits Number of commits that were not processed because they had no DiffTrees.
- * A commit is empty iff at least of one of the following conditions is met for every of its patches:
- * - the patch did not edit a C file,
- * - the DiffTree became empty after transformations (this can happen if there are only whitespace changes),
- * - or the patch had syntax errors in its annotations, so the DiffTree could not be parsed.
- * @param failedCommits Number of commits that could not be parsed at all because of exceptions when operating JGit.
- * The number of commits that were filtered because they are a merge commit is thus given as
- * totalCommits - exportedCommits - emptyCommits - failedCommits
- * @param exportedTrees Number of DiffTrees that were processed.
- * @param runtimeInSeconds The total runtime in seconds (irrespective of multithreading).
- * @param runtimeWithMultithreadingInSeconds The effective runtime in seconds that we have when using multithreading.
- * @param min The commit that was processed the fastest.
- * @param max The commit that was processed the slowest.
- * @param debugData Debug data for DiffTree serialization.
- * @param filterHits Explanations for filter hits, when filtering DiffTrees (e.g., because a diff was empty).
- */
- public AnalysisResult(
- final String repoName,
- int totalCommits,
- int exportedCommits,
- int emptyCommits,
- int failedCommits,
- int exportedTrees,
- double runtimeInSeconds,
- double runtimeWithMultithreadingInSeconds,
- final CommitProcessTime min,
- final CommitProcessTime max,
- final DiffTreeSerializeDebugData debugData,
- final ExplainedFilterSummary filterHits)
- {
this.repoName = repoName;
- this.totalCommits = totalCommits;
- this.exportedCommits = exportedCommits;
- this.emptyCommits = emptyCommits;
- this.failedCommits = failedCommits;
- this.exportedTrees = exportedTrees;
- this.runtimeInSeconds = runtimeInSeconds;
- this.runtimeWithMultithreadingInSeconds = runtimeWithMultithreadingInSeconds;
- this.debugData = debugData;
- this.filterHits = filterHits;
- this.editClassCounts = new EditClassCount();
- this.min = min;
- this.max = max;
- }
-
- /**
- * Stores the given custom key value information in this analysis result.
- * @param key The name of the given value that is used to associate the value.
- * @param value The value to store.
- */
- public void putCustomInfo(final String key, final String value) {
- customInfo.put(key, value);
}
/**
@@ -175,131 +136,66 @@ public void reportDiffErrors(final List errors) {
diffErrors.put(e, 1);
}
}
-
- /**
- * Imports a metadata file, which is an output of a {@link AnalysisResult}, and saves back to {@link AnalysisResult}.
- *
- * @param p {@link Path} to the metadata file
- * @param customParsers A list of parsers to handle custom values that were stored with {@link AnalysisResult#putCustomInfo(String, String)}.
- * Each parser parses the value (second argument) of a given key (first entry in the map) and stores it in the given AnalysisResult (first argument).
- * @return The reconstructed {@link AnalysisResult}
- * @throws IOException when the file could not be read.
- */
- public static AnalysisResult importFrom(final Path p, final Map> customParsers) throws IOException {
- AnalysisResult result = new AnalysisResult();
-
- final List filterHitsLines = new ArrayList<>();
- final List editClassCountsLines = new ArrayList<>();
- try (BufferedReader input = Files.newBufferedReader(p)) {
- // examine each line of the metadata file separately
- String line;
- while ((line = input.readLine()) != null) {
- String[] keyValuePair = line.split(": ");
- String key = keyValuePair[0];
- String value = keyValuePair[1];
+ @Override
+ public LinkedHashMap snapshot() {
+ LinkedHashMap snap = new LinkedHashMap<>();
+ snap.put(MetadataKeys.TASKNAME, taskName);
+ snap.put(MetadataKeys.RUNTIME_WITH_MULTITHREADING, runtimeWithMultithreadingInSeconds);
+ snap.put(MetadataKeys.TOTAL_COMMITS, totalCommits);
+
+ var statistics = get(StatisticsAnalysis.RESULT);
+ if (statistics != null) {
+ snap.put(MetadataKeys.FILTERED_COMMITS, totalCommits - statistics.processedCommits - statistics.emptyCommits - statistics.failedCommits);
+ }
- switch (key) {
- case MetadataKeys.REPONAME -> result.repoName = value;
- case MetadataKeys.TREES -> result.exportedTrees = Integer.parseInt(value);
- case MetadataKeys.PROCESSED_COMMITS -> result.exportedCommits = Integer.parseInt(value);
- case MetadataKeys.TOTAL_COMMITS -> result.totalCommits = Integer.parseInt(value);
- case MetadataKeys.EMPTY_COMMITS -> result.emptyCommits = Integer.parseInt(value);
- case MetadataKeys.FAILED_COMMITS -> result.failedCommits = Integer.parseInt(value);
- case MetadataKeys.FILTERED_COMMITS -> { /* Do nothing because this value is derived. */ }
- case MetadataKeys.NON_NODE_COUNT -> result.debugData.numExportedNonNodes = Integer.parseInt(value);
- case MetadataKeys.ADD_NODE_COUNT -> result.debugData.numExportedAddNodes = Integer.parseInt(value);
- case MetadataKeys.REM_NODE_COUNT -> result.debugData.numExportedRemNodes = Integer.parseInt(value);
- case MetadataKeys.MINCOMMIT -> result.min.set(CommitProcessTime.fromString(value));
- case MetadataKeys.MAXCOMMIT -> result.max.set(CommitProcessTime.fromString(value));
- case MetadataKeys.RUNTIME -> {
- if (value.endsWith("s")) {
- value = value.substring(0, value.length() - 1);
- }
- result.runtimeInSeconds = Double.parseDouble(value);
- }
- case MetadataKeys.RUNTIME_WITH_MULTITHREADING -> {
- if (value.endsWith("s")) {
- value = value.substring(0, value.length() - 1);
- }
- result.runtimeWithMultithreadingInSeconds = Double.parseDouble(value);
- }
- default -> {
+ snap.putAll(Functjonal.bimap(diffErrors, error -> ERROR_BEGIN + error + ERROR_END, Object::toString));
+ snap.put(MetadataKeys.REPONAME, repoName);
+ for (var result : results.values()) {
+ snap.putAll(result.snapshot());
+ }
+ return snap;
+ }
- // temporary fix for renaming from Unchanged to Untouched
- final String unchanged = "Unchanged";
- if (key.startsWith(unchanged)) {
- key = ProposedEditClasses.Untouched.getName();
- line = key + line.substring(unchanged.length());
- }
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ repoName = snap.get(MetadataKeys.REPONAME);
+ taskName = snap.get(MetadataKeys.TASKNAME);
- final String finalKey = key;
- if (ProposedEditClasses.All.stream().anyMatch(editClass -> editClass.getName().equals(finalKey))) {
- editClassCountsLines.add(line);
- } else if (key.startsWith(ExplainedFilterSummary.FILTERED_MESSAGE_BEGIN)) {
- filterHitsLines.add(line);
- } else if (key.startsWith(ERROR_BEGIN)) {
- var errorId = key.substring(ERROR_BEGIN.length(), key.length() - ERROR_END.length());
- var e = DiffError.fromMessage(errorId);
- if (e.isEmpty()) {
- throw new RuntimeException("Invalid error id " + errorId + " while importing " + p);
- }
- // add DiffError
- result.diffErrors.put(e.get(), Integer.parseInt(value));
- } else {
- final BiConsumer customParser = customParsers.get(key);
- if (customParser == null) {
- final String errorMessage = "Unknown entry \"" + line + "\"!";
- throw new IOException(errorMessage);
- } else {
- customParser.accept(result, value);
- }
- }
- }
- }
- }
+ String runtime = snap.get(MetadataKeys.RUNTIME_WITH_MULTITHREADING);
+ if (runtime.endsWith("s")) {
+ runtime = runtime.substring(0, runtime.length() - 1);
}
+ runtimeWithMultithreadingInSeconds = Double.parseDouble(runtime);
- result.filterHits = ExplainedFilterSummary.parse(filterHitsLines);
- result.editClassCounts = EditClassCount.parse(editClassCountsLines, p.toString());
+ totalCommits = Integer.parseInt(snap.get(MetadataKeys.TOTAL_COMMITS));
- return result;
+ for (var entry : snap.entrySet()) {
+ String key = entry.getKey();
+ if (entry.getKey().startsWith(ERROR_BEGIN)) {
+ var errorId = key.substring(ERROR_BEGIN.length(), key.length() - ERROR_END.length());
+ var e = DiffError.fromMessage(errorId);
+ if (e.isEmpty()) {
+ throw new RuntimeException("Invalid error id " + errorId);
+ }
+ // add DiffError
+ diffErrors.put(e.get(), Integer.parseInt(entry.getValue()));
+ }
+ }
}
- /**
- * Helper method to construct custom parsers for {@link AnalysisResult#importFrom(Path, Map)}.
- * This method creates a parser for custom values that just stores the parsed values as string values for the given key.
- * @param key The key whose values should be stored as unparsed strings.
- * @return A custom parser for {@link AnalysisResult#importFrom(Path, Map)}.
- */
- public static Map.Entry> storeAsCustomInfo(String key) {
- return Map.entry(key, (r, val) -> r.putCustomInfo(key, val));
- }
+ public void setFrom(final Path path) throws IOException {
+ var snapshot = new LinkedHashMap();
- @Override
- public LinkedHashMap snapshot() {
- LinkedHashMap snap = new LinkedHashMap<>();
- snap.put(MetadataKeys.REPONAME, repoName);
- snap.put(MetadataKeys.TOTAL_COMMITS, totalCommits);
- snap.put(MetadataKeys.FILTERED_COMMITS, totalCommits - exportedCommits - emptyCommits - failedCommits);
- snap.put(MetadataKeys.FAILED_COMMITS, failedCommits);
- snap.put(MetadataKeys.EMPTY_COMMITS, emptyCommits);
- snap.put(MetadataKeys.PROCESSED_COMMITS, exportedCommits);
- snap.put(MetadataKeys.TREES, exportedTrees);
- snap.put(MetadataKeys.MINCOMMIT, min.toString());
- snap.put(MetadataKeys.MAXCOMMIT, max.toString());
- snap.put(MetadataKeys.RUNTIME, runtimeInSeconds);
- snap.put(MetadataKeys.RUNTIME_WITH_MULTITHREADING, runtimeWithMultithreadingInSeconds);
- snap.putAll(customInfo);
- snap.putAll(debugData.snapshot());
- snap.putAll(filterHits.snapshot());
- snap.putAll(editClassCounts.snapshot());
- snap.putAll(Functjonal.bimap(diffErrors, error -> ERROR_BEGIN + error + ERROR_END, Object::toString));
- return snap;
- }
+ try (BufferedReader input = Files.newBufferedReader(path)) {
+ // examine each line of the metadata file separately
+ String line;
+ while ((line = input.readLine()) != null) {
+ String[] keyValuePair = line.split(": ");
+ snapshot.put(keyValuePair[0], keyValuePair[1]);
+ }
+ }
- @Override
- public InplaceSemigroup semigroup() {
- return ISEMIGROUP;
+ setFromSnapshot(snapshot);
}
}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java b/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java
index 3f39951fd..dbb98377b 100644
--- a/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java
+++ b/src/main/java/org/variantsync/diffdetective/analysis/AutomationResult.java
@@ -1,5 +1,6 @@
package org.variantsync.diffdetective.analysis;
+import org.apache.commons.lang3.NotImplementedException;
import org.variantsync.diffdetective.metadata.Metadata;
import org.variantsync.functjonal.category.InplaceSemigroup;
@@ -52,8 +53,13 @@ public String toString() {
return snap;
}
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ throw new NotImplementedException();
+ }
+
@Override
public InplaceSemigroup semigroup() {
- return null;
+ throw new NotImplementedException();
}
}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java b/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java
deleted file mode 100644
index 27deb4c49..000000000
--- a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTask.java
+++ /dev/null
@@ -1,108 +0,0 @@
-package org.variantsync.diffdetective.analysis;
-
-import org.eclipse.jgit.revwalk.RevCommit;
-import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
-import org.variantsync.diffdetective.datasets.Repository;
-import org.variantsync.diffdetective.diff.git.GitDiffer;
-import org.variantsync.diffdetective.util.CSV;
-import org.variantsync.diffdetective.util.IO;
-import org.variantsync.diffdetective.util.StringUtils;
-import org.variantsync.diffdetective.variation.diff.DiffTree;
-import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter;
-import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer;
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.List;
-import java.util.concurrent.Callable;
-
-/**
- * Abstract base class for tasks to run during a {@link HistoryAnalysis}.
- * A CommitHistoryAnalysisTasks purpose is to process a given set of commits with a specific analysis.
- * @author Paul Bittner
- */
-public abstract class CommitHistoryAnalysisTask implements Callable {
- public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt";
- public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv";
-
- /**
- * Options that may be specified for processing a set of commits.
- * @param repository The repository that is analyzed.
- * @param differ The differ that should be used to obtain diffs.
- * @param outputDir The path to which any output should be written on disk.
- * @param treeFilter filters commits before processing them
- * @param treePreProcessing applies a processing function after filtering, but before processing
- * @param analysisStrategy A callback that is invoked for each commit.
- * @param commits The set of commits to process in this task.
- */
- public record Options(
- Repository repository,
- GitDiffer differ,
- Path outputDir,
- ExplainedFilter treeFilter,
- List treePreProcessing,
- AnalysisStrategy analysisStrategy,
- Iterable commits
- ) {}
-
- protected final Options options;
-
- protected CommitHistoryAnalysisTask(final Options options) {
- this.options = options;
- }
-
- /**
- * Returns the options for this task.
- * @return the options for this task.
- */
- public CommitHistoryAnalysisTask.Options getOptions() {
- return options;
- }
-
- @Override
- public AnalysisResult call() throws Exception {
- options.analysisStrategy().start(options.repository(), options.outputDir());
-
- final AnalysisResult miningResult = new AnalysisResult(options.repository.getRepositoryName());
- miningResult.putCustomInfo(MetadataKeys.TASKNAME, this.getClass().getName());
-
- return miningResult;
- }
-
- /**
- * Exports the given commit times to the given file. Overwrites existing files.
- * @param commitTimes List of all CommitProcessTimes to write into a single file.
- * @param pathToOutputFile Output file to write.
- */
- public static void exportCommitTimes(final List commitTimes, final Path pathToOutputFile) {
- final StringBuilder times = new StringBuilder();
-
- for (final CommitProcessTime ct : commitTimes) {
- times.append(ct.toString()).append(StringUtils.LINEBREAK);
- }
-
- try {
- IO.write(pathToOutputFile, times.toString());
- } catch (IOException e) {
- Logger.error(e);
- System.exit(1);
- }
- }
-
- /**
- * Exports the given patch statistics to the given file. Overwrites existing files.
- * @param commitTimes List of all PatchStatistics to write into a single file.
- * @param pathToOutputFile Output file to write.
- */
- public static void exportPatchStatistics(final List commitTimes, final Path pathToOutputFile) {
- final String csv = CSV.toCSV(commitTimes);
-
- try {
- IO.write(pathToOutputFile, csv);
- } catch (IOException e) {
- Logger.error(e);
- System.exit(1);
- }
- }
-}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java b/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java
deleted file mode 100644
index 231376da9..000000000
--- a/src/main/java/org/variantsync/diffdetective/analysis/CommitHistoryAnalysisTaskFactory.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package org.variantsync.diffdetective.analysis;
-
-import org.eclipse.jgit.revwalk.RevCommit;
-import org.variantsync.diffdetective.datasets.Repository;
-import org.variantsync.diffdetective.diff.git.GitDiffer;
-
-import java.nio.file.Path;
-
-/**
- * Factory for tasks for {@link HistoryAnalysis}.
- * This factory creates a task to run for a given repository and a given set of commits.
- * @author Paul Bittner
- */
-@FunctionalInterface
-public interface CommitHistoryAnalysisTaskFactory {
- /**
- * Create a task for the given set of commits from the given repository.
- * @param repository The repository for whose analysis a task should be created.
- * @param differ The differ that should be used to create diffs from the given commits.
- * @param outputPath The output path to which any results should be written on disk if necessary.
- * @param commits The set of commits that should be processed by the produced task.
- * @return A task that process the given set of commits.
- */
- CommitHistoryAnalysisTask create(
- final Repository repository,
- final GitDiffer differ,
- final Path outputPath,
- Iterable commits
- );
-}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java b/src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java
deleted file mode 100644
index a07a2f378..000000000
--- a/src/main/java/org/variantsync/diffdetective/analysis/EditClassCount.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package org.variantsync.diffdetective.analysis;
-
-import org.variantsync.diffdetective.editclass.EditClass;
-import org.variantsync.diffdetective.editclass.EditClassCatalogue;
-import org.variantsync.diffdetective.util.CSV;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-/**
- * Gathers statistics about matching edit classes.
- * @author Paul Bittner
- */
-public class EditClassCount implements CSV {
- private final EditClassCatalogue catalogue;
- private final Map editClassCounts;
-
- /**
- * Creates a new counter object for the given catalogue of edit classes.
- * @param catalogue The catalogue whose edit classes to match and count.
- */
- public EditClassCount(final EditClassCatalogue catalogue) {
- this.catalogue = catalogue;
- this.editClassCounts = new HashMap<>();
- catalogue.all().forEach(e -> editClassCounts.put(e, 0));
- }
-
- /**
- * Increment the count for the given edit class.
- * The given edit class is assumed to be part of this counts catalog.
- * @see EditClassCount#EditClassCount(EditClassCatalogue)
- * @param editClass The edit class whose count to increase by one.
- */
- public void increment(final EditClass editClass) {
- editClassCounts.computeIfPresent(editClass, (p, i) -> i + 1);
- }
-
- @Override
- public String toCSV(final String delimiter) {
- return catalogue.all().stream()
- .map(editClassCounts::get)
- .map(Object::toString)
- .collect(Collectors.joining(delimiter));
- }
-}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java
new file mode 100644
index 000000000..b0bbb14ee
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/analysis/EditClassOccurenceAnalysis.java
@@ -0,0 +1,89 @@
+package org.variantsync.diffdetective.analysis;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.LinkedHashMap;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
+import org.variantsync.diffdetective.editclass.EditClass;
+import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
+import org.variantsync.diffdetective.metadata.EditClassCount;
+import org.variantsync.diffdetective.util.CSV;
+import org.variantsync.diffdetective.util.FileUtils;
+import org.variantsync.diffdetective.util.StringUtils;
+
+public class EditClassOccurenceAnalysis implements Analysis.Hooks {
+ public static final String PATCH_STATISTICS_EXTENSION = ".patchStatistics.csv";
+
+ private final AnalysisStrategy exportStrategy;
+ private Writer output;
+
+ public EditClassOccurenceAnalysis(AnalysisStrategy exportStrategy) {
+ this.exportStrategy = exportStrategy;
+ }
+
+ @Override
+ public void initializeResults(Analysis analysis) {
+ analysis.append(EditClassCount.KEY, new EditClassCount());
+ }
+
+ @Override
+ public void beginBatch(Analysis analysis) {
+ exportStrategy.start(
+ analysis.getRepository(),
+ FileUtils.addExtension(analysis.getOutputFile(), PATCH_STATISTICS_EXTENSION)
+ );
+ }
+
+ @Override
+ public boolean beginCommit(Analysis analysis) {
+ output = new OutputStreamWriter(exportStrategy.onCommit(analysis.getCurrentCommitDiff()));
+ return true;
+ }
+
+ @Override
+ public boolean analyzeDiffTree(Analysis analysis) throws IOException {
+ var editClassCounts = new LinkedHashMap();
+ ProposedEditClasses.Instance.all().forEach(e -> editClassCounts.put(e, 0));
+
+ analysis.getCurrentDiffTree().forAll(node -> {
+ if (node.isArtifact()) {
+ final EditClass editClass = ProposedEditClasses.Instance.match(node);
+
+ analysis.get(EditClassCount.KEY).reportOccurrenceFor(
+ editClass,
+ analysis.getCurrentCommitDiff()
+ );
+
+ editClassCounts.computeIfPresent(editClass, (p, i) -> i + 1);
+ }
+ });
+
+ output.write(
+ Stream.concat(
+ Stream.of(
+ analysis.getCurrentPatch().getCommitHash(),
+ analysis.getCurrentPatch().getFileName()
+ ),
+ editClassCounts.values().stream())
+ .map(Object::toString)
+ .collect(Collectors.joining(CSV.DEFAULT_CSV_DELIMITER))
+ );
+ output.write(StringUtils.LINEBREAK);
+
+ return true;
+ }
+
+ @Override
+ public void endCommit(Analysis analysis) throws IOException {
+ output.close();
+ }
+
+ @Override
+ public void endBatch(Analysis analysis) throws IOException {
+ exportStrategy.end();
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java
new file mode 100644
index 000000000..ad460484a
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/analysis/FilterAnalysis.java
@@ -0,0 +1,37 @@
+package org.variantsync.diffdetective.analysis;
+
+import java.util.Arrays;
+
+import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
+import org.variantsync.diffdetective.variation.diff.DiffTree;
+import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter;
+import org.variantsync.diffdetective.variation.diff.filter.TaggedPredicate;
+
+public class FilterAnalysis implements Analysis.Hooks {
+ private ExplainedFilter treeFilter;
+
+ public FilterAnalysis(ExplainedFilter treeFilter) {
+ this.treeFilter = treeFilter;
+ }
+
+ @SafeVarargs
+ public FilterAnalysis(TaggedPredicate... treeFilter) {
+ this.treeFilter = new ExplainedFilter(Arrays.stream(treeFilter));
+ }
+
+ @Override
+ public void initializeResults(Analysis analysis) {
+ analysis.append(ExplainedFilterSummary.KEY, new ExplainedFilterSummary());
+ }
+
+ @Override
+ public boolean analyzeDiffTree(Analysis analysis) throws Exception {
+ return treeFilter.test(analysis.getCurrentDiffTree());
+ }
+
+ @Override
+ public void endCommit(Analysis analysis) {
+ analysis.append(ExplainedFilterSummary.KEY, new ExplainedFilterSummary(treeFilter));
+ treeFilter.resetExplanations();
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java
deleted file mode 100644
index 86e7fc33d..000000000
--- a/src/main/java/org/variantsync/diffdetective/analysis/HistoryAnalysis.java
+++ /dev/null
@@ -1,211 +0,0 @@
-package org.variantsync.diffdetective.analysis;
-
-import org.eclipse.jgit.revwalk.RevCommit;
-import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.monitoring.TaskCompletionMonitor;
-import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
-import org.variantsync.diffdetective.datasets.Repository;
-import org.variantsync.diffdetective.diff.git.GitDiffer;
-import org.variantsync.diffdetective.metadata.Metadata;
-import org.variantsync.diffdetective.mining.MiningTask;
-import org.variantsync.diffdetective.parallel.ScheduledTasksIterator;
-import org.variantsync.diffdetective.util.Clock;
-import org.variantsync.diffdetective.util.Diagnostics;
-import org.variantsync.diffdetective.util.InvocationCounter;
-import org.variantsync.diffdetective.variation.diff.DiffTree;
-import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter;
-import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions;
-import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer;
-import org.variantsync.functjonal.iteration.ClusteredIterator;
-import org.variantsync.functjonal.iteration.MappedIterator;
-
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.Iterator;
-import java.util.List;
-import java.util.function.Consumer;
-
-/**
- * An analyses that is performed for the entire commit histories of each given git repository.
- * @param repositoriesToAnalyze The repositories whose commit history should be analyzed.
- * @param outputDir The directory to which any produced results should be written.
- * @param commitsToProcessPerThread Number of commits that should be processed by each single thread if multithreading is used.
- * Each thread will be given this number of commits to process.
- * A larger number means fewer threads and less scheduling.
- * A smaller number means more threads but also more scheduling.
- * @param whatToDo A factory for tasks that should be executed for the commits of a certain repository.
- * @param postProcessingOnRepositoryOutputDir A callback that is invoked after all analyses are completed.
- * The argument is the output directory on which postprocessing might occur.
- * @author Paul Bittner
- */
-public record HistoryAnalysis(
- List repositoriesToAnalyze,
- Path outputDir,
- int commitsToProcessPerThread,
- CommitHistoryAnalysisTaskFactory whatToDo,
- Consumer postProcessingOnRepositoryOutputDir
-) {
- /**
- * File name that is used to store the analysis results for each repository.
- */
- public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + AnalysisResult.EXTENSION;
- /**
- * Default value for commitsToProcessPerThread
- * @see org.variantsync.diffdetective.analysis.HistoryAnalysis#HistoryAnalysis(List, Path, int, CommitHistoryAnalysisTaskFactory, Consumer)
- */
- public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000;
-
- @Deprecated
- public static void analyze(
- final Repository repo,
- final Path outputDir,
- final ExplainedFilter treeFilter,
- final List treePreProcessing,
- final LineGraphExportOptions exportOptions,
- final AnalysisStrategy strategy)
- {
- AnalysisResult totalResult;
- final GitDiffer differ = new GitDiffer(repo);
- final Clock clock = new Clock();
-
- // prepare tasks
- Logger.info(">>> Scheduling synchronous mining");
- clock.start();
- List commitsToProcess = differ.yieldRevCommits().toList();
- final CommitHistoryAnalysisTask task = new MiningTask(new CommitHistoryAnalysisTask.Options(
- repo,
- differ,
- outputDir.resolve(repo.getRepositoryName() + ".lg"),
- treeFilter,
- treePreProcessing,
- strategy,
- commitsToProcess
- ), exportOptions);
- Logger.info("Scheduled {} commits.", commitsToProcess.size());
- commitsToProcess = null; // free reference to enable garbage collection
- Logger.info("<<< done after {}", clock.printPassedSeconds());
-
- Logger.info(">>> Run mining");
- clock.start();
- try {
- totalResult = task.call();
- } catch (Exception e) {
- Logger.error(e);
- Logger.info("<<< aborted after {}", clock.printPassedSeconds());
- return;
- }
- Logger.info("<<< done after {}", clock.printPassedSeconds());
-
- exportMetadata(outputDir, totalResult);
- }
-
- /**
- * Static analysis method that can be used without creating an HistoryAnalysis object first.
- * Analyzes the history of the given repository with the given parameters.
- * @param repo The repository to analyze.
- * @param outputDir The directory to which any produced results should be written.
- * @param taskFactory A factory for tasks that should be executed for the commits of a certain repository.
- * @param commitsToProcessPerThread Number of commits that should be processed by each single thread if multithreading is used.
- */
- public static void analyzeAsync(
- final Repository repo,
- final Path outputDir,
- final CommitHistoryAnalysisTaskFactory taskFactory,
- int commitsToProcessPerThread)
- {
- final AnalysisResult totalResult = new AnalysisResult(repo.getRepositoryName());
- final GitDiffer differ = new GitDiffer(repo);
- final Clock clock = new Clock();
-
- // prepare tasks
- final int nThreads = Diagnostics.INSTANCE.run().getNumberOfAvailableProcessors();
- Logger.info(">>> Scheduling asynchronous analysis on {} threads.", nThreads);
- clock.start();
- final InvocationCounter numberOfTotalCommits = InvocationCounter.justCount();
- final Iterator tasks = new MappedIterator<>(
- /// 1.) Retrieve COMMITS_TO_PROCESS_PER_THREAD commits from the differ and cluster them into one list.
- new ClusteredIterator<>(
- differ.yieldRevCommitsAfter(numberOfTotalCommits),
- commitsToProcessPerThread
- ),
- /// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
- /// particular thread.
- commitList -> taskFactory.create(
- repo,
- differ,
- outputDir.resolve(commitList.get(0).getId().getName() + ".lg"),
- commitList)
- );
- Logger.info("<<< done in {}", clock.printPassedSeconds());
-
- final TaskCompletionMonitor commitSpeedMonitor = new TaskCompletionMonitor(0, TaskCompletionMonitor.LogProgress("commits"));
- Logger.info(">>> Run Analysis");
- clock.start();
- commitSpeedMonitor.start();
- try (final ScheduledTasksIterator threads = new ScheduledTasksIterator<>(tasks, nThreads)) {
- while (threads.hasNext()) {
- final AnalysisResult threadsResult = threads.next();
- totalResult.append(threadsResult);
- commitSpeedMonitor.addFinishedTasks(threadsResult.exportedCommits);
- }
- } catch (Exception e) {
- Logger.error(e, "Failed to run all mining task");
- System.exit(1);
- }
-
- final double runtime = clock.getPassedSeconds();
- Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime));
-
- totalResult.runtimeWithMultithreadingInSeconds = runtime;
- totalResult.totalCommits = numberOfTotalCommits.invocationCount().get();
-
- exportMetadata(outputDir, totalResult);
- }
-
- /**
- * Exports the given metadata object to a file named according
- * {@link org.variantsync.diffdetective.analysis.HistoryAnalysis#TOTAL_RESULTS_FILE_NAME} in the given directory.
- * @param outputDir The directory into which the metadata object file should be written.
- * @param metadata The metadata to serialize
- * @param Type of the metadata.
- */
- public static void exportMetadata(final Path outputDir, final Metadata metadata) {
- exportMetadataToFile(outputDir.resolve(TOTAL_RESULTS_FILE_NAME), metadata);
- }
-
- /**
- * Exports the given metadata object to the given file. Overwrites existing files.
- * @param outputFile The file to write.
- * @param metadata The metadata to serialize
- * @param Type of the metadata.
- */
- public static void exportMetadataToFile(final Path outputFile, final Metadata metadata) {
- final String prettyMetadata = metadata.exportTo(outputFile);
- Logger.info("Metadata:\n{}", prettyMetadata);
- }
-
- /**
- * Runs this analysis asynchronously.
- * Processes each repository sequentially and runs
- * {@link org.variantsync.diffdetective.analysis.HistoryAnalysis#analyzeAsync(Repository, Path, CommitHistoryAnalysisTaskFactory, int)}
- * on each of them.
- */
- public void runAsync() {
- for (final Repository repo : repositoriesToAnalyze) {
- Logger.info(" === Begin Processing {} ===", repo.getRepositoryName());
- final Clock clock = new Clock();
- clock.start();
-
- final Path repoOutputDir = outputDir.resolve(repo.getRepositoryName());
- /// Don't repeat work we already did:
- if (!Files.exists(repoOutputDir.resolve(TOTAL_RESULTS_FILE_NAME))) {
- analyzeAsync(repo, repoOutputDir, whatToDo, commitsToProcessPerThread);
- postProcessingOnRepositoryOutputDir.accept(repoOutputDir);
- } else {
- Logger.info(" Skipping repository {} because it has already been processed.", repo.getRepositoryName());
- }
-
- Logger.info(" === End Processing {} after {} ===", repo.getRepositoryName(), clock.printPassedSeconds());
- }
- }
-}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java
new file mode 100644
index 000000000..12cfc8ef1
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/analysis/LineGraphExportAnalysis.java
@@ -0,0 +1,94 @@
+package org.variantsync.diffdetective.analysis;
+
+import java.io.OutputStream;
+import java.util.LinkedHashMap;
+
+import org.apache.commons.lang3.NotImplementedException;
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
+import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
+import org.variantsync.diffdetective.metadata.Metadata;
+import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExport;
+import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions;
+import org.variantsync.functjonal.category.InplaceSemigroup;
+
+public class LineGraphExportAnalysis implements Analysis.Hooks {
+ public static final ResultKey RESULT = new ResultKey<>("LineGraphExportAnalysis");
+ public static final class Result implements Metadata {
+ public String treeFormat;
+ public String nodeFormat;
+ public String edgeFormat;
+
+ public static final InplaceSemigroup ISEMIGROUP = (a, b) -> {
+ a.treeFormat = Metadata.mergeEqual(a.treeFormat, b.treeFormat);
+ a.nodeFormat = Metadata.mergeEqual(a.nodeFormat, b.nodeFormat);
+ a.edgeFormat = Metadata.mergeEqual(a.edgeFormat, b.edgeFormat);
+ };
+
+ @Override
+ public InplaceSemigroup semigroup() {
+ return ISEMIGROUP;
+ }
+
+ @Override
+ public LinkedHashMap snapshot() {
+ var snap = new LinkedHashMap();
+ snap.put(MetadataKeys.TREEFORMAT, treeFormat);
+ snap.put(MetadataKeys.NODEFORMAT, nodeFormat);
+ snap.put(MetadataKeys.EDGEFORMAT, edgeFormat);
+ return snap;
+ }
+
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ throw new NotImplementedException();
+ }
+ }
+
+ private final AnalysisStrategy analysisStrategy;
+ private final LineGraphExportOptions exportOptions;
+ private OutputStream lineGraphDestination;
+
+ public LineGraphExportAnalysis(final AnalysisStrategy analysisStrategy, final LineGraphExportOptions exportOptions) {
+ this.analysisStrategy = analysisStrategy;
+ this.exportOptions = exportOptions;
+ }
+
+ @Override
+ public void initializeResults(Analysis analysis) {
+ analysis.append(RESULT, new Result());
+ }
+
+ @Override
+ public void beginBatch(Analysis analysis) {
+ analysis.get(RESULT).treeFormat = exportOptions.treeFormat().getName();
+ analysis.get(RESULT).nodeFormat = exportOptions.nodeFormat().getName();
+ analysis.get(RESULT).edgeFormat = exportOptions.edgeFormat().getName();
+
+ analysisStrategy.start(analysis.getRepository(), analysis.getOutputFile());
+ }
+
+ @Override
+ public boolean onParsedCommit(Analysis analysis) {
+ lineGraphDestination = analysisStrategy.onCommit(analysis.getCurrentCommitDiff());
+ return true;
+ }
+
+ @Override
+ public boolean analyzeDiffTree(Analysis analysis) throws Exception {
+ analysis.append(
+ LineGraphExport.STATISTIC,
+ LineGraphExport.toLineGraphFormat(analysis.getCurrentPatch(), exportOptions, lineGraphDestination)
+ );
+ return true;
+ }
+
+ @Override
+ public void endCommit(Analysis analysis) throws Exception {
+ lineGraphDestination.close();
+ }
+
+ @Override
+ public void endBatch(Analysis analysis) {
+ analysisStrategy.end();
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java b/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java
index 1ba5002f2..b599894db 100644
--- a/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java
+++ b/src/main/java/org/variantsync/diffdetective/analysis/MetadataKeys.java
@@ -26,5 +26,10 @@ public final class MetadataKeys {
public final static String RUNTIME_WITH_MULTITHREADING = "runtime with multithreading in seconds";
public static final String MINCOMMIT = "fastestCommit";
public static final String MAXCOMMIT = "slowestCommit";
+
+ public final static String TOTAL_PATCHES = "total patches";
public final static String TREES = "tree diffs";
+
+ public final static String EXPORTED_COMMITS = "exported commits";
+ public final static String EXPORTED_TREES = "exported trees";
}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java b/src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java
deleted file mode 100644
index 9d207eba3..000000000
--- a/src/main/java/org/variantsync/diffdetective/analysis/PatchStatistics.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package org.variantsync.diffdetective.analysis;
-
-import org.variantsync.diffdetective.diff.git.PatchDiff;
-import org.variantsync.diffdetective.editclass.EditClassCatalogue;
-import org.variantsync.diffdetective.util.CSV;
-
-/**
- * Statistics for processing a patch in a commit.
- * @param patchDiff The diff of the processed patch.
- * @param editClassCount Count statistics for the edit class matched to the edits in the patch.
- * @author Paul Bittner
- */
-public record PatchStatistics(
- PatchDiff patchDiff,
- EditClassCount editClassCount) implements CSV {
- /**
- * Creates empty patch statistics for the given catalogue of edit classes.
- * @param patch The patch to gather statistics for.
- * @param catalogue A catalogue of edit classes which should be used for classifying edits.
- */
- public PatchStatistics(final PatchDiff patch, final EditClassCatalogue catalogue) {
- this(patch, new EditClassCount(catalogue));
- }
-
- @Override
- public String toCSV(final String delimiter) {
- return patchDiff.getCommitHash() + delimiter + patchDiff.getFileName() + delimiter + editClassCount.toCSV(delimiter);
- }
-}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java
new file mode 100644
index 000000000..83ec3f861
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/analysis/PreprocessingAnalysis.java
@@ -0,0 +1,25 @@
+package org.variantsync.diffdetective.analysis;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer;
+
+public class PreprocessingAnalysis implements Analysis.Hooks {
+ private List preprocessors;
+
+ public PreprocessingAnalysis(List preprocessors) {
+ this.preprocessors = preprocessors;
+ }
+
+ public PreprocessingAnalysis(DiffTreeTransformer... preprocessors) {
+ this.preprocessors = Arrays.asList(preprocessors);
+ }
+
+ @Override
+ public boolean analyzeDiffTree(Analysis analysis) throws Exception {
+ DiffTreeTransformer.apply(preprocessors, analysis.getCurrentDiffTree());
+ analysis.getCurrentDiffTree().assertConsistency();
+ return true;
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java
new file mode 100644
index 000000000..ff0ac7787
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/analysis/StatisticsAnalysis.java
@@ -0,0 +1,198 @@
+package org.variantsync.diffdetective.analysis;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
+import org.variantsync.diffdetective.metadata.Metadata;
+import org.variantsync.diffdetective.util.Clock;
+import org.variantsync.diffdetective.util.FileUtils;
+import org.variantsync.diffdetective.util.IO;
+import org.variantsync.diffdetective.util.StringUtils;
+import org.variantsync.functjonal.category.InplaceSemigroup;
+
+public class StatisticsAnalysis implements Analysis.Hooks {
+ public static final String COMMIT_TIME_FILE_EXTENSION = ".committimes.txt";
+
+ public static final ResultKey RESULT = new ResultKey<>("StatisticsAnalysis");
+ public static final class Result implements Metadata {
+ /**
+ * Number of commits that were not processed because they had no DiffTrees.
+ * A commit is empty iff at least of one of the following conditions is met for every of its patches:
+ *
+ * - the patch did not edit a C file,
+ *
- the DiffTree became empty after transformations (this can happen if there are only whitespace changes),
+ *
- or the patch had syntax errors in its annotations, so the DiffTree could not be parsed.
+ *
+ */
+ public int emptyCommits = 0;
+ /**
+ * Number of commits that could not be parsed at all because of exceptions when operating JGit.
+ *
+ * The number of commits that were filtered because they are a merge commit is thus given as
+ * {@code totalCommits - processedCommits - emptyCommits - failedCommits}
+ */
+ public int failedCommits = 0;
+ public int processedCommits = 0;
+ public int totalTrees = 0;
+ public int processedTrees = 0;
+ /**
+ * The total runtime in seconds (irrespective of multithreading).
+ */
+ public double runtimeInSeconds = 0;
+ /**
+ * The commit that was processed the fastest.
+ */
+ public final CommitProcessTime min;
+ /**
+ * The commit that was processed the slowest.
+ */
+ public final CommitProcessTime max;
+
+ public Result() {
+ this(AnalysisResult.NO_REPO);
+ }
+
+ public Result(String repoName) {
+ this.min = CommitProcessTime.Unknown(repoName, Long.MAX_VALUE);
+ this.max = CommitProcessTime.Unknown(repoName, Long.MIN_VALUE);
+ }
+
+ public static final InplaceSemigroup ISEMIGROUP = (a, b) -> {
+ a.emptyCommits += b.emptyCommits;
+ a.failedCommits += b.failedCommits;
+ a.processedCommits += b.processedCommits;
+ a.totalTrees += b.totalTrees;
+ a.processedTrees += b.processedTrees;
+ a.runtimeInSeconds += b.runtimeInSeconds;
+ a.min.set(CommitProcessTime.min(a.min, b.min));
+ a.max.set(CommitProcessTime.max(a.max, b.max));
+ };
+
+ @Override
+ public InplaceSemigroup semigroup() {
+ return ISEMIGROUP;
+ }
+
+ @Override
+ public LinkedHashMap snapshot() {
+ LinkedHashMap snap = new LinkedHashMap<>();
+ snap.put(MetadataKeys.FAILED_COMMITS, failedCommits);
+ snap.put(MetadataKeys.EMPTY_COMMITS, emptyCommits);
+ snap.put(MetadataKeys.PROCESSED_COMMITS, processedCommits);
+ snap.put(MetadataKeys.TOTAL_PATCHES, totalTrees);
+ snap.put(MetadataKeys.TREES, processedTrees);
+ snap.put(MetadataKeys.MINCOMMIT, min.toString());
+ snap.put(MetadataKeys.MAXCOMMIT, max.toString());
+ snap.put(MetadataKeys.RUNTIME, runtimeInSeconds);
+ return snap;
+ }
+
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ failedCommits = Integer.parseInt(snap.get(MetadataKeys.FAILED_COMMITS));
+ emptyCommits = Integer.parseInt(snap.get(MetadataKeys.EMPTY_COMMITS));
+ processedCommits = Integer.parseInt(snap.get(MetadataKeys.PROCESSED_COMMITS));
+ totalTrees = Integer.parseInt(snap.get(MetadataKeys.TOTAL_PATCHES));
+ min.set(CommitProcessTime.fromString(snap.get(MetadataKeys.MINCOMMIT)));
+ max.set(CommitProcessTime.fromString(snap.get(MetadataKeys.MAXCOMMIT)));
+ processedTrees = Integer.parseInt(snap.get(MetadataKeys.TREES));
+
+ String runtime = snap.get(MetadataKeys.RUNTIME);
+ if (runtime.endsWith("s")) {
+ runtime = runtime.substring(0, runtime.length() - 1);
+ }
+ runtimeInSeconds = Double.parseDouble(runtime);
+ }
+ }
+
+ // List to store the process time of each commit.
+ private final List commitTimes = new ArrayList<>(Analysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT);
+ // Clock for runtime measurement.
+ private final Clock totalTime = new Clock();
+ private final Clock commitProcessTimer = new Clock();
+ private int numDiffTrees = 0;
+
+ @Override
+ public void initializeResults(Analysis analysis) {
+ analysis.append(RESULT, new Result(analysis.getRepository().getRepositoryName()));
+ }
+
+ @Override
+ public void beginBatch(Analysis analysis) {
+ totalTime.start();
+ }
+
+ @Override
+ public boolean beginCommit(Analysis analysis) {
+ commitProcessTimer.start();
+ numDiffTrees = 0;
+ return true;
+ }
+
+ @Override
+ public boolean onParsedCommit(Analysis analysis) {
+ analysis.get(RESULT).totalTrees += analysis.getCurrentCommitDiff().getPatchAmount();
+ return true;
+ }
+
+ @Override
+ public void onFailedCommit(Analysis analysis) {
+ analysis.get(RESULT).failedCommits += 1;
+ }
+
+ @Override
+ public boolean analyzeDiffTree(Analysis analysis) {
+ ++numDiffTrees;
+ return true;
+ }
+
+ @Override
+ public void endCommit(Analysis analysis) {
+ analysis.get(RESULT).processedTrees += numDiffTrees;
+
+ // Report the commit process time if the commit is not empty.
+ if (numDiffTrees > 0) {
+ final long commitTimeMS = commitProcessTimer.getPassedMilliseconds();
+ // find max commit time
+ if (commitTimeMS > analysis.get(RESULT).max.milliseconds()) {
+ analysis.get(RESULT).max.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS);
+ }
+ // find min commit time
+ if (commitTimeMS < analysis.get(RESULT).min.milliseconds()) {
+ analysis.get(RESULT).min.set(analysis.getCurrentCommitDiff().getCommitHash(), commitTimeMS);
+ }
+ // report time
+ commitTimes.add(new CommitProcessTime(analysis.getCurrentCommitDiff().getCommitHash(), analysis.getRepository().getRepositoryName(), commitTimeMS));
+ analysis.get(RESULT).processedCommits += 1;
+ } else {
+ analysis.get(RESULT).emptyCommits += 1;
+ }
+ }
+
+ @Override
+ public void endBatch(Analysis analysis) throws IOException {
+ // shutdown; report total time; export results
+ analysis.get(RESULT).runtimeInSeconds = totalTime.getPassedSeconds();
+ analysis.get(RESULT).exportTo(FileUtils.addExtension(analysis.getOutputFile(), Analysis.EXTENSION));
+ exportCommitTimes(commitTimes, FileUtils.addExtension(analysis.getOutputFile(), COMMIT_TIME_FILE_EXTENSION));
+ }
+
+ /**
+ * Exports the given commit times to the given file. Overwrites existing files.
+ * @param commitTimes List of all CommitProcessTimes to write into a single file.
+ * @param pathToOutputFile Output file to write.
+ */
+ public static void exportCommitTimes(final List commitTimes, final Path pathToOutputFile) throws IOException {
+ final StringBuilder times = new StringBuilder();
+
+ for (final CommitProcessTime ct : commitTimes) {
+ times.append(ct.toString()).append(StringUtils.LINEBREAK);
+ }
+
+ IO.write(pathToOutputFile, times.toString());
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java b/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java
index e2033b0a5..8ddc24550 100644
--- a/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java
+++ b/src/main/java/org/variantsync/diffdetective/analysis/strategies/AnalysisStrategy.java
@@ -1,5 +1,6 @@
package org.variantsync.diffdetective.analysis.strategies;
+import org.variantsync.diffdetective.analysis.LineGraphExportAnalysis; // For Javadoc
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.git.CommitDiff;
@@ -7,7 +8,7 @@
import java.nio.file.Path;
/**
- * Callbacks for {@link org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask}.
+ * Callbacks for {@link LineGraphExportAnalysis}.
* A strategy may perform arbitrary additional tasks upon the execution of a task.
* The strategy is notified about the start and end of a task as well after each processed commit.
* @author Paul Bittner
diff --git a/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java b/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java
index ba4a7a9ce..15bafe021 100644
--- a/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java
+++ b/src/main/java/org/variantsync/diffdetective/datasets/DefaultDatasets.java
@@ -15,7 +15,7 @@ public class DefaultDatasets {
/**
* Path to the markdown file with the links and metadata for each default dataset.
*/
- public final static Path DEFAULT_DATASETS_FILE = Path.of("docs", "replication", "datasets.md");
+ public final static Path DEFAULT_DATASETS_FILE = Path.of("docs", "datasets", "esecfse22-replication.md");
/**
* Path to the markdown file with the links and metadata for Emacs only.
diff --git a/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java b/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java
index b526ad88c..0c6b1e81e 100644
--- a/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java
+++ b/src/main/java/org/variantsync/diffdetective/metadata/EditClassCount.java
@@ -1,5 +1,6 @@
package org.variantsync.diffdetective.metadata;
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
import org.variantsync.diffdetective.diff.git.CommitDiff;
import org.variantsync.diffdetective.editclass.EditClass;
import org.variantsync.diffdetective.editclass.EditClassCatalogue;
@@ -19,6 +20,8 @@
* @author Paul Bittner
*/
public class EditClassCount implements Metadata {
+ public static final ResultKey KEY = new ResultKey<>("EditClassCount");
+
/**
* Counts the occurrences of a data point across commits.
*/
@@ -172,6 +175,37 @@ public LinkedHashMap snapshot() {
);
}
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ for (var entry : snap.entrySet()) {
+ if (ProposedEditClasses.All.stream().anyMatch(editClass -> editClass.getName().equals(entry.getKey()))) {
+ var key = entry.getKey(); // edit class
+ var value = entry.getValue(); // key value content
+ value = value.replaceAll("[{} ]", ""); // remove unnecessary symbols
+ var innerKeyValuePair = value.split(";");
+ var total = Integer.parseInt(innerKeyValuePair[0].split("=")[1]); // total count
+ var commits = Integer.parseInt(innerKeyValuePair[1].split("=")[1]);
+
+ // get edit class from key
+ final String finalKey = key;
+ EditClass editClass = ProposedEditClasses.Instance.fromName(key).orElseThrow(
+ () -> new RuntimeException("Could not find EditClass with name " + finalKey)
+ );
+
+ Occurrences occurence = new Occurrences();
+ occurence.totalAmount = total;
+
+ // add fake commits
+ for (int i = 0; i < commits; ++i) {
+ occurence.uniqueCommits.add(String.valueOf(i));
+ }
+
+ // add occurrence
+ occurences.put(editClass, occurence);
+ }
+ }
+ }
+
/**
* Mutates and returns first element.
*/
diff --git a/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java b/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java
index a086875ea..e4f2e008c 100644
--- a/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java
+++ b/src/main/java/org/variantsync/diffdetective/metadata/ExplainedFilterSummary.java
@@ -1,5 +1,6 @@
package org.variantsync.diffdetective.metadata;
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter;
import org.variantsync.functjonal.Functjonal;
import org.variantsync.functjonal.category.InplaceSemigroup;
@@ -14,6 +15,8 @@
* @author Paul Bittner
*/
public class ExplainedFilterSummary implements Metadata {
+ public static final ResultKey KEY = new ResultKey<>("ExplainedFilterSummary");
+
/**
* Prefix for exported filter reasons.
*/
@@ -95,6 +98,18 @@ public LinkedHashMap snapshot() {
);
}
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ for (var entry : snap.entrySet()) {
+ final String key = entry.getKey();
+ if (key.startsWith(FILTERED_MESSAGE_BEGIN)) {
+ final String name = key.substring(FILTERED_MESSAGE_BEGIN.length(), key.length() - FILTERED_MESSAGE_END.length());
+
+ explanations.put(name, new ExplainedFilter.Explanation(Integer.parseInt(entry.getValue()), name));
+ }
+ }
+ }
+
@Override
public InplaceSemigroup semigroup() {
return ISEMIGROUP;
diff --git a/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java b/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java
index 5eb7cb199..5867f12c7 100644
--- a/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java
+++ b/src/main/java/org/variantsync/diffdetective/metadata/Metadata.java
@@ -1,6 +1,7 @@
package org.variantsync.diffdetective.metadata;
import org.tinylog.Logger;
+import org.variantsync.diffdetective.util.Assert;
import org.variantsync.diffdetective.util.IO;
import org.variantsync.functjonal.Cast;
import org.variantsync.functjonal.category.InplaceSemigroup;
@@ -22,6 +23,8 @@ public interface Metadata {
*/
LinkedHashMap snapshot();
+ void setFromSnapshot(LinkedHashMap snapshot);
+
/**
* Metadata should be composable.
* Composition should be inplace to optimize performance.
@@ -37,6 +40,33 @@ default void append(T other) {
semigroup().appendToFirst(Cast.unchecked(this), other);
}
+ /**
+ * Composes two equal values by returning that value unmodified.
+ * This method is intended to be used to implement a semigroup for objects which can't be merged
+ * but should always be the same anyway. If {@code !a.equals(b)} then an {@code AssertionError}
+ * is thrown.
+ *
+ * The value {@code null} is treated as the neutral element in the sense that no exception is
+ * thrown if an element is {@code null}. In this case return value is defined by {@code
+ * mergeEqual(a, null) == a} and {@code mergeEqual(b, null) == b}.
+ *
+ * @param a the first element to merge
+ * @param b the second element to merge
+ * @param the type of the objects to be merged
+ * @return {@code a} or {@code b}
+ */
+ static T mergeEqual(T a, T b) {
+ if (b == null) {
+ return a;
+ }
+
+ if (a != null) {
+ Assert.assertTrue(a.equals(b));
+ }
+
+ return b;
+ }
+
/**
* Prints all key-value pairs to a single string.
* Falls back to {@link #show(String, Object)} on each entry.
diff --git a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java
index 5d1aff06c..0a724efb5 100644
--- a/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java
+++ b/src/main/java/org/variantsync/diffdetective/mining/DiffTreeMiner.java
@@ -1,13 +1,28 @@
package org.variantsync.diffdetective.mining;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.BiFunction;
+import java.util.function.Consumer;
+
import org.apache.commons.io.FileUtils;
import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask;
-import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTaskFactory;
-import org.variantsync.diffdetective.analysis.HistoryAnalysis;
+import org.variantsync.diffdetective.analysis.Analysis;
+import org.variantsync.diffdetective.analysis.EditClassOccurenceAnalysis;
+import org.variantsync.diffdetective.analysis.FilterAnalysis;
+import org.variantsync.diffdetective.analysis.LineGraphExportAnalysis;
+import org.variantsync.diffdetective.analysis.PreprocessingAnalysis;
+import org.variantsync.diffdetective.analysis.StatisticsAnalysis;
import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
import org.variantsync.diffdetective.analysis.strategies.AnalyzeAllThenExport;
-import org.variantsync.diffdetective.datasets.*;
+import org.variantsync.diffdetective.datasets.DatasetDescription;
+import org.variantsync.diffdetective.datasets.DatasetFactory;
+import org.variantsync.diffdetective.datasets.DefaultDatasets;
+import org.variantsync.diffdetective.datasets.ParseOptions;
+import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.datasets.predefined.StanciulescuMarlin;
import org.variantsync.diffdetective.feature.CPPAnnotationParser;
import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
@@ -15,7 +30,6 @@
import org.variantsync.diffdetective.mining.formats.MiningNodeFormat;
import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat;
import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter;
-import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter;
import org.variantsync.diffdetective.variation.diff.serialize.GraphFormat;
import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions;
import org.variantsync.diffdetective.variation.diff.serialize.edgeformat.EdgeLabelFormat;
@@ -25,13 +39,6 @@
import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer;
import org.variantsync.diffdetective.variation.diff.transform.Starfold;
-import java.io.IOException;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.function.Consumer;
-
public class DiffTreeMiner {
public static final Path DATASET_FILE = DefaultDatasets.EMACS;
public static final boolean SEARCH_FOR_GOOD_RUNNING_EXAMPLES = false;
@@ -94,12 +101,12 @@ public static AnalysisStrategy MiningStrategy() {
// );
}
- public static CommitHistoryAnalysisTaskFactory Mine() {
- return (repo, differ, outputPath, commits) -> new MiningTask(new CommitHistoryAnalysisTask.Options(
- repo,
- differ,
- outputPath,
- new ExplainedFilter<>(
+ public static BiFunction AnalysisFactory =
+ (repo, repoOutputDir) -> new Analysis(
+ "DiffTreeMiner",
+ List.of(
+ new PreprocessingAnalysis(Postprocessing(repo)),
+ new FilterAnalysis(
DiffTreeFilter.notEmpty(),
DiffTreeFilter.moreThanOneArtifactNode(),
/// We want to exclude patches that do not edit variability.
@@ -109,11 +116,13 @@ public static CommitHistoryAnalysisTaskFactory Mine() {
/// We thus filter them.
DiffTreeFilter.hasAtLeastOneEditToVariability()
),
- Postprocessing(repo),
- MiningStrategy(),
- commits
- ), MiningExportOptions(repo));
- }
+ new LineGraphExportAnalysis(MiningStrategy(), MiningExportOptions(repo)),
+ new EditClassOccurenceAnalysis(MiningStrategy()),
+ new StatisticsAnalysis()
+ ),
+ repo,
+ repoOutputDir
+ );
public static void main(String[] args) throws IOException {
// setupLogger(Level.INFO);
@@ -139,7 +148,7 @@ public static void main(String[] args) throws IOException {
final List datasets = DefaultDatasets.loadDatasets(DATASET_FILE);
// if (PRINT_LATEX_TABLE) {
-// Validation.printLaTeXTableFor(datasets);
+// EditClassValidation.printLaTeXTableFor(datasets);
// }
final DatasetFactory miningDatasetFactory = new DatasetFactory(inputDir);
@@ -166,13 +175,10 @@ public static void main(String[] args) throws IOException {
repoPostProcessing = p -> {};
}
- final HistoryAnalysis analysis = new HistoryAnalysis(
- repos,
- outputDir,
- HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT,
- Mine(),
- repoPostProcessing);
- analysis.runAsync();
+ Analysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) -> {
+ Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir));
+ repoPostProcessing.accept(repoOutputDir);
+ });
Logger.info("Done");
final String logFile = "log.txt";
diff --git a/src/main/java/org/variantsync/diffdetective/mining/MiningTask.java b/src/main/java/org/variantsync/diffdetective/mining/MiningTask.java
deleted file mode 100644
index 2e1230f1a..000000000
--- a/src/main/java/org/variantsync/diffdetective/mining/MiningTask.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package org.variantsync.diffdetective.mining;
-
-import org.eclipse.jgit.revwalk.RevCommit;
-import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.*;
-import org.variantsync.diffdetective.diff.git.CommitDiff;
-import org.variantsync.diffdetective.diff.git.PatchDiff;
-import org.variantsync.diffdetective.diff.result.CommitDiffResult;
-import org.variantsync.diffdetective.editclass.EditClass;
-import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
-import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
-import org.variantsync.diffdetective.util.Clock;
-import org.variantsync.diffdetective.util.FileUtils;
-import org.variantsync.diffdetective.variation.diff.DiffTree;
-import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExport;
-import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions;
-import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer;
-
-import java.util.ArrayList;
-import java.util.List;
-
-public class MiningTask extends CommitHistoryAnalysisTask {
- private final LineGraphExportOptions exportOptions;
-
- public MiningTask(final Options options, final LineGraphExportOptions exportOptions) {
- super(options);
-
- this.exportOptions = exportOptions;
- }
-
- @Override
- public AnalysisResult call() throws Exception {
- final AnalysisResult miningResult = super.call();
- miningResult.putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName());
- miningResult.putCustomInfo(MetadataKeys.NODEFORMAT, exportOptions.nodeFormat().getName());
- miningResult.putCustomInfo(MetadataKeys.EDGEFORMAT, exportOptions.edgeFormat().getName());
-
- final Clock totalTime = new Clock();
-
- final List commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT);
- final List patchStatistics = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT);
- final Clock commitProcessTimer = new Clock();
-
- totalTime.start();
-
- for (final RevCommit commit : options.commits()) {
- commitProcessTimer.start();
- final CommitDiffResult commitDiffResult = options.differ().createCommitDiff(commit);
-
- miningResult.reportDiffErrors(commitDiffResult.errors());
- if (commitDiffResult.diff().isEmpty()) {
- Logger.debug("found commit that failed entirely and was not filtered because:\n{}", commitDiffResult.errors());
- continue;
- }
-
- /*
- * We count the edit classes of all difftrees that match our filter criteria
- * (e.g., match more than one edit class) and export them to the destination
- * determined by the AnalysisStrategy.
- */
- int numDiffTrees = 0;
- final CommitDiff commitDiff = commitDiffResult.diff().get();
- try (var lineGraphDestination = options.analysisStrategy().onCommit(commitDiff)) {
- for (final PatchDiff patch : commitDiff.getPatchDiffs()) {
- final PatchStatistics thisPatchesStatistics = new PatchStatistics(patch, ProposedEditClasses.Instance);
-
- if (patch.isValid()) {
- final DiffTree t = patch.getDiffTree();
- DiffTreeTransformer.apply(options.treePreProcessing(), t);
- t.assertConsistency();
-
- if (!options.treeFilter().test(t)) {
- continue;
- }
-
- miningResult.append(LineGraphExport.toLineGraphFormat(miningResult.repoName, patch, exportOptions, lineGraphDestination));
-
- t.forAll(node -> {
- if (node.isArtifact()) {
- final EditClass editClass = ProposedEditClasses.Instance.match(node);
- miningResult.editClassCounts.reportOccurrenceFor(
- editClass,
- commitDiff
- );
- thisPatchesStatistics.editClassCount().increment(editClass);
- }
- });
-
- ++numDiffTrees;
- }
-
- patchStatistics.add(thisPatchesStatistics);
- }
- }
-
- miningResult.exportedCommits += 1;
- miningResult.exportedTrees += numDiffTrees;
- miningResult.filterHits.append(new ExplainedFilterSummary(options.treeFilter()));
- options.treeFilter().resetExplanations();
-
- // Only consider non-empty commits
- if (numDiffTrees > 0) {
- final long commitTimeMS = commitProcessTimer.getPassedMilliseconds();
- if (commitTimeMS > miningResult.max.milliseconds()) {
- miningResult.max.set(commitDiff.getCommitHash(), commitTimeMS);
- }
- if (commitTimeMS < miningResult.min.milliseconds()) {
- miningResult.min.set(commitDiff.getCommitHash(), commitTimeMS);
- }
- commitTimes.add(new CommitProcessTime(commitDiff.getCommitHash(), options.repository().getRepositoryName(), commitTimeMS));
- ++miningResult.exportedCommits;
- } else {
- ++miningResult.emptyCommits;
- }
- }
-
- options.analysisStrategy().end();
- miningResult.runtimeInSeconds = totalTime.getPassedSeconds();
- miningResult.exportTo(FileUtils.addExtension(options.outputDir(), AnalysisResult.EXTENSION));
- exportCommitTimes(commitTimes, FileUtils.addExtension(options.outputDir(), COMMIT_TIME_FILE_EXTENSION));
- exportPatchStatistics(patchStatistics, FileUtils.addExtension(options.outputDir(), PATCH_STATISTICS_EXTENSION));
- return miningResult;
- }
-}
diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java b/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java
index 015b3c6e7..8149bdc79 100644
--- a/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java
+++ b/src/main/java/org/variantsync/diffdetective/tablegen/MiningResultAccumulator.java
@@ -1,18 +1,5 @@
package org.variantsync.diffdetective.tablegen;
-import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.AnalysisResult;
-import org.variantsync.diffdetective.analysis.AutomationResult;
-import org.variantsync.diffdetective.analysis.HistoryAnalysis;
-import org.variantsync.diffdetective.analysis.MetadataKeys;
-import org.variantsync.diffdetective.datasets.DatasetDescription;
-import org.variantsync.diffdetective.datasets.DefaultDatasets;
-import org.variantsync.diffdetective.tablegen.rows.ContentRow;
-import org.variantsync.diffdetective.tablegen.styles.ShortTable;
-import org.variantsync.diffdetective.tablegen.styles.VariabilityShare;
-import org.variantsync.diffdetective.util.IO;
-import org.variantsync.diffdetective.validation.FindMedianCommitTime;
-
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -21,25 +8,30 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
+import org.tinylog.Logger;
+import org.variantsync.diffdetective.analysis.Analysis;
+import org.variantsync.diffdetective.analysis.AnalysisResult;
+import org.variantsync.diffdetective.analysis.AutomationResult;
+import org.variantsync.diffdetective.analysis.StatisticsAnalysis;
+import org.variantsync.diffdetective.datasets.DatasetDescription;
+import org.variantsync.diffdetective.datasets.DefaultDatasets;
+import org.variantsync.diffdetective.metadata.EditClassCount;
+import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
+import org.variantsync.diffdetective.tablegen.rows.ContentRow;
+import org.variantsync.diffdetective.tablegen.styles.ShortTable;
+import org.variantsync.diffdetective.tablegen.styles.VariabilityShare;
+import org.variantsync.diffdetective.util.IO;
+import org.variantsync.diffdetective.validation.FindMedianCommitTime;
+
/** Accumulates multiple {@link AnalysisResult}s of several datasets. */
public class MiningResultAccumulator {
- /** Specification of the information loaded by {@link getAllTotalResultsIn}. */
- private final static Map> CustomEntryParsers = Map.ofEntries(
- AnalysisResult.storeAsCustomInfo(MetadataKeys.TREEFORMAT),
- AnalysisResult.storeAsCustomInfo(MetadataKeys.NODEFORMAT),
- AnalysisResult.storeAsCustomInfo(MetadataKeys.EDGEFORMAT),
- AnalysisResult.storeAsCustomInfo(MetadataKeys.TASKNAME),
- Map.entry("org/variantsync/diffdetective/analysis", (r, val) -> r.putCustomInfo(MetadataKeys.TASKNAME, val))
- );
-
/**
* Finds all {@code AnalysisResult}s in {@code folderPath} recursively.
- * All files having a {@link HistoryAnalysis#TOTAL_RESULTS_FILE_NAME} filename ending are
+ * All files having a {@link Analysis#TOTAL_RESULTS_FILE_NAME} filename ending are
* parsed and associated with their filename.
*
* @param folderPath the folder which is scanned for analysis results recursively
@@ -49,13 +41,18 @@ public static Map getAllTotalResultsIn(final Path folder
// get all files in the directory which are outputs of DiffTreeMiningResult
final List paths = Files.walk(folderPath)
.filter(Files::isRegularFile)
- .filter(p -> p.toString().endsWith(HistoryAnalysis.TOTAL_RESULTS_FILE_NAME))
+ .filter(p -> p.toString().endsWith(Analysis.TOTAL_RESULTS_FILE_NAME))
.peek(path -> Logger.info("Processing file {}", path))
.toList();
final Map results = new HashMap<>();
for (final Path p : paths) {
- results.put(p.getParent().getFileName().toString(), AnalysisResult.importFrom(p, CustomEntryParsers));
+ var result = new AnalysisResult();
+ result.append(ExplainedFilterSummary.KEY, new ExplainedFilterSummary());
+ result.append(EditClassCount.KEY, new EditClassCount());
+ result.append(StatisticsAnalysis.RESULT, new StatisticsAnalysis.Result());
+ result.setFrom(p);
+ results.put(p.getParent().getFileName().toString(), result);
}
return results;
}
@@ -115,7 +112,7 @@ public static void main(final String[] args) throws IOException, ParseException
final Map allResults = getAllTotalResultsIn(inputPath);
final AnalysisResult ultimateResult = computeTotalMetadataResult(allResults.values());
- HistoryAnalysis.exportMetadataToFile(inputPath.resolve("ultimateresult" + AnalysisResult.EXTENSION), ultimateResult);
+ Analysis.exportMetadataToFile(inputPath.resolve("ultimateresult" + Analysis.EXTENSION), ultimateResult);
final Map datasetByName;
try {
diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java b/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java
index ab9d82309..d6c543b6a 100644
--- a/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java
+++ b/src/main/java/org/variantsync/diffdetective/tablegen/rows/ContentRow.java
@@ -2,7 +2,9 @@
import org.variantsync.diffdetective.analysis.AnalysisResult;
import org.variantsync.diffdetective.analysis.AutomationResult;
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
import org.variantsync.diffdetective.datasets.DatasetDescription;
+import org.variantsync.diffdetective.metadata.Metadata;
import org.variantsync.diffdetective.tablegen.ColumnDefinition;
import org.variantsync.diffdetective.tablegen.Row;
import org.variantsync.diffdetective.tablegen.TableGenerator;
@@ -21,6 +23,10 @@ public record ContentRow(
AnalysisResult results,
AutomationResult automationResult
) implements Row {
+ public > T get(ResultKey resultKey) {
+ return results.get(resultKey);
+ }
+
@Override
public String toLaTeXRow(final List columns) {
final StringBuilder lineBuilder = new StringBuilder();
diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java b/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java
index b235094e9..44a377925 100644
--- a/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java
+++ b/src/main/java/org/variantsync/diffdetective/tablegen/styles/ShortTable.java
@@ -1,9 +1,10 @@
package org.variantsync.diffdetective.tablegen.styles;
import org.apache.commons.lang3.function.TriFunction;
-import org.variantsync.diffdetective.metadata.EditClassCount;
+import org.variantsync.diffdetective.analysis.StatisticsAnalysis;
import org.variantsync.diffdetective.editclass.EditClass;
import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
+import org.variantsync.diffdetective.metadata.EditClassCount;
import org.variantsync.diffdetective.tablegen.ColumnDefinition;
import org.variantsync.diffdetective.tablegen.Row;
import org.variantsync.diffdetective.tablegen.TableDefinition;
@@ -77,11 +78,10 @@ private static List columns(final ShortTable t, final TriFunct
col("Name", LEFT, row -> row.dataset().name().toLowerCase(Locale.US)),
col("Domain", LEFT_DASH, row -> row.dataset().domain()),
col("\\#total\\\\ commits", RIGHT, row -> t.makeReadable(row.results().totalCommits)),
- col("\\#processed commits", RIGHT, row -> t.makeReadable(row.results().exportedCommits)),
- col("\\#diffs", RIGHT, row -> t.makeReadable(row.results().exportedTrees)),
+ col("\\#processed commits", RIGHT, row -> t.makeReadable(row.get(StatisticsAnalysis.RESULT).processedCommits)),
+ col("\\#diffs", RIGHT, row -> t.makeReadable(row.get(StatisticsAnalysis.RESULT).processedTrees)),
col("\\#artifact nodes", RIGHT_DASH, row -> t.makeReadable(row
- .results()
- .editClassCounts
+ .get(EditClassCount.KEY)
.getOccurences()
.values().stream()
.map(EditClassCount.Occurrences::getTotalAmount)
@@ -95,7 +95,7 @@ private static List columns(final ShortTable t, final TriFunct
}
}
- cols.add(col("runtime", DASH_RIGHT, row -> t.makeReadable(row.results().runtimeInSeconds) + "s"));
+ cols.add(col("runtime", DASH_RIGHT, row -> t.makeReadable(row.get(StatisticsAnalysis.RESULT).runtimeInSeconds) + "s"));
cols.add(col("avg. runtime~/\\\\ processed commit", RIGHT, row -> t.makeReadable(row.automationResult().avgTimeMS()) + "ms"));
cols.add(col("median runtime~/\\\\ processed commit", RIGHT, row -> t.makeReadable(row.automationResult().median().milliseconds()) + "ms"));
@@ -113,7 +113,7 @@ private static List columns(final ShortTable t, final TriFunct
* @see column
*/
private static String absoluteCountOf(final ShortTable t, final EditClass editClass, final ContentRow row) {
- return t.makeReadable(row.results().editClassCounts.getOccurences().get(editClass).getTotalAmount());
+ return t.makeReadable(row.get(EditClassCount.KEY).getOccurences().get(editClass).getTotalAmount());
}
/**
@@ -128,7 +128,7 @@ private static String absoluteCountOf(final ShortTable t, final EditClass editCl
*/
private static String relativeCountOf(final ShortTable t, final EditClass editClass, final ContentRow row) {
final LinkedHashMap editClassOccurrences =
- row.results().editClassCounts.getOccurences();
+ row.get(EditClassCount.KEY).getOccurences();
int numTotalMatches = 0;
for (final Map.Entry occurrence : editClassOccurrences.entrySet()) {
diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java b/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java
index fa502418b..6e3201f25 100644
--- a/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java
+++ b/src/main/java/org/variantsync/diffdetective/tablegen/styles/Table1.java
@@ -1,7 +1,9 @@
package org.variantsync.diffdetective.tablegen.styles;
+import org.variantsync.diffdetective.analysis.StatisticsAnalysis;
import org.variantsync.diffdetective.editclass.EditClass;
import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
+import org.variantsync.diffdetective.metadata.EditClassCount;
import org.variantsync.diffdetective.tablegen.Row;
import org.variantsync.diffdetective.tablegen.TableDefinition;
import org.variantsync.diffdetective.tablegen.TableGenerator;
@@ -33,15 +35,15 @@ public Table1() {
col("Name", LEFT, row -> row.dataset().name()),
col("Domain", LEFT, row -> row.dataset().domain()),
col("\\#total commits", RIGHT_DASH, row -> makeReadable(row.results().totalCommits)),
- col("\\#processed commits", RIGHT, row -> makeReadable(row.results().exportedCommits)),
- col("\\#diffs", RIGHT, row -> makeReadable(row.results().exportedTrees))
+ col("\\#processed commits", RIGHT, row -> makeReadable(row.get(StatisticsAnalysis.RESULT).processedCommits)),
+ col("\\#diffs", RIGHT, row -> makeReadable(row.get(StatisticsAnalysis.RESULT).processedTrees))
));
for (final EditClass a : ProposedEditClasses.Instance.all()) {
- this.columnDefinitions.add(col(a.getName(), RIGHT, row -> makeReadable(row.results().editClassCounts.getOccurences().get(a).getTotalAmount())));
+ this.columnDefinitions.add(col(a.getName(), RIGHT, row -> makeReadable(row.get(EditClassCount.KEY).getOccurences().get(a).getTotalAmount())));
}
- this.columnDefinitions.add(col("runtime (s)", RIGHT, row -> makeReadable(row.results().runtimeInSeconds)));
+ this.columnDefinitions.add(col("runtime (s)", RIGHT, row -> makeReadable(row.get(StatisticsAnalysis.RESULT).runtimeInSeconds)));
}
/** Sorts {@code rows} alphabetically and appends {@code ultimateResult} to the result. */
diff --git a/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java b/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java
index 4b0471401..f62215784 100644
--- a/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java
+++ b/src/main/java/org/variantsync/diffdetective/tablegen/styles/VariabilityShare.java
@@ -52,7 +52,7 @@ private static boolean isEditToVariability(final EditClass c) {
/** Returns the number of occurrences of edit classes present in the table. */
private static Stream> getVariationalEditClasses(final ContentRow row) {
- return row.results().editClassCounts.getOccurences().entrySet().stream()
+ return row.get(EditClassCount.KEY).getOccurences().entrySet().stream()
.filter(entry -> isEditToVariability(entry.getKey()));
}
@@ -69,7 +69,7 @@ private static int countEditsToVariability(final ContentRow row) {
*/
private String getRelativeShareOf(final EditClass editClass, final ContentRow row) {
final int totalAmount = countEditsToVariability(row);
- return makeReadable(100.0 * ((double)row.results().editClassCounts.getOccurences().get(editClass).getTotalAmount()) / ((double) totalAmount)) + "\\%";
+ return makeReadable(100.0 * ((double)row.get(EditClassCount.KEY).getOccurences().get(editClass).getTotalAmount()) / ((double) totalAmount)) + "\\%";
}
/**
diff --git a/src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java b/src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java
new file mode 100644
index 000000000..3cc9c8c7c
--- /dev/null
+++ b/src/main/java/org/variantsync/diffdetective/validation/EditClassValidation.java
@@ -0,0 +1,70 @@
+package org.variantsync.diffdetective.validation;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.function.BiFunction;
+
+import org.variantsync.diffdetective.analysis.Analysis;
+import org.variantsync.diffdetective.analysis.FilterAnalysis;
+import org.variantsync.diffdetective.analysis.PreprocessingAnalysis;
+import org.variantsync.diffdetective.analysis.StatisticsAnalysis;
+import org.variantsync.diffdetective.datasets.Repository;
+import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
+import org.variantsync.diffdetective.metadata.EditClassCount;
+import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter;
+import org.variantsync.diffdetective.variation.diff.transform.CutNonEditedSubtrees;
+
+/**
+ * This is the validation from our ESEC/FSE'22 paper.
+ * It provides all configuration settings and facilities to setup the validation by
+ * creating a {@link Analysis} and run it.
+ * @author Paul Bittner
+ */
+public class EditClassValidation implements Analysis.Hooks {
+ // This is only needed for the `MarlinDebug` test.
+ public static final BiFunction AnalysisFactory = (repo, repoOutputDir) -> new Analysis(
+ "EditClassValidation",
+ List.of(
+ new PreprocessingAnalysis(new CutNonEditedSubtrees()),
+ new FilterAnalysis(DiffTreeFilter.notEmpty()), // filters unwanted trees
+ new EditClassValidation(),
+ new StatisticsAnalysis()
+ ),
+ repo,
+ repoOutputDir
+ );
+
+ /**
+ * Main method to start the validation.
+ * @param args Command-line options.
+ * @throws IOException When copying the log file fails.
+ */
+ public static void main(String[] args) throws IOException {
+// setupLogger(Level.INFO);
+// setupLogger(Level.DEBUG);
+
+ Validation.run(args, (repo, repoOutputDir) ->
+ Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir))
+ );
+ }
+
+ @Override
+ public void initializeResults(Analysis analysis) {
+ analysis.append(EditClassCount.KEY, new EditClassCount());
+ }
+
+ @Override
+ public boolean analyzeDiffTree(Analysis analysis) throws Exception {
+ analysis.getCurrentDiffTree().forAll(node -> {
+ if (node.isArtifact()) {
+ analysis.get(EditClassCount.KEY).reportOccurrenceFor(
+ ProposedEditClasses.Instance.match(node),
+ analysis.getCurrentCommitDiff()
+ );
+ }
+ });
+
+ return true;
+ }
+}
diff --git a/src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java b/src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java
deleted file mode 100644
index 5981e48a9..000000000
--- a/src/main/java/org/variantsync/diffdetective/validation/EditClassValidationTask.java
+++ /dev/null
@@ -1,121 +0,0 @@
-package org.variantsync.diffdetective.validation;
-
-import org.eclipse.jgit.revwalk.RevCommit;
-import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.AnalysisResult;
-import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask;
-import org.variantsync.diffdetective.analysis.CommitProcessTime;
-import org.variantsync.diffdetective.analysis.HistoryAnalysis;
-import org.variantsync.diffdetective.diff.git.CommitDiff;
-import org.variantsync.diffdetective.diff.git.PatchDiff;
-import org.variantsync.diffdetective.diff.result.CommitDiffResult;
-import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
-import org.variantsync.diffdetective.metadata.ExplainedFilterSummary;
-import org.variantsync.diffdetective.util.Clock;
-import org.variantsync.diffdetective.util.FileUtils;
-import org.variantsync.diffdetective.variation.diff.DiffTree;
-import org.variantsync.diffdetective.variation.diff.transform.DiffTreeTransformer;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Task for performing the ESEC/FSE'22 validation on a set of commits from a given repository.
- * @author Paul Bittner
- */
-public class EditClassValidationTask extends CommitHistoryAnalysisTask {
- public EditClassValidationTask(Options options) {
- super(options);
- }
-
- @Override
- public AnalysisResult call() throws Exception {
- // Setup. Obtain the result from the initial setup in the super class.
- final AnalysisResult miningResult = super.call();
- // List to store the process time of each commit.
- final List commitTimes = new ArrayList<>(HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT);
- // Clock for runtime measurement.
- final Clock totalTime = new Clock();
- totalTime.start();
- final Clock commitProcessTimer = new Clock();
-
- // For each commit:
- for (final RevCommit commit : options.commits()) {
- try {
- commitProcessTimer.start();
-
- // parse the commit
- final CommitDiffResult commitDiffResult = options.differ().createCommitDiff(commit);
-
- // report any errors that occurred and exit in case no DiffTree could be parsed.
- miningResult.reportDiffErrors(commitDiffResult.errors());
- if (commitDiffResult.diff().isEmpty()) {
- Logger.debug("[MiningTask::call] found commit that failed entirely and was not filtered because:\n{}", commitDiffResult.errors());
- ++miningResult.failedCommits;
- continue;
- }
-
- // extract the produced commit diff and inform the strategy
- final CommitDiff commitDiff = commitDiffResult.diff().get();
- options.analysisStrategy().onCommit(commitDiff).close();
-
- // Count edit class matches
- int numDiffTrees = 0;
- for (final PatchDiff patch : commitDiff.getPatchDiffs()) {
- if (patch.isValid()) {
- final DiffTree t = patch.getDiffTree();
- DiffTreeTransformer.apply(options.treePreProcessing(), t);
- t.assertConsistency();
-
- if (!options.treeFilter().test(t)) {
- continue;
- }
-
- t.forAll(node -> {
- if (node.isArtifact()) {
- miningResult.editClassCounts.reportOccurrenceFor(
- ProposedEditClasses.Instance.match(node),
- commitDiff
- );
- }
- });
-
- ++numDiffTrees;
- }
- }
- miningResult.exportedTrees += numDiffTrees;
- miningResult.filterHits.append(new ExplainedFilterSummary(options.treeFilter()));
- options.treeFilter().resetExplanations();
-
- // Report the commit process time if the commit is not empty.
- if (numDiffTrees > 0) {
- final long commitTimeMS = commitProcessTimer.getPassedMilliseconds();
- // find max commit time
- if (commitTimeMS > miningResult.max.milliseconds()) {
- miningResult.max.set(commitDiff.getCommitHash(), commitTimeMS);
- }
- // find min commit time
- if (commitTimeMS < miningResult.min.milliseconds()) {
- miningResult.min.set(commitDiff.getCommitHash(), commitTimeMS);
- }
- // report time
- commitTimes.add(new CommitProcessTime(commitDiff.getCommitHash(), options.repository().getRepositoryName(), commitTimeMS));
- ++miningResult.exportedCommits;
- } else {
- ++miningResult.emptyCommits;
- }
-
- } catch (Exception e) {
- Logger.error(e, "An unexpected error occurred at {} in {}", commit.getId().getName(), getOptions().repository().getRepositoryName());
- throw e;
- }
- }
-
- // shutdown; report total time; export results
- options.analysisStrategy().end();
- miningResult.runtimeInSeconds = totalTime.getPassedSeconds();
- miningResult.exportTo(FileUtils.addExtension(options.outputDir(), AnalysisResult.EXTENSION));
- exportCommitTimes(commitTimes, FileUtils.addExtension(options.outputDir(), COMMIT_TIME_FILE_EXTENSION));
- return miningResult;
- }
-}
diff --git a/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java b/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java
index 194685e6b..cdc9a193d 100644
--- a/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java
+++ b/src/main/java/org/variantsync/diffdetective/validation/FindMedianCommitTime.java
@@ -3,8 +3,8 @@
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.tinylog.Logger;
import org.variantsync.diffdetective.analysis.AutomationResult;
-import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask;
import org.variantsync.diffdetective.analysis.CommitProcessTime;
+import org.variantsync.diffdetective.analysis.StatisticsAnalysis;
import org.variantsync.diffdetective.util.FileUtils;
import java.io.IOException;
@@ -17,7 +17,7 @@
import java.util.stream.Stream;
/**
- * Program to find the median commit time after the {@link Validation} has been performed.
+ * Program to find the median commit time after the {@link EditClassValidation} has been performed.
* This program will iterate through all commit times reported by the validation, load them,
* and find average time, median time, the fastest, and the slowest commit.
* @author Paul Bittner
@@ -54,7 +54,7 @@ public static void main(final String[] args) throws IOException {
/**
* Summarizes the commit time results found in the given validation output directory.
* The directory should point to the root of the directory in which the results of an execution
- * of the {@link Validation} can be found.
+ * of the {@link EditClassValidation} can be found.
* @param directory Validation output directory.
* @return Summary of commit process times with various speed statistics.
* @throws IOException when iterating the files in the given directory fails for some reason.
@@ -72,7 +72,7 @@ public static AutomationResult getResultOfDirectory(final Path directory) throws
try (Stream paths = Files.walk(directory)) {
result = paths
.parallel()
- .filter(p -> FileUtils.hasExtension(p, CommitHistoryAnalysisTask.COMMIT_TIME_FILE_EXTENSION))
+ .filter(p -> FileUtils.hasExtension(p, StatisticsAnalysis.COMMIT_TIME_FILE_EXTENSION))
.filter(Files::isRegularFile)
// .peek(path -> Logger.info("Processing file {}", path))
.flatMap(FindMedianCommitTime::parse)
diff --git a/src/main/java/org/variantsync/diffdetective/validation/Validation.java b/src/main/java/org/variantsync/diffdetective/validation/Validation.java
index 9da6f5f2f..b14b981b8 100644
--- a/src/main/java/org/variantsync/diffdetective/validation/Validation.java
+++ b/src/main/java/org/variantsync/diffdetective/validation/Validation.java
@@ -1,40 +1,35 @@
package org.variantsync.diffdetective.validation;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.function.BiConsumer;
+import java.util.stream.Collectors;
+
import org.apache.commons.io.FileUtils;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTask;
-import org.variantsync.diffdetective.analysis.CommitHistoryAnalysisTaskFactory;
-import org.variantsync.diffdetective.analysis.HistoryAnalysis;
-import org.variantsync.diffdetective.analysis.strategies.NullStrategy;
-import org.variantsync.diffdetective.datasets.*;
+import org.variantsync.diffdetective.analysis.Analysis;
+import org.variantsync.diffdetective.datasets.DatasetDescription;
+import org.variantsync.diffdetective.datasets.DatasetFactory;
+import org.variantsync.diffdetective.datasets.DefaultDatasets;
+import org.variantsync.diffdetective.datasets.ParseOptions;
+import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.mining.formats.DirectedEdgeLabelFormat;
import org.variantsync.diffdetective.mining.formats.MiningNodeFormat;
import org.variantsync.diffdetective.mining.formats.ReleaseMiningDiffNodeFormat;
import org.variantsync.diffdetective.util.Assert;
-import org.variantsync.diffdetective.variation.diff.filter.DiffTreeFilter;
-import org.variantsync.diffdetective.variation.diff.filter.ExplainedFilter;
import org.variantsync.diffdetective.variation.diff.serialize.GraphFormat;
import org.variantsync.diffdetective.variation.diff.serialize.LineGraphExportOptions;
import org.variantsync.diffdetective.variation.diff.serialize.edgeformat.EdgeLabelFormat;
import org.variantsync.diffdetective.variation.diff.serialize.treeformat.CommitDiffDiffTreeLabelFormat;
-import org.variantsync.diffdetective.variation.diff.transform.CutNonEditedSubtrees;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.List;
-import java.util.function.Consumer;
-import java.util.stream.Collectors;
-
-/**
- * This is the validation from our ESEC/FSE'22 paper.
- * It provides all configuration settings and facilities to setup the validation by
- * creating a {@link HistoryAnalysis} and run it.
- * @author Paul Bittner
- */
public class Validation {
+ private Validation() {
+ }
+
/**
* Hardcoded configuration option that determines of all analyzed repositories should be updated
* (i.e., git pull) before the validation.
@@ -47,21 +42,6 @@ public class Validation {
// public static final boolean PRINT_LATEX_TABLE = true;
// public static final int PRINT_LARGEST_SUBJECTS = 3;
- /**
- * The {@link CommitHistoryAnalysisTaskFactory} for the {@link HistoryAnalysis} that will run our validation.
- * This factory creates {@link EditClassValidationTask}s with the respective settings.
- */
- public static final CommitHistoryAnalysisTaskFactory VALIDATION_TASK_FACTORY =
- (repo, differ, outputPath, commits) -> new EditClassValidationTask(new CommitHistoryAnalysisTask.Options(
- repo,
- differ,
- outputPath,
- new ExplainedFilter<>(DiffTreeFilter.notEmpty()),
- List.of(new CutNonEditedSubtrees()),
- new NullStrategy(),
- commits
- ));
-
/**
* Returns the node format that should be used for DiffNode IO.
*/
@@ -120,13 +100,10 @@ public static LineGraphExportOptions ValidationExportOptions(final Repository re
/**
* Main method to start the validation.
- * @param args Command-line options. Currently ignored.
+ * @param args Command-line options.
* @throws IOException When copying the log file fails.
*/
- public static void main(String[] args) throws IOException {
-// setupLogger(Level.INFO);
-// setupLogger(Level.DEBUG);
-
+ public static void run(String[] args, BiConsumer validation) throws IOException {
final Path datasetsFile;
if (args.length < 1) {
datasetsFile = DefaultDatasets.DEFAULT_DATASETS_FILE;
@@ -135,10 +112,10 @@ public static void main(String[] args) throws IOException {
return;
} else {
datasetsFile = Path.of(args[0]);
+ }
- if (!Files.exists(datasetsFile)) {
- Logger.error("The given datasets file \"" + datasetsFile + "\" does not exist.");
- }
+ if (!Files.exists(datasetsFile)) {
+ Logger.error("The given datasets file \"" + datasetsFile + "\" does not exist.");
}
final ParseOptions.DiffStoragePolicy diffStoragePolicy = ParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER;
@@ -186,14 +163,9 @@ public static void main(String[] args) throws IOException {
| END OF ARGUMENTS |
\* ************************ */
- final Consumer repoPostProcessing = p -> {};
- final HistoryAnalysis analysis = new HistoryAnalysis(
- repos,
- outputDir,
- HistoryAnalysis.COMMITS_TO_PROCESS_PER_THREAD_DEFAULT,
- VALIDATION_TASK_FACTORY,
- repoPostProcessing);
- analysis.runAsync();
+ Analysis.forEachRepository(repos, outputDir, (repo, repoOutputDir) ->
+ validation.accept(repo, repoOutputDir)
+ );
Logger.info("Done");
final String logFile = "log.txt";
diff --git a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java
index 6d428a350..18cd64f54 100644
--- a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java
+++ b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/DiffTreeSerializeDebugData.java
@@ -45,6 +45,13 @@ public LinkedHashMap snapshot() {
return map;
}
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ numExportedNonNodes = Integer.parseInt(snap.get(MetadataKeys.NON_NODE_COUNT));
+ numExportedAddNodes = Integer.parseInt(snap.get(MetadataKeys.ADD_NODE_COUNT));
+ numExportedRemNodes = Integer.parseInt(snap.get(MetadataKeys.REM_NODE_COUNT));
+ }
+
@Override
public InplaceSemigroup semigroup() {
return ISEMIGROUP;
diff --git a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java
index 01e8e78df..206e7228a 100644
--- a/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java
+++ b/src/main/java/org/variantsync/diffdetective/variation/diff/serialize/LineGraphExport.java
@@ -2,15 +2,18 @@
import java.io.IOException;
import java.io.OutputStream;
+import java.util.LinkedHashMap;
import org.tinylog.Logger;
-import org.variantsync.diffdetective.analysis.AnalysisResult;
+import org.variantsync.diffdetective.analysis.AnalysisResult.ResultKey;
+import org.variantsync.diffdetective.analysis.MetadataKeys;
import org.variantsync.diffdetective.diff.git.CommitDiff;
import org.variantsync.diffdetective.diff.git.PatchDiff;
-import org.variantsync.diffdetective.util.StringUtils;
+import org.variantsync.diffdetective.metadata.Metadata;
import org.variantsync.diffdetective.util.StringUtils;
import org.variantsync.diffdetective.variation.diff.DiffTree;
import org.variantsync.diffdetective.variation.diff.source.DiffTreeSource;
+import org.variantsync.functjonal.category.InplaceSemigroup;
/**
* Class that contains functions for writing {@link CommitDiff}s and (sets of) {@link DiffTree}s to a linegraph file.
@@ -19,6 +22,50 @@
public final class LineGraphExport {
private LineGraphExport() {}
+ public static final ResultKey STATISTIC = new ResultKey<>("LineGraphExporter");
+ public static final class Statistic implements Metadata {
+ /**
+ * The number of commits that were processed.
+ * {@code exportedCommits <= totalCommits}
+ */
+ public int exportedCommits = 0;
+ /**
+ * Number of DiffTrees that were processed.
+ */
+ public int exportedTrees = 0;
+ /**
+ * Debug data for DiffTree serialization.
+ */
+ public final DiffTreeSerializeDebugData debugData = new DiffTreeSerializeDebugData();
+
+ public static final InplaceSemigroup ISEMIGROUP = (a, b) -> {
+ a.exportedCommits += b.exportedCommits;
+ a.exportedTrees += b.exportedTrees;
+ a.debugData.append(b.debugData);
+ };
+
+ @Override
+ public InplaceSemigroup semigroup() {
+ return ISEMIGROUP;
+ }
+
+ @Override
+ public LinkedHashMap snapshot() {
+ var snap = new LinkedHashMap();
+ snap.put(MetadataKeys.EXPORTED_COMMITS, exportedCommits);
+ snap.put(MetadataKeys.EXPORTED_TREES, exportedTrees);
+ snap.putAll(debugData.snapshot());
+ return snap;
+ }
+
+ @Override
+ public void setFromSnapshot(LinkedHashMap snap) {
+ exportedCommits = Integer.parseInt(snap.get(MetadataKeys.EXPORTED_COMMITS));
+ exportedTrees = Integer.parseInt(snap.get(MetadataKeys.EXPORTED_TREES));
+ debugData.setFromSnapshot(snap);
+ }
+ }
+
/**
* Exports the given DiffTree to a linegraph String. No file will be written.
* @param diffTree The difftree to export to linegraph format.
@@ -33,13 +80,12 @@ public static DiffTreeSerializeDebugData toLineGraphFormat(final DiffTree diffTr
/**
* Exports the given DiffTrees that originated from a repository with the given name.
- * @param repoName The name of the repository, the given DiffTrees originated from.
* @param trees The set of trees to export.
* @param options Configuration options for the export, such as the format used for node and edge labels.
* @return A pair of (1) metadata about the exported DiffTrees, and (2) the produced linegraph as String.
*/
- public static AnalysisResult toLineGraphFormat(final String repoName, final Iterable trees, final LineGraphExportOptions options, OutputStream destination) throws IOException {
- final AnalysisResult result = new AnalysisResult(repoName);
+ public static Statistic toLineGraphFormat(final Iterable trees, final LineGraphExportOptions options, OutputStream destination) throws IOException {
+ final var result = new Statistic();
for (final DiffTree t : trees) {
destination.write(lineGraphHeader(t.getSource(), options).getBytes());
@@ -53,36 +99,19 @@ public static AnalysisResult toLineGraphFormat(final String repoName, final Iter
return result;
}
- /**
- * Same as {@link LineGraphExport#toLineGraphFormat(String, Iterable, LineGraphExportOptions, OutputStream)} but with an
- * {@link AnalysisResult#NO_REPO unkown repository}.
- */
- public static AnalysisResult toLineGraphFormat(final Iterable trees, final LineGraphExportOptions options, OutputStream destination) throws IOException {
- return toLineGraphFormat(AnalysisResult.NO_REPO, trees, options, destination);
- }
-
- /**
- * Same as {@link LineGraphExport#toLineGraphFormat(String, CommitDiff, LineGraphExportOptions, OutputStream)}
- * but with an {@link AnalysisResult#NO_REPO unkown repository}.
- */
- public static AnalysisResult toLineGraphFormat(final CommitDiff commitDiff, final LineGraphExportOptions options, OutputStream destination) throws IOException {
- return toLineGraphFormat(AnalysisResult.NO_REPO, commitDiff, options, destination);
- }
-
/**
* Writes the given commitDiff in linegraph format to the given StringBuilder.
- * @param repoName The name of the repository from which the given CommitDiff originated.
* @param commitDiff The diff to convert to line graph format.
* @param options Configuration options for the export, such as the format used for node and edge labels.
* @param destination where the resulting line graph is written
* @return The number of the next diff tree to export (updated value of treeCounter).
*/
- public static AnalysisResult toLineGraphFormat(final String repoName, final CommitDiff commitDiff, LineGraphExportOptions options, OutputStream destination) throws IOException {
- final AnalysisResult result = new AnalysisResult(repoName);
+ public static Statistic toLineGraphFormat(final CommitDiff commitDiff, LineGraphExportOptions options, OutputStream destination) throws IOException {
+ final var result = new Statistic();
for (final PatchDiff patchDiff : commitDiff.getPatchDiffs()) {
try {
- result.append(toLineGraphFormat(repoName, patchDiff, options, destination));
+ result.append(toLineGraphFormat(patchDiff, options, destination));
} catch (Exception e) {
options.onError().accept(patchDiff, e);
break;
@@ -101,8 +130,8 @@ public static AnalysisResult toLineGraphFormat(final String repoName, final Comm
* @param destination where the resulting line graph is written
* @return The number of the next diff tree to export (updated value of treeCounter).
*/
- public static AnalysisResult toLineGraphFormat(final String repoName, final PatchDiff patch, final LineGraphExportOptions options, OutputStream destination) throws IOException {
- final AnalysisResult result = new AnalysisResult(repoName);
+ public static Statistic toLineGraphFormat(final PatchDiff patch, final LineGraphExportOptions options, OutputStream destination) throws IOException {
+ final var result = new Statistic();
if (patch.isValid()) {
//Logger.info(" Exporting DiffTree #{}", treeCounter);
diff --git a/src/test/java/MarlinDebug.java b/src/test/java/MarlinDebug.java
index ea4b8abc8..647556411 100644
--- a/src/test/java/MarlinDebug.java
+++ b/src/test/java/MarlinDebug.java
@@ -22,7 +22,7 @@
import org.variantsync.diffdetective.mining.DiffTreeMiner;
import org.variantsync.diffdetective.editclass.proposed.ProposedEditClasses;
import org.variantsync.diffdetective.util.Clock;
-import org.variantsync.diffdetective.validation.Validation;
+import org.variantsync.diffdetective.validation.EditClassValidation;
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -136,12 +136,10 @@ public static void asMiningTask(final RepoInspection repoInspection, final Strin
final RevWalk revWalk = new RevWalk(git.getRepository());
final RevCommit childCommit = revWalk.parseCommit(ObjectId.fromString(commitHash));
- DiffTreeMiner.Mine().create(
+ DiffTreeMiner.AnalysisFactory.apply(
repoInspection.repo,
- new GitDiffer(repoInspection.repo),
- repoInspection.outputPath,
- List.of(childCommit)
- ).call();
+ repoInspection.outputPath
+ ).processCommits(List.of(childCommit));
}
public static void asValidationTask(final RepoInspection repoInspection, final String commitHash) throws Exception {
@@ -150,12 +148,10 @@ public static void asValidationTask(final RepoInspection repoInspection, final S
final RevWalk revWalk = new RevWalk(git.getRepository());
final RevCommit childCommit = revWalk.parseCommit(ObjectId.fromString(commitHash));
- Validation.VALIDATION_TASK_FACTORY.create(
- repoInspection.repo,
- new GitDiffer(repoInspection.repo),
- repoInspection.outputPath,
- List.of(childCommit)
- ).call();
+ EditClassValidation.AnalysisFactory.apply(
+ repoInspection.repo,
+ repoInspection.outputPath
+ ).processCommits(List.of(childCommit));
}
public static void test(final RepoInspection repoInspection) throws Exception {