Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.GitDiffer;
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;
import org.variantsync.diffdetective.diff.difftree.DiffTree;
import org.variantsync.diffdetective.diff.difftree.filter.ExplainedFilter;
import org.variantsync.diffdetective.diff.difftree.transform.DiffTreeTransformer;
import org.variantsync.diffdetective.util.CSV;
import org.variantsync.diffdetective.util.IO;
import org.variantsync.diffdetective.util.StringUtils;
Expand All @@ -29,15 +31,17 @@ public abstract class CommitHistoryAnalysisTask implements Callable<AnalysisResu
* @param repository The repository that is analyzed.
* @param differ The differ that should be used to obtain diffs.
* @param outputDir The path to which any output should be written on disk.
* @param exportOptions Options for exporting DiffTrees.
* @param treeFilter filters commits before processing them
* @param treePreProcessing applies a processing function after filtering, but before processing
* @param analysisStrategy A callback that is invoked for each commit.
* @param commits The set of commits to process in this task.
*/
public record Options(
Repository repository,
GitDiffer differ,
Path outputDir,
DiffTreeLineGraphExportOptions exportOptions,
ExplainedFilter<DiffTree> treeFilter,
List<DiffTreeTransformer> treePreProcessing,
AnalysisStrategy analysisStrategy,
Iterable<RevCommit> commits
) {}
Expand All @@ -58,14 +62,9 @@ public CommitHistoryAnalysisTask.Options getOptions() {

@Override
public AnalysisResult call() throws Exception {
options.analysisStrategy().start(options.repository(), options.outputDir(), options.exportOptions());
options.analysisStrategy().start(options.repository(), options.outputDir());

final AnalysisResult miningResult = new AnalysisResult(options.repository.getRepositoryName());
final DiffTreeLineGraphExportOptions exportOptions = options.exportOptions();

miningResult.putCustomInfo(MetadataKeys.TREEFORMAT, exportOptions.treeFormat().getName());
miningResult.putCustomInfo(MetadataKeys.NODEFORMAT, exportOptions.nodeFormat().getName());
miningResult.putCustomInfo(MetadataKeys.EDGEFORMAT, exportOptions.edgeFormat().getName());
miningResult.putCustomInfo(MetadataKeys.TASKNAME, this.getClass().getName());

return miningResult;
Expand All @@ -87,7 +86,7 @@ public static void exportCommitTimes(final List<CommitProcessTime> commitTimes,
IO.write(pathToOutputFile, times.toString());
} catch (IOException e) {
Logger.error(e);
System.exit(0);
System.exit(1);
}
}

Expand All @@ -103,7 +102,7 @@ public static void exportPatchStatistics(final List<PatchStatistics> commitTimes
IO.write(pathToOutputFile, csv);
} catch (IOException e) {
Logger.error(e);
System.exit(0);
System.exit(1);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import org.variantsync.diffdetective.analysis.strategies.AnalysisStrategy;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.GitDiffer;
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;
import org.variantsync.diffdetective.diff.difftree.DiffTree;
import org.variantsync.diffdetective.diff.difftree.filter.ExplainedFilter;
import org.variantsync.diffdetective.diff.difftree.serialize.LineGraphExportOptions;
import org.variantsync.diffdetective.diff.difftree.transform.DiffTreeTransformer;
import org.variantsync.diffdetective.metadata.Metadata;
import org.variantsync.diffdetective.mining.MiningTask;
import org.variantsync.diffdetective.parallel.ScheduledTasksIterator;
Expand Down Expand Up @@ -56,7 +59,9 @@ public record HistoryAnalysis(
public static void analyze(
final Repository repo,
final Path outputDir,
final DiffTreeLineGraphExportOptions exportOptions,
final ExplainedFilter<DiffTree> treeFilter,
final List<DiffTreeTransformer> treePreProcessing,
final LineGraphExportOptions exportOptions,
final AnalysisStrategy strategy)
{
AnalysisResult totalResult;
Expand All @@ -71,10 +76,11 @@ public static void analyze(
repo,
differ,
outputDir.resolve(repo.getRepositoryName() + ".lg"),
exportOptions,
treeFilter,
treePreProcessing,
strategy,
commitsToProcess
));
), exportOptions);
Logger.info("Scheduled {} commits.", commitsToProcess.size());
commitsToProcess = null; // free reference to enable garbage collection
Logger.info("<<< done after {}", clock.printPassedSeconds());
Expand Down Expand Up @@ -144,7 +150,7 @@ public static void analyzeAsync(
}
} catch (Exception e) {
Logger.error(e, "Failed to run all mining task");
System.exit(0);
System.exit(1);
}

final double runtime = clock.getPassedSeconds();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import org.variantsync.diffdetective.analysis.monitoring.TaskCompletionMonitor;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.CommitDiff;
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;

import java.io.OutputStream;
import java.nio.file.Path;

/**
Expand All @@ -24,14 +24,17 @@ public AnalysisMonitor(int seconds) {
}

@Override
public void start(Repository repo, Path outputPath, DiffTreeLineGraphExportOptions options) {
super.start(repo, outputPath, options);
public void start(Repository repo, Path outputPath) {
super.start(repo, outputPath);
monitor.start();
}

@Override
public void onCommit(CommitDiff commit, String lineGraph) {
public OutputStream onCommit(CommitDiff commit) {
// FIXME This function is called before processing the commit.
monitor.addFinishedTasks(1);

return OutputStream.nullOutputStream();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.CommitDiff;
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;

import java.io.OutputStream;
import java.nio.file.Path;

/**
Expand All @@ -15,27 +15,28 @@
public abstract class AnalysisStrategy {
protected Repository repo;
protected Path outputPath;
protected DiffTreeLineGraphExportOptions exportOptions;

/**
* Invoked when the analysis starts.
*
* @param repo The repository on which an analysis is performed.
* @param outputPath A directory to which output should be written.
* @param options Options for data export.
*/
public void start(Repository repo, Path outputPath, DiffTreeLineGraphExportOptions options) {
public void start(Repository repo, Path outputPath) {
this.repo = repo;
this.outputPath = outputPath;
this.exportOptions = options;
}

/**
* Invoked whenever the analysis processed a commit and converted it to linegraph format.
* Invoked before a commit is analyzed.
*
* The returned line graph export destination is closed after processing the commit given by
* {@code commit}.
*
* @param commit The commit that was just processed.
* @param lineGraph The linegraph representation of the processed commit. Might be empty if no export to linegraph is desired.
* @return the line graph export destination
*/
public abstract void onCommit(CommitDiff commit, String lineGraph);
public abstract OutputStream onCommit(CommitDiff commit);

/**
* Invoked when the analysis is done for the current repository.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.variantsync.diffdetective.analysis.strategies;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Path;

import org.apache.commons.io.output.CloseShieldOutputStream;
import org.tinylog.Logger;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.CommitDiff;
import org.variantsync.diffdetective.util.IO;

/**
* Exports all linegraph representations generated by an analysis directly to a file.
* The file operations are buffered by Java, so it flushes them when it thinks there are enough of
* them. In contrast to `AnalyzeAndExportIncrementally` this relies on the sanity of the Java
* defaults
*
* @author Benjamin Moosherr
*/
Comment thread
pmbittner marked this conversation as resolved.
public class AnalyzeAllAndExport extends AnalysisStrategy {
private OutputStream lineGraphDestination;

@Override
public void start(Repository repo, Path outputPath) {
try {
lineGraphDestination = IO.newBufferedOutputStream(outputPath);
} catch (IOException e) {
Logger.error(e);
}
}

@Override
public OutputStream onCommit(CommitDiff commit) {
// lineGraphDestination is reused for all commits. CloseShieldOutputStream ensures that it
// isn't closed after processing this commit.
return new CloseShieldOutputStream(lineGraphDestination);
}

@Override
public void end() {
try {
lineGraphDestination.close();
} catch (IOException e) {
Logger.error(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,34 +1,11 @@
package org.variantsync.diffdetective.analysis.strategies;

import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.CommitDiff;
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;
import org.variantsync.diffdetective.util.IO;

import java.nio.file.Path;

/**
* Collects all linegraph representations generated by an analysis and exports them at the end.
* @author Paul Bittner
*/
public class AnalyzeAllThenExport extends AnalysisStrategy {
private StringBuilder waitForAll;

@Override
public void start(Repository repo, Path outputPath, DiffTreeLineGraphExportOptions options) {
super.start(repo, outputPath, options);
waitForAll = new StringBuilder();
}

@Override
public void onCommit(CommitDiff commit, String lineGraph) {
waitForAll.append(lineGraph);
}

@Override
public void end() {
final String lineGraph = waitForAll.toString();
// Logger.info("Writing file {}", outputPath);
IO.tryWrite(outputPath, lineGraph);
public class AnalyzeAllThenExport extends AnalyzeAndExportIncrementally {
public AnalyzeAllThenExport() {
super(Integer.MAX_VALUE);
}
Comment thread
ibbem marked this conversation as resolved.
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package org.variantsync.diffdetective.analysis.strategies;

import org.apache.commons.io.output.CloseShieldOutputStream;
import org.tinylog.Logger;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.CommitDiff;
import org.variantsync.diffdetective.diff.difftree.serialize.DiffTreeLineGraphExportOptions;
import org.variantsync.diffdetective.util.IO;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;

/**
* Collects the linegraph representations generated by an analysis and exports them once a certain threshold of
Expand All @@ -22,7 +25,7 @@ public class AnalyzeAndExportIncrementally extends AnalysisStrategy {
public static final int DEFAULT_NUMBER_OF_COMMITS_TO_EXPORT_AT_ONCE = 100;
private final int commitsToExportAtOnce;

private StringBuilder nextChunkToExport;
private ByteArrayOutputStream lineGraphDestination;
private int collectedCommits;

/**
Expand All @@ -44,42 +47,42 @@ public AnalyzeAndExportIncrementally() {
}

@Override
public void start(Repository repo, Path outputPath, DiffTreeLineGraphExportOptions options) {
super.start(repo, outputPath, options);
public void start(Repository repo, Path outputPath) {
super.start(repo, outputPath);

IO.tryDeleteFile(outputPath);
nextChunkToExport = new StringBuilder();

lineGraphDestination = new ByteArrayOutputStream();
collectedCommits = 0;
}

@Override
public void onCommit(CommitDiff commit, String lineGraph) {
public OutputStream onCommit(CommitDiff commit) {
++collectedCommits;
nextChunkToExport.append(lineGraph);

if (collectedCommits >= commitsToExportAtOnce) {
exportAppend(outputPath, nextChunkToExport.toString());
nextChunkToExport = new StringBuilder();
if (collectedCommits > commitsToExportAtOnce) {
flush();
collectedCommits = 0;
}

// lineGraphDestination is reused for all commits. CloseShieldOutputStream ensures that it
// isn't closed after processing this commit.
return new CloseShieldOutputStream(lineGraphDestination);
Comment thread
pmbittner marked this conversation as resolved.
}

@Override
public void end() {
if (!nextChunkToExport.isEmpty()) {
exportAppend(outputPath, nextChunkToExport.toString());
}
flush();
}

/**
* Appends the given linegraph string at the end of the given file.
* @param outputPath File to which the linegraph string should be appended.
* @param linegraph String to append to the given file.
*/
public static void exportAppend(final Path outputPath, final String linegraph) {
try {
// Logger.info("Writing file {}", outputPath);
IO.append(outputPath, linegraph);
private void flush() {
try (var output = IO.newBufferedOutputStream(outputPath, StandardOpenOption.CREATE, StandardOpenOption.APPEND)) {
lineGraphDestination.writeTo(output);
lineGraphDestination.reset();
} catch (IOException exception) {
Logger.error(exception);
}
Expand Down
Loading