Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions actions/setup/js/push_repo_memory.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ async function main() {
const maxFileCount = parseInt(process.env.MAX_FILE_COUNT || "100", 10);
const maxPatchSize = parseInt(process.env.MAX_PATCH_SIZE || "10240", 10);
const fileGlobFilter = process.env.FILE_GLOB_FILTER || "";
const formatJSON = process.env.FORMAT_JSON === "true";

// Parse allowed extensions with error handling
let allowedExtensions = [".json", ".jsonl", ".txt", ".md", ".csv"];
Expand All @@ -74,6 +75,7 @@ async function main() {
core.info(` ALLOWED_EXTENSIONS: ${JSON.stringify(allowedExtensions)}`);
core.info(` FILE_GLOB_FILTER: ${fileGlobFilter ? `"${fileGlobFilter}"` : "(empty - all files accepted)"}`);
core.info(` FILE_GLOB_FILTER length: ${fileGlobFilter.length}`);
core.info(` FORMAT_JSON: ${formatJSON}`);

/** @param {unknown} value */
function isPlainObject(value) {
Expand Down Expand Up @@ -359,6 +361,58 @@ async function main() {
}
}

// Format JSON files if requested
if (formatJSON) {
core.info("FORMAT_JSON is enabled: formatting .json files as human-readable...");

/**
* Recursively find and format all .json files under a directory
* @param {string} dirPath - Directory to scan
*/
function formatJSONFilesInDir(dirPath) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] formatJSONFilesInDir is defined inside the if (formatJSON) block inside main() — making it untestable in isolation. push_repo_memory.test.cjs already has 1566 lines of coverage for this file, but FORMAT_JSON handling has zero test coverage despite being the core runtime behaviour of this feature.

💡 Two paths to fix

Option A — extract to a module (same pattern as glob_pattern_helpers.cjs):

// format_json_helpers.cjs
function formatJSONFilesInDir(dirPath, { destMemoryPath, fs, path, core }) { ... }
module.exports = { formatJSONFilesInDir };

This makes the function importable and testable with a mock fs.

Option B — source-content checks (already used at lines 1502–1565):

expect(scriptContent).toContain('FORMAT_JSON === "true"');
expect(scriptContent).toContain('.endsWith(".json")');
expect(scriptContent).toContain('core.warning');

Lower fidelity but consistent with existing test style.

Behaviours worth covering:

  • Compact JSON → 2-space indent + trailing \n
  • Already-formatted → no writeFileSync call (no-op)
  • Empty file → skipped
  • Invalid JSON → core.warning, not core.setFailed
  • .jsonl / .txt → untouched
  • .git/ → not traversed

const entries = fs.readdirSync(dirPath, { withFileTypes: true });

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

readdirSync failure in a subdirectory is fatal while per-file failures are warnings — asymmetric error handling: If readdirSync throws for any subdirectory (e.g. EACCES, a broken directory symlink, or a race-condition ENOENT), the exception propagates through the recursive call stack and is caught by the outer try/catch at line 399, which calls core.setFailed() and aborts the push. Individual file read/parse errors at line 392 are caught and emitted as core.warning, allowing the run to continue. The asymmetry means a single unreadable subdirectory kills the entire push while an unreadable file does not.

💡 Suggested fix

Wrap the recursive call and the readdirSync in per-directory error handling:

function formatJSONFilesInDir(dirPath) {
  let entries;
  try {
    entries = fs.readdirSync(dirPath, { withFileTypes: true });
  } catch (err) {
    core.warning(`Skipping directory (cannot read): ${path.relative(destMemoryPath, dirPath)}: ${err.message}`);
    return;
  }
  for (const entry of entries) {
    // ...
    if (entry.isDirectory() && entry.name !== ".git") {
      formatJSONFilesInDir(fullPath); // inner failures now self-contained
    }
  }
}

for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
if (entry.name !== ".git") {
formatJSONFilesInDir(fullPath);
}
} else if (entry.isFile() && entry.name.endsWith(".json")) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] The docs state format-json has no effect on .jsonl files, but this is untested. A source-content test (consistent with the existing style at lines 1502–1565) could guard this:

it('should only format .json files, not .jsonl or other types', () => {
  expect(scriptContent).toContain('.endsWith(".json")');
  // Verify .jsonl is not in the same condition
  expect(scriptContent).not.toMatch(/endsWith\(".jsonl"\)/);
});

This also documents the intended exclusion for future maintainers.

try {
const raw = fs.readFileSync(fullPath, "utf8");

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No file-size guard before readFileSync — OOM risk for large pre-existing JSON files: The copy phase enforces maxFileSize per file (line 289), but the formatter reads every .json file on the branch into memory with no size check. A 200 MB accumulated-history file that was committed to the branch before maxFileSize was lowered, or that was once allowed but now exceeds the limit, will be fully loaded, parsed, and re-serialized. On a standard GitHub Actions runner (7 GB RAM shared with git and Node), a handful of large files can exhaust the heap and crash the runner process with an OOM error that is caught by the outer try/catch and surfaces as a confusing core.setFailed failure.

💡 Suggested fix

Add a stat check before reading, consistent with the copy-phase guard:

const statResult = fs.statSync(fullPath);
if (statResult.size > maxFileSize) {
  core.warning(`Skipping large JSON file (> ${maxFileSize} bytes): ${path.relative(destMemoryPath, fullPath)}`);
  continue;
}
const raw = fs.readFileSync(fullPath, "utf8");

if (!raw.trim()) {
continue;
}
const parsed = JSON.parse(raw);
const formatted = JSON.stringify(parsed, null, 2) + "\n";

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/grill-with-docs] JSON.stringify sorts integer-like string keys before other keys — {"1":"a","name":"b"} round-trips to {"1":"a","name":"b"} (fine), but {"name":"b","1":"a"} becomes {"1":"a","name":"b"} (key-order change). Agents that happen to use numeric keys would see a one-time noisy diff the first time format-json runs.

This is a V8 spec behaviour, not a bug in this code, but the docs (repo-memory.md) should mention it so users aren't surprised by unexpected diffs.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JSON.stringify silently reorders integer-like string keys on first format pass: V8 enumerates integer-indexed properties (non-negative integer string keys ≤ 232−2) before string keys, sorted numerically. A file like {"b":1,"10":3,"2":4} becomes {"2":4,"10":3,"b":1} after one round-trip. This mutation is silent (no warning, no log line), one-time, and irreversible. For config schemas, OpenAPI specs, or ordered key–value stores stored in memory, this is unexpected data corruption.

💡 Details and fix

Reproduction:

const o = JSON.parse('{"b":1,"10":3,"2":4}');
console.log(JSON.stringify(o, null, 2));
// Output:
// {
//   "2": 4,
//   "10": 3,
//   "b": 1
// }

This is a one-time normalization (idempotent after first write), but the silent reordering may break downstream consumers that rely on key order.

Consider logging a per-file warning when the stringified form differs from the original to give visibility into what was mutated, not just that it was mutated:

core.info(`Formatted JSON (structural change): ${path.relative(destMemoryPath, fullPath)}`);

if (raw !== formatted) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[/tdd] The raw !== formatted early-exit is a smart optimization (avoids a needless write + dirty git status entry), but it's entirely untested. The already-formatted no-op path is the hot path for users who run the workflow repeatedly — skipping it means each run silently rewrites every file.

💡 Suggested test (source-content check)
it('should skip writeFileSync when JSON is already formatted', () => {
  // Verify the guard exists in source — protects against accidental removal
  expect(scriptContent).toContain('if (raw !== formatted)');
});

For a more behavioural test, extract formatJSONFilesInDir to a module (see comment on line 372) and drive it with a mock fs that asserts writeFileSync is not called when the file content already matches the formatted output.

const formattedSize = Buffer.byteLength(formatted, "utf8");
if (formattedSize > maxFileSize) {
const sizeError = new Error(`Formatted JSON exceeds MAX_FILE_SIZE: ${path.relative(destMemoryPath, fullPath)} (${formattedSize} bytes > ${maxFileSize} bytes)`);
sizeError.name = "FormatJSONSizeLimitError";
throw sizeError;
}
fs.writeFileSync(fullPath, formatted, "utf8");
core.info(`Formatted JSON: ${path.relative(destMemoryPath, fullPath)}`);
}
} catch (/** @type {any} */ error) {
if (error?.name === "FormatJSONSizeLimitError") {
throw error;
}
core.warning(`Skipping JSON formatting for ${path.relative(destMemoryPath, fullPath)}: ${error.message}`);
}
}
}
}

try {
formatJSONFilesInDir(destMemoryPath);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First-enable will always exceed maxFileCount/maxPatchSize limits on a populated branch: formatJSONFilesInDir(destMemoryPath) walks the entire memory branch root, not just the files that were just copied. When format-json: true is first activated on a branch that already has ≥ maxFileCount (default 100) pre-existing JSON files, all of them are reformatted, changedFileCount exceeds the limit, and the push is hard-failed — even though the agent only wrote a single new file. The error message (Too many changed files (150 > 100)) gives no hint that format-json caused it.

💡 Suggested fix

Option A (preferred) — only format files that were just copied:

const copiedPaths = new Set(
  filesToCopy.map(f => path.resolve(path.join(destMemoryPath, f.relativePath)))
);
// inside formatJSONFilesInDir, change the condition to:
} else if (entry.isFile() && entry.name.endsWith('.json') && copiedPaths.has(path.resolve(fullPath))) {

Option B (documentation-only) — add a note in the docs that first enable on a populated branch reformats ALL pre-existing JSON files and may require increasing maxFileCount/maxPatchSize.

The same scope expansion inflates git diff --cached size, so maxPatchSize is equally at risk of being triggered.

} catch (error) {
core.setFailed(`Failed to format JSON files: ${getErrorMessage(error)}`);
return;
}
}

// Check if we have any changes to commit
let changedFileCount = 0;
try {
Expand Down
11 changes: 11 additions & 0 deletions actions/setup/js/push_repo_memory.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,17 @@ describe("push_repo_memory.cjs - changed-file limit checks", () => {
expect(scriptContent).not.toContain("if (filesToCopy.length > maxFileCount)");
expect(scriptContent).toContain("if (changedFileCount > maxFileCount)");
});

it("should fail when formatting expands a JSON file beyond MAX_FILE_SIZE (source check)", () => {
const nodeFs = require("fs");
const nodePath = require("path");
const scriptPath = nodePath.join(import.meta.dirname, "push_repo_memory.cjs");
const scriptContent = nodeFs.readFileSync(scriptPath, "utf8");

expect(scriptContent).toContain('Buffer.byteLength(formatted, "utf8")');
expect(scriptContent).toContain("Formatted JSON exceeds MAX_FILE_SIZE");
expect(scriptContent).toContain("FormatJSONSizeLimitError");
});
});

// ──────────────────────────────────────────────────────────────────────────────
Expand Down
3 changes: 3 additions & 0 deletions docs/src/content/docs/reference/repo-memory.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ tools:
target-repo: "owner/repository"
create-orphan: true # default
allowed-extensions: [".json", ".txt", ".md"] # Restrict file types (default: empty/all files allowed)
format-json: true # Pretty-print .json files (default: false)
---
```

**Branch Prefix**: Use `branch-prefix` to customize the branch name prefix (default is `memory`). The prefix must be 4-32 characters, alphanumeric with hyphens/underscores, and cannot be `copilot`. When set, branches are created as `{branch-prefix}/{id}` instead of `memory/{id}`.

**File Type Restrictions**: Use `allowed-extensions` to restrict which file types can be stored (default: empty/all files allowed). When specified, only files with listed extensions (e.g., `[".json", ".txt", ".md"]`) can be saved. Files with disallowed extensions will trigger validation failures.

**JSON Formatting**: Use `format-json: true` to automatically pretty-print all `.json` files (2-space indent, trailing newline) before they are committed. This makes JSON memory files human-readable in the repository and easier to review and edit manually. Invalid JSON files are skipped with a warning. This option has no effect on `.jsonl` or other file types.

**Patch Size Limit**: Use `max-patch-size` to limit the total size of changes in a single push (default: 10KB, max: 1MB). The total size of the git diff (all staged changes combined) must not exceed this value. If it does, the push is rejected with an error. Use this to prevent large unintentional memory updates.

**Note**: File glob patterns are matched against the **relative file path** within the artifact directory, not the branch path. Use bare extension patterns like `*.json` or `*.md` — do **not** include the branch name (e.g. `memory/custom-agent-for-aw/*.json` is incorrect).
Expand Down
8 changes: 8 additions & 0 deletions pkg/parser/schemas/main_workflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4606,6 +4606,10 @@
"type": "string"
},
"description": "List of allowed file extensions (e.g., [\".json\", \".txt\"]). Default: [\".json\", \".jsonl\", \".txt\", \".md\", \".csv\"]"
},
"format-json": {
"type": "boolean",
"description": "When true, all .json files are pretty-printed (2-space indent) before being committed, making them human-readable in the repository (default: false)"
}
},
"additionalProperties": false,
Expand Down Expand Up @@ -4697,6 +4701,10 @@
"type": "string"
},
"description": "List of allowed file extensions (e.g., [\".json\", \".txt\"]). Default: [\".json\", \".jsonl\", \".txt\", \".md\", \".csv\"]"
},
"format-json": {
"type": "boolean",
"description": "When true, all .json files are pretty-printed (2-space indent) before being committed, making them human-readable in the repository (default: false)"
}
},
"additionalProperties": false
Expand Down
18 changes: 18 additions & 0 deletions pkg/workflow/repo_memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type RepoMemoryEntry struct {
CreateOrphan bool `yaml:"create-orphan,omitempty"` // create orphaned branch if missing (default: true)
AllowedExtensions []string `yaml:"allowed-extensions,omitempty"` // allowed file extensions (default: [".json", ".jsonl", ".txt", ".md", ".csv"])
Wiki bool `yaml:"wiki,omitempty"` // use the GitHub Wiki git repository instead of the regular repo
FormatJSON bool `yaml:"format-json,omitempty"` // pretty-print all .json files before committing (default: false)
}

// RepoMemoryToolConfig represents the configuration for repo-memory in tools
Expand Down Expand Up @@ -308,6 +309,13 @@ func (c *Compiler) extractRepoMemoryConfig(toolsConfig *ToolsConfig, workflowID
entry.AllowedExtensions = constants.DefaultAllowedMemoryExtensions
}

// Parse format-json field
if formatJSON, exists := memoryMap["format-json"]; exists {
if formatJSONBool, ok := formatJSON.(bool); ok {
entry.FormatJSON = formatJSONBool
}
}

config.Memories = append(config.Memories, entry)
}
}
Expand Down Expand Up @@ -465,6 +473,13 @@ func (c *Compiler) extractRepoMemoryConfig(toolsConfig *ToolsConfig, workflowID
entry.AllowedExtensions = constants.DefaultAllowedMemoryExtensions
}

// Parse format-json field
if formatJSON, exists := configMap["format-json"]; exists {
if formatJSONBool, ok := formatJSON.(bool); ok {
entry.FormatJSON = formatJSONBool
}
}

config.Memories = []RepoMemoryEntry{entry}
return config, nil
}
Expand Down Expand Up @@ -720,6 +735,9 @@ func (c *Compiler) buildPushRepoMemoryJob(data *WorkflowData, threatDetectionEna
// Quote the value to prevent YAML alias interpretation of patterns like *.md
fmt.Fprintf(&step, " FILE_GLOB_FILTER: \"%s\"\n", fileGlobFilter)
}
if memory.FormatJSON {
step.WriteString(" FORMAT_JSON: 'true'\n")
}
step.WriteString(" with:\n")
step.WriteString(" script: |\n")

Expand Down
127 changes: 127 additions & 0 deletions pkg/workflow/repo_memory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1417,3 +1417,130 @@ func TestPushRepoMemoryJobConditionGatesOnAgentNotSkipped(t *testing.T) {
"Condition should NOT use != 'skipped' for agent check")
})
}

// TestRepoMemoryFormatJSONObjectConfig tests that format-json is parsed in object notation
func TestRepoMemoryFormatJSONObjectConfig(t *testing.T) {
toolsMap := map[string]any{
"repo-memory": map[string]any{
"branch-name": "memory/notes",
"format-json": true,
},
}

toolsConfig, err := ParseToolsConfig(toolsMap)
require.NoError(t, err, "Failed to parse tools config")

compiler := NewCompiler()
config, err := compiler.extractRepoMemoryConfig(toolsConfig, "")
require.NoError(t, err, "Failed to extract repo-memory config")
require.NotNil(t, config)
require.Len(t, config.Memories, 1)

memory := config.Memories[0]
assert.True(t, memory.FormatJSON, "Expected format-json to be true")
}

// TestRepoMemoryFormatJSONObjectConfigFalse tests that format-json defaults to false in object notation
func TestRepoMemoryFormatJSONObjectConfigFalse(t *testing.T) {
toolsMap := map[string]any{
"repo-memory": map[string]any{
"branch-name": "memory/notes",
},
}

toolsConfig, err := ParseToolsConfig(toolsMap)
require.NoError(t, err, "Failed to parse tools config")

compiler := NewCompiler()
config, err := compiler.extractRepoMemoryConfig(toolsConfig, "")
require.NoError(t, err, "Failed to extract repo-memory config")
require.NotNil(t, config)
require.Len(t, config.Memories, 1)

memory := config.Memories[0]
assert.False(t, memory.FormatJSON, "Expected format-json to be false by default")
}

// TestRepoMemoryFormatJSONArrayConfig tests that format-json is parsed in array notation
func TestRepoMemoryFormatJSONArrayConfig(t *testing.T) {
toolsMap := map[string]any{
"repo-memory": []any{
map[string]any{
"id": "notes",
"branch-name": "memory/notes",
"format-json": true,
},
map[string]any{
"id": "logs",
"branch-name": "memory/logs",
},
},
}

toolsConfig, err := ParseToolsConfig(toolsMap)
require.NoError(t, err, "Failed to parse tools config")

compiler := NewCompiler()
config, err := compiler.extractRepoMemoryConfig(toolsConfig, "")
require.NoError(t, err, "Failed to extract repo-memory config")
require.NotNil(t, config)
require.Len(t, config.Memories, 2)

assert.True(t, config.Memories[0].FormatJSON, "Expected notes memory to have format-json=true")
assert.False(t, config.Memories[1].FormatJSON, "Expected logs memory to have format-json=false by default")
}

// TestRepoMemoryFormatJSONPushStepEnvVar tests that FORMAT_JSON env var is emitted in push steps
func TestRepoMemoryFormatJSONPushStepEnvVar(t *testing.T) {
t.Run("format-json=true emits FORMAT_JSON env var", func(t *testing.T) {
config := &RepoMemoryConfig{
Memories: []RepoMemoryEntry{
{
ID: "default",
BranchName: "memory/default",
MaxFileSize: 102400,
MaxFileCount: 100,
MaxPatchSize: 10240,
CreateOrphan: true,
FormatJSON: true,
},
},
}
data := &WorkflowData{RepoMemoryConfig: config}

compiler := NewCompiler()
pushJob, err := compiler.buildPushRepoMemoryJob(data, false)
require.NoError(t, err)
require.NotNil(t, pushJob)

pushJobOutput := strings.Join(pushJob.Steps, "\n")
assert.Contains(t, pushJobOutput, "FORMAT_JSON: 'true'",
"Push step should include FORMAT_JSON env var when format-json is true")
})

t.Run("format-json=false omits FORMAT_JSON env var", func(t *testing.T) {
config := &RepoMemoryConfig{
Memories: []RepoMemoryEntry{
{
ID: "default",
BranchName: "memory/default",
MaxFileSize: 102400,
MaxFileCount: 100,
MaxPatchSize: 10240,
CreateOrphan: true,
FormatJSON: false,
},
},
}
data := &WorkflowData{RepoMemoryConfig: config}

compiler := NewCompiler()
pushJob, err := compiler.buildPushRepoMemoryJob(data, false)
require.NoError(t, err)
require.NotNil(t, pushJob)

pushJobOutput := strings.Join(pushJob.Steps, "\n")
assert.NotContains(t, pushJobOutput, "FORMAT_JSON",
"Push step should NOT include FORMAT_JSON env var when format-json is false")
})
}
Loading