Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed
- Hide version upgrade toast for askgithub deployment (`EXPERIMENT_ASK_GH_ENABLED`). [#931](https://github.com/sourcebot-dev/sourcebot/pull/931)
- Optimized web package and Docker builds: restructured Docker layers to separate dependency installation from source copying for better cache reuse, consolidated yarn installs, added BuildKit cache mount for Next.js compilation cache, enabled `experimental.optimizePackageImports` for barrel-export packages, and made Sentry build-time features conditional. [#944](https://github.com/sourcebot-dev/sourcebot/pull/944)
- Hide security notice on login and signup pages when `EXPERIMENT_ASK_GH_ENABLED` is enabled. [#943](https://github.com/sourcebot-dev/sourcebot/pull/943)

### Fixed
Expand Down
93 changes: 65 additions & 28 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,33 @@ RUN CGO_ENABLED=0 GOOS=linux go build -o /cmd/ ./cmd/...
FROM node-alpine AS shared-libs-builder
WORKDIR /app

# Step 1: Copy only package manifests for dependency installation.
# This layer is cached as long as no package.json or lock file changes.
COPY package.json yarn.lock* .yarnrc.yml ./
COPY .yarn ./.yarn
COPY ./packages/db/package.json ./packages/db/package.json
COPY ./packages/db/prisma ./packages/db/prisma
COPY ./packages/schemas/package.json ./packages/schemas/package.json
COPY ./packages/shared/package.json ./packages/shared/package.json
COPY ./packages/queryLanguage/package.json ./packages/queryLanguage/package.json
# All workspace package.json files are needed for Yarn resolution
COPY ./packages/web/package.json ./packages/web/package.json
COPY ./packages/backend/package.json ./packages/backend/package.json
COPY ./packages/mcp/package.json ./packages/mcp/package.json

RUN yarn install --immutable --mode=skip-build

# Step 2: Copy source files and build explicitly in topological order.
COPY ./packages/db ./packages/db
COPY ./schemas ./schemas
COPY ./packages/schemas ./packages/schemas
COPY ./packages/shared ./packages/shared
COPY ./packages/queryLanguage ./packages/queryLanguage
COPY ./packages/shared ./packages/shared

RUN yarn workspace @sourcebot/db install
RUN yarn workspace @sourcebot/schemas install
RUN yarn workspace @sourcebot/shared install
RUN yarn workspace @sourcebot/query-language install
RUN yarn workspace @sourcebot/db build && \
yarn workspace @sourcebot/schemas build && \
yarn workspace @sourcebot/query-language build && \
yarn workspace @sourcebot/shared build
# ------------------------------------

# ------ Build Web ------
Expand Down Expand Up @@ -79,20 +95,33 @@ ENV SENTRY_SMUAT=$SENTRY_SMUAT
RUN apk add --no-cache libc6-compat
WORKDIR /app

# Step 1: Install dependencies (cached unless package.json/lock changes).
COPY package.json yarn.lock* .yarnrc.yml ./
COPY .yarn ./.yarn
COPY ./packages/web ./packages/web
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY ./packages/web/package.json ./packages/web/package.json
COPY ./packages/db/package.json ./packages/db/package.json
COPY ./packages/db/prisma ./packages/db/prisma
COPY ./packages/schemas/package.json ./packages/schemas/package.json
COPY ./packages/shared/package.json ./packages/shared/package.json
COPY ./packages/queryLanguage/package.json ./packages/queryLanguage/package.json
COPY ./packages/backend/package.json ./packages/backend/package.json
COPY ./packages/mcp/package.json ./packages/mcp/package.json

RUN yarn install --immutable --mode=skip-build && \
yarn workspace @sourcebot/db prisma:generate

# Step 2: Copy pre-built shared libraries.
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage

# Fixes arm64 timeouts
RUN yarn workspace @sourcebot/web install
# Step 3: Copy web source and build.
COPY ./packages/web ./packages/web

ENV NEXT_TELEMETRY_DISABLED=1
RUN yarn workspace @sourcebot/web build
RUN --mount=type=cache,target=/app/packages/web/.next/cache \
yarn workspace @sourcebot/web build
ENV SKIP_ENV_VALIDATION=0
# ------------------------------

Expand All @@ -117,16 +146,29 @@ ENV SENTRY_RELEASE=$SENTRY_RELEASE

WORKDIR /app

# Step 1: Install dependencies (cached unless package.json/lock changes).
COPY package.json yarn.lock* .yarnrc.yml ./
COPY .yarn ./.yarn
COPY ./schemas ./schemas
COPY ./packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY ./packages/backend/package.json ./packages/backend/package.json
COPY ./packages/db/package.json ./packages/db/package.json
COPY ./packages/db/prisma ./packages/db/prisma
COPY ./packages/schemas/package.json ./packages/schemas/package.json
COPY ./packages/shared/package.json ./packages/shared/package.json
COPY ./packages/queryLanguage/package.json ./packages/queryLanguage/package.json
COPY ./packages/web/package.json ./packages/web/package.json
COPY ./packages/mcp/package.json ./packages/mcp/package.json

RUN yarn install --immutable --mode=skip-build && \
yarn workspace @sourcebot/db prisma:generate

# Step 2: Copy pre-built shared libraries and backend source.
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
RUN yarn workspace @sourcebot/backend install
COPY ./schemas ./schemas
COPY ./packages/backend ./packages/backend

RUN yarn workspace @sourcebot/backend build

# Upload source maps to Sentry if we have the necessary build-time args.
Expand Down Expand Up @@ -201,8 +243,8 @@ COPY .yarn ./.yarn

# Configure zoekt
COPY vendor/zoekt/install-ctags-alpine.sh .
RUN ./install-ctags-alpine.sh && rm install-ctags-alpine.sh
RUN mkdir -p ${DATA_CACHE_DIR}
RUN ./install-ctags-alpine.sh && rm install-ctags-alpine.sh && \
mkdir -p ${DATA_CACHE_DIR}
COPY --from=zoekt-builder \
/cmd/zoekt-git-index \
/cmd/zoekt-indexserver \
Expand Down Expand Up @@ -234,19 +276,14 @@ COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/shared
COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage

# Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container.
RUN git config --global safe.directory "*"

# Configure the database
RUN mkdir -p /run/postgresql && \
RUN git config --global safe.directory "*" && \
mkdir -p /run/postgresql && \
chown -R postgres:postgres /run/postgresql && \
chmod 775 /run/postgresql

# Make app directory accessible to both root and sourcebot user
RUN chown -R sourcebot /app \
&& chgrp -R 0 /app \
&& chmod -R g=u /app
# Make data directory accessible to both root and sourcebot user
RUN chown -R sourcebot /data
chmod 775 /run/postgresql && \
chown -R sourcebot /app && \
chgrp -R 0 /app && \
chmod -R g=u /app && \
chown -R sourcebot /data
Comment on lines +279 to +286
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

# First, find relevant files
echo "=== Finding relevant files ==="
fd -t f -E node_modules -E .git 'Dockerfile|entrypoint|supervisord|prefix-output' . 2>/dev/null | head -20 || true

echo -e "\n=== Searching for safe.directory and git invocations ==="
rg -n 'safe\.directory|git\s+(config|clone|pull|fetch)' --max-count=20 || true

echo -e "\n=== Searching for user context changes (RUN USER, su-exec, gosu, sudo) ==="
rg -n '^\s*USER\b|su-exec|gosu|sudo\s+' --max-count=20 || true

echo -e "\n=== Checking Dockerfile structure around line 279 ==="
fd -t f Dockerfile | head -1 | xargs -I {} sed -n '270,295p' {} || true

Repository: sourcebot-dev/sourcebot

Length of output: 6516


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== entrypoint.sh content ==="
cat -n ./entrypoint.sh || true

echo -e "\n=== supervisord.conf relevant sections ==="
rg -n 'user|command|git' supervisord.conf -A 3 || true

echo -e "\n=== git.ts to understand git operation directories ==="
head -100 ./packages/backend/src/git.ts || true

echo -e "\n=== Find all git clone/fetch operations in code ==="
rg -n 'git.*clone|git.*fetch' packages/ -A 2 -B 2 | head -60 || true

Repository: sourcebot-dev/sourcebot

Length of output: 20108


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Full supervisord.conf ==="
cat supervisord.conf || true

echo -e "\n=== Check if user is specified in supervisord.conf ==="
rg -n '^\s*(user|group)\s*=' supervisord.conf || echo "No user/group found"

echo -e "\n=== Check git invocations - which directories are accessed ==="
rg -n 'cloneRepository|createGitClientForPath|git.*cwd' packages/backend/src/ -B 3 -A 3 | head -80 || true

echo -e "\n=== Check what DATA_CACHE_DIR is set to ==="
rg -n 'DATA_CACHE_DIR' entrypoint.sh | head -10 || true

echo -e "\n=== Find where repositories are stored ==="
rg -n 'repoPath\s*=' packages/backend/src/ -A 1 | head -30 || true

Repository: sourcebot-dev/sourcebot

Length of output: 8589


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Where is repoPath defined/constructed ==="
rg -n 'repoPath\s*=' packages/backend/src/repoIndexManager.ts -B 3 -A 1 | head -50 || true

echo -e "\n=== Check INDEX_CACHE_DIR constant ==="
rg -n 'INDEX_CACHE_DIR' packages/backend/src/constants.ts -A 2 -B 2 || true

echo -e "\n=== Check if /data is referenced anywhere ==="
rg -n '/data|DATA_CACHE_DIR' packages/backend/src/ --type ts | head -20 || true

echo -e "\n=== Search for default values of DATA_CACHE_DIR ==="
rg -n 'DATA_CACHE_DIR|INDEX_CACHE_DIR' . --type sh --type ts | grep -E '\s?=|default|export' | head -15 || true

Repository: sourcebot-dev/sourcebot

Length of output: 2122


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Search for DATA_CACHE_DIR default value in env setup ==="
rg -n 'DATA_CACHE_DIR' . --type ts --type js -B 2 -A 2 | grep -E 'default|DATA_CACHE_DIR\s*=' | head -20 || true

echo -e "\n=== Check .env or environment files ==="
fd -t f '.env*|environment' . | head -10 || true

echo -e "\n=== Check Dockerfile for DATA_CACHE_DIR or /data setup ==="
sed -n '260,310p' Dockerfile || true

echo -e "\n=== Look for env variable declarations ==="
rg -n 'DATA_CACHE_DIR|/data|/app' Dockerfile | head -20 || true

echo -e "\n=== Check shared env setup ==="
rg -n 'DATA_CACHE_DIR' packages/shared/ -A 2 -B 2 || true

Repository: sourcebot-dev/sourcebot

Length of output: 4319


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Confirm git operations are only in /app and /data/.sourcebot paths ==="
echo "Git clone/fetch operations in code:"
rg -n 'cloneUrl|repoPath|createGitClientForPath' packages/backend/src/git.ts packages/backend/src/repoIndexManager.ts | head -40 || true

echo -e "\n=== Check if git is invoked anywhere else at runtime ==="
rg -n 'git\s+(clone|fetch|config)' packages/ --type ts --type js | grep -v node_modules | grep -v dist | head -30 || true

echo -e "\n=== Verify safe.directory is only set once (global scope) ==="
rg -n 'safe\.directory' . --type ts --type js --type sh | grep -v node_modules || true

echo -e "\n=== Check if there's documentation on the volume mounting issue ==="
rg -n 'dubious ownership|safe\.directory|volume' docs/ || true

Repository: sourcebot-dev/sourcebot

Length of output: 8435


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Verify repoPath construction - where exactly are repos stored ==="
rg -n 'getRepoPath' packages/backend/src/ -A 5 -B 1 | head -50 || true

echo -e "\n=== Check if git might run against /app or other locations ==="
rg -n 'cwd|path.*=|working.*dir' packages/backend/src/git.ts | head -20 || true

echo -e "\n=== Confirm all git.* functions use createGitClientForPath ==="
rg -n 'export.*const.*\(' packages/backend/src/git.ts | head -20 || true

Repository: sourcebot-dev/sourcebot

Length of output: 5486


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Find getRepoPath implementation ==="
rg -n 'export.*getRepoPath|function getRepoPath' packages/shared/src/ -A 15 || true

echo -e "\n=== Check if getRepoPath can return paths outside /data ==="
rg -n 'getRepoPath' packages/shared/src/utils.ts -A 20 -B 5 || true

Repository: sourcebot-dev/sourcebot

Length of output: 2155


Consider narrowing safe.directory to specific paths rather than wildcard.

The global wildcard disables Git's ownership safety across the entire filesystem. Instead, explicitly trust only the paths where Git actually operates: /data/.sourcebot/repos and /data/.sourcebot/index (derived from DATA_CACHE_DIR). For local repositories mounted via file:// URLs, consider using per-repository configuration or documenting the ownership requirements for mounted volumes.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@Dockerfile` around lines 279 - 286, Replace the broad git safe.directory
wildcard with explicit trusted paths: change the git config invocation that sets
safe.directory "*" so it instead registers only the specific repo and index
directories (e.g. /data/.sourcebot/repos and /data/.sourcebot/index derived from
DATA_CACHE_DIR) using separate git config --global --add safe.directory <path>
entries; ensure any documentation or startup notes mention ownership
requirements for file:// mounted repos so per-repository configs can be used
when appropriate.


COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY prefix-output.sh ./prefix-output.sh
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"packages/*"
],
"scripts": {
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach --all --topological run build",
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach --all --topological --parallel run build",
"test": "yarn workspaces foreach --all --topological run test",
"dev": "concurrently --kill-others --names \"zoekt,worker,web,mcp,schemas\" 'yarn dev:zoekt' 'yarn dev:backend' 'yarn dev:web' 'yarn watch:mcp' 'yarn watch:schemas'",
"with-env": "cross-env PATH=\"$PWD/bin:$PATH\" dotenv -e .env.development -c --",
Expand All @@ -18,7 +18,8 @@
"dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio",
"dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset",
"dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push",
"build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build",
"build:web": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach --recursive --topological --parallel --from @sourcebot/web run build",
"build:deps": "yarn workspaces foreach --recursive --topological --parallel --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build",
"tool:decrypt-jwe": "yarn with-env yarn workspace @sourcebot/web tool:decrypt-jwe"
},
"devDependencies": {
Expand Down
35 changes: 28 additions & 7 deletions packages/web/next.config.mjs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { withSentryConfig } from "@sentry/nextjs";

const hasSentryConfig = !!(process.env.SENTRY_ORG && process.env.SENTRY_SMUAT);

/** @type {import('next').NextConfig} */
const nextConfig = {
Expand Down Expand Up @@ -40,16 +41,36 @@ const nextConfig = {

turbopack: {},

// @see: https://github.com/vercel/next.js/issues/58019#issuecomment-1910531929
...(process.env.NODE_ENV === 'development' ? {
experimental: {
experimental: {
optimizePackageImports: [
"lucide-react",
"@radix-ui/react-icons",
"react-icons",
"recharts",
"date-fns",
"@ai-sdk/react",
"@ai-sdk/openai",
"@ai-sdk/anthropic",
"@ai-sdk/google",
"@ai-sdk/amazon-bedrock",
"@ai-sdk/azure",
"@ai-sdk/deepseek",
"@ai-sdk/google-vertex",
"@ai-sdk/mistral",
"@ai-sdk/xai",
"@ai-sdk/openai-compatible",
"@codemirror/language-data",
"posthog-js",
],
// @see: https://github.com/vercel/next.js/issues/58019#issuecomment-1910531929
...(process.env.NODE_ENV === 'development' ? {
serverActions: {
allowedOrigins: [
'localhost:3000'
]
}
}
} : {}),
} : {}),
},
};

export default withSentryConfig(nextConfig, {
Expand All @@ -68,11 +89,11 @@ export default withSentryConfig(nextConfig, {
// https://docs.sentry.io/platforms/javascript/guides/nextjs/manual-setup/

// Upload a larger set of source maps for prettier stack traces (increases build time)
widenClientFileUpload: true,
widenClientFileUpload: hasSentryConfig,

// Automatically annotate React components to show their full name in breadcrumbs and session replay
reactComponentAnnotation: {
enabled: true,
enabled: hasSentryConfig,
},

// Route browser requests to Sentry through a Next.js rewrite to circumvent ad-blockers.
Expand Down