Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 40 additions & 20 deletions images/chromium-headful/client/src/components/video.vue
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,19 @@
})
}

wheelThrottle = false
_scrollAccX = 0
_scrollAccY = 0
_scrollLastSendTime = 0
_scrollApiUrl: string | null = null

_getScrollApiUrl(): string {
if (this._scrollApiUrl) return this._scrollApiUrl
// The kernel-images API is exposed on port 444 (maps to 10001 inside the
// container) in both Docker and unikernel deployments.
this._scrollApiUrl = `${location.protocol}//${location.hostname}:444/live-view/scroll`
return this._scrollApiUrl
}

onWheel(e: WheelEvent) {
if (!this.hosting || this.locked) {
return
Expand All @@ -717,8 +729,6 @@
let x = e.deltaX
let y = e.deltaY

// Normalize to pixel units. deltaMode 1 = lines, 2 = pages; convert
// both to approximate pixel values so the divisor below works uniformly.
if (e.deltaMode !== 0) {
x *= WHEEL_LINE_HEIGHT
y *= WHEEL_LINE_HEIGHT
Expand All @@ -729,26 +739,36 @@
y = y * -1
}

// The server sends one XTestFakeButtonEvent per unit we pass here,
// and each event scrolls Chromium by ~120 px. Raw pixel deltas from
// trackpads are already in pixels (~120 per notch), so dividing by
// PIXELS_PER_TICK converts them to discrete scroll "ticks". The
// result is clamped to [-scroll, scroll] (the user-facing sensitivity
// setting) so fast swipes don't over-scroll.
const PIXELS_PER_TICK = 120
x = x === 0 ? 0 : Math.min(Math.max(Math.round(x / PIXELS_PER_TICK) || Math.sign(x), -this.scroll), this.scroll)
y = y === 0 ? 0 : Math.min(Math.max(Math.round(y / PIXELS_PER_TICK) || Math.sign(y), -this.scroll), this.scroll)
this._scrollAccX += x
this._scrollAccY += y

this.sendMousePos(e)

if (!this.wheelThrottle) {
this.wheelThrottle = true
this.$client.sendData('wheel', { x, y })
if (this._scrollAccX === 0 && this._scrollAccY === 0) {
return
}

window.setTimeout(() => {
this.wheelThrottle = false
}, 100)
const now = Date.now()
if (now - this._scrollLastSendTime < 50) {
return
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Throttled scroll events silently lost at gesture end

Medium Severity

The scroll throttle accumulates deltas in _scrollAccX/_scrollAccY and only sends when 50ms have passed since the last send. When the user stops scrolling, any deltas accumulated during the final throttle window are never flushed — there's no trailing-edge timer to dispatch them. This silently drops the tail end of every scroll gesture, causing noticeable under-scrolling.

Additional Locations (1)
Fix in Cursor Fix in Web

}
this._scrollLastSendTime = now

const { w, h } = this.$accessor.video.resolution
const rect = this._overlay.getBoundingClientRect()
const sx = Math.round((w / rect.width) * (e.clientX - rect.left))
const sy = Math.round((h / rect.height) * (e.clientY - rect.top))

const dx = this._scrollAccX
const dy = this._scrollAccY
this._scrollAccX = 0
this._scrollAccY = 0

const url = this._getScrollApiUrl()
fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ x: sx, y: sy, delta_x: -dx, delta_y: -dy }),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scroll delta negation inverts scroll_invert setting semantics

Medium Severity

The fetch body sends delta_x: -dx, delta_y: -dy, negating the accumulated deltas. CDP's Input.dispatchMouseEvent with mouseWheel uses the same sign convention as the browser's WheelEvent (positive deltaY = scroll down). This extra negation reverses the effective meaning of the scroll_invert toggle: when scroll_invert is false, scrolling is actually inverted, and when true (the default), it produces natural scrolling — the opposite of pre-existing behavior and what the setting name implies.

Fix in Cursor Fix in Web

keepalive: true,
}).catch(() => {})
}

onTouchHandler(e: TouchEvent) {
Expand Down
Binary file modified server/api
Binary file not shown.
121 changes: 90 additions & 31 deletions server/cmd/api/api/computer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@ package api
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"math"
"math/rand"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"syscall"
"time"

"github.com/onkernel/kernel-images/server/lib/cdpclient"
"github.com/onkernel/kernel-images/server/lib/logger"
"github.com/onkernel/kernel-images/server/lib/mousetrajectory"
oapi "github.com/onkernel/kernel-images/server/lib/oapi"
Expand Down Expand Up @@ -748,6 +751,8 @@ func (s *ApiService) PressKey(ctx context.Context, request oapi.PressKeyRequestO
return oapi.PressKey200Response{}, nil
}

const pixelsPerScrollTick = 120

func (s *ApiService) doScroll(ctx context.Context, body oapi.ScrollRequest) error {
log := logger.FromContext(ctx)

Expand All @@ -769,50 +774,104 @@ func (s *ApiService) doScroll(ctx context.Context, body oapi.ScrollRequest) erro
return &validationError{msg: fmt.Sprintf("coordinates exceed screen bounds (max: %dx%d)", screenWidth-1, screenHeight-1)}
}

args := []string{}
if body.HoldKeys != nil {
// Hold keys via xdotool (CDP doesn't have a direct modifier-hold mechanism
// that persists across separate commands).
if body.HoldKeys != nil && len(*body.HoldKeys) > 0 {
var keydownArgs []string
for _, key := range *body.HoldKeys {
args = append(args, "keydown", key)
keydownArgs = append(keydownArgs, "keydown", key)
}
if _, err := defaultXdoTool.Run(ctx, keydownArgs...); err != nil {
log.Error("xdotool keydown failed", "err", err)
}
defer func() {
var keyupArgs []string
for _, key := range *body.HoldKeys {
keyupArgs = append(keyupArgs, "keyup", key)
}
if _, err := defaultXdoTool.Run(ctx, keyupArgs...); err != nil {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deferred keyup uses request context, may leave keys stuck

High Severity

The deferred keyup cleanup in doScroll calls defaultXdoTool.Run(ctx, keyupArgs...) using the request context. Every other cleanup path in this file (lines 174, 696, 969, 984) uses context.Background() specifically so that keys are released even when the context is cancelled. If the CDP operation times out or the request is cancelled, the modifier keys will remain stuck in the "down" state.

Additional Locations (1)
Fix in Cursor Fix in Web

log.Error("xdotool keyup failed", "err", err)
}
}()
}
args = append(args, "mousemove", strconv.Itoa(body.X), strconv.Itoa(body.Y))

// Apply vertical ticks first (sequential as specified)
if body.DeltaY != nil && *body.DeltaY != 0 {
count := *body.DeltaY
btn := "5" // down
if count < 0 {
btn = "4" // up
count = -count
}
args = append(args, "click", "--repeat", strconv.Itoa(count), "--delay", "0", btn)
}
// Then horizontal ticks
if body.DeltaX != nil && *body.DeltaX != 0 {
count := *body.DeltaX
btn := "7" // right
if count < 0 {
btn = "6" // left
count = -count
}
args = append(args, "click", "--repeat", strconv.Itoa(count), "--delay", "0", btn)
// Convert tick counts to CSS pixel deltas for CDP. The API contract
// specifies delta_x/delta_y as discrete scroll ticks (matching the old
// xdotool button-click model). Each tick ≈ 120 CSS pixels.
var deltaXPx, deltaYPx float64
if body.DeltaX != nil {
deltaXPx = float64(*body.DeltaX) * pixelsPerScrollTick
}
if body.DeltaY != nil {
deltaYPx = float64(*body.DeltaY) * pixelsPerScrollTick
}

if body.HoldKeys != nil {
for _, key := range *body.HoldKeys {
args = append(args, "keyup", key)
}
upstreamURL := s.upstreamMgr.Current()
if upstreamURL == "" {
return &executionError{msg: "devtools upstream not available"}
}

log.Info("executing xdotool", "args", args)
output, err := defaultXdoTool.Run(ctx, args...)
cdpCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

client, err := cdpclient.Dial(cdpCtx, upstreamURL)
if err != nil {
log.Error("xdotool scroll failed", "err", err, "output", string(output))
return &executionError{msg: fmt.Sprintf("failed to perform scroll: %s", string(output))}
return &executionError{msg: fmt.Sprintf("failed to connect to devtools for scroll: %s", err)}
}
defer client.Close()

log.Info("dispatching CDP mouseWheel", "x", body.X, "y", body.Y, "deltaX", deltaXPx, "deltaY", deltaYPx)
if err := client.DispatchMouseWheelEvent(cdpCtx, body.X, body.Y, deltaXPx, deltaYPx); err != nil {
return &executionError{msg: fmt.Sprintf("CDP mouseWheel failed: %s", err)}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hold keys ignored in CDP scroll dispatch

High Severity

The doScroll function sends hold_keys as xdotool keydown/keyup (X11 events) but dispatches the scroll via CDP Input.dispatchMouseEvent. CDP synthetic events use their own explicit modifiers parameter (defaulting to 0) and do not inherit X11 platform keyboard state. Since modifiers is never set in the CDP call, modifier+scroll combinations like Ctrl+scroll for zoom are broken.

Fix in Cursor Fix in Web

}

return nil
}

// HandlePixelScroll handles POST /live-view/scroll — a lightweight endpoint
// for the live view client that accepts pixel-precise deltas and forwards
// them directly to Chromium via CDP, bypassing X11 entirely.
func (s *ApiService) HandlePixelScroll(w http.ResponseWriter, r *http.Request) {
var body struct {
X int `json:"x"`
Y int `json:"y"`
DeltaX float64 `json:"delta_x"`
DeltaY float64 `json:"delta_y"`
}
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
http.Error(w, "bad request", http.StatusBadRequest)
return
}

if body.DeltaX == 0 && body.DeltaY == 0 {
w.WriteHeader(http.StatusOK)
return
}

upstreamURL := s.upstreamMgr.Current()
if upstreamURL == "" {
http.Error(w, "devtools not available", http.StatusServiceUnavailable)
return
}

cdpCtx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
defer cancel()

client, err := cdpclient.Dial(cdpCtx, upstreamURL)
if err != nil {
http.Error(w, "cdp dial failed", http.StatusInternalServerError)
return
}
defer client.Close()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per-scroll WebSocket connection causes high overhead

Medium Severity

Both HandlePixelScroll and doScroll open a new CDP WebSocket connection for every scroll event — involving a full handshake, target discovery, session attach, event dispatch, detach, and close. The client sends scroll events at up to 20Hz, meaning ~20 WebSocket lifecycles per second. This adds substantial latency and resource pressure to every scroll gesture.

Additional Locations (1)
Fix in Cursor Fix in Web


if err := client.DispatchMouseWheelEvent(cdpCtx, body.X, body.Y, body.DeltaX, body.DeltaY); err != nil {
http.Error(w, "cdp scroll failed", http.StatusInternalServerError)
return
}

w.WriteHeader(http.StatusOK)
}

func (s *ApiService) Scroll(ctx context.Context, request oapi.ScrollRequestObject) (oapi.ScrollResponseObject, error) {
s.inputMu.Lock()
defer s.inputMu.Unlock()
Expand Down
15 changes: 15 additions & 0 deletions server/cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ func main() {
r.Use(
chiMiddleware.Logger,
chiMiddleware.Recoverer,
func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
if r.Method == http.MethodOptions {
w.WriteHeader(http.StatusNoContent)
return
}
next.ServeHTTP(w, r)
})
},
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CORS wildcard exposes all API endpoints to cross-origin

Medium Severity

The CORS middleware sets Access-Control-Allow-Origin: * on every route of the API server, not just /live-view/scroll. This exposes all sensitive endpoints (click_mouse, type_text, press_key, scroll, process management, etc.) to cross-origin requests from any website. A malicious page could remotely control the browser session.

Fix in Cursor Fix in Web

func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctxWithLogger := logger.AddToContext(r.Context(), slogger)
Expand Down Expand Up @@ -120,6 +132,9 @@ func main() {
w.Header().Set("Content-Type", "application/json")
w.Write(jsonData)
})
// Pixel-precise scroll for the live view client (bypasses X11 via CDP)
r.Post("/live-view/scroll", apiService.HandlePixelScroll)

// PTY attach endpoint (WebSocket) - not part of OpenAPI spec
// Uses WebSocket for bidirectional streaming, which works well through proxies.
r.Get("/process/{process_id}/attach", func(w http.ResponseWriter, r *http.Request) {
Expand Down
63 changes: 63 additions & 0 deletions server/lib/cdpclient/cdpclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,66 @@ func (c *Client) SetDeviceMetricsOverride(ctx context.Context, width, height int

return nil
}

// DispatchMouseWheelEvent sends a mouseWheel event to the first page target
// via CDP Input.dispatchMouseEvent. deltaX/deltaY are in CSS pixels, allowing
// sub-notch precision that X11 button events cannot express.
func (c *Client) DispatchMouseWheelEvent(ctx context.Context, x, y int, deltaX, deltaY float64) error {
targetsResult, err := c.send(ctx, "Target.getTargets", nil, "")
if err != nil {
return fmt.Errorf("Target.getTargets: %w", err)
}

var targets struct {
TargetInfos []struct {
TargetID string `json:"targetId"`
Type string `json:"type"`
} `json:"targetInfos"`
}
if err := json.Unmarshal(targetsResult, &targets); err != nil {
return fmt.Errorf("unmarshal targets: %w", err)
}

var pageTargetID string
for _, t := range targets.TargetInfos {
if t.Type == "page" {
pageTargetID = t.TargetID
break
}
}
if pageTargetID == "" {
return fmt.Errorf("no page target found")
}

attachResult, err := c.send(ctx, "Target.attachToTarget", map[string]any{
"targetId": pageTargetID,
"flatten": true,
}, "")
if err != nil {
return fmt.Errorf("Target.attachToTarget: %w", err)
}

var attach struct {
SessionID string `json:"sessionId"`
}
if err := json.Unmarshal(attachResult, &attach); err != nil {
return fmt.Errorf("unmarshal attach: %w", err)
}

_, err = c.send(ctx, "Input.dispatchMouseEvent", map[string]any{
"type": "mouseWheel",
"x": x,
"y": y,
"deltaX": deltaX,
"deltaY": deltaY,
}, attach.SessionID)
if err != nil {
return fmt.Errorf("Input.dispatchMouseEvent mouseWheel: %w", err)
}

_, _ = c.send(ctx, "Target.detachFromTarget", map[string]any{
"sessionId": attach.SessionID,
}, "")

return nil
}
Loading