feat(mac): host PeekabooBridge for ui

This commit is contained in:
Peter Steinberger
2025-12-13 16:55:41 +00:00
parent fd566bda14
commit c17440f5b4
21 changed files with 1197 additions and 422 deletions

View File

@@ -155,6 +155,15 @@ final class AppState: ObservableObject {
didSet { self.ifNotPreview { UserDefaults.standard.set(self.canvasEnabled, forKey: canvasEnabledKey) } }
}
@Published var peekabooBridgeEnabled: Bool {
didSet {
self.ifNotPreview {
UserDefaults.standard.set(self.peekabooBridgeEnabled, forKey: peekabooBridgeEnabledKey)
Task { await PeekabooBridgeHostCoordinator.shared.setEnabled(self.peekabooBridgeEnabled) }
}
}
}
@Published var attachExistingGatewayOnly: Bool {
didSet {
self.ifNotPreview {
@@ -231,6 +240,8 @@ final class AppState: ObservableObject {
let storedPort = UserDefaults.standard.integer(forKey: webChatPortKey)
self.webChatPort = storedPort > 0 ? storedPort : 18788
self.canvasEnabled = UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true
self.peekabooBridgeEnabled = UserDefaults.standard
.object(forKey: peekabooBridgeEnabledKey) as? Bool ?? true
self.attachExistingGatewayOnly = UserDefaults.standard.bool(forKey: attachExistingGatewayOnlyKey)
if !self.isPreview {

View File

@@ -24,6 +24,7 @@ let webChatEnabledKey = "clawdis.webChatEnabled"
let webChatSwiftUIEnabledKey = "clawdis.webChatSwiftUIEnabled"
let webChatPortKey = "clawdis.webChatPort"
let canvasEnabledKey = "clawdis.canvasEnabled"
let peekabooBridgeEnabledKey = "clawdis.peekabooBridgeEnabled"
let deepLinkAgentEnabledKey = "clawdis.deepLinkAgentEnabled"
let deepLinkKeyKey = "clawdis.deepLinkKey"
let modelCatalogPathKey = "clawdis.modelCatalogPath"

View File

@@ -58,53 +58,6 @@ enum ControlRequestHandler {
let result = await AgentRPC.shared.status()
return Response(ok: result.ok, message: result.error)
case .uiListScreens:
let screens = await MainActor.run { UIScreenService.listScreens() }
let payload = try JSONEncoder().encode(screens)
return Response(ok: true, payload: payload)
case let .uiScreenshot(screenIndex, windowID):
let authorized = await PermissionManager
.ensure([.screenRecording], interactive: false)[.screenRecording] ?? false
guard authorized else { return Response(ok: false, message: "screen recording permission missing") }
let resolution: (screenIndex: Int?, displayID: UInt32?) = await Task { @MainActor in
if let screenIndex,
let match = UIScreenService.listScreens().first(where: { $0.index == screenIndex })
{
return (screenIndex, match.displayID)
}
return (nil, nil)
}.value
let data = await Task { @MainActor in
await Screenshotter.capture(displayID: resolution.displayID, windowID: windowID)
}.value
guard let data else {
return Response(ok: false, message: "screenshot failed")
}
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("clawdis-ui", isDirectory: true)
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
let outURL = dir.appendingPathComponent("screenshot-\(Int(Date().timeIntervalSince1970 * 1000)).png")
do {
try data.write(to: outURL)
} catch {
return Response(ok: false, message: "failed to write screenshot: \(error.localizedDescription)")
}
let size = ScreenshotSize.readPNGSize(data: data)
let result = UIScreenshotResult(
path: outURL.path,
width: size?.width ?? 0,
height: size?.height ?? 0,
screenIndex: resolution.screenIndex,
displayID: resolution.displayID,
windowID: windowID)
let payload = try JSONEncoder().encode(result)
return Response(ok: true, payload: payload)
case let .runShell(command, cwd, env, timeoutSec, needsSR):
if needsSR {
let authorized = await PermissionManager

View File

@@ -57,6 +57,11 @@ struct GeneralSettings: View {
subtitle: "Allow the agent to show and control the Canvas panel.",
binding: self.$state.canvasEnabled)
SettingsToggleRow(
title: "Enable Peekaboo Bridge",
subtitle: "Allow signed tools to drive UI automation via `clawdis-mac ui …`.",
binding: self.$state.peekabooBridgeEnabled)
SettingsToggleRow(
title: "Enable debug tools",
subtitle: "Show the Debug tab with development utilities.",

View File

@@ -183,6 +183,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
Task { await HealthStore.shared.refresh(onDemand: true) }
Task { await PortGuardian.shared.sweep(mode: AppStateStore.shared.connectionMode) }
Task { await self.socketServer.start() }
Task { await PeekabooBridgeHostCoordinator.shared.setEnabled(AppStateStore.shared.peekabooBridgeEnabled) }
self.scheduleFirstRunOnboardingIfNeeded()
// Developer/testing helper: auto-open WebChat when launched with --webchat
@@ -202,6 +203,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
Task { await AgentRPC.shared.shutdown() }
Task { await GatewayConnection.shared.shutdown() }
Task { await self.socketServer.stop() }
Task { await PeekabooBridgeHostCoordinator.shared.stop() }
}
@MainActor

View File

@@ -0,0 +1,254 @@
import Foundation
import os
import PeekabooAutomationKit
import PeekabooBridge
import PeekabooFoundation
import PeekabooVisualizer
@MainActor
final class PeekabooBridgeHostCoordinator {
static let shared = PeekabooBridgeHostCoordinator()
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "PeekabooBridge")
private var host: PeekabooBridgeHost?
private var services: ClawdisPeekabooBridgeServices?
func setEnabled(_ enabled: Bool) async {
if enabled {
await self.startIfNeeded()
} else {
await self.stop()
}
}
func stop() async {
guard let host else { return }
await host.stop()
self.host = nil
self.services = nil
self.logger.info("PeekabooBridge host stopped")
}
private func startIfNeeded() async {
guard self.host == nil else { return }
let allowlistedTeamIDs: Set<String> = ["Y5PE65HELJ"]
let allowlistedBundles: Set<String> = []
let services = ClawdisPeekabooBridgeServices()
let server = PeekabooBridgeServer(
services: services,
hostKind: .gui,
allowlistedTeams: allowlistedTeamIDs,
allowlistedBundles: allowlistedBundles)
let host = PeekabooBridgeHost(
socketPath: PeekabooBridgeConstants.clawdisSocketPath,
server: server,
allowedTeamIDs: allowlistedTeamIDs,
requestTimeoutSec: 10)
self.services = services
self.host = host
await host.start()
self.logger.info("PeekabooBridge host started at \(PeekabooBridgeConstants.clawdisSocketPath, privacy: .public)")
}
}
@MainActor
private final class ClawdisPeekabooBridgeServices: PeekabooBridgeServiceProviding {
let permissions: PermissionsService
let screenCapture: any ScreenCaptureServiceProtocol
let automation: any UIAutomationServiceProtocol
let windows: any WindowManagementServiceProtocol
let applications: any ApplicationServiceProtocol
let menu: any MenuServiceProtocol
let dock: any DockServiceProtocol
let dialogs: any DialogServiceProtocol
let snapshots: any SnapshotManagerProtocol
init() {
let logging = LoggingService(subsystem: "com.steipete.clawdis.peekaboo")
let visualizer = PeekabooVisualizerFeedbackClient(client: .shared)
let snapshots = InMemorySnapshotManager(options: .init(
snapshotValidityWindow: 600,
maxSnapshots: 50,
deleteArtifactsOnCleanup: false))
let applications = ApplicationService(feedbackClient: visualizer)
let captureBase = ScreenCaptureService(loggingService: logging)
let screenCapture = FeedbackScreenCaptureService(base: captureBase, feedbackClient: visualizer)
self.permissions = PermissionsService()
self.snapshots = snapshots
self.applications = applications
self.screenCapture = screenCapture
self.automation = UIAutomationService(
snapshotManager: snapshots,
loggingService: logging,
searchPolicy: .balanced,
feedbackClient: visualizer)
self.windows = WindowManagementService(applicationService: applications, feedbackClient: visualizer)
self.menu = MenuService(applicationService: applications, feedbackClient: visualizer)
self.dock = DockService(feedbackClient: visualizer)
self.dialogs = DialogService(feedbackClient: visualizer)
}
}
@MainActor
private final class PeekabooVisualizerFeedbackClient: AutomationFeedbackClient {
private let client: VisualizationClient
init(client: VisualizationClient) {
self.client = client
}
func connect() {
self.client.connect()
}
func showClickFeedback(at point: CGPoint, type: ClickType) async -> Bool {
await self.client.showClickFeedback(at: point, type: type)
}
func showTypingFeedback(keys: [String], duration: TimeInterval, cadence: TypingCadence) async -> Bool {
await self.client.showTypingFeedback(keys: keys, duration: duration, cadence: cadence)
}
func showScrollFeedback(at point: CGPoint, direction: ScrollDirection, amount: Int) async -> Bool {
await self.client.showScrollFeedback(at: point, direction: direction, amount: amount)
}
func showHotkeyDisplay(keys: [String], duration: TimeInterval) async -> Bool {
await self.client.showHotkeyDisplay(keys: keys, duration: duration)
}
func showSwipeGesture(from: CGPoint, to: CGPoint, duration: TimeInterval) async -> Bool {
await self.client.showSwipeGesture(from: from, to: to, duration: duration)
}
func showMouseMovement(from: CGPoint, to: CGPoint, duration: TimeInterval) async -> Bool {
await self.client.showMouseMovement(from: from, to: to, duration: duration)
}
func showWindowOperation(_ kind: WindowOperationKind, windowRect: CGRect, duration: TimeInterval) async -> Bool {
let mapped: WindowOperation = switch kind {
case .close: .close
case .minimize: .minimize
case .maximize: .maximize
case .move: .move
case .resize: .resize
case .setBounds: .setBounds
case .focus: .focus
}
return await self.client.showWindowOperation(mapped, windowRect: windowRect, duration: duration)
}
func showDialogInteraction(
element: DialogElementType,
elementRect: CGRect,
action: DialogActionType) async -> Bool
{
await self.client.showDialogInteraction(element: element, elementRect: elementRect, action: action)
}
func showMenuNavigation(menuPath: [String]) async -> Bool {
await self.client.showMenuNavigation(menuPath: menuPath)
}
func showSpaceSwitch(from: Int, to: Int, direction: SpaceSwitchDirection) async -> Bool {
let mapped: SpaceDirection = direction == .left ? .left : .right
return await self.client.showSpaceSwitch(from: from, to: to, direction: mapped)
}
func showAppLaunch(appName: String, iconPath: String?) async -> Bool {
await self.client.showAppLaunch(appName: appName, iconPath: iconPath)
}
func showAppQuit(appName: String, iconPath: String?) async -> Bool {
await self.client.showAppQuit(appName: appName, iconPath: iconPath)
}
func showScreenshotFlash(in rect: CGRect) async -> Bool {
await self.client.showScreenshotFlash(in: rect)
}
func showWatchCapture(in rect: CGRect) async -> Bool {
await self.client.showWatchCapture(in: rect)
}
}
@MainActor
private final class FeedbackScreenCaptureService: ScreenCaptureServiceProtocol {
private let base: any ScreenCaptureServiceProtocol
private let feedbackClient: any AutomationFeedbackClient
init(base: any ScreenCaptureServiceProtocol, feedbackClient: any AutomationFeedbackClient) {
self.base = base
self.feedbackClient = feedbackClient
}
func captureScreen(
displayIndex: Int?,
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureScreen(
displayIndex: displayIndex,
visualizerMode: visualizerMode,
scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.displayInfo?.bounds)
return result
}
func captureWindow(
appIdentifier: String,
windowIndex: Int?,
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureWindow(
appIdentifier: appIdentifier,
windowIndex: windowIndex,
visualizerMode: visualizerMode,
scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.windowInfo?.bounds)
return result
}
func captureFrontmost(
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureFrontmost(visualizerMode: visualizerMode, scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.windowInfo?.bounds)
return result
}
func captureArea(
_ rect: CGRect,
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureArea(rect, visualizerMode: visualizerMode, scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: rect)
return result
}
func hasScreenRecordingPermission() async -> Bool {
await self.base.hasScreenRecordingPermission()
}
private func showCaptureFeedback(mode: CaptureVisualizerMode, rect: CGRect?) async {
guard let rect else { return }
switch mode {
case .screenshotFlash:
_ = await self.feedbackClient.showScreenshotFlash(in: rect)
case .watchCapture:
_ = await self.feedbackClient.showWatchCapture(in: rect)
}
}
}

View File

@@ -1,80 +0,0 @@
import AppKit
import CoreGraphics
import Foundation
@preconcurrency import ScreenCaptureKit
import VideoToolbox
enum Screenshotter {
@MainActor
static func capture(displayID: UInt32?, windowID: UInt32?) async -> Data? {
guard let content = try? await SCShareableContent.current else { return nil }
let targetDisplay: SCDisplay? = if let displayID {
content.displays.first(where: { $0.displayID == displayID })
} else {
content.displays.first
}
let filter: SCContentFilter
if let windowID, let win = content.windows.first(where: { $0.windowID == windowID }) {
filter = SCContentFilter(desktopIndependentWindow: win)
} else if let display = targetDisplay {
filter = SCContentFilter(display: display, excludingWindows: [])
} else {
return nil
}
let config = SCStreamConfiguration()
if let display = targetDisplay {
config.width = display.width
config.height = display.height
}
config.scalesToFit = true
config.colorSpaceName = CGColorSpace.displayP3
let stream = SCStream(filter: filter, configuration: config, delegate: nil)
let grabber = FrameGrabber()
try? stream.addStreamOutput(
grabber,
type: .screen,
sampleHandlerQueue: DispatchQueue(label: "com.steipete.clawdis.sshot"))
do {
try await stream.startCapture()
let data = await grabber.awaitPNG()
try? await stream.stopCapture()
return data
} catch {
return nil
}
}
}
final class FrameGrabber: NSObject, SCStreamOutput {
private var continuation: CheckedContinuation<Data?, Never>?
private var delivered = false
func awaitPNG() async -> Data? {
await withCheckedContinuation { cont in
self.continuation = cont
}
}
nonisolated func stream(
_ stream: SCStream,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
of outputType: SCStreamOutputType)
{
guard outputType == .screen else { return }
if self.delivered { return }
guard let imageBuffer = sampleBuffer.imageBuffer else { return }
var cgImage: CGImage?
let result = VTCreateCGImageFromCVPixelBuffer(imageBuffer, options: nil, imageOut: &cgImage)
guard result == noErr, let cgImage else { return }
let rep = NSBitmapImageRep(cgImage: cgImage)
guard let data = rep.representation(using: .png, properties: [:]) else { return }
self.delivered = true
self.continuation?.resume(returning: data)
self.continuation = nil
}
}

View File

@@ -1,44 +0,0 @@
import AppKit
import ClawdisIPC
import CoreGraphics
enum UIScreenService {
static func listScreens() -> [UIScreenInfo] {
let screens = NSScreen.screens
let mainScreen = NSScreen.main
return screens.enumerated().map { index, screen in
UIScreenInfo(
index: index,
name: screen.peekabooName,
frame: screen.frame,
visibleFrame: screen.visibleFrame,
isPrimary: screen == mainScreen,
scaleFactor: screen.backingScaleFactor,
displayID: screen.displayID)
}
}
}
private extension NSScreen {
var displayID: UInt32 {
if let num = self.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber {
return num.uint32Value
}
return 0
}
/// Match Peekaboo's `ScreenService` naming (built-in vs. resolution fallback).
var peekabooName: String {
let id = self.displayID
guard id != 0 else { return "Display" }
if CGDisplayIsBuiltin(id) != 0 { return "Built-in Display" }
if let mode = CGDisplayCopyDisplayMode(id) {
return "\(mode.pixelWidth)×\(mode.pixelHeight) Display"
}
return "External Display"
}
}