diff --git a/package.json b/package.json index 2b1bcb83..5870e5ae 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "build:main": "tsc -p tsconfig.main.json && cp src/main/emoji-data.json dist/main/emoji-data.json", "build:renderer": "vite build", "check:i18n": "node scripts/check-i18n.mjs", - "build:native": "mkdir -p dist/native && swiftc -O -o dist/native/get-selected-text src/native/get-selected-text.swift -framework Foundation -framework ApplicationServices && swiftc -O -o dist/native/color-picker src/native/color-picker.swift -framework AppKit && swiftc -O -o dist/native/keyboard-lock src/native/keyboard-lock.swift -framework CoreGraphics -framework Foundation && swiftc -O -o dist/native/screen-ocr src/native/screen-ocr.swift -framework AppKit -framework CoreGraphics -framework Foundation -framework Vision && swiftc -O -o dist/native/snippet-expander src/native/snippet-expander.swift -framework AppKit && swiftc -O -o dist/native/emoji-trigger-monitor src/native/emoji-trigger-monitor.swift src/native/ax-caret-query.swift -framework AppKit -framework ApplicationServices && swiftc -O -o dist/native/hotkey-hold-monitor src/native/hotkey-hold-monitor.swift -framework CoreGraphics -framework AppKit -framework Carbon && swiftc -O -o dist/native/speech-recognizer src/native/speech-recognizer.swift -framework Speech -framework AVFoundation && swiftc -O -o dist/native/microphone-access src/native/microphone-access.swift -framework AVFoundation && swiftc -O -o dist/native/input-monitoring-request src/native/input-monitoring-request.swift -framework CoreGraphics && swiftc -O -o dist/native/window-adjust src/native/window-adjust.swift -framework ApplicationServices -framework AppKit && swiftc -O -o dist/native/calendar-events src/native/calendar-events.swift -framework EventKit && swiftc -O -o dist/native/settings-coordinator src/native/settings-coordinator.swift -framework Foundation && cd src/native/native-helpers-addon && HOME=~/.electron-gyp npx node-gyp rebuild --target=$(node -e \"console.log(require('../../../node_modules/electron/package.json').version)\") --arch=$(node -e \"console.log(process.arch)\") --dist-url=https://electronjs.org/headers && cp build/Release/native_helpers.node ../../../dist/native/native_helpers.node && cd ../../.. && node scripts/build-whispercpp.mjs && node scripts/build-parakeet.mjs && node scripts/build-soulver-calculator.mjs", + "build:native": "mkdir -p dist/native && swiftc -O -o dist/native/get-selected-text src/native/get-selected-text.swift -framework Foundation -framework ApplicationServices -framework AppKit && swiftc -O -o dist/native/color-picker src/native/color-picker.swift -framework AppKit && swiftc -O -o dist/native/keyboard-lock src/native/keyboard-lock.swift -framework CoreGraphics -framework Foundation && swiftc -O -o dist/native/screen-ocr src/native/screen-ocr.swift -framework AppKit -framework CoreGraphics -framework Foundation -framework Vision && swiftc -O -o dist/native/snippet-expander src/native/snippet-expander.swift -framework AppKit && swiftc -O -o dist/native/emoji-trigger-monitor src/native/emoji-trigger-monitor.swift src/native/ax-caret-query.swift -framework AppKit -framework ApplicationServices && swiftc -O -o dist/native/hotkey-hold-monitor src/native/hotkey-hold-monitor.swift -framework CoreGraphics -framework AppKit -framework Carbon && swiftc -O -o dist/native/speech-recognizer src/native/speech-recognizer.swift -framework Speech -framework AVFoundation && swiftc -O -o dist/native/microphone-access src/native/microphone-access.swift -framework AVFoundation && swiftc -O -o dist/native/input-monitoring-request src/native/input-monitoring-request.swift -framework CoreGraphics && swiftc -O -o dist/native/window-adjust src/native/window-adjust.swift -framework ApplicationServices -framework AppKit && swiftc -O -o dist/native/calendar-events src/native/calendar-events.swift -framework EventKit && swiftc -O -o dist/native/settings-coordinator src/native/settings-coordinator.swift -framework Foundation && cd src/native/native-helpers-addon && HOME=~/.electron-gyp npx node-gyp rebuild --target=$(node -e \"console.log(require('../../../node_modules/electron/package.json').version)\") --arch=$(node -e \"console.log(process.arch)\") --dist-url=https://electronjs.org/headers && cp build/Release/native_helpers.node ../../../dist/native/native_helpers.node && cd ../../.. && node scripts/build-whispercpp.mjs && node scripts/build-parakeet.mjs && node scripts/build-soulver-calculator.mjs", "postinstall": "electron-builder install-app-deps", "start": "electron .", "package": "cross-env NODE_ENV=production npm run build && electron-builder", diff --git a/src/main/clipboard-manager.ts b/src/main/clipboard-manager.ts index 1c54587f..c4affb35 100644 --- a/src/main/clipboard-manager.ts +++ b/src/main/clipboard-manager.ts @@ -654,6 +654,7 @@ function getClipboardImageFingerprint(): { }; } + } catch {} return { fingerprint: '' }; } diff --git a/src/main/main.ts b/src/main/main.ts index ac2f625e..115ee416 100644 --- a/src/main/main.ts +++ b/src/main/main.ts @@ -2076,6 +2076,8 @@ function computeDetachedPopupPosition( let mainWindow: InstanceType | null = null; let promptWindow: InstanceType | null = null; let promptWindowPrewarmScheduled = false; +let promptRendererReady = false; +let pendingPromptWindowShown: { mode: string; selectedTextSnapshot: string } | null = null; let memoryStatusWindow: InstanceType | null = null; let memoryStatusHideTimer: NodeJS.Timeout | null = null; let memoryStatusRenderSeq = 0; @@ -5611,13 +5613,13 @@ async function captureSelectionSnapshotBeforeShow(options?: { allowClipboardFall rememberSelectionSnapshot(''); return ''; } - // Skip System Events during window-show if permission hasn't been confirmed - // yet, to avoid triggering the macOS Automation dialog unexpectedly. - if (!systemEventsPermissionConfirmed) { + const allowClipboardFallback = options?.allowClipboardFallback === true; + // Skip only the System Events fallback during window-show if permission + // has not been confirmed. AX selection reads do not require Automation. + if (allowClipboardFallback && !systemEventsPermissionConfirmed) { rememberSelectionSnapshot(''); return ''; } - const allowClipboardFallback = options?.allowClipboardFallback === true; try { const selected = String( await getSelectedTextForSpeak({ allowClipboardFallback, clipboardWaitMs: 90 }) || '' @@ -7703,6 +7705,7 @@ function getDefaultPromptWindowBounds(): { x: number; y: number; width: number; function createPromptWindow(initialBounds?: { x: number; y: number; width: number; height: number }): void { if (promptWindow && !promptWindow.isDestroyed()) return; + promptRendererReady = false; const useNativeLiquidGlass = shouldUseNativeLiquidGlass(); const bounds = initialBounds || getDefaultPromptWindowBounds(); promptWindow = new BrowserWindow({ @@ -7740,7 +7743,27 @@ function createPromptWindow(initialBounds?: { x: number; y: number; width: numbe loadWindowUrl(promptWindow, '/prompt'); promptWindow.on('closed', () => { promptWindow = null; - }); + promptRendererReady = false; + pendingPromptWindowShown = null; + }); + + // Defer any queued window-shown until the React app has mounted. + // 'did-finish-load' fires before React mounts (dynamic import chunks), so we + // wait for the explicit 'renderer-ready' signal from PromptApp instead. + const capturedWindow = promptWindow; + const onPromptRendererReady = (event: Electron.IpcMainEvent) => { + if (!capturedWindow || capturedWindow.isDestroyed()) return; + if (event.sender !== capturedWindow.webContents) { + ipcMain.once('renderer-ready', onPromptRendererReady); + return; + } + promptRendererReady = true; + if (pendingPromptWindowShown) { + capturedWindow.webContents.send('window-shown', pendingPromptWindowShown); + pendingPromptWindowShown = null; + } + }; + ipcMain.once('renderer-ready', onPromptRendererReady); } function schedulePromptWindowPrewarm(): void { @@ -7768,10 +7791,14 @@ function showPromptWindow( promptWindow.moveTop(); promptWindow.webContents.focus(); const selectedTextSnapshot = String(getRecentSelectionSnapshot() || lastCursorPromptSelection || '').trim(); - promptWindow.webContents.send('window-shown', { - mode: 'prompt', - selectedTextSnapshot, - }); + const payload = { mode: 'prompt', selectedTextSnapshot }; + if (promptRendererReady) { + promptWindow.webContents.send('window-shown', payload); + } else { + // Renderer hasn't mounted yet (first open) — the createPromptWindow + // ipcMain.once('renderer-ready') handler will deliver this once PromptApp mounts. + pendingPromptWindowShown = payload; + } } function hidePromptWindow(): void { @@ -9758,7 +9785,7 @@ async function runCommandById(commandId: string, source: 'launcher' | 'hotkey' | const isLauncherPath = source === 'launcher'; const selectionPromise = isLauncherPath ? Promise.resolve('') - : getSelectedTextForSpeak({ allowClipboardFallback: true }); + : getSelectedTextForSpeak({ allowClipboardFallback: false }); // Caret/input captures must happen synchronously before focus shifts. const earlyCaretRect = isLauncherPath ? null : getTypingCaretRect(); @@ -9774,10 +9801,9 @@ async function runCommandById(commandId: string, source: 'launcher' | 'hotkey' | return true; } - // Await the selection. For the hotkey path the AX query has typically - // already resolved during the ~150 ms caret capture above, so this adds - // no measurable delay. Cmd+C clipboard fallback also reaches the right - // target because the original app is still frontmost here. + // Await the selection. For the hotkey path the native AX query has + // typically resolved during the caret capture above, so this adds no + // measurable delay and does not touch the user's clipboard. const selectedBeforeOpenRaw = String( (await selectionPromise) || getRecentSelectionSnapshot() || lastCursorPromptSelection || '' ); diff --git a/src/native/get-selected-text.swift b/src/native/get-selected-text.swift index 0351b521..7e4f4112 100644 --- a/src/native/get-selected-text.swift +++ b/src/native/get-selected-text.swift @@ -1,54 +1,230 @@ import Foundation import ApplicationServices +import AppKit -// Use AXUIElementCreateSystemWide to get the focused application directly — -// no NSWorkspace / AppKit needed, keeps startup overhead minimal (~10 ms). -let systemElement = AXUIElementCreateSystemWide() +private let debugEnabled = ProcessInfo.processInfo.environment["GET_SELECTED_TEXT_DEBUG"] == "1" -var focusedAppRaw: AnyObject? -guard AXUIElementCopyAttributeValue(systemElement, kAXFocusedApplicationAttribute as CFString, &focusedAppRaw) == .success, - let focusedApp = focusedAppRaw else { - exit(0) +private func dbg(_ message: @autoclosure () -> String) { + if debugEnabled { + FileHandle.standardError.write(Data(("[get-selected-text] " + message() + "\n").utf8)) + } } -let appElement = focusedApp as! AXUIElement -var focusedRaw: AnyObject? -guard AXUIElementCopyAttributeValue(appElement, kAXFocusedUIElementAttribute as CFString, &focusedRaw) == .success, - let focused = focusedRaw else { - exit(0) +private func writeAndExit(_ text: String) -> Never { + if !text.isEmpty { + FileHandle.standardOutput.write(Data(text.utf8)) + } + exit(0) } -let focusedElement = focused as! AXUIElement -// 1. kAXSelectedTextAttribute — supported by most native text controls. -var selectedRaw: AnyObject? -if AXUIElementCopyAttributeValue(focusedElement, kAXSelectedTextAttribute as CFString, &selectedRaw) == .success, - let text = selectedRaw as? String, !text.isEmpty { - FileHandle.standardOutput.write(text.data(using: .utf8)!) - exit(0) +private func copyAttribute(_ element: AXUIElement, _ attribute: CFString) -> AnyObject? { + var raw: AnyObject? + let err = AXUIElementCopyAttributeValue(element, attribute, &raw) + if err != .success { + dbg("attribute \(attribute) err=\(err.rawValue)") + return nil + } + return raw } -// 2. Fall back: derive selection from kAXSelectedTextRangeAttribute + kAXValueAttribute. -// Works for controls that expose a range but not the text slice directly. -var rangeRaw: AnyObject? -var valueRaw: AnyObject? -guard AXUIElementCopyAttributeValue(focusedElement, kAXSelectedTextRangeAttribute as CFString, &rangeRaw) == .success, - let rangeVal = rangeRaw, - AXUIElementCopyAttributeValue(focusedElement, kAXValueAttribute as CFString, &valueRaw) == .success, - let fullText = valueRaw as? String else { - exit(0) +private func copyParameterizedAttribute(_ element: AXUIElement, _ attribute: CFString, _ parameter: AnyObject) -> AnyObject? { + var raw: AnyObject? + let err = AXUIElementCopyParameterizedAttributeValue(element, attribute, parameter, &raw) + if err != .success { + dbg("parameterized \(attribute) err=\(err.rawValue)") + return nil + } + return raw } -var cfRange = CFRange(location: 0, length: 0) -AXValueGetValue(rangeVal as! AXValue, .cfRange, &cfRange) -guard cfRange.length > 0 else { exit(0) } +private func stringFromAXResult(_ raw: AnyObject?) -> String? { + guard let raw else { return nil } + if let text = raw as? String { return text.isEmpty ? nil : text } + if let attributed = raw as? NSAttributedString { + let text = attributed.string + return text.isEmpty ? nil : text + } + if CFGetTypeID(raw) == CFAttributedStringGetTypeID() { + let attributed = raw as! NSAttributedString + let text = attributed.string + return text.isEmpty ? nil : text + } + return nil +} + +private func selectedRangeValue(_ element: AXUIElement) -> AnyObject? { + guard let rangeValue = copyAttribute(element, kAXSelectedTextRangeAttribute as CFString) else { + return nil + } + var range = CFRange(location: 0, length: 0) + guard AXValueGetValue(rangeValue as! AXValue, .cfRange, &range), range.length > 0 else { + return nil + } + return rangeValue +} + +private func selectedTextViaValueRange(_ element: AXUIElement, _ rangeValue: AnyObject) -> String? { + guard let fullText = copyAttribute(element, kAXValueAttribute as CFString) as? String else { + return nil + } + var range = CFRange(location: 0, length: 0) + guard AXValueGetValue(rangeValue as! AXValue, .cfRange, &range), range.length > 0 else { + return nil + } + + // CFRange from AX text controls is expressed in UTF-16 offsets. + let utf16 = fullText.utf16 + guard let startIdx = utf16.index(utf16.startIndex, offsetBy: range.location, limitedBy: utf16.endIndex), + let endIdx = utf16.index(startIdx, offsetBy: range.length, limitedBy: utf16.endIndex), + let slice = String(utf16[startIdx.. String? { + let attributes: [CFString] = [ + kAXStringForRangeParameterizedAttribute as CFString, + kAXAttributedStringForRangeParameterizedAttribute as CFString, + "AXStringForRange" as CFString, + "AXAttributedStringForRange" as CFString, + ] + for attribute in attributes { + if let text = stringFromAXResult(copyParameterizedAttribute(element, attribute, rangeValue)) { + return text + } + } + return nil +} + +private func selectedTextViaTextMarkerRange(_ element: AXUIElement) -> String? { + guard let markerRange = copyAttribute(element, "AXSelectedTextMarkerRange" as CFString) else { + return nil + } + + let attributes: [CFString] = [ + "AXStringForTextMarkerRange" as CFString, + "AXAttributedStringForTextMarkerRange" as CFString, + ] + for attribute in attributes { + if let text = stringFromAXResult(copyParameterizedAttribute(element, attribute, markerRange)) { + return text + } + } + return nil +} + +private func selectedTextFromElement(_ element: AXUIElement) -> String? { + let role = copyAttribute(element, kAXRoleAttribute as CFString) as? String ?? "" + let subrole = copyAttribute(element, kAXSubroleAttribute as CFString) as? String ?? "" + if role == "AXSecureTextField" || subrole == (kAXSecureTextFieldSubrole as String) { + return nil + } + + if let text = stringFromAXResult(copyAttribute(element, kAXSelectedTextAttribute as CFString)) { + return text + } + if let text = selectedTextViaTextMarkerRange(element) { + return text + } + if let rangeValue = selectedRangeValue(element) { + if let text = selectedTextViaRangeParameterizedAttribute(element, rangeValue) { + return text + } + if let text = selectedTextViaValueRange(element, rangeValue) { + return text + } + } + return nil +} + +private func axElementFromRaw(_ raw: AnyObject?) -> AXUIElement? { + guard let raw, CFGetTypeID(raw) == AXUIElementGetTypeID() else { + return nil + } + return (raw as! AXUIElement) +} + +private func enqueueFocusedChild(of element: AXUIElement, depth: Int, into queue: inout [(AXUIElement, Int)]) { + if let focused = axElementFromRaw(copyAttribute(element, kAXFocusedUIElementAttribute as CFString)) { + queue.append((focused, depth + 1)) + } +} -// CFRange uses UTF-16 offsets. -let utf16 = fullText.utf16 -guard let startIdx = utf16.index(utf16.startIndex, offsetBy: cfRange.location, limitedBy: utf16.endIndex), - let endIdx = utf16.index(startIdx, offsetBy: cfRange.length, limitedBy: utf16.endIndex) else { - exit(0) +private func enqueueChildren(of element: AXUIElement, depth: Int, into queue: inout [(AXUIElement, Int)]) { + guard let children = copyAttribute(element, kAXChildrenAttribute as CFString) as? [AXUIElement] else { + return + } + for child in children { + queue.append((child, depth + 1)) + } } -if let slice = String(utf16[startIdx.. String? { + var queue = roots.map { ($0, 0) } + var inspected = 0 + let maxDepth = 8 + let maxElements = 240 + + while let (element, depth) = queue.first { + queue.removeFirst() + inspected += 1 + if inspected > maxElements { break } + + if let text = selectedTextFromElement(element) { + dbg("selected text found at depth \(depth)") + return text + } + if depth >= maxDepth { continue } + + enqueueFocusedChild(of: element, depth: depth, into: &queue) + enqueueChildren(of: element, depth: depth, into: &queue) + } + return nil +} + +private func frontmostApplicationElement() -> AXUIElement? { + guard let frontApp = NSWorkspace.shared.frontmostApplication else { + return nil + } + let appElement = AXUIElementCreateApplication(frontApp.processIdentifier) + + // Chromium/Electron apps often expose richer text-marker attributes only + // after these AX opt-in flags have been set. They are idempotent. + AXUIElementSetAttributeValue(appElement, "AXEnhancedUserInterface" as CFString, kCFBooleanTrue) + AXUIElementSetAttributeValue(appElement, "AXManualAccessibility" as CFString, kCFBooleanTrue) + + return appElement } -exit(0) + +private func focusedElementRoots() -> [AXUIElement] { + var roots: [AXUIElement] = [] + + if let appElement = frontmostApplicationElement() { + var focused = axElementFromRaw(copyAttribute(appElement, kAXFocusedUIElementAttribute as CFString)) + if focused == nil { + Thread.sleep(forTimeInterval: 0.06) + focused = axElementFromRaw(copyAttribute(appElement, kAXFocusedUIElementAttribute as CFString)) + } + if let focused { + roots.append(focused) + } + if let focusedWindow = axElementFromRaw(copyAttribute(appElement, kAXFocusedWindowAttribute as CFString)) { + roots.append(focusedWindow) + } + } + + let systemElement = AXUIElementCreateSystemWide() + if let focused = axElementFromRaw(copyAttribute(systemElement, kAXFocusedUIElementAttribute as CFString)) { + roots.append(focused) + } + + return roots +} + +if let text = findSelectedText(from: focusedElementRoots()) { + writeAndExit(text) +} + +writeAndExit("") diff --git a/src/renderer/src/PromptApp.tsx b/src/renderer/src/PromptApp.tsx index 85a950d5..6c998f66 100644 --- a/src/renderer/src/PromptApp.tsx +++ b/src/renderer/src/PromptApp.tsx @@ -137,6 +137,10 @@ const PromptApp: React.FC = () => { return () => clearTimeout(timer); }, []); + useEffect(() => { + window.electron.rendererReady(); + }, []); + useEffect(() => { const cleanupWindowShown = window.electron.onWindowShown((payload) => { if (payload?.mode !== 'prompt') return;