import math import re import subprocess import tempfile import time from pathlib import Path import cv2 from loguru import logger __all__ = ["sendKey", "getWindowGeo", "focusWindow", "moveMouse", "capActiveWindow"] _re_window_position = re.compile(r"^\s*Position:\s*([\d\.]+)\s*,\s*([\d\.]+)") _re_window_geometry = re.compile(r"^\s*Geometry:\s*([\d\.]+)\s*x\s*([\d\.]+)") ### Begin LLM generated # ydotool / uinput keycode mapping KEY_TO_CODE = { # Letters "a": 30, "b": 48, "c": 46, "d": 32, "e": 18, "f": 33, "g": 34, "h": 35, "i": 23, "j": 36, "k": 37, "l": 38, "m": 50, "n": 49, "o": 24, "p": 25, "q": 16, "r": 19, "s": 31, "t": 20, "u": 22, "v": 47, "w": 17, "x": 45, "y": 21, "z": 44, # Numbers (Row) "1": 2, "2": 3, "3": 4, "4": 5, "5": 6, "6": 7, "7": 8, "8": 9, "9": 10, "0": 11, # Control Keys "esc": 1, "backspace": 14, "tab": 15, "enter": 28, "space": 57, "capslock": 58, "menu": 139, "delete": 111, "insert": 110, # Modifiers "l_ctrl": 29, "r_ctrl": 97, "l_shift": 42, "r_shift": 54, "l_alt": 56, "r_alt": 100, "super": 125, # Windows/Meta key "meta": 125, # Navigation "up": 103, "down": 108, "left": 105, "right": 106, "pageup": 104, "pagedown": 109, "home": 102, "end": 107, # Punctuation "minus": 12, "equal": 13, "leftbrace": 26, "rightbrace": 27, "semicolon": 39, "apostrophe": 40, "grave": 41, # ` "backslash": 43, "comma": 51, "dot": 52, "slash": 53, } CODE_TO_KEY = {v: k for k, v in KEY_TO_CODE.items()} ### End LLM generated def sendKey( thekey: str | int, count: int = 1, modifier: str | int | list[str | int] | None = None, cycle_delay: float = 0.1, sub_cycle_delay: float = 0.05, custom_lookup: dict[str, int] | None = None, ): # Include a custom lookup if provided if custom_lookup is not None and isinstance(custom_lookup, dict): _KEY_TO_CODE = KEY_TO_CODE | custom_lookup else: _KEY_TO_CODE = KEY_TO_CODE # Handle keycode for main key if isinstance(thekey, str): keycode = _KEY_TO_CODE[thekey.lower()] elif isinstance(thekey, int): keycode = thekey else: raise ValueError("sendKey's 'thekey' must be str or int") pass # We MUST 'release' any modifiers we 'press' usedmods: list[int] = [] try: if modifier is not None: pre_modlist = modifier if isinstance(modifier, list) else [modifier] modlist = [mkc if isinstance(mkc, int) else _KEY_TO_CODE[mkc.lower()] for mkc in pre_modlist] for mod in modlist: subprocess.run(["ydotool", "key", f"{mod}:1"], check=True) usedmods.append(mod) time.sleep(sub_cycle_delay) for _ in range(count): subprocess.run(["ydotool", "key", f"{keycode}:1", f"{keycode}:0"], check=True) time.sleep(cycle_delay) finally: # Release everything we pressed in reverse orderr for mod in reversed(usedmods): try: subprocess.run(["ydotool", "key", f"{mod}:0"], check=True) time.sleep(sub_cycle_delay) except Exception: logger.critical(f"Modifier {mod} is stuck down!") def getWindowGeo(query: str, exception_on_missing: bool = True) -> tuple[tuple[int, int], tuple[int, int]]: try: result = subprocess.run( ["kdotool", "search", query, "getwindowgeometry"], check=True, capture_output=True, text=True ) except subprocess.CalledProcessError as e: logger.error(f"Error: Could not find window '{query}'", e) return ((-999, -999), (-999, -999)) geo_x = 0 geo_y = 0 pos_x = 0 pos_y = 0 got_pos = False got_geo = False for line in result.stdout.split("\n"): if m := _re_window_geometry.match(line): geo_x = math.floor(float(m.group(1))) geo_y = math.floor(float(m.group(2))) got_geo = True elif m := _re_window_position.match(line): pos_x = math.floor(float(m.group(1))) pos_y = math.floor(float(m.group(2))) got_pos = True if not (got_geo and got_pos): if exception_on_missing: raise ValueError("Incomplete window information", query, got_pos, got_geo) else: logger.warning(f"getwindowgeometry returned partial information. {got_geo=} {got_pos=}") return ((pos_x, pos_y), (geo_x, geo_y)) def focusWindow(query: str) -> bool: try: subprocess.run(["kdotool", "search", query, "windowactivate"], check=True) return True except subprocess.CalledProcessError: logger.error(f"Could not find window '{query}'") return False def moveMouse(x: int, y: int) -> bool: try: subprocess.run(["ydotool", "mousemove", "-x", str(x), "-y", str(y)], check=True) return True except subprocess.CalledProcessError as e: logger.error(f"Error while using ydotool to move mouse ({x},{y}). {e}") return False # TODO: Add getMonitorLayoutInfo def capActiveWindow_cv2(trim_transparent_border: bool = False): _alpha_threshold = 230 with tempfile.TemporaryDirectory() as tdir: img_file = Path(tdir).joinpath("cap_image.png") # TODO: Handle different capture software capture_cmd = ["spectacle", "-b", "-n", "-a", "-o", str(img_file.resolve())] subprocess.run(capture_cmd, check=True) if not img_file.exists(): raise FileNotFoundError("Temporary file of captured image does not exist", img_file) img = cv2.imread(str(img_file.resolve()), cv2.IMREAD_UNCHANGED) if img is None: raise Exception("Unable to load image", img_file) if trim_transparent_border: alpha = img[:, :, 3] _, thresh = cv2.threshold(alpha, _alpha_threshold, 255, cv2.THRESH_BINARY) x, y, w, h = cv2.boundingRect(cv2.findNonZero(thresh)) logger.trace(f"Bounding box: (x={x}, y={y}), size: {w}x{h}") return img[y : y + h, x : x + w] return img def capActiveWindow(use_opencv: bool = True, trim_transparent_border: bool = False): if use_opencv: return capActiveWindow_cv2(trim_transparent_border=trim_transparent_border) raise NotImplementedError("opencv is the only graphics backend implemented currently")