diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index d625d101..1a12e4dd 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -50,7 +50,7 @@ For image shape and channels, please use `utils.ImageShape` and `utils.ColorChan ### Image color format and channels -To avoid image shape mismatch issues, and to keep code simpler, we standardize the image color format to BGRA. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA)` or `cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)`). +To avoid image shape mismatch issues, reduce memory usage, and to keep code simpler, we standardize the image color format to BGR. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGB2BGR)` or `cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)`). Split Images' transparency is handled by first extracting a binary mask into `AutoSplitImage.mask`. ### Split-specific setting overrides diff --git a/src/AutoSplit.py b/src/AutoSplit.py index bb65efa8..0c6d3732 100755 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -87,8 +87,8 @@ def do_nothing(*_): ... ) from user_profile import DEFAULT_PROFILE from utils import ( + ALPHA_CHANNEL_COUNT, AUTOSPLIT_VERSION, - BGRA_CHANNEL_COUNT, FROZEN, ONE_SECOND, RUNNING_WAYLAND, @@ -1087,7 +1087,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): text = "\nor\n".join(self.split_image.texts) self.current_split_image.setText(f"Looking for OCR text:\n{text}") elif is_valid_image(self.split_image.byte_array): - set_preview_image(self.current_split_image, self.split_image.byte_array) + set_preview_image(self.current_split_image, self.split_image.get_preview_image()) self.current_image_file_label.setText(self.split_image.filename) self.table_current_image_threshold_label.setText( @@ -1160,7 +1160,7 @@ def set_preview_image(qlabel: QLabel, image: MatLike | None): else: height, width, channels = image.shape - if channels == BGRA_CHANNEL_COUNT: + if channels == ALPHA_CHANNEL_COUNT: image_format = QtGui.QImage.Format.Format_RGBA8888 capture = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA) else: diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 32078e6c..db8337f8 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -11,7 +11,7 @@ import error_messages from compare import extract_and_compare_text, get_comparison_method_by_index -from utils import MAXBYTE, TESSERACT_PATH, imread, is_valid_image +from utils import MAXBYTE, TESSERACT_PATH, ColorChannel, imread, is_valid_image if TYPE_CHECKING: from cv2.typing import MatLike @@ -175,16 +175,31 @@ def __read_image_bytes(self, path: str): interpolation=cv2.INTER_NEAREST, ) - # Mask based on adaptively resized, nearest neighbor interpolated split image + # Mask based on adaptively resized, nearest neighbor interpolated split image. + # This must happen before dropping the alpha channel below. self.mask = cv2.inRange(image, MASK_LOWER_BOUND, MASK_UPPER_BOUND) + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) else: image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST) - if transparency == ImageTransparency.NO_MASK_NO_ALPHA_CHANNEL: - # Add Alpha channel if missing - image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA) + # Captures are standardized to BGR, so drop the alpha channel if present + if transparency == ImageTransparency.NO_MASK_FULLY_SOLID: + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) self.byte_array = image + def get_preview_image(self): + """ + The comparison `byte_array` is stored as BGR, but the preview should + show the user the transparency. Re-attach the mask as the alpha channel. + """ + if self.byte_array is None: + return None + if self.mask is None: + return self.byte_array + preview = cv2.cvtColor(self.byte_array, cv2.COLOR_BGR2BGRA) + preview[:, :, ColorChannel.Alpha] = self.mask + return preview + def check_flag(self, flag: int): return self.flags & flag == flag diff --git a/src/capture_method/BitBltCaptureMethod.py b/src/capture_method/BitBltCaptureMethod.py index 31e6b180..e5ab48cb 100644 --- a/src/capture_method/BitBltCaptureMethod.py +++ b/src/capture_method/BitBltCaptureMethod.py @@ -5,6 +5,7 @@ import ctypes from typing import TYPE_CHECKING, override +import cv2 import numpy as np import pywintypes import win32con @@ -12,7 +13,7 @@ import win32ui from capture_method.CaptureMethodBase import CaptureMethodBase -from utils import BGRA_CHANNEL_COUNT, get_window_bounds, is_valid_hwnd, try_delete_dc +from utils import ALPHA_CHANNEL_COUNT, get_window_bounds, is_valid_hwnd, try_delete_dc if TYPE_CHECKING: from cv2.typing import MatLike @@ -77,7 +78,11 @@ def get_frame(self) -> MatLike | None: # Invalid handle or the window was closed while it was being manipulated return None - image = None if is_blank(image) else image.reshape((height, width, BGRA_CHANNEL_COUNT)) + if is_blank(image): + image = None + else: + image = image.reshape((height, width, ALPHA_CHANNEL_COUNT)) + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) # Cleanup DC and handle try_delete_dc(dc_object) diff --git a/src/capture_method/CaptureMethodBase.py b/src/capture_method/CaptureMethodBase.py index 3a937e7e..3331b589 100644 --- a/src/capture_method/CaptureMethodBase.py +++ b/src/capture_method/CaptureMethodBase.py @@ -33,7 +33,7 @@ def get_frame(self) -> MatLike | None: # noqa: PLR6301 Captures an image of the region for a window matching the given parameters of the bounding box. - @return: The image of the region in the window in BGRA format + @return: The image of the region in the window in BGR format """ return None diff --git a/src/capture_method/DesktopDuplicationCaptureMethod.py b/src/capture_method/DesktopDuplicationCaptureMethod.py index fc03206b..874b5a16 100644 --- a/src/capture_method/DesktopDuplicationCaptureMethod.py +++ b/src/capture_method/DesktopDuplicationCaptureMethod.py @@ -71,4 +71,4 @@ def get_frame(self): screenshot = self._desktop_duplication.screenshot((left, top, right, bottom)) if screenshot is None: return None - return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGRA) + return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGR) diff --git a/src/capture_method/ScrotCaptureMethod.py b/src/capture_method/ScrotCaptureMethod.py index 834a9686..19420196 100644 --- a/src/capture_method/ScrotCaptureMethod.py +++ b/src/capture_method/ScrotCaptureMethod.py @@ -34,7 +34,7 @@ def _scrot_screenshot(x: int, y: int, width: int, height: int): "-z", screenshot_file, )) - return imread(screenshot_file, cv2.IMREAD_COLOR_RGB) + return imread(screenshot_file, cv2.IMREAD_COLOR_BGR) except subprocess.CalledProcessError: # This can happen when trying to capture a region OOB # scrot is rude and prints directly to TTY, no stderr :/ @@ -73,9 +73,7 @@ def get_frame(self): selection["width"], selection["height"], ) - if not is_valid_image(image): - return None - return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA) + return image if is_valid_image(image) else None @override def recover_window(self, captured_window_title: str): diff --git a/src/capture_method/VideoCaptureDeviceCaptureMethod.py b/src/capture_method/VideoCaptureDeviceCaptureMethod.py index d824802d..c53d6ce6 100644 --- a/src/capture_method/VideoCaptureDeviceCaptureMethod.py +++ b/src/capture_method/VideoCaptureDeviceCaptureMethod.py @@ -143,7 +143,8 @@ def get_frame(self): y : y + selection["height"], x : x + selection["width"], ] - self.last_converted_frame = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA) + # `cv2.VideoCapture` frames are already BGR + self.last_converted_frame = image return self.last_converted_frame @override diff --git a/src/capture_method/WindowsGraphicsCaptureMethod.py b/src/capture_method/WindowsGraphicsCaptureMethod.py index 72e2a175..d65cafc3 100644 --- a/src/capture_method/WindowsGraphicsCaptureMethod.py +++ b/src/capture_method/WindowsGraphicsCaptureMethod.py @@ -7,6 +7,7 @@ import asyncio from typing import TYPE_CHECKING, cast, override +import cv2 import numpy as np import win32api import win32gui @@ -21,7 +22,7 @@ from capture_method.CaptureMethodBase import CaptureMethodBase from d3d11 import D3D11_CREATE_DEVICE_FLAG, D3D_DRIVER_TYPE, D3D11CreateDevice -from utils import BGRA_CHANNEL_COUNT, WGC_MIN_BUILD, WINDOWS_BUILD_NUMBER, is_valid_hwnd +from utils import ALPHA_CHANNEL_COUNT, WGC_MIN_BUILD, WINDOWS_BUILD_NUMBER, is_valid_hwnd if TYPE_CHECKING: from cv2.typing import MatLike @@ -156,11 +157,13 @@ def get_frame(self) -> MatLike | None: raise ValueError("Unable to obtain the BitmapBuffer from SoftwareBitmap.") reference = bitmap_buffer.create_reference() image = np.frombuffer(cast("bytes", reference), dtype=np.uint8) - image = image.reshape((self.size.height, self.size.width, BGRA_CHANNEL_COUNT)) + image = image.reshape((self.size.height, self.size.width, ALPHA_CHANNEL_COUNT)) image = image[ selection["y"] : selection["y"] + selection["height"], selection["x"] : selection["x"] + selection["width"], ] + # The OS hands us a native BGRA buffer; drop the unused alpha + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) self.last_converted_frame = image return image diff --git a/src/capture_method/XcbCaptureMethod.py b/src/capture_method/XcbCaptureMethod.py index 901df671..a5cfbfb5 100644 --- a/src/capture_method/XcbCaptureMethod.py +++ b/src/capture_method/XcbCaptureMethod.py @@ -68,7 +68,7 @@ def get_frame(self): image = np.array(image) if not is_valid_image(image): return None - return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA) + return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) @override def recover_window(self, captured_window_title: str): diff --git a/src/compare.py b/src/compare.py index f9eecfdc..615239eb 100644 --- a/src/compare.py +++ b/src/compare.py @@ -6,7 +6,7 @@ import Levenshtein import numpy as np -from utils import MAXBYTE, ColorChannel, is_valid_image, run_tesseract +from utils import MAXBYTE, NO_ALPHA_CHANNEL_COUNT, ColorChannel, is_valid_image, run_tesseract if TYPE_CHECKING: from cv2.typing import MatLike @@ -15,7 +15,7 @@ CHANNELS = (ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value) HISTOGRAM_SIZE = (8, 8, 8) RANGES = (0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE) -MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE +MASK_SIZE_MULTIPLIER = NO_ALPHA_CHANNEL_COUNT * MAXBYTE * MAXBYTE MAX_VALUE = 1.0 CV2_PHASH_SIZE = 8 diff --git a/src/region_selection.py b/src/region_selection.py index 121ffafc..335602b2 100644 --- a/src/region_selection.py +++ b/src/region_selection.py @@ -13,7 +13,7 @@ import error_messages from capture_method import Region from utils import ( - BGR_CHANNEL_COUNT, + ALPHA_CHANNEL_COUNT, MAXBYTE, ImageShape, auto_split_directory, @@ -219,9 +219,9 @@ def align_region(autosplit: AutoSplit): return template = imread(template_filename, cv2.IMREAD_UNCHANGED) - # Add alpha channel to template if it's missing. - if template.shape[ImageShape.Channels] == BGR_CHANNEL_COUNT: - template = cv2.cvtColor(template, cv2.COLOR_BGR2BGRA) + # Captures are standardized to BGR, so drop the template's alpha channel if present + if template.shape[ImageShape.Channels] == ALPHA_CHANNEL_COUNT: + template = cv2.cvtColor(template, cv2.COLOR_BGRA2BGR) # Validate template is a valid image file if not is_valid_image(template): diff --git a/src/split_parser.py b/src/split_parser.py index 500e931b..b84f76cc 100644 --- a/src/split_parser.py +++ b/src/split_parser.py @@ -13,7 +13,7 @@ import error_messages from AutoSplitImage import RESET_KEYWORD, START_KEYWORD, AutoSplitImage, ImageType -from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image +from utils import ALPHA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image if sys.platform == "win32": from stat import FILE_ATTRIBUTE_HIDDEN, FILE_ATTRIBUTE_SYSTEM @@ -66,7 +66,7 @@ def get_image_transparency(image: MatLike): cheap, allocation-free reductions. The `ERROR_*` outcomes are rare and lead to a user-facing error, so they're allowed to be slow. """ - if image.shape[ImageShape.Channels] != BGRA_CHANNEL_COUNT: + if image.shape[ImageShape.Channels] != ALPHA_CHANNEL_COUNT: return ImageTransparency.NO_MASK_NO_ALPHA_CHANNEL, 0 alpha = image[:, :, ColorChannel.Alpha] # Fully opaque is the most common case; a single reduction rules it in. diff --git a/src/utils.py b/src/utils.py index 2b2b20a5..5bbd64f6 100644 --- a/src/utils.py +++ b/src/utils.py @@ -79,10 +79,10 @@ def find_tesseract_path(): MAXBYTE = 255 ONE_SECOND = 1000 """1000 milliseconds in 1 second""" -BGR_CHANNEL_COUNT = 3 -"""How many channels in a BGR image""" -BGRA_CHANNEL_COUNT = 4 -"""How many channels in a BGRA image""" +NO_ALPHA_CHANNEL_COUNT = 3 +"""How many channels in a BGR/RGB image""" +ALPHA_CHANNEL_COUNT = 4 +"""How many channels in a BGRA/RGBA image""" class ImageShape(IntEnum):