Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ For image shape and channels, please use `utils.ImageShape` and `utils.ColorChan

### Image color format and channels

To avoid image shape mismatch issues, and to keep code simpler, we standardize the image color format to BGRA. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA)` or `cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)`).
To avoid image shape mismatch issues, reduce memory usage, and to keep code simpler, we standardize the image color format to BGR. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGB2BGR)` or `cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)`). Split Images' transparency is handled by first extracting a binary mask into `AutoSplitImage.mask`.

### Split-specific setting overrides

Expand Down
6 changes: 3 additions & 3 deletions src/AutoSplit.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ def do_nothing(*_): ...
)
from user_profile import DEFAULT_PROFILE
from utils import (
ALPHA_CHANNEL_COUNT,
AUTOSPLIT_VERSION,
BGRA_CHANNEL_COUNT,
FROZEN,
ONE_SECOND,
RUNNING_WAYLAND,
Expand Down Expand Up @@ -1087,7 +1087,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None):
text = "\nor\n".join(self.split_image.texts)
self.current_split_image.setText(f"Looking for OCR text:\n{text}")
elif is_valid_image(self.split_image.byte_array):
set_preview_image(self.current_split_image, self.split_image.byte_array)
set_preview_image(self.current_split_image, self.split_image.get_preview_image())

self.current_image_file_label.setText(self.split_image.filename)
self.table_current_image_threshold_label.setText(
Expand Down Expand Up @@ -1160,7 +1160,7 @@ def set_preview_image(qlabel: QLabel, image: MatLike | None):
else:
height, width, channels = image.shape

if channels == BGRA_CHANNEL_COUNT:
if channels == ALPHA_CHANNEL_COUNT:
image_format = QtGui.QImage.Format.Format_RGBA8888
capture = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
else:
Expand Down
25 changes: 20 additions & 5 deletions src/AutoSplitImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import error_messages
from compare import extract_and_compare_text, get_comparison_method_by_index
from utils import MAXBYTE, TESSERACT_PATH, imread, is_valid_image
from utils import MAXBYTE, TESSERACT_PATH, ColorChannel, imread, is_valid_image

if TYPE_CHECKING:
from cv2.typing import MatLike
Expand Down Expand Up @@ -175,16 +175,31 @@ def __read_image_bytes(self, path: str):
interpolation=cv2.INTER_NEAREST,
)

# Mask based on adaptively resized, nearest neighbor interpolated split image
# Mask based on adaptively resized, nearest neighbor interpolated split image.
# This must happen before dropping the alpha channel below.
self.mask = cv2.inRange(image, MASK_LOWER_BOUND, MASK_UPPER_BOUND)
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
else:
image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
if transparency == ImageTransparency.NO_MASK_NO_ALPHA_CHANNEL:
# Add Alpha channel if missing
image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
# Captures are standardized to BGR, so drop the alpha channel if present
if transparency == ImageTransparency.NO_MASK_FULLY_SOLID:
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

self.byte_array = image

def get_preview_image(self):
"""
The comparison `byte_array` is stored as BGR, but the preview should
show the user the transparency. Re-attach the mask as the alpha channel.
"""
if self.byte_array is None:
return None
if self.mask is None:
return self.byte_array
preview = cv2.cvtColor(self.byte_array, cv2.COLOR_BGR2BGRA)
preview[:, :, ColorChannel.Alpha] = self.mask
return preview

def check_flag(self, flag: int):
return self.flags & flag == flag

Expand Down
9 changes: 7 additions & 2 deletions src/capture_method/BitBltCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
import ctypes
from typing import TYPE_CHECKING, override

import cv2
import numpy as np
import pywintypes
import win32con
import win32gui
import win32ui

from capture_method.CaptureMethodBase import CaptureMethodBase
from utils import BGRA_CHANNEL_COUNT, get_window_bounds, is_valid_hwnd, try_delete_dc
from utils import ALPHA_CHANNEL_COUNT, get_window_bounds, is_valid_hwnd, try_delete_dc

if TYPE_CHECKING:
from cv2.typing import MatLike
Expand Down Expand Up @@ -77,7 +78,11 @@ def get_frame(self) -> MatLike | None:
# Invalid handle or the window was closed while it was being manipulated
return None

image = None if is_blank(image) else image.reshape((height, width, BGRA_CHANNEL_COUNT))
if is_blank(image):
image = None
else:
image = image.reshape((height, width, ALPHA_CHANNEL_COUNT))
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

# Cleanup DC and handle
try_delete_dc(dc_object)
Expand Down
2 changes: 1 addition & 1 deletion src/capture_method/CaptureMethodBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_frame(self) -> MatLike | None: # noqa: PLR6301
Captures an image of the region for a window matching the given
parameters of the bounding box.

@return: The image of the region in the window in BGRA format
@return: The image of the region in the window in BGR format
"""
return None

Expand Down
2 changes: 1 addition & 1 deletion src/capture_method/DesktopDuplicationCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ def get_frame(self):
screenshot = self._desktop_duplication.screenshot((left, top, right, bottom))
if screenshot is None:
return None
return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGRA)
return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGR)
6 changes: 2 additions & 4 deletions src/capture_method/ScrotCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _scrot_screenshot(x: int, y: int, width: int, height: int):
"-z",
screenshot_file,
))
return imread(screenshot_file, cv2.IMREAD_COLOR_RGB)
return imread(screenshot_file, cv2.IMREAD_COLOR_BGR)
except subprocess.CalledProcessError:
# This can happen when trying to capture a region OOB
# scrot is rude and prints directly to TTY, no stderr :/
Expand Down Expand Up @@ -73,9 +73,7 @@ def get_frame(self):
selection["width"],
selection["height"],
)
if not is_valid_image(image):
return None
return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA)
return image if is_valid_image(image) else None

@override
def recover_window(self, captured_window_title: str):
Expand Down
3 changes: 2 additions & 1 deletion src/capture_method/VideoCaptureDeviceCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def get_frame(self):
y : y + selection["height"],
x : x + selection["width"],
]
self.last_converted_frame = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
# `cv2.VideoCapture` frames are already BGR
self.last_converted_frame = image
return self.last_converted_frame

@override
Expand Down
7 changes: 5 additions & 2 deletions src/capture_method/WindowsGraphicsCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import asyncio
from typing import TYPE_CHECKING, cast, override

import cv2
import numpy as np
import win32api
import win32gui
Expand All @@ -21,7 +22,7 @@

from capture_method.CaptureMethodBase import CaptureMethodBase
from d3d11 import D3D11_CREATE_DEVICE_FLAG, D3D_DRIVER_TYPE, D3D11CreateDevice
from utils import BGRA_CHANNEL_COUNT, WGC_MIN_BUILD, WINDOWS_BUILD_NUMBER, is_valid_hwnd
from utils import ALPHA_CHANNEL_COUNT, WGC_MIN_BUILD, WINDOWS_BUILD_NUMBER, is_valid_hwnd

if TYPE_CHECKING:
from cv2.typing import MatLike
Expand Down Expand Up @@ -156,11 +157,13 @@ def get_frame(self) -> MatLike | None:
raise ValueError("Unable to obtain the BitmapBuffer from SoftwareBitmap.")
reference = bitmap_buffer.create_reference()
image = np.frombuffer(cast("bytes", reference), dtype=np.uint8)
image = image.reshape((self.size.height, self.size.width, BGRA_CHANNEL_COUNT))
image = image.reshape((self.size.height, self.size.width, ALPHA_CHANNEL_COUNT))
image = image[
selection["y"] : selection["y"] + selection["height"],
selection["x"] : selection["x"] + selection["width"],
]
# The OS hands us a native BGRA buffer; drop the unused alpha
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
self.last_converted_frame = image
return image

Expand Down
2 changes: 1 addition & 1 deletion src/capture_method/XcbCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_frame(self):
image = np.array(image)
if not is_valid_image(image):
return None
return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA)
return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

@override
def recover_window(self, captured_window_title: str):
Expand Down
4 changes: 2 additions & 2 deletions src/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import Levenshtein
import numpy as np

from utils import MAXBYTE, ColorChannel, is_valid_image, run_tesseract
from utils import MAXBYTE, NO_ALPHA_CHANNEL_COUNT, ColorChannel, is_valid_image, run_tesseract

if TYPE_CHECKING:
from cv2.typing import MatLike
Expand All @@ -15,7 +15,7 @@
CHANNELS = (ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value)
HISTOGRAM_SIZE = (8, 8, 8)
RANGES = (0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE)
MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE
MASK_SIZE_MULTIPLIER = NO_ALPHA_CHANNEL_COUNT * MAXBYTE * MAXBYTE
MAX_VALUE = 1.0
CV2_PHASH_SIZE = 8

Expand Down
8 changes: 4 additions & 4 deletions src/region_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import error_messages
from capture_method import Region
from utils import (
BGR_CHANNEL_COUNT,
ALPHA_CHANNEL_COUNT,
MAXBYTE,
ImageShape,
auto_split_directory,
Expand Down Expand Up @@ -219,9 +219,9 @@ def align_region(autosplit: AutoSplit):
return

template = imread(template_filename, cv2.IMREAD_UNCHANGED)
# Add alpha channel to template if it's missing.
if template.shape[ImageShape.Channels] == BGR_CHANNEL_COUNT:
template = cv2.cvtColor(template, cv2.COLOR_BGR2BGRA)
# Captures are standardized to BGR, so drop the template's alpha channel if present
if template.shape[ImageShape.Channels] == ALPHA_CHANNEL_COUNT:
template = cv2.cvtColor(template, cv2.COLOR_BGRA2BGR)

# Validate template is a valid image file
if not is_valid_image(template):
Expand Down
4 changes: 2 additions & 2 deletions src/split_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import error_messages
from AutoSplitImage import RESET_KEYWORD, START_KEYWORD, AutoSplitImage, ImageType
from utils import BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image
from utils import ALPHA_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image

if sys.platform == "win32":
from stat import FILE_ATTRIBUTE_HIDDEN, FILE_ATTRIBUTE_SYSTEM
Expand Down Expand Up @@ -66,7 +66,7 @@ def get_image_transparency(image: MatLike):
cheap, allocation-free reductions. The `ERROR_*` outcomes are rare and lead
to a user-facing error, so they're allowed to be slow.
"""
if image.shape[ImageShape.Channels] != BGRA_CHANNEL_COUNT:
if image.shape[ImageShape.Channels] != ALPHA_CHANNEL_COUNT:
return ImageTransparency.NO_MASK_NO_ALPHA_CHANNEL, 0
alpha = image[:, :, ColorChannel.Alpha]
# Fully opaque is the most common case; a single reduction rules it in.
Expand Down
8 changes: 4 additions & 4 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ def find_tesseract_path():
MAXBYTE = 255
ONE_SECOND = 1000
"""1000 milliseconds in 1 second"""
BGR_CHANNEL_COUNT = 3
"""How many channels in a BGR image"""
BGRA_CHANNEL_COUNT = 4
"""How many channels in a BGRA image"""
NO_ALPHA_CHANNEL_COUNT = 3
"""How many channels in a BGR/RGB image"""
ALPHA_CHANNEL_COUNT = 4
"""How many channels in a BGRA/RGBA image"""


class ImageShape(IntEnum):
Expand Down
Loading