Skip to content

Commit 890eb6c

Browse files
zalgo3Avasam
authored andcommitted
Adaptive resizing for masked images
Especially in the case of 1080p or higher, if the non-transparent area of the mask image used for the split is small, resizing the entire image to 320x240 will lose the information inside the mask. One idea to prevent this is to adaptively determine the target size according to the number of nonzero elements in the alpha channel of the split image. At least for the L2 norm and histogram, such a change would not affect performance since they only use information about the mask's interior. Masks are not recommended with pHash anyway.
1 parent 4b06065 commit 890eb6c

File tree

4 files changed

+32
-20
lines changed

4 files changed

+32
-20
lines changed

src/AutoSplit.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import error_messages
2020
import user_profile
2121
from AutoControlledWorker import AutoControlledWorker
22-
from AutoSplitImage import COMPARISON_RESIZE, START_KEYWORD, AutoSplitImage, ImageType
22+
from AutoSplitImage import START_KEYWORD, AutoSplitImage, ImageType
2323
from capture_method import CaptureMethodBase, CaptureMethodEnum
2424
from gen import about, design, settings, update_checker
2525
from hotkeys import HOTKEYS, after_setting_hotkey, send_command
@@ -771,12 +771,7 @@ def __get_capture_for_comparison(self):
771771
if recovered:
772772
capture, _ = self.capture_method.get_frame(self)
773773

774-
return (
775-
None
776-
if not is_valid_image(capture)
777-
else cv2.resize(capture, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST),
778-
is_old_image,
779-
)
774+
return capture, is_old_image
780775

781776
def __reset_if_should(self, capture: cv2.Mat | None):
782777
"""

src/AutoSplitImage.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
COMPARISON_RESIZE_WIDTH = 320
2020
COMPARISON_RESIZE_HEIGHT = 240
2121
COMPARISON_RESIZE = (COMPARISON_RESIZE_WIDTH, COMPARISON_RESIZE_HEIGHT)
22-
LOWER_BOUND = np.array([0, 0, 0, 1], dtype="uint8")
23-
UPPER_BOUND = np.array([MAXBYTE, MAXBYTE, MAXBYTE, MAXBYTE], dtype="uint8")
22+
COMPARISON_RESIZE_AREA = COMPARISON_RESIZE_WIDTH * COMPARISON_RESIZE_HEIGHT
23+
MASK_LOWER_BOUND = np.array([1], dtype="uint8")
24+
MASK_UPPER_BOUND = np.array([MAXBYTE], dtype="uint8")
2425
START_KEYWORD = "start_auto_splitter"
2526
RESET_KEYWORD = "reset"
2627

@@ -108,15 +109,31 @@ def __read_image_bytes(self, path: str):
108109
error_messages.image_type(path)
109110
return
110111

111-
image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
112112
self._has_transparency = check_if_image_has_transparency(image)
113113
# If image has transparency, create a mask
114114
if self._has_transparency:
115-
# Create mask based on resized, nearest neighbor interpolated split image
116-
self.mask = cv2.inRange(image, LOWER_BOUND, UPPER_BOUND)
117-
# Add Alpha channel if missing
118-
elif image.shape[2] == 3:
119-
image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
115+
# Adaptively determine the target size according to
116+
# the number of nonzero elements in the alpha channel of the split image.
117+
# This may result in images bigger than COMPARISON_RESIZE if there's plenty of transparency.
118+
# Which wouldn't incur any performance loss in methods where masked regions are ignored.
119+
alpha_channel = image[:, :, 3]
120+
scale = min(1, (COMPARISON_RESIZE_AREA / cv2.countNonZero(alpha_channel)) ** 0.5)
121+
122+
image = cv2.resize(
123+
image,
124+
dsize=None,
125+
fx=scale,
126+
fy=scale,
127+
interpolation=cv2.INTER_NEAREST,
128+
)
129+
130+
# Mask based on adaptively resized, nearest neighbor interpolated split image
131+
self.mask = cv2.inRange(alpha_channel, MASK_LOWER_BOUND, MASK_UPPER_BOUND)
132+
else:
133+
image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
134+
# Add Alpha channel if missing
135+
if image.shape[2] == 3:
136+
image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
120137

121138
self.byte_array = image
122139

@@ -134,6 +151,7 @@ def compare_with_capture(
134151

135152
if not is_valid_image(self.byte_array) or not is_valid_image(capture):
136153
return 0.0
154+
capture = cv2.resize(capture, self.byte_array.shape[1::-1])
137155
comparison_method = self.__get_comparison_method(default)
138156
if comparison_method == 0:
139157
return compare_l2_norm(self.byte_array, capture, self.mask)

src/compare.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import cv2
44
import imagehash
5-
import numpy as np
65
from PIL import Image
76
from win32con import MAXBYTE
87

@@ -49,7 +48,7 @@ def compare_l2_norm(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = No
4948
# The L2 Error is summed across all pixels, so this normalizes
5049
max_error = (source.size ** 0.5) * MAXBYTE \
5150
if not is_valid_image(mask)\
52-
else (3 * np.count_nonzero(mask) * MAXBYTE * MAXBYTE) ** 0.5
51+
else (3 * cv2.countNonZero(mask) * MAXBYTE * MAXBYTE) ** 0.5
5352

5453
if not max_error:
5554
return 0.0
@@ -75,7 +74,7 @@ def compare_template(source: cv2.Mat, capture: cv2.Mat, mask: cv2.Mat | None = N
7574
# that the value can be. Used for normalizing from 0 to 1.
7675
max_error = source.size * MAXBYTE * MAXBYTE \
7776
if not is_valid_image(mask) \
78-
else np.count_nonzero(mask)
77+
else cv2.countNonZero(mask)
7978

8079
return 1 - (min_val / max_error)
8180

typings/cv2/cv2.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4391,7 +4391,7 @@ def cornerHarris(src: Mat, blockSize, ksize, k, dst: Mat = ..., borderType=...)
43914391
def cornerMinEigenVal(src: Mat, blockSize, dst: Mat = ..., ksize=..., borderType=...) -> _dst: ...
43924392
def cornerSubPix(image: Mat, corners, winSize, zeroZone, criteria) -> _corners: ...
43934393
def correctMatches(F, points1, points2, newPoints1=..., newPoints2=...) -> tuple[_newPoints1, _newPoints2]: ...
4394-
def countNonZero(src): ...
4394+
def countNonZero(src: Mat | _NumericScalar) -> int: ...
43954395
def createAlignMTB(max_bits=..., exclude_range=..., cut=...): ...
43964396
def createBackgroundSubtractorKNN(history=..., dist2Threshold=..., detectShadows=...): ...
43974397
def createBackgroundSubtractorMOG2(history=..., varThreshold=..., detectShadows=...): ...
@@ -4948,7 +4948,7 @@ def reprojectImageTo3D(disparity, Q, _3dImage=..., handleMissingValues=..., ddep
49484948

49494949

49504950
def resize(
4951-
src: Mat, dsize: _Size, dst: Mat = ..., fx: float = ...,
4951+
src: Mat | int | bool, dsize: _Size | None, dst: Mat | _NumericScalar = ..., fx: float = ...,
49524952
fy: float = ..., interpolation: int = ...,
49534953
) -> Mat: ...
49544954

0 commit comments

Comments
 (0)