diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index dbb0887..84b66b8 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -46,7 +46,7 @@ jobs: uses: astral-sh/setup-uv@v7 - name: Run pytest run: uv run --frozen pytest - coverage_and_badge: + coverage: runs-on: ubuntu-24.04 steps: - name: Checkout @@ -57,14 +57,10 @@ jobs: python-version: "3.13" - name: Install the latest version of uv uses: astral-sh/setup-uv@v7 - - name: Install just - run: sudo apt-get update && sudo apt-get install -y just - - name: Run coverage check and update badge - run: just coverage-check - - name: Verify coverage badge is up to date - run: | - git diff --exit-code README.md || { - echo "README coverage badge is out of date."; - echo "Run 'just coverage-check' and commit changes."; - exit 1; - } + - name: Run pytest with coverage + run: uv run --frozen pytest --cov=zedprofiler --cov-report=xml + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: coverage.xml diff --git a/README.md b/README.md index 47a5ff8..eecdb50 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ZEDprofiler [![Documentation](https://img.shields.io/badge/documentation-available-brightgreen)](https://zedprofiler.readthedocs.io/en/latest/) ![License](https://img.shields.io/badge/license-BSD%203--Clause-blue)[![Coverage](https://img.shields.io/badge/coverage-92%25-brightgreen)](#quality-gates) +# ZEDprofiler [![Documentation](https://img.shields.io/badge/documentation-available-brightgreen)](https://zedprofiler.readthedocs.io/en/latest/) ![License](https://img.shields.io/badge/license-BSD%203--Clause-blue)[![Coverage](https://codecov.io/gh/WayScience/ZedProfiler/branch/main/graph/badge.svg)](https://codecov.io/gh/WayScience/ZedProfiler) [![ZEDprofiler](https://github.com/WayScience/ZEDprofiler/raw/main/logo/with-text-for-dark-bg.png)](https://github.com/WayScience/ZEDprofiler) diff --git a/scripts/update_coverage_badge.py b/scripts/update_coverage_badge.py deleted file mode 100644 index 4057310..0000000 --- a/scripts/update_coverage_badge.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Update the README coverage badge from a coverage.py XML report.""" - -from __future__ import annotations - -import argparse -import re -import xml.etree.ElementTree as ET -from pathlib import Path - -BADGE_PATTERN = re.compile( - r"\[!\[Coverage\]\(https://img\.shields\.io/badge/coverage-[^)]+\)\]\(#quality-gates\)" -) -BRIGHTGREEN_THRESHOLD = 90 -GREEN_THRESHOLD = 80 -YELLOWGREEN_THRESHOLD = 70 -YELLOW_THRESHOLD = 60 -ORANGE_THRESHOLD = 50 - - -def choose_color(percent: int) -> str: - """Pick a simple badge color based on rounded percentage coverage.""" - if percent >= BRIGHTGREEN_THRESHOLD: - return "brightgreen" - if percent >= GREEN_THRESHOLD: - return "green" - if percent >= YELLOWGREEN_THRESHOLD: - return "yellowgreen" - if percent >= YELLOW_THRESHOLD: - return "yellow" - if percent >= ORANGE_THRESHOLD: - return "orange" - return "red" - - -def read_percent_from_coverage_xml(coverage_file: Path) -> int: - """Parse overall line-rate from coverage XML and return rounded percent.""" - root = ET.parse(coverage_file).getroot() - line_rate = root.attrib.get("line-rate") - if line_rate is None: - msg = f"Missing 'line-rate' attribute in {coverage_file}" - raise ValueError(msg) - - return round(float(line_rate) * 100) - - -def update_readme_badge(readme_file: Path, percent: int) -> bool: - """Update badge in README and return True when file content changed.""" - color = choose_color(percent) - badge = ( - f"[![Coverage](https://img.shields.io/badge/coverage-{percent}%25-{color})]" - "(#quality-gates)" - ) - - original = readme_file.read_text(encoding="utf-8") - - if BADGE_PATTERN.search(original): - updated = BADGE_PATTERN.sub(badge, original, count=1) - else: - lines = original.splitlines() - if not lines: - msg = f"README file is empty: {readme_file}" - raise ValueError(msg) - lines.insert(1, "") - lines.insert(2, badge) - updated = "\n".join(lines) - if original.endswith("\n"): - updated += "\n" - - if updated == original: - return False - - readme_file.write_text(updated, encoding="utf-8") - return True - - -def parse_args() -> argparse.Namespace: - """Parse command line arguments.""" - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--coverage-file", - default="coverage.xml", - type=Path, - help="Path to coverage.py XML report (default: coverage.xml)", - ) - parser.add_argument( - "--readme", - default="README.md", - type=Path, - help="Path to README file (default: README.md)", - ) - return parser.parse_args() - - -def main() -> int: - """Update README badge and print a small status message.""" - args = parse_args() - percent = read_percent_from_coverage_xml(args.coverage_file) - changed = update_readme_badge(args.readme, percent) - - if changed: - print(f"Updated coverage badge to {percent}% in {args.readme}") - else: - print(f"Coverage badge already up to date at {percent}%") - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/src/zedprofiler/featurization/colocalization.py b/src/zedprofiler/featurization/colocalization.py index 8d421bf..59dced5 100644 --- a/src/zedprofiler/featurization/colocalization.py +++ b/src/zedprofiler/featurization/colocalization.py @@ -117,8 +117,6 @@ def linear_costes_threshold_calculation( first_image_max = first_image.max() second_image_max = second_image.max() - # Initialize without a threshold - costReg, _ = scipy.stats.pearsonr(first_image, second_image) thr_first_image_c = i thr_second_image_c = (a * i) + b while i > first_image_max and (a * i) + b > second_image_max: @@ -209,7 +207,11 @@ def bisection_costes_threshold_calculation( valid = 1 while lastmid != mid: - thr_first_image_c = mid / scale_max + # Use raw pixel units (not normalised) so the threshold is comparable + # with linear_costes_threshold_calculation and with the outer dispatch's + # `image > thr` comparison. CellProfiler's library has the same + # mid/scale_max normalisation bug; this is an intentional divergence. + thr_first_image_c = float(mid) thr_second_image_c = (a * thr_first_image_c) + b combt = (first_image < thr_first_image_c) | (second_image < thr_second_image_c) if numpy.count_nonzero(combt) <= MIN_PEARSON_POINTS: @@ -234,7 +236,7 @@ def bisection_costes_threshold_calculation( else: mid = ((right - left) // 2) + left - thr_first_image_c = (valid - 1) / scale_max + thr_first_image_c = float(valid - 1) thr_second_image_c = (a * thr_first_image_c) + b return thr_first_image_c, thr_second_image_c @@ -304,7 +306,7 @@ def prepare_two_images_for_colocalization( # noqa: PLR0913 return cropped_image_1, cropped_image_2 -def calculate_colocalization( # noqa: PLR0912, PLR0915 +def calculate_colocalization( # noqa: PLR0912, PLR0915, C901 cropped_image_1: numpy.ndarray, cropped_image_2: numpy.ndarray, thr: int = 15, @@ -325,9 +327,13 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 The threshold for the Manders' coefficients, by default 15 fast_costes : str, optional The mode for Costes' threshold calculation, by default "Accurate". - Options are "Accurate" or "Fast". - "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm. - The "Fast" mode is faster but less accurate. + Options are "Accurate", "Fast", or "Faster" (matching CellProfiler's + three Costes methods). "Accurate" tests every threshold value using a + linear scan (slowest, most precise). "Fast" uses the same linear scan + but skips candidate thresholds when the Pearson R is far from the + crossing point (faster, slightly less precise). "Faster" uses a + bisection algorithm and is substantially faster for 16-bit images + (least precise). Returns ------- @@ -361,6 +367,9 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 ################################################################################################ # Threshold as percentage of maximum intensity of objects in each channel + # Initialise before the try block so combined_thresh is always bound even + # when the except branch fires (numpy.max raises ValueError on empty arrays). + combined_thresh = numpy.zeros_like(cropped_image_1, dtype=bool) try: tff = (thr / 100) * numpy.max(cropped_image_1) tss = (thr / 100) * numpy.max(cropped_image_2) @@ -394,28 +403,30 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 # Calculate the overlap coefficient ################################################################################################ - fpsq = scipy.ndimage.sum( - cropped_image_1[combined_thresh] ** 2, - ) - spsq = scipy.ndimage.sum( - cropped_image_2[combined_thresh] ** 2, - ) - pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq)) - overlap = ( - scipy.ndimage.sum( - cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + if numpy.any(combined_thresh): + fpsq = scipy.ndimage.sum( + cropped_image_1[combined_thresh] ** 2, ) - / pdt - ) - # leave in for now given they are not exported but still calculated - K1 = scipy.ndimage.sum( - cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], - ) / (numpy.array(fpsq)) - K2 = scipy.ndimage.sum( - cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], - ) / (numpy.array(spsq)) - if K1 == K2: - pass + spsq = scipy.ndimage.sum( + cropped_image_2[combined_thresh] ** 2, + ) + pdt = numpy.sqrt(numpy.array(fpsq) * numpy.array(spsq)) + overlap = ( + scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) + / pdt + ) + K1 = scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) / (numpy.array(fpsq)) + K2 = scipy.ndimage.sum( + cropped_image_1[combined_thresh] * cropped_image_2[combined_thresh], + ) / (numpy.array(spsq)) + if K1 == K2: + pass + else: + overlap, K1, K2 = 0.0, 0.0, 0.0 # first_pixels, second_pixels = flattened image arrays # combined_thresh = boolean mask of pixels above threshold in both channels @@ -475,12 +486,24 @@ def calculate_colocalization( # noqa: PLR0912, PLR0915 scale = UINT8_MAX if fast_costes == "Accurate": - thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( - cropped_image_1, cropped_image_2, scale + thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( + first_image=cropped_image_1, + second_image=cropped_image_2, + scale_max=scale, + fast_costes="Accurate", ) - else: + elif fast_costes == "Fast": thr_first_image_c, thr_second_image_c = linear_costes_threshold_calculation( - cropped_image_1, cropped_image_2, scale, fast_costes + first_image=cropped_image_1, + second_image=cropped_image_2, + scale_max=scale, + fast_costes="Fast", + ) + else: # "Faster" + thr_first_image_c, thr_second_image_c = bisection_costes_threshold_calculation( + first_image=cropped_image_1, + second_image=cropped_image_2, + scale_max=scale, ) # Costes' thershold for entire image is applied to each object @@ -537,9 +560,13 @@ def compute_colocalization( # noqa: C901, PLR0912 The threshold for the Manders' coefficients, by default 15 fast_costes : str, optional The mode for Costes' threshold calculation, by default "Accurate". - Options are "Accurate" or "Fast". - "Accurate" uses a linear algorithm, while "Fast" uses a bisection algorithm. - The "Fast" mode is faster but less accurate. + Options are "Accurate", "Fast", or "Faster" (matching CellProfiler's + three Costes methods). "Accurate" tests every threshold value using a + linear scan (slowest, most precise). "Fast" uses the same linear scan + but skips candidate thresholds when the Pearson R is far from the + crossing point (faster, slightly less precise). "Faster" uses a + bisection algorithm and is substantially faster for 16-bit images + (least precise). channel1 : str | None, optional The name of the first channel, used for feature naming, by default None channel2 : str | None, optional @@ -566,8 +593,8 @@ def compute_colocalization( # noqa: C901, PLR0912 colocalization_features = calculate_colocalization( cropped_image_1=cropped_image1, cropped_image_2=cropped_image2, - thr=15, - fast_costes="Accurate", + thr=thr, + fast_costes=fast_costes, ) # Build a simple dict row (avoid pandas dependency) diff --git a/src/zedprofiler/featurization/granularity.py b/src/zedprofiler/featurization/granularity.py index 31c9717..f53681e 100644 --- a/src/zedprofiler/featurization/granularity.py +++ b/src/zedprofiler/featurization/granularity.py @@ -104,9 +104,12 @@ def _upsample_3d( k, i, j = numpy.mgrid[ 0 : original_shape[0], 0 : original_shape[1], 0 : original_shape[2] ].astype(float) - k *= float(subsampled_shape[0] - 1) / float(original_shape[0] - 1) - i *= float(subsampled_shape[1] - 1) / float(original_shape[1] - 1) - j *= float(subsampled_shape[2] - 1) / float(original_shape[2] - 1) + if original_shape[0] > 1: + k *= float(subsampled_shape[0] - 1) / float(original_shape[0] - 1) + if original_shape[1] > 1: + i *= float(subsampled_shape[1] - 1) / float(original_shape[1] - 1) + if original_shape[2] > 1: + j *= float(subsampled_shape[2] - 1) / float(original_shape[2] - 1) return scipy.ndimage.map_coordinates(data, (k, i, j), order=1) @@ -283,9 +286,12 @@ def compute_granularity( # noqa: C901, PLR0912, PLR0913, PLR0915 k, i, j = numpy.mgrid[ 0 : new_shape[0], 0 : new_shape[1], 0 : new_shape[2] ].astype(float) - k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) - i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) - j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) + if new_shape[0] > 1: + k *= float(back_shape[0] - 1) / float(new_shape[0] - 1) + if new_shape[1] > 1: + i *= float(back_shape[1] - 1) / float(new_shape[1] - 1) + if new_shape[2] > 1: + j *= float(back_shape[2] - 1) / float(new_shape[2] - 1) back_pixels = scipy.ndimage.map_coordinates(back_pixels, (k, i, j), order=1) # Subtract background diff --git a/src/zedprofiler/featurization/intensity.py b/src/zedprofiler/featurization/intensity.py index a7feac9..29b8140 100644 --- a/src/zedprofiler/featurization/intensity.py +++ b/src/zedprofiler/featurization/intensity.py @@ -31,7 +31,7 @@ def get_outline(mask: numpy.ndarray) -> numpy.ndarray: """ outline = numpy.zeros_like(mask) for z in range(mask.shape[0]): - outline[z] = skimage.segmentation.find_boundaries(mask[z]) + outline[z] = skimage.segmentation.find_boundaries(mask[z], mode="inner") return outline @@ -80,29 +80,34 @@ def compute_intensity( # noqa: PLR0915 # Extract only coordinates where object exists z_indices, y_indices, x_indices = numpy.where(selected_label_object > 0) - min_z, max_z = numpy.min(z_indices), numpy.max(z_indices) - min_y, max_y = numpy.min(y_indices), numpy.max(y_indices) - min_x, max_x = numpy.min(x_indices), numpy.max(x_indices) + bbox_min_z, bbox_max_z = numpy.min(z_indices), numpy.max(z_indices) + bbox_min_y, bbox_max_y = numpy.min(y_indices), numpy.max(y_indices) + bbox_min_x, bbox_max_x = numpy.min(x_indices), numpy.max(x_indices) # Crop to bounding box for efficiency cropped_label = selected_label_object[ - min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1 + bbox_min_z : bbox_max_z + 1, + bbox_min_y : bbox_max_y + 1, + bbox_min_x : bbox_max_x + 1, ] cropped_image = selected_image_object[ - min_z : max_z + 1, min_y : max_y + 1, min_x : max_x + 1 + bbox_min_z : bbox_max_z + 1, + bbox_min_y : bbox_max_y + 1, + bbox_min_x : bbox_max_x + 1, ] # Create coordinate grids for the bounding box mesh_z, mesh_y, mesh_x = numpy.mgrid[ - min_z : max_z + 1, # + 1 to include the max index - min_y : max_y + 1, - min_x : max_x + 1, + bbox_min_z : bbox_max_z + 1, # + 1 to include the max index + bbox_min_y : bbox_max_y + 1, + bbox_min_x : bbox_max_x + 1, ] # calculate the integrated intensity integrated_intensity = scipy.ndimage.sum( selected_image_object, selected_label_object, + index=1, ) # calculate the volume volume = numpy.sum(selected_label_object) @@ -125,10 +130,10 @@ def compute_intensity( # noqa: PLR0915 upper_quartile_intensity = numpy.percentile(non_zero_pixels_object, 75) # median intensity median_intensity = numpy.median(non_zero_pixels_object) - # max intensity location - max_z, max_y, max_x = scipy.ndimage.maximum_position( - selected_image_object, - ) # z, y, x + # location of maximum intensity pixel (z, y, x) + max_intensity_z, max_intensity_y, max_intensity_x = ( + scipy.ndimage.maximum_position(selected_image_object) + ) # Calculate center of mass (geometric center) using cropped arrays object_mask = cropped_label > 0 @@ -177,9 +182,9 @@ def compute_intensity( # noqa: PLR0915 "StdIntensityEdge": std_intensity_edge, "MinIntensityEdge": min_intensity_edge, "MaxIntensityEdge": max_intensity_edge, - "MaxZ": max_z, - "MaxY": max_y, - "MaxX": max_x, + "MaxZ": max_intensity_z, + "MaxY": max_intensity_y, + "MaxX": max_intensity_x, "CMI.X": cmi_x, "CMI.Y": cmi_y, "CMI.Z": cmi_z, diff --git a/src/zedprofiler/featurization/neighbors.py b/src/zedprofiler/featurization/neighbors.py index 3c2e17a..cdfeafb 100644 --- a/src/zedprofiler/featurization/neighbors.py +++ b/src/zedprofiler/featurization/neighbors.py @@ -6,6 +6,7 @@ import numpy import pandas import skimage.measure +import skimage.morphology from zedprofiler.contracts import validate_column_name_schema from zedprofiler.IO.feature_writing_utils import format_morphology_feature_name @@ -138,8 +139,6 @@ def compute_neighbors( props_label["bbox-4"][0], props_label["bbox-5"][0], ) - original_bbox = (z_min, y_min, x_min, z_max, y_max, x_max) - new_z_min, new_z_max = neighbors_expand_box( min_coor=image_global_min_coord_z, max_coord=image_global_max_coord_z, @@ -163,18 +162,12 @@ def compute_neighbors( ) bbox = (new_z_min, new_y_min, new_x_min, new_z_max, new_y_max, new_x_max) croppped_neighbor_image = crop_3D_image(image=label_object, bbox=bbox) - self_cropped_neighbor_image = crop_3D_image( - image=label_object, bbox=original_bbox - ) - # find all the unique values in the cropped image of the object of interest - # this is the number of neighbors in the cropped image - n_neighbors_adjacent = ( - len( - numpy.unique( - self_cropped_neighbor_image[self_cropped_neighbor_image > 0] - ) - ) - - 1 + binary_mask = label_object == label + dilated_mask = skimage.morphology.dilation(binary_mask) + labels_in_dilation = label_object[dilated_mask] + adjacent_labels = numpy.unique(labels_in_dilation) + n_neighbors_adjacent = int( + numpy.sum((adjacent_labels != 0) & (adjacent_labels != label)) ) # find all the unique values in the expanded cropped image of the diff --git a/src/zedprofiler/featurization/texture.py b/src/zedprofiler/featurization/texture.py index 32a9b4d..780d333 100644 --- a/src/zedprofiler/featurization/texture.py +++ b/src/zedprofiler/featurization/texture.py @@ -151,6 +151,7 @@ def compute_texture( # noqa: C901 ) # loop through each label and get the bounding box # to compute features for the object + features = numpy.empty((n_directions, 13, max(labels))) for _, label in enumerate(labels): if int(label) == 0: continue @@ -167,7 +168,6 @@ def compute_texture( # noqa: C901 if not numpy.any(object_mask): continue image_object[~object_mask] = 0 - features = numpy.empty((n_directions, 13, max(labels))) image_object = scale_image(image_object, num_gray_levels=grayscale) try: # calculates 13 Haralick features for each direction (13) diff --git a/tests/featurization/test_colocalization.py b/tests/featurization/test_colocalization.py index 8c5e26d..deda149 100644 --- a/tests/featurization/test_colocalization.py +++ b/tests/featurization/test_colocalization.py @@ -1,5 +1,7 @@ from __future__ import annotations +import unittest.mock + import numpy as np import pandas as pd import pytest @@ -102,6 +104,216 @@ def test_prepare_two_images_for_colocalization_crops() -> None: assert cropped2.max() >= expected_peak_im2 +def test_combined_thresh_does_not_raise_unbound_local_error() -> None: + """combined_thresh must be bound even when images are empty (Bug 2). + + Before the fix, combined_thresh was only assigned inside the try-else block, + so when numpy.max raised ValueError on an empty crop, the subsequent + ``if numpy.any(combined_thresh)`` reference raised UnboundLocalError. + """ + empty = np.zeros((0,), dtype=float) + try: + calculate_colocalization(empty, empty, thr=15, fast_costes="Accurate") + except UnboundLocalError as e: + pytest.fail(f"UnboundLocalError raised — combined_thresh not initialised: {e}") + except Exception: + pass # any other exception (ValueError, ZeroDivisionError) is acceptable + + +def test_accurate_mode_calls_linear_not_bisection() -> None: + """fast_costes='Accurate' must route to linear_costes, not bisection (Bug 3). + + The two algorithms can converge to the same threshold for some inputs, so + the test uses unittest.mock.patch to spy on which function is actually called + rather than comparing numeric results. + """ + rng = np.random.default_rng(42) + img = rng.uniform(0, 255, (8, 8, 8)).astype(float) + + with ( + unittest.mock.patch( + "zedprofiler.featurization.colocalization.linear_costes_threshold_calculation", + wraps=linear_costes_threshold_calculation, + ) as mock_linear, + unittest.mock.patch( + "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", + wraps=bisection_costes_threshold_calculation, + ) as mock_bisection, + ): + calculate_colocalization(img, img, thr=15, fast_costes="Accurate") + + assert mock_linear.called, ( + "linear_costes_threshold_calculation was not called for fast_costes='Accurate'" + ) + assert not mock_bisection.called, ( + "bisection_costes_threshold_calculation was called for fast_costes='Accurate'" + ) + + +def test_compute_colocalization_respects_fast_costes_parameter() -> None: + """compute_colocalization must forward fast_costes to calculate_colocalization. + + Bug B (ZedProfiler-specific): + + Before the fix, the inner call hard-coded fast_costes='Accurate', so the + caller's value was silently ignored. The test passes fast_costes='Faster' and + checks via mock that bisection is invoked (the only mode that reaches bisection). + """ + imgset = ImageSetLoaderModel() + shape = (7, 7, 7) + center = (3, 3, 3) + label, im1, im2 = make_pair(shape, center) + loader = TwoObjectLoaderModel( + image_set_loader=imgset, + compartment="Cell", + image1=im1, + image2=im2, + label_image=label, + object_ids=[1], + ) + + with unittest.mock.patch( + "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", + wraps=bisection_costes_threshold_calculation, + ) as mock_bisection: + compute_colocalization(loader, channel1="A", channel2="B", fast_costes="Faster") + + assert mock_bisection.called, ( + "bisection_costes_threshold_calculation was not called for " + "fast_costes='Faster' — compute_colocalization may still be " + "hard-coding fast_costes='Accurate'" + ) + + +@pytest.fixture() +def high_contrast_images() -> tuple[np.ndarray, np.ndarray]: + """Two 1-D images with a bright correlated signal well above background.""" + rng = np.random.default_rng(0) + background = rng.uniform(0, 50, 300) + signal_mask = np.zeros(300, dtype=bool) + signal_mask[100:150] = True + img1 = background.copy() + img1[signal_mask] = 200.0 + img2 = background.copy() + img2[signal_mask] = 180.0 + return img1, img2 + + +def test_all_costes_modes_converge_on_same_threshold( + high_contrast_images: tuple[np.ndarray, np.ndarray], +) -> None: + """All three modes must agree to within 15 pixel units on realistic images.""" + img1, img2 = high_contrast_images + thr_accurate, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Accurate" + ) + thr_fast, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Fast" + ) + thr_faster, _ = bisection_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255 + ) + tolerance = 15 + assert abs(thr_accurate - thr_fast) <= tolerance, ( + f"Accurate ({thr_accurate:.1f}) and Fast ({thr_fast:.1f}) diverged " + f"by more than {tolerance}" + ) + assert abs(thr_accurate - thr_faster) <= tolerance, ( + f"Accurate ({thr_accurate:.1f}) and Faster ({thr_faster:.1f}) diverged " + f"by more than {tolerance}" + ) + + +def test_costes_threshold_low_for_perfectly_correlated_images() -> None: + """All three modes must return a low threshold for perfectly correlated images.""" + img = np.linspace(10, 200, 300) + thr_accurate, _ = linear_costes_threshold_calculation( + first_image=img, second_image=img, scale_max=255, fast_costes="Accurate" + ) + thr_fast, _ = linear_costes_threshold_calculation( + first_image=img, second_image=img, scale_max=255, fast_costes="Fast" + ) + thr_faster, _ = bisection_costes_threshold_calculation( + first_image=img, second_image=img, scale_max=255 + ) + max_expected = 15 # near zero in 0-255 space + assert thr_accurate < max_expected, ( + f"Expected threshold near 0 for fully correlated images, got {thr_accurate:.3f}" + ) + assert thr_fast < max_expected, ( + f"Expected threshold near 0 for fully correlated images, got {thr_fast:.3f}" + ) + assert thr_faster < max_expected, ( + f"Expected threshold near 0 for fully correlated images, got {thr_faster:.3f}" + ) + + +def test_costes_threshold_high_for_anticorrelated_images_linear() -> None: + """Linear modes must return a threshold near max for anti-correlated images. + + Note: the bisection algorithm's degenerate behaviour for purely anti-correlated + inputs (returns 0 rather than scale_max) is documented separately in + test_bisection_degenerate_anticorrelation_returns_zero. + """ + img1 = np.linspace(0, 255, 300) + img2 = np.linspace(255, 0, 300) + thr_accurate, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Accurate" + ) + thr_fast, _ = linear_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255, fast_costes="Fast" + ) + min_expected = 200 + assert thr_accurate > min_expected, ( + "Expected threshold near max for anti-correlated images, " + f"got {thr_accurate:.3f}" + ) + assert thr_fast > min_expected, ( + f"Expected threshold near max for anti-correlated images, got {thr_fast:.3f}" + ) + + +def test_bisection_degenerate_anticorrelation_returns_zero() -> None: + """Document bisection's edge-case behaviour for purely anti-correlated images. + + When Pearson R is negative for every candidate threshold, valid is never updated + from its initial value of 1, so the return is valid - 1 = 0. This matches + CellProfiler's library behaviour and is a known limitation for this degenerate case. + """ + img1 = np.linspace(0, 255, 300) + img2 = np.linspace(255, 0, 300) + thr, _ = bisection_costes_threshold_calculation( + first_image=img1, second_image=img2, scale_max=255 + ) + assert thr == 0.0, ( + f"Expected bisection to return 0 for fully anti-correlated images " + f"(degenerate case), got {thr}" + ) + + +def test_faster_mode_calls_bisection_not_linear() -> None: + """fast_costes='Faster' must route to bisection, not linear.""" + rng = np.random.default_rng(42) + img = rng.uniform(0, 255, (8, 8, 8)).astype(float) + with ( + unittest.mock.patch( + "zedprofiler.featurization.colocalization.linear_costes_threshold_calculation", + wraps=linear_costes_threshold_calculation, + ) as mock_linear, + unittest.mock.patch( + "zedprofiler.featurization.colocalization.bisection_costes_threshold_calculation", + wraps=bisection_costes_threshold_calculation, + ) as mock_bisection, + ): + calculate_colocalization(img, img, thr=15, fast_costes="Faster") + assert mock_bisection.called, ( + "bisection_costes_threshold_calculation was not called for fast_costes='Faster'" + ) + assert not mock_linear.called, ( + "linear_costes_threshold_calculation was called for fast_costes='Faster'" + ) + + def test_calculate_colocalization_identical_images() -> None: # identical images should give high correlation and Manders near 1 rng = np.random.default_rng(0) diff --git a/tests/featurization/test_granularity.py b/tests/featurization/test_granularity.py index b1ac407..465a2a4 100644 --- a/tests/featurization/test_granularity.py +++ b/tests/featurization/test_granularity.py @@ -159,6 +159,50 @@ class Dummy: assert isinstance(df, pd.DataFrame) +def test_upsample_3d_no_division_by_zero_when_dim_is_one() -> None: + """_upsample_3d must not raise ZeroDivisionError when any dim has size 1 (Bug 7). + + Before the fix, the per-axis scale factor was always computed as + ``(subsampled_shape[k] - 1) / (original_shape[k] - 1)`` without guarding + against ``original_shape[k] == 1``, which produces a zero denominator. + """ + data = np.ones((1, 4, 4), dtype=float) + try: + result = _upsample_3d(data, data.shape, (1, 8, 8)) + except ZeroDivisionError as e: + pytest.fail(f"ZeroDivisionError in _upsample_3d with dim-1 axis: {e}") + assert result.shape == (1, 8, 8) + + +def test_granularity_no_crash_on_single_z_slice() -> None: + """compute_granularity must not crash when the input has only one Z slice (Bug 7). + + The division-by-zero guard must also protect the background upsampling block + inside compute_granularity, not just _upsample_3d. + """ + shape = (1, 12, 12) + img = np.zeros(shape, dtype=float) + lab = np.zeros(shape, dtype=int) + img[0, 5, 5] = 10.0 + lab[0, 5, 5] = 1 + + class Dummy: + image = img + label_image = lab + object_ids: ClassVar[list[int]] = [1] + image_set_loader = type("ISL", (), {"image_set_name": "s"})() + compartment = "Cell" + channel = "Ch1" + + try: + df = compute_granularity(Dummy(), radius=1, granular_spectrum_length=3) + except ZeroDivisionError as e: + pytest.fail( + f"ZeroDivisionError in compute_granularity with single-Z image: {e}" + ) + assert isinstance(df, pd.DataFrame) + + def test_compute_granularity_preserves_sparse_label_ids() -> None: # Sparse labels should not be renumbered to 1..n internally. shape = (8, 8, 8) diff --git a/tests/featurization/test_intensity.py b/tests/featurization/test_intensity.py index 2f01168..0cc9035 100644 --- a/tests/featurization/test_intensity.py +++ b/tests/featurization/test_intensity.py @@ -41,6 +41,99 @@ def make_label_and_image( return image, label +def test_min_intensity_edge_not_zero_for_bright_cell() -> None: + """MinIntensityEdge must reflect actual boundary intensities, not 0. + + Regression test for a bug where get_outline used find_boundaries with the + default mode='thick', which returns both inner (object-side) and outer + (background-side) boundary pixels. Because the image is zeroed outside the + cell before the outline is computed, outer boundary pixels always have + intensity 0, making numpy.min always return 0. + + The fix is to use mode='inner' so only pixels inside the object boundary + are included in the edge mask. + """ + shape = (10, 10, 10) + image = np.zeros(shape, dtype=np.float32) + label = np.zeros(shape, dtype=np.int32) + + image[3:7, 3:7, 3:7] = 100.0 + label[3:7, 3:7, 3:7] = 1 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[1], + image_set_loader=imgset, + ) + + df = compute_intensity(loader) + + row = df[(df["Metadata_Object_ObjectID"] == 1)] + min_edge_col = [c for c in df.columns if "MinIntensityEdge" in c] + assert min_edge_col, "MinIntensityEdge column not found in output" + + min_edge = row[min_edge_col[0]].values[0] + + assert min_edge != 0, ( + "MinIntensityEdge is 0 for a cell with uniform intensity 100. " + "Likely caused by find_boundaries(mode='thick') including outer " + "(background) boundary pixels that have been zeroed." + ) + assert min_edge == pytest.approx(100.0), ( + f"MinIntensityEdge should be 100 (cell intensity) but got {min_edge}" + ) + + +def test_integrated_intensity_is_per_object_not_global() -> None: + """IntegratedIntensity must reflect only the target object's voxels (Bug 10). + + Before the fix, scipy.ndimage.sum was called without an explicit ``index`` + argument. With a binarised label array (values 0/1), omitting ``index`` + causes ndimage.sum to return the total over all labelled pixels, which can + include voxels from other objects if the label array was not fully isolated + beforehand. Passing ``index=1`` constrains the sum to only the pixels + belonging to label 1. + + The test constructs an image where one object (label 1, intensity 1.0) is + isolated from a second brighter region that is masked out. Without the fix, + the sum bleeds into the second region and the integrated intensity is too + large. + """ + shape = (10, 10, 10) + image = np.zeros(shape, dtype=np.float32) + label = np.zeros(shape, dtype=np.int32) + + # Object 1: single bright voxel, intensity 1.0 + image[2, 2, 2] = 1.0 + label[2, 2, 2] = 1 + + # A second region that is NOT labelled (background) but has high intensity. + # If scipy.ndimage.sum leaks into it, IntegratedIntensity will be inflated. + image[7, 7, 7] = 100.0 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[1], + image_set_loader=imgset, + ) + + df = compute_intensity(loader) + ii_col = [c for c in df.columns if "IntegratedIntensity" in c and "Edge" not in c] + assert ii_col, "IntegratedIntensity column not found in output" + + row = df[df["Metadata_Object_ObjectID"] == 1] + ii = float(row[ii_col[0]].values[0]) + expected = 1.0 + assert np.isclose(ii, expected, atol=1e-3), ( + f"IntegratedIntensity = {ii}, expected {expected}. " + "Likely caused by scipy.ndimage.sum without index=1 summing all pixels." + ) + + @pytest.mark.parametrize("shape,center", [((6, 6, 6), (3, 3, 3))]) def test_compute_intensity_basic( shape: tuple[int, int, int], center: tuple[int, int, int] diff --git a/tests/featurization/test_neighbors.py b/tests/featurization/test_neighbors.py index cb5b3f3..3d0ee18 100644 --- a/tests/featurization/test_neighbors.py +++ b/tests/featurization/test_neighbors.py @@ -128,6 +128,44 @@ def test_mahalanobis_small_and_regularized_and_singular() -> None: assert np.allclose(md_sing, 0.0) +def test_neighbors_count_adjacent_detects_touching_cells() -> None: + """NeighborsCountAdjacent must be > 0 for two directly touching objects (Bug 5). + + Before the fix, adjacency was detected by cropping to the regionprops bbox + of each object and counting unique labels inside. Because regionprops uses + exclusive-max indices (Python slice convention), the boundary voxel of the + touching neighbour was excluded from the crop, making the count always 0. + + The fix replaces the bbox-crop approach with a 1-voxel morphological dilation + of the object mask; any label in the dilated region that is not the object + itself is a true neighbour. + """ + shape = (10, 10, 10) + lab = np.zeros(shape, dtype=int) + # Object 1 occupies [2:5, 2:5, 2:5]; Object 2 occupies [5:8, 2:5, 2:5]. + # They share the plane at z=5, so they are face-adjacent (touching). + lab[2:5, 2:5, 2:5] = 1 + lab[5:8, 2:5, 2:5] = 2 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + label_image=lab, + object_ids=[1, 2], + image_set_loader=imgset, + ) + + df = compute_neighbors(loader, distance_threshold=5, anisotropy_factor=1) + obj1_row = df[df["Metadata_Object_ObjectID"] == 1] + adjacent_col = [c for c in df.columns if "NeighborsCountAdjacent" in c] + assert adjacent_col, "NeighborsCountAdjacent column not found in output" + + n_adj = int(obj1_row[adjacent_col[0]].values[0]) + assert n_adj > 0, ( + "NeighborsCountAdjacent is 0 for two touching cells — " + "likely caused by bbox-crop excluding the shared boundary voxel" + ) + + def test_create_results_dataframe_and_errors_and_plots() -> None: # create a simple classification results dict results = { diff --git a/tests/featurization/test_texture.py b/tests/featurization/test_texture.py index aaee06a..7de2f3c 100644 --- a/tests/featurization/test_texture.py +++ b/tests/featurization/test_texture.py @@ -3,12 +3,16 @@ import numpy as np import pandas as pd import pytest +import skimage.measure from beartype import beartype from pydantic import BaseModel, ConfigDict, field_validator mahotas = pytest.importorskip("mahotas") -from zedprofiler.featurization.texture import compute_texture # noqa: E402 +from zedprofiler.featurization.texture import compute_texture, scale_image # noqa: E402 + +LABEL_OBJ_1 = 1 +LABEL_OBJ_2 = 2 class ImageSetLoaderModel(BaseModel): @@ -56,3 +60,97 @@ def test_compute_texture_basic( df = compute_texture(loader, distance=1, grayscale=256) assert isinstance(df, pd.DataFrame) assert "Metadata_Object_ObjectID" in df.columns + + +def test_texture_values_correct_per_object() -> None: + """Each object must receive its own Haralick values, not another object's. + + Regression test for a bug where features = numpy.empty(...) was called + inside the per-object loop, resetting the array on every iteration so only + the last object's values survived. All prior objects received uninitialized + memory from numpy.empty. + + Two objects with distinct pixel intensities are profiled. The test + independently computes expected Haralick values per object using mahotas + directly and asserts the pipeline output matches. It fails on the buggy code + because object 1 gets garbage values instead of its own. + """ + # Object 1: bright uniform region (intensity 200) + # Object 2: dim uniform region (intensity 50) + # AngularSecondMoment for a uniform region is 1.0; if the array is reset + # inside the loop, object 1 gets uninitialized values instead. + shape = (20, 20, 20) + image = np.zeros(shape, dtype=np.uint8) + label = np.zeros(shape, dtype=np.int32) + + image[1:5, 1:5, 1:5] = 200 + label[1:5, 1:5, 1:5] = LABEL_OBJ_1 + + image[14:18, 14:18, 14:18] = 50 + label[14:18, 14:18, 14:18] = LABEL_OBJ_2 + + imgset = ImageSetLoaderModel() + loader = ObjectLoaderModel( + image=image, + label_image=label, + object_ids=[LABEL_OBJ_1, LABEL_OBJ_2], + image_set_loader=imgset, + ) + + df = compute_texture(loader, distance=1, grayscale=256) + + # Build per-object lookup: {object_id: {feature_col: value}} + obj_textures: dict[int, dict[str, float]] = {} + for _, row in df.iterrows(): + obj_id = int(row["Metadata_Object_ObjectID"]) + obj_textures.setdefault(obj_id, {}) + for col in df.columns: + if col != "Metadata_Object_ObjectID": + obj_textures[obj_id][col] = row[col] + + assert LABEL_OBJ_1 in obj_textures, "Object 1 missing from texture output" + assert LABEL_OBJ_2 in obj_textures, "Object 2 missing from texture output" + + for obj_id in [LABEL_OBJ_1, LABEL_OBJ_2]: + mask = label == obj_id + crop_img = image.copy() + crop_img[~mask] = 0 + + props = skimage.measure.regionprops_table( + mask.astype(np.uint8), properties=["bbox"] + ) + z0, y0, x0 = ( + int(props["bbox-0"][0]), + int(props["bbox-1"][0]), + int(props["bbox-2"][0]), + ) + z1, y1, x1 = ( + int(props["bbox-3"][0]), + int(props["bbox-4"][0]), + int(props["bbox-5"][0]), + ) + crop_scaled = scale_image(crop_img[z0:z1, y0:y1, x0:x1], num_gray_levels=256) + + expected = mahotas.features.haralick( + ignore_zeros=True, + f=crop_scaled, + distance=1, + compute_14th_feature=False, + ) + expected_asm = expected[0, 0] + + asm_cols = [ + c + for c in obj_textures[obj_id] + if "AngularSecondMoment" in c and "-00-" in c + ] + assert asm_cols, ( + f"No AngularSecondMoment-direction-00 column for object {obj_id}" + ) + actual_asm = obj_textures[obj_id][asm_cols[0]] + + assert np.isclose(actual_asm, expected_asm, rtol=1e-5), ( + f"Object {obj_id} AngularSecondMoment: " + f"got {actual_asm}, expected {expected_asm}. " + f"Likely caused by numpy.empty reset inside the per-object loop." + ) diff --git a/tests/test_update_coverage_badge.py b/tests/test_update_coverage_badge.py deleted file mode 100644 index 8cb3c8b..0000000 --- a/tests/test_update_coverage_badge.py +++ /dev/null @@ -1,78 +0,0 @@ -import importlib.util -from pathlib import Path - -import pytest - -# Load the module relative to this test file -_spec = importlib.util.spec_from_file_location( - "update_coverage_badge", - Path(__file__).parent.parent / "scripts" / "update_coverage_badge.py", -) -mod = importlib.util.module_from_spec(_spec) -_spec.loader.exec_module(mod) - - -def test_replaces_existing_badge(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - original = ( - "# Project\n\n" - "[![Coverage](https://img.shields.io/badge/coverage-42%25-red)](#quality-gates)\n\n" - "Some text\n" - ) - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 85) - assert changed is True - - updated = readme.read_text(encoding="utf-8") - assert "coverage-85%25-green" in updated - assert updated.count("[![Coverage]") == 1 - assert "coverage-42%25-red" not in updated - - -def test_inserts_badge_when_missing(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - original = "# Project\nDescription line\n" - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 60) - assert changed is True - - updated = readme.read_text(encoding="utf-8") - lines = updated.splitlines() - # After insertion: title at 0, blank line at 1, badge at 2 - assert lines[0] == "# Project" - assert lines[1] == "" - assert lines[2].startswith("[![Coverage]") - assert "coverage-60%25-yellow" in lines[2] - - -def test_preserves_trailing_newline_on_insert(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - original = "# Project\nDescription line\n" # ends with newline - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 70) - assert changed is True - - updated = readme.read_text(encoding="utf-8") - assert updated.endswith("\n") - - -def test_no_change_returns_false_when_badge_already_exact(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - # percent 75 -> color "yellowgreen" (per thresholds) - badge = "[![Coverage](https://img.shields.io/badge/coverage-75%25-yellowgreen)](#quality-gates)" - original = f"# Project\n\n{badge}\n\nMore\n" - readme.write_text(original, encoding="utf-8") - - changed = mod.update_readme_badge(readme, 75) - assert changed is False - assert readme.read_text(encoding="utf-8") == original - - -def test_empty_readme_raises_value_error(tmp_path: Path) -> None: - readme = tmp_path / "README.md" - readme.write_text("", encoding="utf-8") - with pytest.raises(ValueError): - mod.update_readme_badge(readme, 50)