diff --git a/openkb/images.py b/openkb/images.py index 76284148..750530d9 100644 --- a/openkb/images.py +++ b/openkb/images.py @@ -223,6 +223,18 @@ def copy_relative_images( - Missing source file: log a warning and leave the original text unchanged. """ result = markdown + # Track the destination chosen for each already-copied source so the same + # image referenced twice isn't duplicated, plus the set of taken names so + # two *different* sources that share a basename (e.g. ``a/logo.png`` and + # ``b/logo.png``) don't overwrite each other and collapse both links onto a + # single image. + assigned: dict[Path, str] = {} + # Seed taken names from any files already in images_dir so a re-convert into + # an existing per-doc directory disambiguates against them instead of + # overwriting a previously-copied image with a same-basename source. + taken: set[str] = ( + {p.name for p in images_dir.iterdir()} if images_dir.is_dir() else set() + ) for match in _RELATIVE_RE.finditer(markdown): alt, rel_path = match.group(1), match.group(2) @@ -236,10 +248,17 @@ def copy_relative_images( ) continue - filename = src.name - dest = images_dir / filename - images_dir.mkdir(parents=True, exist_ok=True) - shutil.copy2(src, dest) + filename = assigned.get(src) + if filename is None: + filename = src.name + n = 1 + while filename in taken: + filename = f"{src.stem}_{n}{src.suffix}" + n += 1 + assigned[src] = filename + taken.add(filename) + images_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, images_dir / filename) new_ref = f"![{alt}](sources/images/{doc_name}/{filename})" result = result.replace(match.group(0), new_ref, 1) diff --git a/tests/test_images.py b/tests/test_images.py index 9abb3ec2..14b38aec 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -164,3 +164,60 @@ def test_multiple_relative_images_all_copied(self, tmp_path): assert "![b](sources/images/doc/b.jpg)" in result assert (images_dir / "a.png").exists() assert (images_dir / "b.jpg").exists() + + def test_same_basename_different_dirs_no_overwrite(self, tmp_path): + # Two distinct images sharing a basename must not overwrite each other + # (which would lose one image and point both links at the survivor). + source_dir = tmp_path / "source" + (source_dir / "a").mkdir(parents=True) + (source_dir / "b").mkdir(parents=True) + (source_dir / "a" / "logo.png").write_bytes(FAKE_PNG) + (source_dir / "b" / "logo.png").write_bytes(FAKE_JPG) + + images_dir = tmp_path / "images" / "doc" + images_dir.mkdir(parents=True) + + md = "![a](a/logo.png)\n![b](b/logo.png)" + result = copy_relative_images(md, source_dir, "doc", images_dir) + + saved = sorted(p.name for p in images_dir.iterdir()) + assert len(saved) == 2 # both copied, neither overwritten + assert {(images_dir / n).read_bytes() for n in saved} == {FAKE_PNG, FAKE_JPG} + links = sorted( + line.split("](")[1].rstrip(")") for line in result.strip().splitlines() + ) + assert links[0] != links[1] # links point at different files + + def test_same_image_referenced_twice_is_copied_once(self, tmp_path): + # Identical source referenced twice: copy once, both links agree. + source_dir = tmp_path / "source" + source_dir.mkdir() + (source_dir / "logo.png").write_bytes(FAKE_PNG) + images_dir = tmp_path / "images" / "doc" + images_dir.mkdir(parents=True) + + md = "![x](logo.png)\n![y](logo.png)" + result = copy_relative_images(md, source_dir, "doc", images_dir) + + assert [p.name for p in images_dir.iterdir()] == ["logo.png"] + assert result.count("sources/images/doc/logo.png") == 2 + + def test_does_not_overwrite_preexisting_file_in_images_dir(self, tmp_path): + # A file already in images_dir (e.g. from a prior conversion) with a + # colliding basename must not be silently overwritten. + source_dir = tmp_path / "source" + source_dir.mkdir() + (source_dir / "logo.png").write_bytes(FAKE_JPG) # new image + images_dir = tmp_path / "images" / "doc" + images_dir.mkdir(parents=True) + (images_dir / "logo.png").write_bytes(FAKE_PNG) # pre-existing image + + result = copy_relative_images("![a](logo.png)", source_dir, "doc", images_dir) + + # Pre-existing file kept intact; the new image lands under a fresh name. + assert (images_dir / "logo.png").read_bytes() == FAKE_PNG + names = sorted(p.name for p in images_dir.iterdir()) + assert len(names) == 2 + new_name = next(n for n in names if n != "logo.png") + assert (images_dir / new_name).read_bytes() == FAKE_JPG + assert f"sources/images/doc/{new_name}" in result