Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions openkb/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,18 @@ def copy_relative_images(
- Missing source file: log a warning and leave the original text unchanged.
"""
result = markdown
# Track the destination chosen for each already-copied source so the same
# image referenced twice isn't duplicated, plus the set of taken names so
# two *different* sources that share a basename (e.g. ``a/logo.png`` and
# ``b/logo.png``) don't overwrite each other and collapse both links onto a
# single image.
assigned: dict[Path, str] = {}
# Seed taken names from any files already in images_dir so a re-convert into
# an existing per-doc directory disambiguates against them instead of
# overwriting a previously-copied image with a same-basename source.
taken: set[str] = (
{p.name for p in images_dir.iterdir()} if images_dir.is_dir() else set()
)

for match in _RELATIVE_RE.finditer(markdown):
alt, rel_path = match.group(1), match.group(2)
Expand All @@ -236,10 +248,17 @@ def copy_relative_images(
)
continue

filename = src.name
dest = images_dir / filename
images_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dest)
filename = assigned.get(src)
if filename is None:
filename = src.name
n = 1
while filename in taken:
filename = f"{src.stem}_{n}{src.suffix}"
n += 1
assigned[src] = filename
taken.add(filename)
images_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, images_dir / filename)

new_ref = f"![{alt}](sources/images/{doc_name}/{filename})"
result = result.replace(match.group(0), new_ref, 1)
Expand Down
57 changes: 57 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,60 @@ def test_multiple_relative_images_all_copied(self, tmp_path):
assert "![b](sources/images/doc/b.jpg)" in result
assert (images_dir / "a.png").exists()
assert (images_dir / "b.jpg").exists()

def test_same_basename_different_dirs_no_overwrite(self, tmp_path):
# Two distinct images sharing a basename must not overwrite each other
# (which would lose one image and point both links at the survivor).
source_dir = tmp_path / "source"
(source_dir / "a").mkdir(parents=True)
(source_dir / "b").mkdir(parents=True)
(source_dir / "a" / "logo.png").write_bytes(FAKE_PNG)
(source_dir / "b" / "logo.png").write_bytes(FAKE_JPG)

images_dir = tmp_path / "images" / "doc"
images_dir.mkdir(parents=True)

md = "![a](a/logo.png)\n![b](b/logo.png)"
result = copy_relative_images(md, source_dir, "doc", images_dir)

saved = sorted(p.name for p in images_dir.iterdir())
assert len(saved) == 2 # both copied, neither overwritten
assert {(images_dir / n).read_bytes() for n in saved} == {FAKE_PNG, FAKE_JPG}
links = sorted(
line.split("](")[1].rstrip(")") for line in result.strip().splitlines()
)
assert links[0] != links[1] # links point at different files

def test_same_image_referenced_twice_is_copied_once(self, tmp_path):
# Identical source referenced twice: copy once, both links agree.
source_dir = tmp_path / "source"
source_dir.mkdir()
(source_dir / "logo.png").write_bytes(FAKE_PNG)
images_dir = tmp_path / "images" / "doc"
images_dir.mkdir(parents=True)

md = "![x](logo.png)\n![y](logo.png)"
result = copy_relative_images(md, source_dir, "doc", images_dir)

assert [p.name for p in images_dir.iterdir()] == ["logo.png"]
assert result.count("sources/images/doc/logo.png") == 2

def test_does_not_overwrite_preexisting_file_in_images_dir(self, tmp_path):
# A file already in images_dir (e.g. from a prior conversion) with a
# colliding basename must not be silently overwritten.
source_dir = tmp_path / "source"
source_dir.mkdir()
(source_dir / "logo.png").write_bytes(FAKE_JPG) # new image
images_dir = tmp_path / "images" / "doc"
images_dir.mkdir(parents=True)
(images_dir / "logo.png").write_bytes(FAKE_PNG) # pre-existing image

result = copy_relative_images("![a](logo.png)", source_dir, "doc", images_dir)

# Pre-existing file kept intact; the new image lands under a fresh name.
assert (images_dir / "logo.png").read_bytes() == FAKE_PNG
names = sorted(p.name for p in images_dir.iterdir())
assert len(names) == 2
new_name = next(n for n in names if n != "logo.png")
assert (images_dir / new_name).read_bytes() == FAKE_JPG
assert f"sources/images/doc/{new_name}" in result