def create_searchable_pdf(
images: List[Image.Image],
docs: List[Any],
output_path: str,
font_path: Optional[str] = None,
image_quality: str = "high",
):
"""
Create a searchable PDF from an image and OCR results.
Args:
images (List[Image.Image]): A list of pillow images.
docs (List[DocumentAnalyzerSchema]): A list of OCR results.
output_path (str): Path to the output PDF file.
font_path (str, optional): Path to the font file. Defaults to None.
image_quality (str, optional): Image quality preset ("high", "middle", "low").
Defaults to "high".
"""
if font_path is None:
font_path = FONT_PATH
preset = IMAGE_QUALITY_PRESETS[image_quality]
jpeg_quality = preset["jpeg_quality"]
max_long_side = preset["max_long_side"]
pdfmetrics.registerFont(TTFont("MPLUS1p-Medium", font_path))
packet = BytesIO()
c = canvas.Canvas(packet)
for i, (image, doc) in enumerate(zip(images, docs)):
image = Image.fromarray(image[:, :, ::-1]) # Convert BGR to RGB
orig_w, orig_h = image.size
embed_image = image
if max_long_side is not None and max(orig_w, orig_h) > max_long_side:
scale = max_long_side / max(orig_w, orig_h)
new_w = int(orig_w * scale)
new_h = int(orig_h * scale)
embed_image = image.resize((new_w, new_h), Image.LANCZOS)
image_path = f"tmp_{i}.png"
embed_image.save(image_path, format="JPEG", quality=jpeg_quality)
# Page size is based on original image dimensions to preserve text coordinates
c.setPageSize((orig_w, orig_h))
c.drawImage(image_path, 0, 0, width=orig_w, height=orig_h)
os.remove(image_path)
# Collect all text containers
containers = []
for p in doc.paragraphs:
containers.append(
{
"box": p.box,
"order": p.order,
"sub_order": 0,
"direction": p.direction,
"type": "paragraph",
},
)
for t in doc.tables:
for cell in t.cells:
containers.append(
{
"box": cell.box,
"order": t.order,
"sub_order": (cell.row, cell.col),
"direction": "horizontal", # Assuming table text is horizontal
"type": "table_cell",
},
)
if t.caption is not None:
containers.append(
{
"box": t.caption.box,
"order": t.order,
"sub_order": (-1, -1),
"direction": t.caption.direction,
"type": "table_caption",
},
)
for f in doc.figures:
for para_idx, p in enumerate(f.paragraphs):
containers.append(
{
"box": p.box,
"order": f.order,
"sub_order": para_idx,
"direction": p.direction,
"type": "figure_paragraph",
},
)
if f.caption is not None:
containers.append(
{
"box": f.caption.box,
"order": f.order,
"sub_order": 0,
"direction": f.caption.direction,
"type": "figure_caption",
},
)
# Sort containers by reading order
containers = sorted(containers, key=lambda c: (c["order"], c["sub_order"]))
all_words = []
for container in containers:
container_words = []
for word in doc.words:
word_box = _poly2rect(word.points)
if is_contained(container["box"], word_box, 0.5):
container_words.append(word)
# Sort words within the container
if container["direction"] == "vertical":
# Right-to-left column, then top-to-bottom
container_words.sort(
key=lambda w: (
-_poly2rect(w.points)[0],
_poly2rect(w.points)[1],
)
)
else:
# Top-to-bottom, then left-to-right
container_words.sort(
key=lambda w: (
_poly2rect(w.points)[1],
_poly2rect(w.points)[0],
)
)
all_words.extend(container_words)
# Set transparent color for text
text_color = Color(1, 1, 1, alpha=0)
c.setFillColor(text_color)
for word in all_words:
text = word.content
bbox = _poly2rect(word.points)
direction = word.direction
x1, y1, x2, y2 = bbox
bbox_height = y2 - y1
bbox_width = x2 - x1
if direction == "vertical":
text = to_full_width(text)
font_size = _calc_font_size(text, bbox_width, bbox_height)
else:
font_size = _calc_font_size(text, bbox_height, bbox_width)
if not font_size:
continue
c.setFont("MPLUS1p-Medium", font_size)
if direction == "vertical":
# Adjust for vertical text rendering
base_y = orig_h - y1
char_height = bbox_height / len(text) if text else 0
for j, ch in enumerate(text):
char_x = x1 + (bbox_width - font_size) / 2
char_y = base_y - (j * char_height) - char_height / 2
c.saveState()
c.translate(char_x, char_y + font_size / 2)
c.rotate(-90)
c.drawString(0, 0, ch)
c.restoreState()
else:
base_y = orig_h - y2 + (bbox_height - font_size) * 0.5
c.drawString(x1, base_y, text)
c.showPage()
c.save()
with open(output_path, "wb") as f:
f.write(packet.getvalue())