Skip to content

Commit

Permalink
Add antialias parameter to Page.to_image(...)
Browse files Browse the repository at this point in the history
... and associated methods. Thanks to @cmdlineluser for flagging.

For details, see:
#899 (reply in thread)
  • Loading branch information
jsvine committed Jul 19, 2023
1 parent f3c628a commit 7e28931
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 5 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,10 @@ If you pass the `pdfminer.six`-handling `laparams` parameter to `pdfplumber.open

To turn any page (including cropped pages) into an `PageImage` object, call `my_page.to_image()`. You can optionally pass *one* of the following keyword arguments:

- `resolution`: The desired number pixels per inch. Defaults to 72.
- `width`: The desired image width in pixels.
- `height`: The desired image width in pixels.
- `resolution`: The desired number pixels per inch. Default: `72`. Type: `int`.
- `width`: The desired image width in pixels. Default: unset, determined by `resolution`. Type: `int`.
- `height`: The desired image width in pixels. Default: unset, determined by `resolution`. Type: `int`.
- `antialias`: Whether to use antialiasing when creating the image. Setting to `True` creates images with less-jagged text and graphics, but with larger file sizes. Default: `False`. Type: `bool`.

For instance:

Expand Down
7 changes: 6 additions & 1 deletion pdfplumber/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def get_page_image(
page_ix: int,
resolution: Union[int, float],
password: Optional[str],
antialias: bool = False,
) -> PIL.Image.Image:
# If we are working with a file object saved to disk
if hasattr(stream, "name"):
Expand All @@ -54,7 +55,9 @@ def get_page_image(
input_data=src,
password=password,
scale=resolution / 72,
no_smoothtext=True,
no_smoothtext=not antialias,
no_smoothpath=not antialias,
no_smoothimage=not antialias,
# Non-modifiable arguments
renderer=pypdfium2._helpers.page.PdfPage.render,
converter=pypdfium2.PdfBitmap.to_pil,
Expand All @@ -73,13 +76,15 @@ def __init__(
page: "Page",
original: Optional[PIL.Image.Image] = None,
resolution: Union[int, float] = DEFAULT_RESOLUTION,
antialias: bool = False,
):
self.page = page
if original is None:
self.original = get_page_image(
stream=page.pdf.stream,
page_ix=page.page_number - 1,
resolution=resolution,
antialias=antialias,
password=page.pdf.password,
)
else:
Expand Down
5 changes: 4 additions & 1 deletion pdfplumber/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ def to_image(
resolution: Optional[Union[int, float]] = None,
width: Optional[Union[int, float]] = None,
height: Optional[Union[int, float]] = None,
antialias: bool = False,
) -> "PageImage":
"""
You can pass a maximum of 1 of the following:
Expand All @@ -479,7 +480,9 @@ def to_image(
elif height is not None:
resolution = 72 * height / self.height

return PageImage(self, resolution=resolution or DEFAULT_RESOLUTION)
return PageImage(
self, resolution=resolution or DEFAULT_RESOLUTION, antialias=antialias
)

def to_dict(self, object_types: Optional[List[str]] = None) -> Dict[str, Any]:
if object_types is None:
Expand Down
4 changes: 4 additions & 0 deletions tests/test_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def test_no_quantize(self):
self.im.save(b, "PNG", quantize=False)
assert len(b.getvalue()) > len(self.im._repr_png_())

def test_antialias(self):
aa = self.pdf.pages[0].to_image(antialias=True)
assert len(aa._repr_png_()) > len(self.im._repr_png_())

def test_decompression_bomb(self):
original_max = PIL.Image.MAX_IMAGE_PIXELS
PIL.Image.MAX_IMAGE_PIXELS = 10
Expand Down

0 comments on commit 7e28931

Please sign in to comment.