Skip to content

Question / Comment: Getting png images without colors #493

@moka1309

Description

@moka1309

Hi,
I am extracting images from pdf in png format. But as a result I am getting colorless like no RGB colors on those images. I tried my code with other pdf where I am getting proper images with colors.
Code I am using:

import fitz
import csv


def extract_images(file_name):
    doc = fitz.open(file_name)
    csv_file_name = file_name[:-4] + "-images-with-coordinates.csv"
    with open(csv_file_name, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Image name", " Block_Coordinates"])
        for page in doc:
            image_blocks = [b for b in page.getText("dict")["blocks"] if b["type"] == 1]
            images = page.getImageList()
            for item in images:
                xref = item[0]  # xref number
                pix = fitz.Pixmap(doc, xref)  # make pixmap from image
                img = doc.extractImage(xref)
                bbox = ()
                save_image_name = "page-%s-%s.png" % (page.number, xref)
                path = f"./images2/{save_image_name}"
                if pix.n - pix.alpha < 4:  # can be saved as PNG
                    pix.writePNG(path)
                else:  # CMYK: must convert first
                    pix0 = fitz.Pixmap(fitz.csRGB, pix)
                    pix0.writePNG(path)
                    pix0 = None  # free Pixmap resources
                pix = None
                for b in image_blocks:
                    if img["image"] == b["image"]:
                        bbox = b["bbox"]
                        break
                writer.writerow([save_image_name, bbox])
    return


if __name__ == '__main__':
    filename = "./test.pdf"
    extract_images(filename)

`
test.pdf

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions