-
Notifications
You must be signed in to change notification settings - Fork 664
Closed
Labels
Description
Hi,
I am extracting images from pdf in png format. But as a result I am getting colorless like no RGB colors on those images. I tried my code with other pdf where I am getting proper images with colors.
Code I am using:
import fitz
import csv
def extract_images(file_name):
doc = fitz.open(file_name)
csv_file_name = file_name[:-4] + "-images-with-coordinates.csv"
with open(csv_file_name, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["Image name", " Block_Coordinates"])
for page in doc:
image_blocks = [b for b in page.getText("dict")["blocks"] if b["type"] == 1]
images = page.getImageList()
for item in images:
xref = item[0] # xref number
pix = fitz.Pixmap(doc, xref) # make pixmap from image
img = doc.extractImage(xref)
bbox = ()
save_image_name = "page-%s-%s.png" % (page.number, xref)
path = f"./images2/{save_image_name}"
if pix.n - pix.alpha < 4: # can be saved as PNG
pix.writePNG(path)
else: # CMYK: must convert first
pix0 = fitz.Pixmap(fitz.csRGB, pix)
pix0.writePNG(path)
pix0 = None # free Pixmap resources
pix = None
for b in image_blocks:
if img["image"] == b["image"]:
bbox = b["bbox"]
break
writer.writerow([save_image_name, bbox])
return
if __name__ == '__main__':
filename = "./test.pdf"
extract_images(filename)
`
test.pdf