2种方案

方案一: 使用pdf2image模块

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from pdf2image import convert_from_path
import os
pwd_file = ""
img_file = ""
file_path = "存放PDF的文件目录"

# 遍历找到所有PDF文件
pdf_files = []
print(os.listdir(file_path))
for file in os.listdir(file_path):
print(file.split(".")[-1])
full_file = os.path.join(file_path, file)
print(os.path.isfile(full_file))
if os.path.isfile(full_file) and file.split(".")[-1].lower() == "pdf":
pdf_files.append(file)
print(pdf_files)

# PDF文件转换函数
img_file = os.path.join(file_path, "img")
def conv_pdf(file):
pdf_file = os.path.join(file_path, file)
print(pdf_file)
filename = file.split(".")[0]
print(file)
pages = convert_from_path(pdf_file, 500)
for i, page in enumerate(pages):
img_filename = f"{filename}_{i}.jpg"
img_path = os.path.join(img_file, img_filename)
page.save(img_path, "JPEG")

for file in pdf_files:
conv_pdf(file)

方案二: 使用PyMuPDF模块

1
2
3
4
5
6
7
8
9
10
# PDF to Images
# pip install PyMuPDF
import fitz
def pdf_to_images(pdf_file):
doc = fitz.open(pdf_file)
for p in doc:
pix = p.get_pixmap()
output = f"page{p.number}.png"
pix.writePNG(output)
pdf_to_images("test.pdf")