from docx2pdf import convert
from pdf2image import convert_from_path
import os, sys, zipfile
# Документ в картинку
# pip install docx2pdf pdf2image
# Poppler: https://github.com/oschwartz10612/poppler-windows/releases/
# py doc2img.py "C:\Users\User\Desktop\Public_folder\Doc2.pdf" "C:\Users\User\Desktop\Public_folder\poppler\bin"
def base_dir():
base_dir = sys.executable if getattr(sys, 'frozen', False) else os.path.abspath(__file__)
return os.path.dirname(base_dir) + os.sep
file = sys.argv[1] if len(sys.argv) > 1 else ''
poppler_path = sys.argv[2] if len(sys.argv) > 2 else ''
ext = os.path.splitext(file)[1][1:]
if not os.path.isfile(file):
print(f'The file "{file}" does not exist.')
sys.exit()
allowed = ['pdf', 'doc', 'docx']
if ext not in allowed:
print('Only these: ' + ", ".join(allowed))
sys.exit()
output_dir = os.path.dirname(file) + os.sep + os.path.splitext(os.path.basename(file))[0]
if not os.path.isdir(output_dir):
os.makedirs(output_dir, mode=0o755, exist_ok=True)
if ext == 'pdf':
output_pdf = file
elif ext == 'doc' or ext == 'docx':
output_pdf = output_dir + os.sep + "output.pdf"
convert(file, output_pdf)
pages = convert_from_path(output_pdf, poppler_path=poppler_path)
jpg_files = []
for i, page in enumerate(pages):
jpg_path = os.path.join(output_dir, f"page_{i+1}.jpg")
page.save(jpg_path, "JPEG")
jpg_files.append(jpg_path)
if ext == 'doc' or ext == 'docx':
os.remove(output_pdf)
if len(jpg_files) > 1:
zip_path = os.path.join(output_dir, 'images.zip')
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for jpg in jpg_files:
zipf.write(jpg, os.path.basename(jpg))
os.remove(jpg)
print(zip_path)
else:
print(jpg_files[0])
*текст* - жирный,
~текст~ - курсивный,
-текст- - _текст_ - подчеркнутый