feat: Removed docx and added PDF support
This commit is contained in:
3
.dockerignore
Normal file
3
.dockerignore
Normal file
@@ -0,0 +1,3 @@
|
||||
__pycache__/
|
||||
Dockerfile
|
||||
templated-image.tar.gz
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -104,3 +104,4 @@ venv.bak/
|
||||
|
||||
# End of https://www.gitignore.io/api/python
|
||||
.idea/
|
||||
templated-image.tar.gz
|
||||
|
||||
22
Dockerfile
Normal file
22
Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
||||
FROM python:3
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
RUN apt-get -qq update && \
|
||||
apt-get -q -y upgrade && \
|
||||
apt-get install -y locales && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
|
||||
sed -i -e 's/# nl_NL.UTF-8 UTF-8/nl_NL.UTF-8 UTF-8/' /etc/locale.gen && \
|
||||
locale-gen
|
||||
|
||||
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 1212
|
||||
|
||||
CMD ["python", "./test.py"]
|
||||
@@ -1,22 +0,0 @@
|
||||
from docx import Document
|
||||
from xlsx_replacement import XlsxReplacement
|
||||
|
||||
|
||||
def docx_replace_regex(doc_obj: Document, replacer: XlsxReplacement):
|
||||
for p in doc_obj.paragraphs:
|
||||
if '[[' in p.text:
|
||||
inline = p.runs
|
||||
# Loop added to work with runs (strings with same style)
|
||||
for i in range(len(inline)):
|
||||
if '[[' in inline[i].text:
|
||||
text = replacer.find_replace_text(inline[i].text)
|
||||
inline[i].text = text
|
||||
|
||||
for table in doc_obj.tables:
|
||||
for row in table.rows:
|
||||
for cell in row.cells:
|
||||
docx_replace_regex(cell, replacer)
|
||||
|
||||
|
||||
def load_document(file):
|
||||
return Document(file)
|
||||
42
pdf_merger.py
Normal file
42
pdf_merger.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from PyPDF2 import PdfFileReader, PdfFileWriter
|
||||
import io
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from typing import IO, Union
|
||||
|
||||
from xlsx_replacement import XlsxReplacement
|
||||
|
||||
|
||||
class PdfMerger:
|
||||
def __init__(self, input_file: Union[str, IO], replacer: XlsxReplacement):
|
||||
self.in_file = input_file
|
||||
self.in_pdf = PdfFileReader(input_file)
|
||||
self.replacer = replacer
|
||||
|
||||
def get_new_document(self) -> io.BytesIO:
|
||||
overlay = PdfFileReader(self._build_canvas())
|
||||
|
||||
generated = PdfFileWriter()
|
||||
|
||||
for i in range(self.in_pdf.numPages):
|
||||
page = self.in_pdf.getPage(i)
|
||||
if i <= overlay.numPages:
|
||||
page.mergePage(overlay.getPage(i))
|
||||
generated.addPage(page)
|
||||
|
||||
out = io.BytesIO()
|
||||
generated.write(out)
|
||||
out.seek(0)
|
||||
return out
|
||||
|
||||
def _build_canvas(self) -> io.BytesIO:
|
||||
packet = io.BytesIO()
|
||||
|
||||
can = canvas.Canvas(packet, pagesize=A4)
|
||||
for x, y, data in self.replacer.get_replacements():
|
||||
can.drawString(x, y, data)
|
||||
can.save()
|
||||
|
||||
packet.seek(0)
|
||||
return packet
|
||||
pass
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
PyPDF2
|
||||
openpyxl
|
||||
reportlab
|
||||
Flask
|
||||
29
test.py
29
test.py
@@ -1,41 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
from flask_api import FlaskAPI
|
||||
from flask import Flask, request, send_file
|
||||
from docx_replacement import docx_replace_regex, load_document
|
||||
from xlsx_replacement import XlsxReplacement
|
||||
from io import BytesIO
|
||||
from pdf_merger import PdfMerger
|
||||
|
||||
# app = FlaskAPI(__name__)
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/docx-test/docx_replace', methods=['POST'])
|
||||
@app.route('/pdf-test/pdf_replace', methods=['POST'])
|
||||
def test_upload():
|
||||
if request.files is None or len(request.files) == 0:
|
||||
raise ValueError("Files required!")
|
||||
|
||||
if 'xlsx' not in request.files or 'docx' not in request.files:
|
||||
if 'xlsx' not in request.files or 'pdf' not in request.files:
|
||||
raise ValueError('Missing xlsx or docx')
|
||||
|
||||
xlsx = XlsxReplacement(request.files['xlsx'])
|
||||
docx = load_document(request.files['docx'])
|
||||
|
||||
docx_replace_regex(docx, xlsx)
|
||||
|
||||
bio = BytesIO()
|
||||
docx.save(bio)
|
||||
bio.seek(0)
|
||||
return send_file(bio, mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
attachment_filename=request.files['docx'].filename)
|
||||
merger = PdfMerger(request.files['pdf'], xlsx)
|
||||
out = merger.get_new_document()
|
||||
return send_file(out, mimetype='application/pdf', attachment_filename=request.files['pdf'].filename)
|
||||
|
||||
|
||||
@app.route('/docx-test/testpage')
|
||||
@app.route('/pdf-test/testpage')
|
||||
def test_page():
|
||||
return """
|
||||
<html><body>
|
||||
<form action="/docx-test/docx_replace" method="post" enctype="multipart/form-data">
|
||||
<label>Select DOCX
|
||||
<input type="file" name="docx" id="docx"></label>
|
||||
<form action="/pdf-test/pdf_replace" method="post" enctype="multipart/form-data">
|
||||
<label>Select PDF
|
||||
<input type="file" name="pdf" id="pdf"></label>
|
||||
<label>Select XLSX
|
||||
<input type="file" name="xlsx" id="xlsx"></label>
|
||||
<input type="submit" value="Upload" name="submit">
|
||||
|
||||
@@ -1,34 +1,50 @@
|
||||
import re
|
||||
import openpyxl
|
||||
from typing import Union, IO
|
||||
from openpyxl.cell import Cell
|
||||
from typing import Union, Tuple, Iterator, IO
|
||||
import locale
|
||||
|
||||
|
||||
class XlsxReplacement:
|
||||
def __init__(self, file: Union[str, IO]):
|
||||
self.file = file
|
||||
self.xlsx = openpyxl.load_workbook(file, read_only=True, data_only=True)
|
||||
self.re_replace = re.compile('\[\[(?:(\w+):)?([a-zA-Z]+\d+)\]\]')
|
||||
self.errors = []
|
||||
self.reset_settings = {}
|
||||
|
||||
def find_replace_text(self, text: str):
|
||||
if '[[' not in text:
|
||||
return text
|
||||
def get_replacements(self) -> Iterator[Tuple[int, int, str]]:
|
||||
self._get_settings()
|
||||
sheet = self.xlsx['replacements']
|
||||
for row in sheet.iter_rows(min_row=1, max_col=5):
|
||||
content = [c.value for c in row]
|
||||
x = int(content[0])
|
||||
y = int(content[1])
|
||||
value = content[2]
|
||||
if len(content) == 5:
|
||||
value_format = content[3]
|
||||
str_format = content[4]
|
||||
print(value_format)
|
||||
print(str_format)
|
||||
if value_format is not None and isinstance(value_format, str) and len(value_format) > 0:
|
||||
value = locale.format(value_format, value)
|
||||
if str_format is not None and isinstance(str_format, str) and len(str_format) > 0:
|
||||
value = str_format.format(value)
|
||||
if not isinstance(value, str):
|
||||
value = str(value)
|
||||
yield x, y, value
|
||||
|
||||
return self.re_replace.sub(self._regex_replacement, text)
|
||||
def _get_settings(self):
|
||||
sheet = self.xlsx['settings']
|
||||
for row in sheet.iter_rows(max_col=2):
|
||||
key, value = [i.value for i in row]
|
||||
if key == 'locale':
|
||||
print("Using locale %s" % value)
|
||||
old_locale = '.'.join(locale.getlocale())
|
||||
locale.setlocale(locale.LC_ALL, value)
|
||||
self.reset_settings['locale'] = old_locale
|
||||
|
||||
def _regex_replacement(self, match):
|
||||
sheet, cell = match.groups()
|
||||
if sheet is None:
|
||||
sheet = self.xlsx.active
|
||||
else:
|
||||
try:
|
||||
sheet = self.xlsx[sheet]
|
||||
except IndexError:
|
||||
self.errors.append('Unknown sheet {sheet}'.format(sheet=sheet))
|
||||
return 'ERR Unknown sheet'
|
||||
try:
|
||||
return str(sheet[cell].value)
|
||||
except (IndexError, ValueError):
|
||||
self.errors.append('Unknown cell {sheet}:{cell}'.format(sheet=sheet, cell=cell))
|
||||
return 'ERR Unknown cell'
|
||||
def _reset_settings(self):
|
||||
for key, value in self.reset_settings:
|
||||
if key == 'locale':
|
||||
locale.setlocale(locale.LC_ALL, value)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user