feat: Removed docx and added PDF support
This commit is contained in:
3
.dockerignore
Normal file
3
.dockerignore
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
__pycache__/
|
||||||
|
Dockerfile
|
||||||
|
templated-image.tar.gz
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -104,3 +104,4 @@ venv.bak/
|
|||||||
|
|
||||||
# End of https://www.gitignore.io/api/python
|
# End of https://www.gitignore.io/api/python
|
||||||
.idea/
|
.idea/
|
||||||
|
templated-image.tar.gz
|
||||||
|
|||||||
22
Dockerfile
Normal file
22
Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
FROM python:3
|
||||||
|
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
RUN apt-get -qq update && \
|
||||||
|
apt-get -q -y upgrade && \
|
||||||
|
apt-get install -y locales && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
|
||||||
|
sed -i -e 's/# nl_NL.UTF-8 UTF-8/nl_NL.UTF-8 UTF-8/' /etc/locale.gen && \
|
||||||
|
locale-gen
|
||||||
|
|
||||||
|
|
||||||
|
COPY requirements.txt ./
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 1212
|
||||||
|
|
||||||
|
CMD ["python", "./test.py"]
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
from docx import Document
|
|
||||||
from xlsx_replacement import XlsxReplacement
|
|
||||||
|
|
||||||
|
|
||||||
def docx_replace_regex(doc_obj: Document, replacer: XlsxReplacement):
|
|
||||||
for p in doc_obj.paragraphs:
|
|
||||||
if '[[' in p.text:
|
|
||||||
inline = p.runs
|
|
||||||
# Loop added to work with runs (strings with same style)
|
|
||||||
for i in range(len(inline)):
|
|
||||||
if '[[' in inline[i].text:
|
|
||||||
text = replacer.find_replace_text(inline[i].text)
|
|
||||||
inline[i].text = text
|
|
||||||
|
|
||||||
for table in doc_obj.tables:
|
|
||||||
for row in table.rows:
|
|
||||||
for cell in row.cells:
|
|
||||||
docx_replace_regex(cell, replacer)
|
|
||||||
|
|
||||||
|
|
||||||
def load_document(file):
|
|
||||||
return Document(file)
|
|
||||||
42
pdf_merger.py
Normal file
42
pdf_merger.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from PyPDF2 import PdfFileReader, PdfFileWriter
|
||||||
|
import io
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
from reportlab.lib.pagesizes import A4
|
||||||
|
from typing import IO, Union
|
||||||
|
|
||||||
|
from xlsx_replacement import XlsxReplacement
|
||||||
|
|
||||||
|
|
||||||
|
class PdfMerger:
|
||||||
|
def __init__(self, input_file: Union[str, IO], replacer: XlsxReplacement):
|
||||||
|
self.in_file = input_file
|
||||||
|
self.in_pdf = PdfFileReader(input_file)
|
||||||
|
self.replacer = replacer
|
||||||
|
|
||||||
|
def get_new_document(self) -> io.BytesIO:
|
||||||
|
overlay = PdfFileReader(self._build_canvas())
|
||||||
|
|
||||||
|
generated = PdfFileWriter()
|
||||||
|
|
||||||
|
for i in range(self.in_pdf.numPages):
|
||||||
|
page = self.in_pdf.getPage(i)
|
||||||
|
if i <= overlay.numPages:
|
||||||
|
page.mergePage(overlay.getPage(i))
|
||||||
|
generated.addPage(page)
|
||||||
|
|
||||||
|
out = io.BytesIO()
|
||||||
|
generated.write(out)
|
||||||
|
out.seek(0)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _build_canvas(self) -> io.BytesIO:
|
||||||
|
packet = io.BytesIO()
|
||||||
|
|
||||||
|
can = canvas.Canvas(packet, pagesize=A4)
|
||||||
|
for x, y, data in self.replacer.get_replacements():
|
||||||
|
can.drawString(x, y, data)
|
||||||
|
can.save()
|
||||||
|
|
||||||
|
packet.seek(0)
|
||||||
|
return packet
|
||||||
|
pass
|
||||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
PyPDF2
|
||||||
|
openpyxl
|
||||||
|
reportlab
|
||||||
|
Flask
|
||||||
29
test.py
29
test.py
@@ -1,41 +1,32 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from flask_api import FlaskAPI
|
|
||||||
from flask import Flask, request, send_file
|
from flask import Flask, request, send_file
|
||||||
from docx_replacement import docx_replace_regex, load_document
|
|
||||||
from xlsx_replacement import XlsxReplacement
|
from xlsx_replacement import XlsxReplacement
|
||||||
from io import BytesIO
|
from pdf_merger import PdfMerger
|
||||||
|
|
||||||
# app = FlaskAPI(__name__)
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/docx-test/docx_replace', methods=['POST'])
|
@app.route('/pdf-test/pdf_replace', methods=['POST'])
|
||||||
def test_upload():
|
def test_upload():
|
||||||
if request.files is None or len(request.files) == 0:
|
if request.files is None or len(request.files) == 0:
|
||||||
raise ValueError("Files required!")
|
raise ValueError("Files required!")
|
||||||
|
|
||||||
if 'xlsx' not in request.files or 'docx' not in request.files:
|
if 'xlsx' not in request.files or 'pdf' not in request.files:
|
||||||
raise ValueError('Missing xlsx or docx')
|
raise ValueError('Missing xlsx or docx')
|
||||||
|
|
||||||
xlsx = XlsxReplacement(request.files['xlsx'])
|
xlsx = XlsxReplacement(request.files['xlsx'])
|
||||||
docx = load_document(request.files['docx'])
|
merger = PdfMerger(request.files['pdf'], xlsx)
|
||||||
|
out = merger.get_new_document()
|
||||||
docx_replace_regex(docx, xlsx)
|
return send_file(out, mimetype='application/pdf', attachment_filename=request.files['pdf'].filename)
|
||||||
|
|
||||||
bio = BytesIO()
|
|
||||||
docx.save(bio)
|
|
||||||
bio.seek(0)
|
|
||||||
return send_file(bio, mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
||||||
attachment_filename=request.files['docx'].filename)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/docx-test/testpage')
|
@app.route('/pdf-test/testpage')
|
||||||
def test_page():
|
def test_page():
|
||||||
return """
|
return """
|
||||||
<html><body>
|
<html><body>
|
||||||
<form action="/docx-test/docx_replace" method="post" enctype="multipart/form-data">
|
<form action="/pdf-test/pdf_replace" method="post" enctype="multipart/form-data">
|
||||||
<label>Select DOCX
|
<label>Select PDF
|
||||||
<input type="file" name="docx" id="docx"></label>
|
<input type="file" name="pdf" id="pdf"></label>
|
||||||
<label>Select XLSX
|
<label>Select XLSX
|
||||||
<input type="file" name="xlsx" id="xlsx"></label>
|
<input type="file" name="xlsx" id="xlsx"></label>
|
||||||
<input type="submit" value="Upload" name="submit">
|
<input type="submit" value="Upload" name="submit">
|
||||||
|
|||||||
@@ -1,34 +1,50 @@
|
|||||||
import re
|
import re
|
||||||
import openpyxl
|
import openpyxl
|
||||||
from typing import Union, IO
|
from openpyxl.cell import Cell
|
||||||
|
from typing import Union, Tuple, Iterator, IO
|
||||||
|
import locale
|
||||||
|
|
||||||
|
|
||||||
class XlsxReplacement:
|
class XlsxReplacement:
|
||||||
def __init__(self, file: Union[str, IO]):
|
def __init__(self, file: Union[str, IO]):
|
||||||
self.file = file
|
self.file = file
|
||||||
self.xlsx = openpyxl.load_workbook(file, read_only=True, data_only=True)
|
self.xlsx = openpyxl.load_workbook(file, read_only=True, data_only=True)
|
||||||
self.re_replace = re.compile('\[\[(?:(\w+):)?([a-zA-Z]+\d+)\]\]')
|
|
||||||
self.errors = []
|
self.errors = []
|
||||||
|
self.reset_settings = {}
|
||||||
|
|
||||||
def find_replace_text(self, text: str):
|
def get_replacements(self) -> Iterator[Tuple[int, int, str]]:
|
||||||
if '[[' not in text:
|
self._get_settings()
|
||||||
return text
|
sheet = self.xlsx['replacements']
|
||||||
|
for row in sheet.iter_rows(min_row=1, max_col=5):
|
||||||
|
content = [c.value for c in row]
|
||||||
|
x = int(content[0])
|
||||||
|
y = int(content[1])
|
||||||
|
value = content[2]
|
||||||
|
if len(content) == 5:
|
||||||
|
value_format = content[3]
|
||||||
|
str_format = content[4]
|
||||||
|
print(value_format)
|
||||||
|
print(str_format)
|
||||||
|
if value_format is not None and isinstance(value_format, str) and len(value_format) > 0:
|
||||||
|
value = locale.format(value_format, value)
|
||||||
|
if str_format is not None and isinstance(str_format, str) and len(str_format) > 0:
|
||||||
|
value = str_format.format(value)
|
||||||
|
if not isinstance(value, str):
|
||||||
|
value = str(value)
|
||||||
|
yield x, y, value
|
||||||
|
|
||||||
return self.re_replace.sub(self._regex_replacement, text)
|
def _get_settings(self):
|
||||||
|
sheet = self.xlsx['settings']
|
||||||
|
for row in sheet.iter_rows(max_col=2):
|
||||||
|
key, value = [i.value for i in row]
|
||||||
|
if key == 'locale':
|
||||||
|
print("Using locale %s" % value)
|
||||||
|
old_locale = '.'.join(locale.getlocale())
|
||||||
|
locale.setlocale(locale.LC_ALL, value)
|
||||||
|
self.reset_settings['locale'] = old_locale
|
||||||
|
|
||||||
def _regex_replacement(self, match):
|
def _reset_settings(self):
|
||||||
sheet, cell = match.groups()
|
for key, value in self.reset_settings:
|
||||||
if sheet is None:
|
if key == 'locale':
|
||||||
sheet = self.xlsx.active
|
locale.setlocale(locale.LC_ALL, value)
|
||||||
else:
|
|
||||||
try:
|
|
||||||
sheet = self.xlsx[sheet]
|
|
||||||
except IndexError:
|
|
||||||
self.errors.append('Unknown sheet {sheet}'.format(sheet=sheet))
|
|
||||||
return 'ERR Unknown sheet'
|
|
||||||
try:
|
|
||||||
return str(sheet[cell].value)
|
|
||||||
except (IndexError, ValueError):
|
|
||||||
self.errors.append('Unknown cell {sheet}:{cell}'.format(sheet=sheet, cell=cell))
|
|
||||||
return 'ERR Unknown cell'
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user