feat: Removed docx and added PDF support

This commit is contained in:
2018-03-11 22:00:16 +01:00
parent 655a0461f5
commit 9e1126b621
8 changed files with 119 additions and 62 deletions

3
.dockerignore Normal file
View File

@@ -0,0 +1,3 @@
__pycache__/
Dockerfile
templated-image.tar.gz

1
.gitignore vendored
View File

@@ -104,3 +104,4 @@ venv.bak/
# End of https://www.gitignore.io/api/python
.idea/
templated-image.tar.gz

22
Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM python:3
WORKDIR /usr/src/app
RUN apt-get -qq update && \
apt-get -q -y upgrade && \
apt-get install -y locales && \
rm -rf /var/lib/apt/lists/*
RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
sed -i -e 's/# nl_NL.UTF-8 UTF-8/nl_NL.UTF-8 UTF-8/' /etc/locale.gen && \
locale-gen
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 1212
CMD ["python", "./test.py"]

View File

@@ -1,22 +0,0 @@
from docx import Document
from xlsx_replacement import XlsxReplacement
def docx_replace_regex(doc_obj: Document, replacer: XlsxReplacement):
for p in doc_obj.paragraphs:
if '[[' in p.text:
inline = p.runs
# Loop added to work with runs (strings with same style)
for i in range(len(inline)):
if '[[' in inline[i].text:
text = replacer.find_replace_text(inline[i].text)
inline[i].text = text
for table in doc_obj.tables:
for row in table.rows:
for cell in row.cells:
docx_replace_regex(cell, replacer)
def load_document(file):
return Document(file)

42
pdf_merger.py Normal file
View File

@@ -0,0 +1,42 @@
from PyPDF2 import PdfFileReader, PdfFileWriter
import io
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from typing import IO, Union
from xlsx_replacement import XlsxReplacement
class PdfMerger:
def __init__(self, input_file: Union[str, IO], replacer: XlsxReplacement):
self.in_file = input_file
self.in_pdf = PdfFileReader(input_file)
self.replacer = replacer
def get_new_document(self) -> io.BytesIO:
overlay = PdfFileReader(self._build_canvas())
generated = PdfFileWriter()
for i in range(self.in_pdf.numPages):
page = self.in_pdf.getPage(i)
if i <= overlay.numPages:
page.mergePage(overlay.getPage(i))
generated.addPage(page)
out = io.BytesIO()
generated.write(out)
out.seek(0)
return out
def _build_canvas(self) -> io.BytesIO:
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=A4)
for x, y, data in self.replacer.get_replacements():
can.drawString(x, y, data)
can.save()
packet.seek(0)
return packet
pass

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
PyPDF2
openpyxl
reportlab
Flask

29
test.py
View File

@@ -1,41 +1,32 @@
#!/usr/bin/env python3
from flask_api import FlaskAPI
from flask import Flask, request, send_file
from docx_replacement import docx_replace_regex, load_document
from xlsx_replacement import XlsxReplacement
from io import BytesIO
from pdf_merger import PdfMerger
# app = FlaskAPI(__name__)
app = Flask(__name__)
@app.route('/docx-test/docx_replace', methods=['POST'])
@app.route('/pdf-test/pdf_replace', methods=['POST'])
def test_upload():
if request.files is None or len(request.files) == 0:
raise ValueError("Files required!")
if 'xlsx' not in request.files or 'docx' not in request.files:
if 'xlsx' not in request.files or 'pdf' not in request.files:
raise ValueError('Missing xlsx or docx')
xlsx = XlsxReplacement(request.files['xlsx'])
docx = load_document(request.files['docx'])
docx_replace_regex(docx, xlsx)
bio = BytesIO()
docx.save(bio)
bio.seek(0)
return send_file(bio, mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
attachment_filename=request.files['docx'].filename)
merger = PdfMerger(request.files['pdf'], xlsx)
out = merger.get_new_document()
return send_file(out, mimetype='application/pdf', attachment_filename=request.files['pdf'].filename)
@app.route('/docx-test/testpage')
@app.route('/pdf-test/testpage')
def test_page():
return """
<html><body>
<form action="/docx-test/docx_replace" method="post" enctype="multipart/form-data">
<label>Select DOCX
<input type="file" name="docx" id="docx"></label>
<form action="/pdf-test/pdf_replace" method="post" enctype="multipart/form-data">
<label>Select PDF
<input type="file" name="pdf" id="pdf"></label>
<label>Select XLSX
<input type="file" name="xlsx" id="xlsx"></label>
<input type="submit" value="Upload" name="submit">

View File

@@ -1,34 +1,50 @@
import re
import openpyxl
from typing import Union, IO
from openpyxl.cell import Cell
from typing import Union, Tuple, Iterator, IO
import locale
class XlsxReplacement:
def __init__(self, file: Union[str, IO]):
self.file = file
self.xlsx = openpyxl.load_workbook(file, read_only=True, data_only=True)
self.re_replace = re.compile('\[\[(?:(\w+):)?([a-zA-Z]+\d+)\]\]')
self.errors = []
self.reset_settings = {}
def find_replace_text(self, text: str):
if '[[' not in text:
return text
def get_replacements(self) -> Iterator[Tuple[int, int, str]]:
self._get_settings()
sheet = self.xlsx['replacements']
for row in sheet.iter_rows(min_row=1, max_col=5):
content = [c.value for c in row]
x = int(content[0])
y = int(content[1])
value = content[2]
if len(content) == 5:
value_format = content[3]
str_format = content[4]
print(value_format)
print(str_format)
if value_format is not None and isinstance(value_format, str) and len(value_format) > 0:
value = locale.format(value_format, value)
if str_format is not None and isinstance(str_format, str) and len(str_format) > 0:
value = str_format.format(value)
if not isinstance(value, str):
value = str(value)
yield x, y, value
return self.re_replace.sub(self._regex_replacement, text)
def _get_settings(self):
sheet = self.xlsx['settings']
for row in sheet.iter_rows(max_col=2):
key, value = [i.value for i in row]
if key == 'locale':
print("Using locale %s" % value)
old_locale = '.'.join(locale.getlocale())
locale.setlocale(locale.LC_ALL, value)
self.reset_settings['locale'] = old_locale
def _regex_replacement(self, match):
sheet, cell = match.groups()
if sheet is None:
sheet = self.xlsx.active
else:
try:
sheet = self.xlsx[sheet]
except IndexError:
self.errors.append('Unknown sheet {sheet}'.format(sheet=sheet))
return 'ERR Unknown sheet'
try:
return str(sheet[cell].value)
except (IndexError, ValueError):
self.errors.append('Unknown cell {sheet}:{cell}'.format(sheet=sheet, cell=cell))
return 'ERR Unknown cell'
def _reset_settings(self):
for key, value in self.reset_settings:
if key == 'locale':
locale.setlocale(locale.LC_ALL, value)