Initial version (with docx)

This commit is contained in:
2018-03-11 18:21:26 +01:00
commit 655a0461f5
4 changed files with 209 additions and 0 deletions

106
.gitignore vendored Normal file
View File

@@ -0,0 +1,106 @@
# Created by https://www.gitignore.io/api/python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
.pytest_cache/
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule.*
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# End of https://www.gitignore.io/api/python
.idea/

22
docx_replacement.py Normal file
View File

@@ -0,0 +1,22 @@
from docx import Document
from xlsx_replacement import XlsxReplacement
def docx_replace_regex(doc_obj: Document, replacer: XlsxReplacement):
for p in doc_obj.paragraphs:
if '[[' in p.text:
inline = p.runs
# Loop added to work with runs (strings with same style)
for i in range(len(inline)):
if '[[' in inline[i].text:
text = replacer.find_replace_text(inline[i].text)
inline[i].text = text
for table in doc_obj.tables:
for row in table.rows:
for cell in row.cells:
docx_replace_regex(cell, replacer)
def load_document(file):
return Document(file)

47
test.py Executable file
View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
from flask_api import FlaskAPI
from flask import Flask, request, send_file
from docx_replacement import docx_replace_regex, load_document
from xlsx_replacement import XlsxReplacement
from io import BytesIO
# app = FlaskAPI(__name__)
app = Flask(__name__)
@app.route('/docx-test/docx_replace', methods=['POST'])
def test_upload():
if request.files is None or len(request.files) == 0:
raise ValueError("Files required!")
if 'xlsx' not in request.files or 'docx' not in request.files:
raise ValueError('Missing xlsx or docx')
xlsx = XlsxReplacement(request.files['xlsx'])
docx = load_document(request.files['docx'])
docx_replace_regex(docx, xlsx)
bio = BytesIO()
docx.save(bio)
bio.seek(0)
return send_file(bio, mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
attachment_filename=request.files['docx'].filename)
@app.route('/docx-test/testpage')
def test_page():
return """
<html><body>
<form action="/docx-test/docx_replace" method="post" enctype="multipart/form-data">
<label>Select DOCX
<input type="file" name="docx" id="docx"></label>
<label>Select XLSX
<input type="file" name="xlsx" id="xlsx"></label>
<input type="submit" value="Upload" name="submit">
</form>
</body></html>"""
if __name__ == '__main__':
app.run('0.0.0.0', 1212)

34
xlsx_replacement.py Normal file
View File

@@ -0,0 +1,34 @@
import re
import openpyxl
from typing import Union, IO
class XlsxReplacement:
def __init__(self, file: Union[str, IO]):
self.file = file
self.xlsx = openpyxl.load_workbook(file, read_only=True, data_only=True)
self.re_replace = re.compile('\[\[(?:(\w+):)?([a-zA-Z]+\d+)\]\]')
self.errors = []
def find_replace_text(self, text: str):
if '[[' not in text:
return text
return self.re_replace.sub(self._regex_replacement, text)
def _regex_replacement(self, match):
sheet, cell = match.groups()
if sheet is None:
sheet = self.xlsx.active
else:
try:
sheet = self.xlsx[sheet]
except IndexError:
self.errors.append('Unknown sheet {sheet}'.format(sheet=sheet))
return 'ERR Unknown sheet'
try:
return str(sheet[cell].value)
except (IndexError, ValueError):
self.errors.append('Unknown cell {sheet}:{cell}'.format(sheet=sheet, cell=cell))
return 'ERR Unknown cell'