From f396ae8879475d6c4aca29003f77965945da24fd Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Fri, 3 Jun 2022 14:40:49 +0200 Subject: [PATCH] feat(engines): add basic excel validation --- .../api/data/xlsx-not-valid.xlsx | 1 + .../api/data/xlsx-syntax.xlsx | Bin 0 -> 6432 bytes document_merge_service/api/engines.py | 37 +++++++++++++++++- .../{test_structure.py => test_excel.py} | 22 +++++++++++ document_merge_service/settings.py | 2 +- 5 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 document_merge_service/api/data/xlsx-not-valid.xlsx create mode 100644 document_merge_service/api/data/xlsx-syntax.xlsx rename document_merge_service/api/tests/{test_structure.py => test_excel.py} (52%) diff --git a/document_merge_service/api/data/xlsx-not-valid.xlsx b/document_merge_service/api/data/xlsx-not-valid.xlsx new file mode 100644 index 00000000..8bd6648e --- /dev/null +++ b/document_merge_service/api/data/xlsx-not-valid.xlsx @@ -0,0 +1 @@ +asdf diff --git a/document_merge_service/api/data/xlsx-syntax.xlsx b/document_merge_service/api/data/xlsx-syntax.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..465bfb55f61de1630ace825b621e09b9f8238665 GIT binary patch literal 6432 zcmbVQ1yq!4*Ciwcqy&MXyGIa2x;q901nC%Hs3Av(zI20xG?xwqDd`T8?j8hbX;5i_ zfAHS*%jfrf_h0Kjv)0VKYrW5T*1Mnm?sLutsi5B>MZw0#MtLtiq>XYb?jrx2xPa|l zd3kQWOXJ$rJArt?`<~$(Cjy@&v9k*rRTUfQAJMciIZ{=J<#q*pJ(eXQ#*3=#jCS_% zy)w^VP<7;50J5kUhw12tp*6Euco%l{Upd2h1qX9{x`~7ow4gE^E-@O)-v^eY`tm&8 zVwG{fd{a{yMcuI2u|yG*3~@2idBJyO8M+>*zY|I9Y^|F!<9nwmP|d+kz)^`7;33Yx z{EQ)<9@e9*n(I|4)I^LA4#tHqfW7Pc7xxngDDZ%aYD7u&>w7(9~n}`E})^46^jNXc>dw<@i<~LD}c%J#Af1L!Rf`uKF`Y3e|3BnK>3JU1I z!gLRL$4h(O7f=^Fb12k~=cR*v)Kf(C9FWj^|Cy{$h+!Qmp_v@I&9g1Fe%Q&Z5=+;I z3Pw*o)3;ZitOT=K4mwAuqQ?X2Ppqx@TA%2Qd16`w={~%Bf%;7nEKF`oUIo3Ewh#y> ztM99yz%_?q_B)d8uPZnl7f*S{d`*nf57Qg=TC`%;oS9O`a0CF3^<>8JU(V}h5TcnrK>dLn?9wO?10C%LX{6-XCX$RA!g(#pZ zTp$(=sh3Lym*qHDUD7DH!2=@zp2<+Q@EvHT!*~%^=G@-lcP8z3F%NTzGRk&x`TvxS zX2y3CO~z}iH6Uyxwh+WAB>EOxc%qhTE{MxkO4yylxeO9whhx5(o>z?Z&F>j35fNLX z+}g>?R^BjDPyCcgSDzVlznH1pilxVF?`x5iS3I>bSmC;dMo63*$5vN##NY*u~@zS#{zIDMG<=r;Zr1Pw-ip;$EMs2(3KKxn0+Yo~8hz zhZl!FfT;Lhp)I+7r;)pzM@1)QK){jU_F(W_W=Y)UIxBa+OvWfLWYgf$_n=QQ6=LJ& z>E<=!V7jE}cn!1N*4*_McdJh^Za6eUm2-}TP3#pOdBD86A3q)s6Rx?-P z?F^g&8D$s>C)!j6tALnj>Bu5g?LJHKkB|F5Ts)18WaeWwR1*GpU*J^Khn&K|*~18A zOT?zg|1qd~ErbrJB!%%%;Y}WKrls;-bNUImsj`}qEkNnO6cAh`Nyy?1=uOnawgctp zj`>HCNw=KEp$Vm#udeav;OdG$RT5Yd3eC+T3e8pREgd5Dq>CP4Y+wb*N+Sp}^n%0<tu?k9-QcIl9Cy`$Po+Fl6PUoX=UFKVw?xfOaD0$Fz@Hw1+kLS zt7>v8)d7E&Ezv;}ri!MT4xqhriVH*r;scP97J$ z8l>K}b>G1+#$EYd!)LA}e{rRzvA(gcDXOyIl~8+P&Fn{>cOZ}dfLD!t60hcRj`cN( z*0Cywc20CxvFoV$^rkmQR8&Be6EHHL zdXttXchN0UmQ{27^-r-`yd?gYI_(`QTs{6f1tljpxGxa}wgm>E}TCG?OuRv3!YWrtLL~ z1-(2-%Q!S59;X79))+=@fYkwazNOANzup)< z6T1ggEx0pj3ny)sbtc5ET72Ve@Rio*2X@(hCAvw~hXf~g6KIvN8}z+D`bvfiU~<<` zJZuV{sEK{;FhxgS(RQ(+o1aK(d6^6a)t~^hbd|T+KtdBy-U^Y{*)A^1{#_kGdVUE! zgFwak1&zRK7%`?s=$WutoGc}NNb^yYgpZB8i*c0#S2I`jv{*3yasdC6`E1U(;f`Jd z;E9thnOR#;LqO`_`_^~SnCqT8v5;rHSo7Bl309j>yu=1wf9+G2y1~$C8=#noL|S^f zxkC_W=Y9ca~)s-VXR#n+;S z99P^VH5vSo3@FmImJQw3B=$+)oOavGILl)y)8`$F)=QF_L7 zDM_lN`PkI?kk4NI#!e&izHcJK%tHsE9=H%iRHt7r9$OLuetKkeb}ob$$s9+F8aLP} zV%2f78DV+ve2O%376mmgB_ybN>ruF>Zh2fX5gW^Bd;m|_hx`S3d5CJw*RjnZ*G=Cxwn|GtHlKJm)fJxFa`p=i%h*VZUYg?&bl@IC@i@ zH6`9EEJ11(Br@=oNf;h$ogx;Zr}W(+-=CmY@)4jV+tgGfQ9m-Y`p{HGgd<^t5|+0j~wC!0(dy zOvgW&Y=}z2R-L!b(x$q>HgdY$#P2jv`tu;Aun$nXM8KwnJ9Y`9-vJBKYc+VMpDXuX zM+pZfuwtgk%k`=#0$=YLTt;~?IO?E<(gop}L={nI)I#*8#3%}ZNU>z3Q zGzC%7n58I93Ge0MN8Q2=jQW7S?Xj1i>}F8rh2uM^8#&G%rMHCFC5@M3qx(-$+s$-! zx_dtYN^FOBdA9=&4m=Ig@>}`zw&iiOmqZF{B#q-ocTQJdp0C?XZ3cqRy3)UViB>qR z+^+zX<`YrbX7kQO>^B0>!VKBu@i(XMMwdS#1RY-i=T#M2nPIjcKlZ znLcXjV!(S@E~j#!n2m%GHTN(9wdN;>R>J1B2YRQ1f;80BjpT*f0|;hu9|b1b7= zjqeCC31Io&_PCz#r@(q?jB=0EToj+NAsLK0+m4DfhdaHF!%UaPP6+%|l#Q*dV(S?M z8K~ivgQ3A|o40rd;#SiOZ{Cq@gb#N<=83PtZe~XRkm7s5CeJ-R7C{Qi+|Xl#c>85O zwKLW{H=8z@dW5RHt)JM!l<#q)09Pe#EsgE$5Py|)ziANh*;cB9^PR1^%@eidHs4HU z&6pKBJ3(KZP0M2EGEqnL*Rq|e&a;=audgs~6kwtsMt7qC{r{){lz;UAe@&9Nr=*`# zWpmuHEpn<1+4siHy;x?VvWOIFPLL`yI6>*wTpsBXj5{ubO2kBQ2Wi3BL@0WXPc9^F zr}n?)8NYfN*)FM*kOi!pi82EoG+^7aR7IVx}5UbWSRbB$qF zApPR^S5}<$c~Y#~$1q8wq_xrlX7oA6f;kwb4MHai782-d_OXSQ+A+s7$J^a|rnG>hFnL6glcB zq&gB~>p)TN9H9UwrxOhkl`$_siw=mtiZw9mp?&k|3sDb4GD?ZKY~Vtyu%V|0dj#lx$;^-UBEXL zB}mIiBLF!zoji+ekzxOAPT5T?;OP?93#44Mt+&u&Q$s}UE*hAY7pr(RZe)(Twx;qB zxUcO>QH}OIVUP+HjcsBN!uI)4T6d zWNd&{Nqt^4kY&|8Z#1i{#EiG0HR)Aenefs(hpLOyJB}eKET7Fe?J;CL@UFdjcBpRN zekK9`MH^HUl=*)=Cc@jtWai{_b4nSp8XCxV{=>dkILBPInn9vt>X<5B7lasqyQ~}3 z^~pa~LV3RU;`mN%O-B8!&(!d6%q|%}m=#-71o7$(n-Zq3aQ<#KBlOa>gZ1ZlPyjNJDb*-Z7y;3qQV~o<<-R^xUA1pxDDAJz zFQ?PP;5Z?tJ7aXzI5jD9`TEhsYkye$P-wgTREf?>z6`t%Qf(G&d+R>Az`Dt2dT&AF zIj(B)LGu;f!B@}V8o8$>5gygkFyr7DHIesfuqt5Q;}%H`64u1&$c2sv%`6{V ze{yWo)A!n&uoOXA0gF5_$*PvqnkT4Lbk#_st!J&e5G6|9{d-Q|U z6!CORw6j#twU0h+vpKl#J1A70!>qg3Wi%{x#UrP*V_5*9d}GJCPo5pXL`MW-{W3Ni zdnt9=bI|O!SP!>KpVBDYS7b^CwPTDXMqIhY=^Kv_wjq+v67p2{*zQ# zuERzfNE9IdCr$=3P)9egqnn8~+zISzbc53e9T6n22p29m8OqpT2vqun_vkT19GX^^ zJ1XXs5|)kmmnSng+V=w`C$U*LTP}>JHsc0sE7b)}Ge$TRNJQmv*b3@F;Aonp+D(kq zVfk4X>o(@>XY~M`g)|juz*|YJkZRQX96>d4=LZlKb2fuf;vP3qaaL8%dwqrCE?N38 zd<>&$@%q`|ZQaLhnl6*aa>W2m3)}Xhdtl}jIQWoarYrMWk#^ko z>uB_+CD9+F*bU*&Ki^vj7UMr<0=S%-N>RQOLlYhKZh0Yl>h($ literal 0 HcmV?d00001 diff --git a/document_merge_service/api/engines.py b/document_merge_service/api/engines.py index 798a7106..3c821f91 100644 --- a/document_merge_service/api/engines.py +++ b/document_merge_service/api/engines.py @@ -186,9 +186,13 @@ def merge(self, data, buf): return buf +_placeholder_match = re.compile(r"^\s*{{\s*([^{}]+)\s*}}\s*$") + + class XlsxTemplateEngine: def __init__(self, template): self.template = template + self.writer = None def validate_is_xlsx(self): try: @@ -201,10 +205,39 @@ def validate(self, available_placeholders=None, sample_data=None): self.validate_template_syntax(available_placeholders, sample_data) def validate_template_syntax(self, available_placeholders=None, sample_data=None): - pass + # We cannot use jinja to validate because xltpl uses jinja's lexer directly + if not sample_data: + sample_data = {} + buf = io.BytesIO() + try: + self.merge(sample_data, buf) + except TemplateSyntaxError as exc: + arg_str = ";".join(exc.args) + raise exceptions.ValidationError(f"Syntax error in template: {arg_str}") + if available_placeholders: + placeholders = [] + for sheet in self.writer.sheet_resource_map.sheet_state_list: + if not sheet.sheet_resource: + continue + tree = sheet.sheet_resource.sheet_tree + self.collect_placeholders(tree._children, placeholders) + missing = set(available_placeholders) - set(placeholders) + if missing: + raise exceptions.ValidationError( + f"Template uses unavailable placeholders: {str(missing)}" + ) + + def collect_placeholders(self, children, placeholders): + for child in children: + if hasattr(child, "value"): + value = str(child.value) + re_match = _placeholder_match.match(value) + if re_match: + placeholders.append(re_match.group(1)) + self.collect_placeholders(child._children, placeholders) def merge(self, data, buf): - writer = BookWriter(self.template) + self.writer = writer = BookWriter(self.template) writer.jinja_env.filters.update(get_jinja_filters()) writer.jinja_env.globals.update(dir=dir, getattr=getattr) diff --git a/document_merge_service/api/tests/test_structure.py b/document_merge_service/api/tests/test_excel.py similarity index 52% rename from document_merge_service/api/tests/test_structure.py rename to document_merge_service/api/tests/test_excel.py index dbf91cce..f140de23 100644 --- a/document_merge_service/api/tests/test_structure.py +++ b/document_merge_service/api/tests/test_excel.py @@ -1,6 +1,8 @@ import io import openpyxl +import pytest +from rest_framework import exceptions from ..data import django_file from ..engines import XlsxTemplateEngine @@ -17,6 +19,8 @@ ], } +_available = ["key0", "key1.subkey1"] + def test_structure(): tmpl = django_file("xlsx-structure.xlsx") @@ -31,3 +35,21 @@ def test_structure(): assert ws["A5"].value == "Item: mixed" assert ws["A6"].value == "Item: list" assert ws["A7"].value == "Subitem: xdata2" + engine.validate(_available, _structure) + _available.append("huhu") + with pytest.raises(exceptions.ValidationError): + engine.validate(_available, _structure) + + +def test_syntax_error(): + tmpl = django_file("xlsx-syntax.xlsx") + engine = XlsxTemplateEngine(tmpl) + with pytest.raises(exceptions.ValidationError): + engine.validate(_available, _structure) + + +def test_valid_error(): + tmpl = django_file("xlsx-not-valid.xlsx") + engine = XlsxTemplateEngine(tmpl) + with pytest.raises(exceptions.ParseError): + engine.validate(_available, _structure) diff --git a/document_merge_service/settings.py b/document_merge_service/settings.py index 470a9aef..6fee9ea8 100644 --- a/document_merge_service/settings.py +++ b/document_merge_service/settings.py @@ -10,7 +10,7 @@ django_root = environ.Path(__file__) - 2 ENV_FILE = env.str("ENV_FILE", default=django_root(".env")) -if os.path.exists(ENV_FILE): +if os.path.exists(ENV_FILE): # pragma: no cover environ.Env.read_env(ENV_FILE) # per default production is enabled for security reasons