From a6962716ca9c66c24dd6b85d01fadd18c305cc5d Mon Sep 17 00:00:00 2001 From: zhanghongji <1029550448@qq.com> Date: Thu, 29 Oct 2020 10:36:14 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E6=A0=B9=E6=8D=AE=E6=9C=80=E6=96=B0?= =?UTF-8?q?=E5=8F=8D=E9=A6=88=E5=BB=BA=E8=AE=AE=E6=96=B0=E5=A2=9E=E4=BA=86?= =?UTF-8?q?=E4=BB=A5=E4=B8=8B=E4=BF=AE=E6=94=B9=EF=BC=9A=201.=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E6=95=B0=E6=8D=AE=E9=9B=86=E5=8E=8B=E7=BC=A9=E5=8C=85?= =?UTF-8?q?=E4=B8=8B=E8=BD=BD=E9=93=BE=E6=8E=A5=E5=92=8C=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E6=96=B9=E6=B3=95=202.=E6=B5=8B=E8=AF=95pillow=E5=9C=A8paddlep?= =?UTF-8?q?addle=E5=AE=89=E8=A3=85=E5=90=8E=E6=98=AF=E5=90=A6=E9=9C=80?= =?UTF-8?q?=E8=A6=81=E5=8D=95=E7=8B=AC=E5=AE=89=E8=A3=85=203.=E5=9C=A8?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=E4=B8=8B=E6=B7=BB=E5=8A=A0=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=204.=E6=9B=B4=E6=8D=A2=E7=AC=AC=E4=B8=89?= =?UTF-8?q?=E6=96=B9=E8=A7=A3=E7=A0=81=E5=99=A8=E5=B9=B6=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?=E9=9D=92=E6=98=A5=E7=89=88=EF=BC=8C=E5=BE=852.0=E6=9B=B4?= =?UTF-8?q?=E6=96=B0ctc-decode=E5=90=8E=E5=86=8D=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E8=AF=A5=E5=A4=84=E4=BB=A3=E7=A0=81=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle2.0_docs/image_ocr/OCR.ipynb | 44 +++++++++++++------ paddle2.0_docs/image_ocr/sample_img/9450.jpg | Bin 0 -> 1640 bytes paddle2.0_docs/image_ocr/sample_img/9451.jpg | Bin 0 -> 1623 bytes paddle2.0_docs/image_ocr/sample_img/9452.jpg | Bin 0 -> 1683 bytes 4 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 paddle2.0_docs/image_ocr/sample_img/9450.jpg create mode 100644 paddle2.0_docs/image_ocr/sample_img/9451.jpg create mode 100644 paddle2.0_docs/image_ocr/sample_img/9452.jpg diff --git a/paddle2.0_docs/image_ocr/OCR.ipynb b/paddle2.0_docs/image_ocr/OCR.ipynb index 370ce582..d46a2db4 100644 --- a/paddle2.0_docs/image_ocr/OCR.ipynb +++ b/paddle2.0_docs/image_ocr/OCR.ipynb @@ -22,7 +22,9 @@ "**数据展示**\n", "

\n", "
\n", - "

" + "

\n", + "\n", + "点此[快速获取本节数据集](https://aistudio.baidu.com/aistudio/datasetdetail/57285),待数据集下载完毕后可使用`!unzip OCR_Dataset.zip -d data/`命令或熟悉的解压软件进行解压,待数据准备工作完成后修改本文“训练准备”中的`DATA_PATH = 解压后数据集路径`。" ], "cell_type": "markdown", "metadata": {} @@ -126,16 +128,13 @@ "\n", "CTC相关论文:[Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neu](http://people.idsia.ch/~santiago/papers/icml2006.pdf) \n", "

\n", - "
\n", + "
\n", "

\n", "\n", - "网络部分,因本篇采用数据集较为简单且图像尺寸较小并不适合较深层次网络。若在对尺寸较大的图像进行模型构建,可以考虑使用更深层次网络/注意力机制来完成。当然也可以通过目标检测形式先检出文本位置,然后进行OCR部分模型构建。\n", + "网络部分,因本篇采用数据集较为简单且图像尺寸较小并不适合较深层次网络。若在对尺寸较大的图像进行模型构建,可以考虑使用更深层次网络/注意力机制来完成。当然也可以通过目标检测形式先检出文本位置,然后进行OCR部分模型构建。(下方样例来源[PaddleOCR](v))\n", "\n", "

\n", - "
\n", - " \n", - " \n", - "PaddleOCR效果图\n", + "
\n", "

" ] }, @@ -215,6 +214,8 @@ " if self.is_infer:\n", " # 输出层 - Shape = (Batch Size, Max label len, Prob) \n", " x = paddle.nn.functional.softmax(x)\n", + " # 转换为标签\n", + " x = paddle.tensor.argmax(x, axis=-1)\n", " return x" ] }, @@ -452,7 +453,7 @@ }, "outputs": [], "source": [ - "# 待预测目录\n", + "# 待预测目录 - 可在测试数据集中挑出\b3张图像放在该目录中进行推理\n", "INFER_DATA_PATH = \"./sample_img\"\n", "# 训练后存档点路径 - 10代表使用第10个存档点\n", "CHECKPOINT_PATH = \"./output/10\"\n", @@ -505,7 +506,7 @@ { "source": [ "## 开始预测\n", - "> 飞桨2.0 CTC Decoder 相关API正在迁移中,本节暂时使用[第三方解码器](https://github.com/awni/speech/blob/072bcf9ff510d814fbfcaad43b2883ecf8f60806/speech/models/ctc_decoder.py)进行解码。" + "> 飞桨2.0 CTC Decoder 相关API正在迁移中,本节暂时使用简易版解码器。" ], "cell_type": "markdown", "metadata": { @@ -533,7 +534,22 @@ } ], "source": [ - "from ctc import decode\n", + "# 编写简易版解码器\n", + "def ctc_decode(text, blank=10):\n", + " \"\"\"\n", + " 简易CTC解码器\n", + " :param text: 待解码数据\n", + " :param blank: 分隔符索引值\n", + " :return: 解码后数据\n", + " \"\"\"\n", + " result = []\n", + " cache_idx = -1\n", + " for char in text:\n", + " if char != blank and char != cache_idx:\n", + " result.append(char)\n", + " cache_idx = char\n", + " return result\n", + "\n", "\n", "# 实例化预测模型\n", "model = paddle.Model(Net(is_infer=True), inputs=input_define)\n", @@ -547,10 +563,10 @@ "img_names = infer_reader.get_names()\n", "results = model.predict(infer_reader, batch_size=BATCH_SIZE)\n", "index = 0\n", - "for result in results[0]:\n", - " for prob in result:\n", - " out, _ = decode(prob, blank=10)\n", - " print(f\"文件名:{img_names[index]},预测结果为:{out}\")\n", + "for text_batch in results[0]:\n", + " for prob in text_batch:\n", + " out = ctc_decode(prob, blank=10)\n", + " print(f\"文件名:{img_names[index]},推理结果为:{out}\")\n", " index += 1" ] } diff --git a/paddle2.0_docs/image_ocr/sample_img/9450.jpg b/paddle2.0_docs/image_ocr/sample_img/9450.jpg new file mode 100644 index 0000000000000000000000000000000000000000..028273bef0376d69a087790cfe3ca30b8fa45fa2 GIT binary patch literal 1640 zcmbW$doInETAj$8kfcP`uuON^pv;qdJsHBXO7u2c&Bm#v(qEQM8XtcaKNxl!z>IxeA z=G!rv9zocR(OQ%gPN^c9QT1r8Cx6Dk;$TdwlJdIs1Z|y7hDMu>w@@vutZitvPR=en zm^)dnUfw>t_xSGJcPRMqk&w`^@Yv%g;^Gt7C)0jRKa-JpHY+dxLIL;U&xJ*0<-c65 zxK?@nMs>}-+PeA%-u>3L$4{QNKkMiey!`D|Z(skwplDb;GAbE+H$E{tH~(>AQ3`!p z`s_jg)EAa~{Q~>Sr7m|N(P$JJ``Lv+#>*#49j%~mj?viefenh*+(=1L)M9W-s~#zl zEj(w|9*p5DuQQ;EHqCyf{VDr*u+;x0`y2L;YY^a32>Iio)Bzp5TQL^7uknYrIhQ1% zT)AOXNOvUBW}q-ToMn*1vY+&ZLRx!0+qW)a@^QvP)#nH;3|n2{X|Iw3KgM#%iiwK# zGF?Vbt}0umyDyaN6!zsbOg>>JHI*K&Oo)lJO=Q1(8j(myl^lt3*n6tw3OnP%mS(eF zJv@>&UV48I^QI88Xq}8A$XfWL;1~=JW=`woM0d2GUNBnGf3qG2rBO^6v=Ex1q5`V0 zPB2x*6?Zj-zS!^Y_{7P|AzRkq{tDZzRD09E8DexT?-X{`dh> zNZk=u$a#i=S85?Tj}Dl1v4htqkg6(BhLfA^>!7Unm%_wODSDmhMt63a?mAggD5>>m zxa#3j8+jWl(txbnE-~{(&oz(H+WMKP_BO;M_L6tN%$au@3p0}ZqvNZ>&HG97{$kmu zN$BOnLzb=Yh6mpWrpKmKtG#RSq^?Is5@pA+&=F!PvtevSHB8j8u$EVy$ggzKjxrRb z;LalgeH`hD#MNkD-tthz(9Kxva8Cx+&7!rgCMoY}`-usoOV`FLt4W-0)ozCa7JZdk zJ}}6E!5n!Yxt~S<=zq^E!b|)1u4|4F6;Mz`F*S|7noFqHySv0!#P#JiPPX4>*FNDE z+(^I>I;kaQSrX?255j9#38UM<|7f6QaeJwMNTt_y@nG@%^}_U707fk&02W z_08!p7;E!|L9==CY-f>B9F<`<8Y=qrRlI4G{_n)VNEg%B#QAg^Y5T5Pira&svlS!7 zi*vTQI^(euO{Czu-R!{g-;+J^Aha4C>E2}ire=CL?Z7N1>v&f|QSXaOh012mj6=Dmy-iyGHvVL9 zbJ&&D!n@?loyCjns*2I zPfYM6bWtJ{AUkE5yF4%d1jO>MrO$6Ry7A8UB84%uaYsc!UK)tKq#(&@wB_P_&w06l}`^Z)<= literal 0 HcmV?d00001 diff --git a/paddle2.0_docs/image_ocr/sample_img/9451.jpg b/paddle2.0_docs/image_ocr/sample_img/9451.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1fbea8ae9dcd92d901b2cd6f0773d620fb42e22a GIT binary patch literal 1623 zcmbW#c~Fx_6aerIUm%2pBOG!CL%5YYfLtO545y&n3Wx%7rXZID0p(H@1QhT<8Y`fJ zT9KQemcz(Ztx!P_gOYM9RY(LSSmbNaPCMgYZQsuB?9A@GeZP4NkHEhH2`4*8JAgm{ zfZz`Rj{;jDEF^>y5)?+EP-wKU2v%AYi@{*!C8flrl@tgYl@#!JWi>rbWfdJ&JYLIm zla7I*v9U2h)7;98v{lc@n6#b*fkvaT7_6MAs2quiCzAeiz#RZ53^oEg2%!Q5a0m#8 zfI9(!U#B4AOMt%uApju-g;2t15e)xAlLQbzK#%|u5)?!t`L~bq&jAuAC`mLV3rV>J zqEup~Ng0LZ!m2iP4`ke*a@351;xf@98)W6=71T8}wKi=wHrZloW^Q4-gJNgz;OOMx z>E-RSXRmMY!I03f@QBFxgu{unq$9~$$4_LRJe8AMRDACIg^MMZN-M5jyIxs!pAZfyNHi3ZB)g#kW2IC`8N$*wh2?b*(5go69GRfF zry?8FjK|fv>$ESje+SF_U$Vbp|F~EH3nBQ;gK)qayiV00-={FiR8V^|o-al#-o>C@ z2$xE!%XidV4x`V~hs!vX#~GA`RFjI5>CL(9Q2P;^chEUGe=U%F<(LkS?94-vDTP-< z2i|oY?)F@9?(J2z-c8LNT6$vY#GTKlhN%BS(|Htq@NSs4d2qKMvIqvGI_Qw~f=F({ z=c|!3^-ncu1WtC6-`a_40t`S93?{;1aNLN=Uee`nc#td%q

OAmLLDmsEklS&6Y1 zAKpBRaWJc~=0^W`IHiWSljt=VIt1;wA2%R3MUjkNd-F0eh;OL#rtqt@-Kq-%o8dDsNH8T(rDO-p!Eh*YbMBNLJcT?8*Jv)1~t>+Pa<;X#vOc z6=&2IJdwv1Yx?_Aw3aI9D<;kX(~kqH%6{J>Nwv@!zQD+QRVm^flS%0~;IP?M-T^6* zq)eZR4WLen8u>SKWwTXVx#FW5r3+C%v4b4e`bH;LYQ+?b#Kf16&h@YAw`0=!v+H1R zjQ?jQ&!sW{;4oHsRKoI%{+hW5+DSu*bO^S{{1&bgT6vY() literal 0 HcmV?d00001 diff --git a/paddle2.0_docs/image_ocr/sample_img/9452.jpg b/paddle2.0_docs/image_ocr/sample_img/9452.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ff4fdd93f12ae33541b035eaa19515527d209b17 GIT binary patch literal 1683 zcmbW#c{tQ*902h5XT}^%7{=KcVa8ENV;q$s*PxS9>7augwQWT^BRR85(zfHO5T%-= z?KVS6QZmHMqBRL^7`J3lOlz9oj@oDUdG?<@zVGwCf4uMeyzlq(zF)W$FC2H;NM0Wc^O8l{XuqtRF_Mg>Qhhf`I>>1%4K6ATTENQMSP;sUab`2rIwQzFsA z(bCFx35`ZGGG9t}qB`5y(WrBWKv*mmr;5{?H&2ghOf;tc=YXF8JO+>e0|A);B|L<{ zLvSrHQsk)&eF^YKKuQRtG762ss;DY9a5aDugg_`E5z5L)q+)lxVjUpy%9_SY+)!Hn zyU-?w2-IZuC5);2-5TxnEfTWb?xk*? zz58WcUU|iXhfkl?KCk1~H@s+l`AeJVRr~9Xo?dZZ|G?nT@W{mE)Q6ALGty7L&AA|e z_{vg@Utxc9;T0|=Bocu{&$%EamZBr@NM++CC`~ti^sYl%Ce&mM!JU2SZVlGdZoNc% zcT|gt4w=?NnV6$}k^MW^vHvCe6ZV&@1K<#l;`0!A;0oS7GVW2wbx9YwRa8jcv#nZ( zMdRemEsUmjA?tl9QNHxiaJfd2N@DZfVi&n{AwO5fndjXc9Bq zp}M1oZ>^I@o|RFGrbturmT4G#imVh)P87X=_lX(b#Vkp6v_2quZdfieb|JvvQivA} zo?xn_d9FQJ$+gd_28-oDF;&}E5yTx=b6eO*XIJrj9ao6sc4cd7r;Q5jGG_Gb2RrO@ zvp8|VvpP8xL4p5@7tKDzlcAlN7YmtU_1t<&O!xI-!l!I*+q^{={^=$WtM)?FsD~u0 zA#98|L%sR%#p&Z28i8#7c1y!FpF!mvc+P*DtWYf06vehQf4ps_3rz|kM zo5bvP@QALl_Uh;_3EDpJ*h$LrY>m?nKg1dOEk0XNQcTMgHO$wZ>~o^5aH=OB_20Uq zOjnk+$#q)m3=F(hd2M&F3lHj<4Xu_2T^=5zK0QE-*tyBnxeRf!UWi&tzmWrjygdp; zk&OQc2KPUw$hUqw^{7caTj{bN1}wu-T~BFS&nhF8$XL4k{B@y-cd*7{(j+v@)6b`= zAR{9qy~?o;2I|9`C%1X4ajg!m49Q$te0n0~n154#)jC0?GsG((i)XrQSh1XIUta4& zuc-B{$Ns*KrU}_qKO5(!sAWmEKK)68=yJ`@n8D}4q{pobit6u$*uUxQ98(Fn@srmN z)F;!(SG>2;&BV9os-^u#lIFRBx|qO)LBehrh_e~jYHQ9Gcr}A8*}^FRAfTX8(zV{MghtaXXFa=u(X_>jb5zyDQddBo;@A@h%u zd#d>MVK2pxlXzJII_HL!zYqJyshAvLH-CT>=9*Gc7d^vR_%LGqg`ATv^dq*?WiJMN zkbNU~{ykqZ#+79$Y};u!?sX6bganJBLI;U}m(S@K!LQNo6$+va7LH*?lerDTKxT2N Ku8*Jye*HTbSoTB! literal 0 HcmV?d00001 From 8e1b29d471ff20b2f62b2098d17e276fdc38058c Mon Sep 17 00:00:00 2001 From: zhanghongji <1029550448@qq.com> Date: Thu, 29 Oct 2020 11:35:28 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E9=80=82=E9=85=8D2.0RC0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle2.0_docs/image_ocr/OCR.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle2.0_docs/image_ocr/OCR.ipynb b/paddle2.0_docs/image_ocr/OCR.ipynb index d46a2db4..db68b55e 100644 --- a/paddle2.0_docs/image_ocr/OCR.ipynb +++ b/paddle2.0_docs/image_ocr/OCR.ipynb @@ -163,16 +163,16 @@ " self.is_infer = is_infer\n", "\n", " # 定义一层3x3卷积+BatchNorm\n", - " self.conv1 = paddle.nn.Conv2d(in_channels=IMAGE_SHAPE_C,\n", + " self.conv1 = paddle.nn.Conv2D(in_channels=IMAGE_SHAPE_C,\n", " out_channels=32,\n", " kernel_size=3)\n", - " self.bn1 = paddle.nn.BatchNorm2d(32)\n", + " self.bn1 = paddle.nn.BatchNorm2D(32)\n", " # 定义一层步长为2的3x3卷积进行下采样+BatchNorm\n", - " self.conv2 = paddle.nn.Conv2d(in_channels=32,\n", + " self.conv2 = paddle.nn.Conv2D(in_channels=32,\n", " out_channels=64,\n", " kernel_size=3,\n", " stride=2)\n", - " self.bn2 = paddle.nn.BatchNorm2d(64)\n", + " self.bn2 = paddle.nn.BatchNorm2D(64)\n", " # 定义一层1x1卷积压缩通道数,输出通道数设置为比LABEL_MAX_LEN稍大的定值可获取更优效果,当然也可设置为LABEL_MAX_LEN\n", " self.conv3 = paddle.nn.Conv2d(in_channels=64,\n", " out_channels=LABEL_MAX_LEN + 4,\n", @@ -287,8 +287,8 @@ " super().__init__()\n", "\n", " def forward(self, ipt, label):\n", - " input_lengths = paddle.tensor.fill_constant([BATCH_SIZE, 1], \"int64\", LABEL_MAX_LEN + 4)\n", - " label_lengths = paddle.tensor.fill_constant([BATCH_SIZE, 1], \"int64\", LABEL_MAX_LEN)\n", + " input_lengths = paddle.tensor.creation.fill_constant([BATCH_SIZE, 1], \"int64\", LABEL_MAX_LEN + 4)\n", + " label_lengths = paddle.tensor.creation.fill_constant([BATCH_SIZE, 1], \"int64\", LABEL_MAX_LEN)\n", " # 按文档要求进行转换dim顺序\n", " ipt = paddle.tensor.transpose(ipt, [1, 0, 2])\n", " # 计算loss\n",