From 0d635644ba732ee70baa4d8dd0ee7d4a8101889a Mon Sep 17 00:00:00 2001 From: Jiaju Zhuang Date: Wed, 16 Nov 2022 10:44:11 +0800 Subject: [PATCH] =?UTF-8?q?*=20=E6=8F=90=E4=BE=9Bxlsx=E5=85=BC=E5=AE=B9?= =?UTF-8?q?=E6=80=A7=EF=BC=9A=E5=85=BC=E5=AE=B9`sharedStrings.xml`=20?= =?UTF-8?q?=E5=AD=98=E5=82=A8=E5=9C=A8=20`x:t`=E6=A0=87=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- .../sax/SharedStringsTableHandler.java | 73 +++++++++++------- .../excel/constant/ExcelXmlConstants.java | 24 ++++++ .../core/compatibility/CompatibilityTest.java | 12 +++ .../src/test/resources/compatibility/t02.xlsx | Bin 0 -> 10366 bytes pom.xml | 2 +- 6 files changed, 84 insertions(+), 29 deletions(-) create mode 100644 easyexcel-test/src/test/resources/compatibility/t02.xlsx diff --git a/README.md b/README.md index 58018547..a9ac8647 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Java解析、生成Excel比较有名的框架有Apache poi、jxl。但他们都 com.alibaba easyexcel - 3.1.2 + 3.1.3 ``` diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java index f4514e43..83e92df3 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/analysis/v07/handlers/sax/SharedStringsTableHandler.java @@ -4,6 +4,7 @@ import org.xml.sax.Attributes; import org.xml.sax.helpers.DefaultHandler; import com.alibaba.excel.cache.ReadCache; +import com.alibaba.excel.constant.ExcelXmlConstants; /** * Sax read sharedStringsTable.xml @@ -11,12 +12,6 @@ import com.alibaba.excel.cache.ReadCache; * @author Jiaju Zhuang */ public class SharedStringsTableHandler extends DefaultHandler { - private static final String T_TAG = "t"; - private static final String SI_TAG = "si"; - /** - * Mac 2016 2017 will have this extra field to ignore - */ - private static final String RPH_TAG = "rPh"; /** * The final piece of data @@ -43,34 +38,58 @@ public class SharedStringsTableHandler extends DefaultHandler { @Override public void startElement(String uri, String localName, String name, Attributes attributes) { - if (T_TAG.equals(name)) { - currentElementData = null; - isTagt = true; - } else if (SI_TAG.equals(name)) { - currentData = null; - } else if (RPH_TAG.equals(name)) { - ignoreTagt = true; + if (name == null) { + return; + } + switch (name) { + case ExcelXmlConstants.SHAREDSTRINGS_T_TAG: + case ExcelXmlConstants.SHAREDSTRINGS_X_T_TAG: + currentElementData = null; + isTagt = true; + break; + case ExcelXmlConstants.SHAREDSTRINGS_SI_TAG: + case ExcelXmlConstants.SHAREDSTRINGS_X_SI_TAG: + currentData = null; + break; + case ExcelXmlConstants.SHAREDSTRINGS_RPH_TAG: + case ExcelXmlConstants.SHAREDSTRINGS_X_RPH_TAG: + ignoreTagt = true; + break; + default: + // ignore } } @Override public void endElement(String uri, String localName, String name) { - if (T_TAG.equals(name)) { - if (currentElementData != null) { + if (name == null) { + return; + } + switch (name) { + case ExcelXmlConstants.SHAREDSTRINGS_T_TAG: + case ExcelXmlConstants.SHAREDSTRINGS_X_T_TAG: + if (currentElementData != null) { + if (currentData == null) { + currentData = new StringBuilder(); + } + currentData.append(currentElementData); + } + isTagt = false; + break; + case ExcelXmlConstants.SHAREDSTRINGS_SI_TAG: + case ExcelXmlConstants.SHAREDSTRINGS_X_SI_TAG: if (currentData == null) { - currentData = new StringBuilder(); + readCache.put(null); + } else { + readCache.put(currentData.toString()); } - currentData.append(currentElementData); - } - isTagt = false; - } else if (SI_TAG.equals(name)) { - if (currentData == null) { - readCache.put(null); - } else { - readCache.put(currentData.toString()); - } - } else if (RPH_TAG.equals(name)) { - ignoreTagt = false; + break; + case ExcelXmlConstants.SHAREDSTRINGS_RPH_TAG: + case ExcelXmlConstants.SHAREDSTRINGS_X_RPH_TAG: + ignoreTagt = false; + break; + default: + // ignore } } diff --git a/easyexcel-core/src/main/java/com/alibaba/excel/constant/ExcelXmlConstants.java b/easyexcel-core/src/main/java/com/alibaba/excel/constant/ExcelXmlConstants.java index bfd319a2..4f5c64dc 100644 --- a/easyexcel-core/src/main/java/com/alibaba/excel/constant/ExcelXmlConstants.java +++ b/easyexcel-core/src/main/java/com/alibaba/excel/constant/ExcelXmlConstants.java @@ -59,4 +59,28 @@ public class ExcelXmlConstants { */ public static final String CELL_RANGE_SPLIT = ":"; + // The following is a constant read the `SharedStrings.xml` + + /** + * text + * https://learn.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet + * .text?redirectedfrom=MSDN&view=openxml-2.8.1 + */ + public static final String SHAREDSTRINGS_T_TAG = "t"; + public static final String SHAREDSTRINGS_X_T_TAG = "x:t"; + + /** + * SharedStringItem + * https://learn.microsoft.com/en-us/dotnet/api/documentformat.openxml.spreadsheet + * .sharedstringitem?redirectedfrom=MSDN&view=openxml-2.8.1 + */ + public static final String SHAREDSTRINGS_SI_TAG = "si"; + public static final String SHAREDSTRINGS_X_SI_TAG = "x:si"; + + /** + * Mac 2016 2017 will have this extra field to ignore + */ + public static final String SHAREDSTRINGS_RPH_TAG = "rPh"; + public static final String SHAREDSTRINGS_X_RPH_TAG = "x:rPh"; + } diff --git a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/compatibility/CompatibilityTest.java b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/compatibility/CompatibilityTest.java index 027aa54f..3844978f 100644 --- a/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/compatibility/CompatibilityTest.java +++ b/easyexcel-test/src/test/java/com/alibaba/easyexcel/test/core/compatibility/CompatibilityTest.java @@ -5,6 +5,7 @@ import java.util.Map; import com.alibaba.easyexcel.test.util.TestFileUtil; import com.alibaba.excel.EasyExcel; +import com.alibaba.fastjson.JSON; import lombok.extern.slf4j.Slf4j; import org.junit.Assert; @@ -30,4 +31,15 @@ public class CompatibilityTest { Map row1 = list.get(1); Assert.assertEquals("Q235(碳钢)", row1.get(0)); } + + @Test + public void t02() { + // Exist in `sharedStrings.xml` `x:t` start tag, need to be compatible + List> list = EasyExcel.read(TestFileUtil.getPath() + "compatibility/t02.xlsx").sheet() + .headRowNumber(0).doReadSync(); + log.info("data:{}", JSON.toJSONString(list)); + Assert.assertEquals(3, list.size()); + Map row2 = list.get(2); + Assert.assertEquals("1,2-戊二醇", row2.get(2)); + } } diff --git a/easyexcel-test/src/test/resources/compatibility/t02.xlsx b/easyexcel-test/src/test/resources/compatibility/t02.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b8d755de85bec3cc84e39ae6c831e48f1a3ec485 GIT binary patch literal 10366 zcma)i1z45c(l(8Nba%7qZt0SiPH8sX4I&-VDbgvM?(Xg`De3M|8sUf6bB?~A`2M-B zy>`vLVrI>ndDe`A3?vjh*fRyouM0iD|C8XJJ{a2?DLB~LIWj3cv0*&@@B-|MkA-TS z>+XX;I2f1#Bp4XV?`#Hkc8qS;R%vk^^7t$$L3+S5LKk$h{A?ZaX(A6l%#hU&Feg@q z6_(nU#^Uvkg&tO8-@o7kkUrk>p*BP71_xQ2(RNsGU*(+%wjF+7EiBE3DT&hp7T3j< zzxX&Fs1g@GlK=Igo$Pfce13Ij_{c;BWV#>jwisu7q^RHC`Ucv z8)|P23vI}oFY<;u2}-K~1``h2&^xJ0Y;3LSsjqu(Huv{C?)uBGkjq}E`u5anx*K^g$l@OI3Oe~09*EZ#LlrJtwUQN15QnFz#+64n`j<4g>^~IzYuN)2I#&fgA z7S<|+VMal{H)hLpLAQ~HtbtuOBW|t5yZ7T{ z&4GG&?>|bK*43ysuMs^wX(;2Gf=Dm zIy+rW!QxXx$rjk~qnDHC{7~|_dg+$e{60A$NCXj!0A>(feqdMpl=B`JCw`&0Ajp z)wmlqf(H78P#wG37ceTTh)qQ&^}Bc=!N zEYkmMDoKdg07C-KzOcFv8QKFW-jxkRgrT@r87I{p}TJLcqJKS^TL-wD5)of7g}+UE9EpKTkF ze0<9`tVlg6>*pIt{6yH%s$dU(WMm4sbG#8rTC3NZXm%9?m(Gog;-vQGZnBbt2QE1i z1#D=rZwdGX#WZcwt9~qaQLiLG0~F2CunW$^WAuv(9S|BetF%6PN@1`jPZ5>{vh#d$ zNy+YCD7!k4LdgY!DDEW2M`GCCNsyNqi+nwuz*P3FseS)o`nXLlQH%?ZdV zX~*k8Ygsal&z`K-7@NT8c>LP5dMns>zU*RQL~V>yW?_6PAoWd{&Oz;%C6ak+hBv-c zeBlM9?{0$GeSsv9!&0XRc*T3(=@He#LAgv&h?IeBq7#QH)wn~kWErbAa$I+0ais3cMPTdbC zRcHZ|zo<7F*9@^AF{w_`OgXhq7dV_3AAc&OF&%5fZ+*knoGNg@>-9rRPi^WkgZ| zjq&O+#l2!i(Zltdq4|zDw)p`Wp^mXK_TE03c4L1QV`WJg!5-PG#(sO$`L?jbeKJ5} ze>$T>;VTb4Y@Vrf_oH0i*&vIi>hU0p#p>xGi@WN{l9fr$!KomNS6h|{c{FeoVqkHQ@gwk+#ga~(pv|<4D z|0Rdq1MBGv&O=@d*iZ!E3m_EmB@hA#4}<|C0l|TAKxd6nGB3#5GySQo-j z$euvf09HTNAXZ=2KvsX&;7Xs$fJ(o0;j^sbEUQALJr8W>7KA3G9^Z8lToGJhT+t+< zB#|WHBvDNvO%YAuv&?ISFAAKh1x_u&jm!Qz!RtcySueA~GsClFGi9@eGl#S2GwHL6 zGu;$CsV%8f3RN1HU+bK7m5SPD)++=lNGS9wP$_^EOcb`C<)EOZz(*|*&k--DN~`mU zSlB=60k8ZdkwBIaL<&L$v4G$}G$33M4+t7W3Bsg8j*BvnFwe=_Cs`fyVc>fa{eLsg z*`Zkde=s_r?0fT(NxxVu+8Js_!Tlmv|e}rOmDjQqIi6)p-c%S!lf~8`Fo18<31@NUAmH4z?5v$W1xdPI>SKdi#UphvF4n3Jx)ewesr(laQgli?;^e8*4L6f zo0%TxLI;82eJTkI{JGx5lKUYuDM8#UhG+XtB#>-5;Z-E-*~#25^t?YWW(0pgueoGE znAVab$H<$@3b`*0;AWta3o%FOrAF6Yq#j#eO|u$FaXdDO+4b1i4$!;G$HYJ&DENp8 zdxNil;!_AP_pJ(t!ZpCUFiI$Ritv=|LT8<_W%=@i})$wOkrvhQEJzV?q|yi1R!MnnbD9A7)q z-xoCzd)YxB_2U5MAn#NTGb4#*l8#hp$$Shb7%N`pomzFe`B8XoDN=6kLfunUSE`n9 z(!r4xoa9o2*VR#>qv?CQuGPNjaHS2zdqGW1o99M|FK-D?x}7c4;2Lh$7mO?7!rF

%M!U-<^AWzOlO)weXpQL;#|1zW@CTWxDX$aoOzdEr$R!3z&P9fjMyp?j7Hd0|dCF zNM@j}V7ExUm5sedSKcqwyHJs8p$ z?sZ8{9Z`ubqSLPZkkVYBJE9J?#xX3vrp#2TT%d10fbF<{i(|O>mT+_r%Akam2+({WY@4>WIFi z`+$BooSc{PCmeb_A!2L1Xu7>nU-2%=r|LSVG#$D-@4sjk72umN5 znZ|5C&FOyI1yY(yNV6}a{fJP23U@kE0iZ2L?skJInQnU>MlI7Dcu0!613sG_S0pS# zu}wrN)u%qhG$o{5LYk~?jR3WlMmYZ|8kN)ybIk{*7P3956!aLg4Tx=^)xgu;WZn@sahpQCGbZHJ4nQ3WY(`S=-ghc z3I~(02jd$-X>Rl9oVJfF%&nM~K|Xk-GRp%g2-2?^I9G;PLke)+|Go28p zIgPR5jH{Nc6TZ-5RMF0spOzOUhaIQp+;iVkbjx?e9kFLNQ z1W0^Au(REcrmwgk$*^A_F8qM~`_rQ#XS93a(}PW`1sE9OPha5ZCQbLxmmX!3N1=zcc@VJcH4<3_#7AeC(!i2-5PL*G; zSEeD}g@EDchlt*9LcUO2AfXz7BLt`YCN5p&HJw^#wU_ogn|>fvFhD>O?w4&F+11u^P{Ec zi0Gv92=ROp$HtnblXVbGimh}l_9eqilivf=6>V^C4Jdk~xFqYv9oKqc+uR<$3ouEp zqq}Aw-nL=~OoVpiq`8RN)KnW6rKpmXr$|qZ&}(-J?|fWVjeAX`WGez7dod4Ljs@h9ja2&`rxhGvG%0cBUdPq^mGWH6j| z7Ex_S|9ICpx{DHLSMa3M1#F6bR}=^DRkq$y3Kdev86)mUqaP|OFvb@5rTsx@ZV z!3a4n_igcFtu^UXCB61+G?FjN1)Gp)tXh@1KYOawraMu10IYVEm9pHfLU^cKK35&x zwy=y@K7J68M0;k!LpuGLit}5lIcZ~#r zu-AU>%~ex+6*^30Mj+$cVeS+Gpl$z|&1HD=cVTIUmiiA(#WUac2{tMv25MBS6KbTH z1l?zLGH~fN@07YIUr^U*2l*>?DT54@dlp&D^btSh`POLr!hErF#>2WOq<336^2iau zOi$8P$KV;0(i6R|(zYdX$od$8sksA1 zx*UvVW?GV_f$@wZP-~_CLaI)qn^jg88_xAziCr?vdCunKx|ySgvpwH?lxbIUf36&6 zmse*_J-Relg-90Q;&lG!7m9615+!>|<>)@`D5>{k&FRdSyw`h503N3S|I!vB;t>O`LcO7c^C=m`~HH*&gsJ=8)j6%ca;jQbRaSVG48GS%U zqKB;XqnRYPl0ZusQ6!f~)L1EI^)ne>v9v!2U1M5KM-obX;W3A~-88{VmaD^y{SHMb zqCG#EXtm5;*Svm+*l0>EG!ccyYH`s*r79tcO@2Ydgyd_|iIeI*dVRRakE0OTD|U*_ zh1p}tX2scWB+QEUc{*utxh_>!l*T5kgfQQ83RwzW`-VOJSTAlV^xZyeTlJ|~S4DsS zHM=4`$6n_7gFEI^WAid0n$FP1VBByuABsB}f;gHjme^&vlWsFM%(ca4mJ-<%YsD9Q zcY`~Y6qmY6i(={3AL zYrSX1jTKjTvn!erZw@98BN8mYc6;WmZ2anB%<`LmKki%CtMz*hIKaS`)ySx%VjjB% z;|x52SYXJqW&_41UbTE>17YvrX;FufNw9yL#U(R5Xa#OCdQIM>o_nDPoS zqET6$_Y7{YdJD<>h-o{nbHBRMU=@HmIa+0#9)dksie7b-sBz(S)&c6tmN( zD>No%YK`=GV|KsUx1!kkDTTDR7%8z}@Y82R^ZWU{VDBdWo1>+(4cOPj@9nO?y`f;Z z%3<|lM+kj9e7GL%`YLG@t|GAeRnj!vsoli$T0tL zip#JE=tJ?|!R_>B!rveJr0o@P3MQ7=_f2ANfyO%`7Q?{6%w)g#f^PI8PDG{Q&90`` z#6RRGLkQ{Eq1FUE@AJ;emSqgz2wWpn0DDxMql?(7Dh#26NNSOZ-={KjG+dZbQPZ(P zDru@I#_pri(N_DAh!hW6zzhEXw<7Zs0;+-vJfXyU4L=5wX!^g9G%70o>O>7#+jPBB zHtdm>2~55@U5#QwB0`~4)1cK@jIxz`IJNUU;o5jr2O##67}m5dP+;?iTcJqTXu(w= z-nd|Nrm7M)n~Z1H!&Q4n+kLlr;tHsH5QQOBbEi#AAQGUYD(+U1+5~xK~tiEP$q5$y5*Zpdw-&ZkpsdJe?j;c3% z#6vFFDi4qEQ1RINeQZ+IVJ2mHgn6`-_OfhUaFGE7b$w!;2VQl>%{~xfmM7QJDx;V< z8!?K-YCDp~w5b8=(I>s2y{Jqi5F!|wPC!R@YRX+Z%FWN~DKga>7Pb@)#2TAi*96rH z88|lD8~P<&%W;sv)s~op21wJJ#T}6p;<`%A*B>*rk+iJ(LPBzg@>J_t*Wj(Pg1DtM zNSKk}-YPmuuGYdcLN)ZbY+^QkV_oa+c1fk$$d7WjKCvRjqB1Km*FYesP7UZ_sJ50T zP2k9Pn+n)tJA~K;Y}ygxwKCBdFY{dhV~*8& ztejXW<_;dKbF{qpc_U^2w=r`ai|@#9S}q0u9@JU~n&5Cer9(2FE;*hvTTk~Oj%Ef9 zKx1Vm2XhkkOJvXvxw;y_q@k>H=3pH35{HI?O@+nf(H(?ohk~=U3EDP5XzQ1QMixsO(!9{Z)q&Y?x=zZ-%yFv?>TqA~Tp{z;piL*` z0>xRjKbw4Facx>d ziMZ!7T%samQPo=Kar?-z`B&1)s47-{v>DS?I;$~*~N>roUB@9aQ|q4(q>hAm(fh3Wf=M4 z=u?r}F{6_220gc|k!!*y`tS$;oE%VTtuJ3UAW^A&ZhAoyA4dI4pS!x*H&+S zq%50yt-swz*}VKfxM3PG*Ego3)n*|V>^|4u5frMGl%(dms>7Y%p^LsHkVy6)-;pe>DeQ1rVWHJ^!CODig z6f6n^lquITI~~ERNqg{oH@E&lE$J^Z|HzHBnj z$qHxE6qB(HWg%|$BE2B~tf-{z&%>7)?EjMB$lEzzdXTf ziqouKT}yqaeS8E>J3(bNhG79uPO_4HNuIz0*R8%R|Eydk^kqcbn|g+cDR4`bXZ1jhJ>(q+$1j{63{n9y-NVJxRx(j|Egsz{eF}BQXt~ z3hQe_DKPhlfSrsJ@564_LnD!=t?uU-&hZ2>^PwS%`}uJv<8@9H3-{9jTZ!Lj83Qrs z3TRdbgQp4W``j*BKA$M_Foq28ZhPdc%=)wCu$g(eC@q^x&v?`B(Xh^kiJUj*QmO#e zQhKCK`Wo)Q{?AM0|0G0#Bp-#fC*hSo2@&t<{prm8$@aTR{6nUH*@gI%Ow+k(UXZHP zrPwqE>b5&j*-v-Znt+7Hdi}`lp=FK3=r(KMD>#cs^>_u7DINLqrRuS6JoAg;}r>$fH~ zp~7f5iL6+?d_<-(siG_wj?2Ld`(rf$RvJ9$%j#P2pLz8(FM?wqP+0e^I4Y(EmZNsYMTqw*w3$dq2i~TlL(WjwY<*L7B!3Ob{Zf$x3*cEC#Q#KY#{d zaV_^AgHgw4Xm62)W9bw5!H3askBo+IJlJieZuH_{7k<8s28R(*0zL{AP(sSUcgq0( zRghm1z)bEdWm4{F9Cln(4C6`(xrO$;!CU!^Zb&W=vD)~wGuAj!r7qt1Me7XF=bV}>lAP62W6 z9y6wcHR_OvG?XF6ct$O=f#rS&!w*D~4hssmC#ur7{h%-xp5e4E1Dggf#o$fqG~$R- zFew(v&sZShi&6+>oK>`9_1uace>Sy7X6cM>^rJAN{B{8-ZIaC%-NMm4maGj2W_f4Q zNSokc=5P^C30diVAL?GmX?%Oo&C%3=G%_Qf(c>U`9I}M)9q@%fIMX*8Tos%<_-4*< zF2t^vlbk!z`bu4VL)a)xEFg|8t{c#M$PoTu8cC--V`!Tj&h^MF?m1K9ue2NCCe5V9 zNh791&_$-WVfw9e)eL2hy4fdIAzo(pD>QdbjqjSHkPiW<`{sP#wf9OfXiPBBVtYB2 zMY1=T(f9E{|t1w6g}1^Y++ z(a%ADq5mv@d4l`{J@=&Xf1&?b4D-L{{9Nkslb+{H_w@Cz!jJ#H$mi0JpY#ln{R#M6 zA;{lX`e!M~Gxq07-Tn>xt1#q$P5JL)-gx@@Th8y-;QuM>_iJ$YCl&q+_A3YYr=B;(ucPa%cZ*u)nK^o8+&V{?mK@eY`*Y=dbbfeuw?O z%-@FpWeL^E`is-^}svhvhT;pL9p|yFvXrIu&G~|B@=|Q~lWsC@?U6 Iif6?C18ms(`v3p{ literal 0 HcmV?d00001 diff --git a/pom.xml b/pom.xml index 319a85e0..39ca4669 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ - 3.1.2 + 3.1.3 UTF-8 1.8 true