From cd1394695244f71885d6e55f6825315290e68a19 Mon Sep 17 00:00:00 2001 From: Eric Jolibois Date: Fri, 23 Aug 2024 18:40:54 +0200 Subject: [PATCH] fix: null column should be nullable (#279) --- .gitignore | 1 + python/tests/fixtures/null-column.xlsx | Bin 0 -> 5787 bytes python/tests/test_fastexcel.py | 5 +++++ src/types/python/excelsheet/mod.rs | 13 +++++++++---- 4 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 python/tests/fixtures/null-column.xlsx diff --git a/.gitignore b/.gitignore index 3a99b8d..5659737 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__ *.pyc *.so *.dat +.DS_Store .python-version .venv diff --git a/python/tests/fixtures/null-column.xlsx b/python/tests/fixtures/null-column.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..44dafc26e21c9e37756b8b931165f6ce2b95a734 GIT binary patch literal 5787 zcmaJ_2RIz<+FoV#E`sRoi(Z4sN=O7j)YW@iy)QymZz1~X1krmBqIXsgqQoLfL`(D% z(axSP-*x_QPX5_zX0Lf??)#eidEa@SXKob*49r^q000NTrA(odo2F5UM4f@q0RVi! zEr5{=#16{G`|l$ffCP2n`c?+-u<7C_2-)$*TY=@g3lxxvGwgj^FW88^YsV%K#=}yg zcYG`qSOt`)wv&~+y4s_Aha5Fgt3Z4L+a2Z?*LGUJUx8hI&tP+3+wEE7WSp1 zpj7_uR~cgF4SGf-T3|l~Tg2)(DK-n_3q8ClXM?oP%^UwbE(-|+)9L4&E{f=;FOXkG z5~B6oXTjnYC?`)63GdTi41vYVfU5hdM>4~TV}$i@&t4jaHje0dF~W$bR^gN*0b^NB z<>qRQ7On@GGK1aqL*gAYdq1As^J!>b#-5$TR=E}zOr|M{MwE6MGys4U6&G_yGZhy{ zCn%qpqYH%B!`|+HMaMo$(Kbku(v zNiH3i&JR;O=T%R}_S*tm#1N6gkMrU;4=bUks`%4`gGirN!6<1ED^O|=&zW4-gH|zI_uwQo*Nki%d zi0_EM4J9;n2}&1Nuf64$8qt*(SHGc_vHF4XyRe!cf(VYlc%H7GB9)^)&h z`!b7f2t*=8m`=|gFlN)k_J>{`x#!OrDfaQMV~FW>Sfry#aiudOHFu9rFXOvf4HI$> z))>}CxXX|#Kk!^5ygNK&?nL6)PUk54Mjps?n|lu$OvH4H-o*|S*0Dw)Ye(H)W+@o)Lx&m3k6?Y6=i zU;%C|+AYh*I#Dq@DeWCsp>Q?(AW(U1Gyi#O#&{uZGrj&5JpAUvyG^rjD zc*r?_tl!*OE457Nfu5;ercCmLyb_~sHC>34{UX%Y|I?dCM~VOC{>o+#@%2zeormUp z1p)xf8UO(KuTZ&KLF^%Xe;)<@w7kCS>w%H?Z~1&STH1qc!FZobVdJH26Cf@8(!Dil z#hhay6}c*l(>t+VeM~BN_Ag#`KL4;97v*}mf%pmUZ#ogBt|>2yu^zFkJDIBG8Cda3 z*gqP&Jaf6UFm01KU!fCIp-EN6|DaDFlhUGEeBX(dT1p_+@gTsZP{AdZ6sS|>t{LDJ zY$*8kM>)tv5bn}s!uOalFymWa0bO8sT^8h+Nqk!h zyv;!5Es~j$@${kB%yLeavi_CFox@$#a0bHNrH!!}iSJ8m_t#W3=z90Cdpt6 zU@c>qdsV^*P^MV0#kx}cjWn8#!;Eq>aZJd%mm8W`{#C_FFyFf7c@GvmVT|5^Csa<4 zS?em-23o}gq&kQQt}SgeR475P#dP^pI+9DNl^0)Vze1BFKhkRD3=eehuxb=&sQq>a z&lOu)_;y6wXtEZ1rqyt`yI%OQF7&}d5RWOTx4H*Q+dcdKWd6o+A3jiF(kev~0t&Ky z$Hdi`^Ik||G_!N7r-j;kg3++If4jYZ#7R_OVN}P+^7!693-Yy`Kn-~)Nz4%5y7GRl zJ!xHtrN4FpxtAB;LfW3IXoxnd2q~T5JK_OfcoSsCda-V*{tKP!RPIs+w1{h zC*-nAb4I$UtoJmDYDsKj zro6B4-FAONC%$Mm?M-JX3-Y4-{-RcBE?a3$ZsZFVWtrD0&@Qtbx}2E6R_&*sq8+-# z>$}T58JN&9mDZ@rH}mNBy<+?^QB2JXs*=J=?Np67PB7Irn$^Sj0FB8P!VQqtMgyE- z#o>nvxA(+>3YJfcfuv6l)Q##UEj<*piQ)!I;_dBSoa;;USC(1{YU_?P*82y2>z}G1 zO?_SHY_h9FgEh7-o{H9dv-lx$79%7?LX|G3`6^ju$ec;egW!0eW^nd4vw@>}P0H(> zvVk>giuFP2G->#E>&onY^z#@}XNw0x-3@ZNUO)x4I|6+yH9P<#_-YSbkT@SfNfuRT zt28Y&6A4H)XNRhZO7F`y22pT<7T30(;^8Q5r*sIAF{IyEm1q;|_`fQV%i>m5js%H<^!MK#+RP&p2 zo91r^*``v`1Po#R-BzJX_1!ozRL>$ps2(dsko-MFTGt)=Ye-cgf|Lv+Y>dS0Bbix5 zx1{O4ZmRn3WJZA)Dnfz3cm(pF5wOZgMOdpAfS5~5$s;l+y5+S*6SM|LakrBada<7E z+Jh}=7}`c?!GBL!#57PW*&wRR^Z+twPUcH%P2Kbv`-wsli} z9F3(c%w!Qzk&rcg>l4*2c|jZI|5J<%;|R)%wYm0;0CfXZVWxzhgrhms?V!+SJ9^&V zXW_A1?#^!g)qZ1VMfr->^j^u9hk4c^3cf)X=06US-+4w5zYt5s-hyI&iDLfe(q5DN zPQvmwf$#hLOXsvMiOU;36&w7?Iu!;0a33E4p!&Zq1Zo9=xI+2<{`+ISNJvzKrty0BMl8?td|?E0^QrJcyf@EG)8oj( zNlWDN${`U++Ql57e{c`3r|Q?Zf#LE$O-Q;Q-32CcCLyeORAgy7$%OCTpJa=de#I$J z_CZjtD7$_Y_Qo9F6|EqNiP1M@eK9lsTadu3#iRVBf-1=9s)H{+Jy8tfmQN~Z5}BeAtWuQ_5vXA+G)xCiH-f?pUwIC&ZP63mX<^SiDQde$K|6*mclFI1!dMcD}nY44Ze) z*w53r7`qO(2q(sx!Or(FKEvikGx75`=ESKBT!fQgY2)OF7{hRQNlpC1jEiyV!WZEJ zw2Wp}12}b&S%bY!evw($W>!cl`cLaL4D)v!5u!t)@5l@v3-%DoY>qA&5IsyRonkr4 ze<)PM>5;ZtT*cg+&fzB5`vuv(^Zs*~oef`(J_1{~#dg4Q+V$8&1!zsXc~v`k|*Y$ zE#dNR)o^(~I-TeDbeDR~6YCn;0veBrjy9jTmHcAbl`81xn(bTShd$5FeY?ATGY2K^ zbqo^wqFryslB?V};!%BT9csVz>kWfinYcjA!LBaW4wleAbz{;|kBK@NNu*CQ0)`gU zn)rJG2FV`i1~h8DiI^b+eq+dR=cWC&q$DrlR#Ot?p>O2YGj0!Yayi75YeKeGn;*7Y ziM(zImA_Qoi|3>u#0b?Sv(=W@iB{WxXZ&t1kD@3AHZYbba=`65Y(>Wx%7!I&cws2B zg&Se&s4hTV5I;W*VhdN@$M+4e!3pa;8-;*OMHsFtgR)X4JwtWlPPhO7;Xle;J?$WW zlr*o2e2wHM@%~&v@^WWDDXm)1jt?n0+ZxpAkX7;ep*v=^xjAI(a0>)~GlMH2$8l)! zEVm(DOo+N)j=oVBPRU>PfirK<{wZ#uKIIb)j*Ia^Ti5Uw%}K!IPfmSbKU z*=$xiEUI#FtSS2TgPIK(8lGLGjig$X-U_hkw@^JyoYZGeWwX)avqi%79TR#=u_l%rpB0Byz#wIaX~#C@m=BrIs$)Q@zsw@g!X zh`MZ2*a5GDtL(jN%vq>2>t0}jTGL*j^LP?Wx##oz5 zxUVv<#$lZ4(i3`WWe~leb@ZG-%-iTybZ}qH5YOfj?X?88A&g-=V50q;TM5M`w7Z|I z@EeZSKu^fLc9x@BXtaayA*XmvWNp2`AQzd0p{p!;pD4@o7rS0i->3H?63Ca0EauKk?v3Z;;V<c&E({?wiS?g*1>pWGz`w&+>J}Fu##0E4d_r6|I z$}c;QTTHur3%-55;fc)6y*`LXeMG;<`t`M=C*|ni3UP2XQulO%Kn?!%ew8H*I-r~Z z>644s>_|=lS5*y)Xld&t9<>eZWDhvi@+&*?&d)Ds@rq-}B`f9PsWV}gy-I?qBH6EP z_!GIrqF!wu|}B3Pn-~IlodSg?;3V+XV3gz;2Q@{$`(PN zRs?mS9kI58yPCnimdnU`{7eZca|19(^416b%-QUHhqJW0}2$f0xw+)CIzP9~l z3f=@>_sM=oQK44Kf4OEi;n$s}-{BOfeD@#tKfcpV^mS+Fck~JheZ%Lu3BGP~{0=6^ z{2TlS)8i)3br0Zoo<}HW0QC?z9f6w!*E_@C39M1Q;y)_>t53Yia=rTf&N4{!H_Lz3 z!kY}&tIY2VdBndM{!?>qLa%3r-=W-O0Kosq6DkToR1yII@K8TL)La4oy%Yie0-D~j AP5=M^ literal 0 HcmV?d00001 diff --git a/python/tests/test_fastexcel.py b/python/tests/test_fastexcel.py index 82d5db0..555ae1c 100644 --- a/python/tests/test_fastexcel.py +++ b/python/tests/test_fastexcel.py @@ -531,3 +531,8 @@ def test_null_values_in_cells() -> None: pd_expected = pd.DataFrame(expected) pd_expected["Date"] = pd_expected["Date"].dt.as_unit("ms") pd_assert_frame_equal(sheet.to_pandas(), pd_expected) + + +def test_null_column_is_nullable() -> None: + sheet = fastexcel.read_excel(path_for_fixture("null-column.xlsx")).load_sheet(0) + assert sheet.to_arrow().schema.field("nullonly").nullable is True diff --git a/src/types/python/excelsheet/mod.rs b/src/types/python/excelsheet/mod.rs index 9b98185..761d5c2 100644 --- a/src/types/python/excelsheet/mod.rs +++ b/src/types/python/excelsheet/mod.rs @@ -6,7 +6,7 @@ use sheet_data::ExcelSheetData; use std::{cmp, collections::HashSet, fmt::Debug, str::FromStr, sync::Arc}; use arrow::{ - array::NullArray, + array::{Array, NullArray}, datatypes::{Field, Schema}, pyarrow::ToPyArrow, record_batch::RecordBatch, @@ -521,9 +521,14 @@ impl TryFrom<&ExcelSheet> for RecordBatch { let schema: Schema = sheet.into(); Ok(RecordBatch::new_empty(Arc::new(schema))) } else { - RecordBatch::try_from_iter(iter) - .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into()) - .with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name)) + // We use `try_from_iter_with_nullable` because `try_from_iter` relies on `array.null_count() > 0;` + // to determine if the array is nullable. This is not the case for `NullArray` which has no nulls. + RecordBatch::try_from_iter_with_nullable(iter.map(|(field_name, array)| { + let nullable = array.is_nullable(); + (field_name, array, nullable) + })) + .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into()) + .with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name)) } } }