From 66afc9f5a6dec8996be356142073365809a47a88 Mon Sep 17 00:00:00 2001 From: Doug Ransom Date: Thu, 29 Feb 2024 13:10:26 -0800 Subject: [PATCH 1/4] added entry point for loggers --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 163d7f1..c852637 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ dynamic = ["version", "description"] requires-python = ">=3.9" readme = "readme.md" dependencies= [ -## "natlink>=5.3.4", + "natlink>=5.3.4", "FreeSimpleGUI>=5.1.0", "pydebugstring >= 1.0.0.1", "dtactions>=1.6.1", @@ -59,6 +59,9 @@ natlinkconfig_gui = "natlinkcore.configure.natlinkconfig_gui:main_gui" [project.entry-points."dt.loggers"] natlink ="natlinkcore:logname" +[project.entry-points."natlink.loggers"] + natlink ="natlinkcore:logname" + [project.entry-points."natlink.extensions"] natlink_sample_macros = "natlinkcore.SampleMacros:locateme" From adb8c360eb9fcd8b1e937468de2c7008d13cddf0 Mon Sep 17 00:00:00 2001 From: Doug Ransom Date: Sat, 2 Mar 2024 06:27:10 -0800 Subject: [PATCH 2/4] added build in the dev tools. changed entry points group for loggers to dt.loggers --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c852637..db00af3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ natlinkconfig_gui = "natlinkcore.configure.natlinkconfig_gui:main_gui" [project.entry-points."dt.loggers"] natlink ="natlinkcore:logname" -[project.entry-points."natlink.loggers"] +[project.entry-points."dt.loggers"] natlink ="natlinkcore:logname" [project.entry-points."natlink.extensions"] From 367b56b839dd18b18df44d1091901586b1d5efff Mon Sep 17 00:00:00 2001 From: Quintijn Hoogenboom Date: Mon, 17 Jun 2024 15:41:00 +0200 Subject: [PATCH 3/4] most testing for readwritefile.py ok, including read and write of nsapps.ini... correction of pyproject.toml --- pyproject.toml | 3 -- src/natlinkcore/readwritefile.py | 58 ++++++++++++++-------- tests/mock_readwritefile/nsapps_aaron.ini | Bin 0 -> 192222 bytes tests/mock_readwritefile/nsapps_short.ini | Bin 0 -> 126 bytes tests/test_readwritefile.py | 42 +++++++++++++++- 5 files changed, 79 insertions(+), 24 deletions(-) create mode 100644 tests/mock_readwritefile/nsapps_aaron.ini create mode 100644 tests/mock_readwritefile/nsapps_short.ini diff --git a/pyproject.toml b/pyproject.toml index db00af3..ee0d06f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,9 +56,6 @@ natlink_extensions = "natlinkcore.configure.natlink_extensions:main" [project.gui-scripts] natlinkconfig_gui = "natlinkcore.configure.natlinkconfig_gui:main_gui" -[project.entry-points."dt.loggers"] - natlink ="natlinkcore:logname" - [project.entry-points."dt.loggers"] natlink ="natlinkcore:logname" diff --git a/src/natlinkcore/readwritefile.py b/src/natlinkcore/readwritefile.py index f290852..d34ed0b 100644 --- a/src/natlinkcore/readwritefile.py +++ b/src/natlinkcore/readwritefile.py @@ -27,6 +27,10 @@ import os import sys +# replacement strings +WINDOWS_LINE_ENDING = '\r\n' +UNIX_LINE_ENDING = '\n' + class ReadWriteFile: """instance to read any text file and/or and write text into same or new file @@ -78,7 +82,7 @@ def readAnything(self, input_path, encoding=None): with open(self.input_path, mode='rb') as file: # b is important -> binary self.rawText = file.read() - tRaw = fixCrLf(self.rawText) + tRaw = self.rawText # for codingscheme in self.encodings: result = DecodeEncode(tRaw, codingscheme) @@ -87,9 +91,15 @@ def readAnything(self, input_path, encoding=None): pass if result and ord(result[0]) == 65279: # BOM, remove result = result[1:] - self.bom = tRaw[0:3] + if codingscheme.replace('-','').lower() == 'utf8': + self.bom = [239, 187, 191] + elif codingscheme.replace('-', '').lower() == 'utf16le': + self.bom = [255, 254] + else: + raise OSError('file "{input_path}", BOM (byte order mark) found at start of file, but not "utf8" or "utf16le": "{codingscheme}"') self.text = result self.encoding = codingscheme + result = result.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING) return result print(f'readAnything: no valid encoding found for file: {input_path}') self.text = '' @@ -126,6 +136,13 @@ def writeAnything(self, filepath, content, encoding=None, errors=None): if not isinstance(content, str): raise TypeError("writeAnything, content should be str, not %s (%s)"% (type(content), filepath)) + if sys.platform == 'win32': + # convert \n into \r\n: + content = content.replace(UNIX_LINE_ENDING, WINDOWS_LINE_ENDING) + # content = content.replace(b'\r\r\n', b'\r\n') # just to be sure + + + if self.encoding != 'ascii': i = self.encodings.index(self.encoding) # take 'ascii' and next encoding (will be 'utf-8') @@ -148,28 +165,29 @@ def writeAnything(self, filepath, content, encoding=None, errors=None): else: tRaw = content.encode(encoding=firstEncoding, errors=errors) - if sys.platform == 'win32': - tRaw = tRaw.replace(b'\n', b'\r\n') - tRaw = tRaw.replace(b'\r\r\n', b'\r\n') if self.bom: # print('add bom for tRaw') - tRaw = self.bom + tRaw - outfile = open(filepath, 'wb') - # what difference does a bytearray make? (QH) - outfile.write(bytearray(tRaw)) - outfile.close() + bombytes = bytearray(self.bom) + tRaw = bombytes + tRaw # now a bytesarray + with open(filepath, 'wb') as f: + # what difference does a bytearray make? (QH) + f.write(tRaw) -def fixCrLf(tRaw): - """replace crlf into lf - """ - if b'\r\r\n' in tRaw: - print('readAnything, fixCrLf: fix crcrlf') - tRaw = tRaw.replace(b'\r\r\n', b'\r\n') - if b'\r' in tRaw: - # print 'readAnything, self.fixCrLf, remove cr' - tRaw = tRaw.replace(b'\r', b'') - return tRaw +# def fixCrLf(tRaw): +# """replace crlf into lf +# """ +# if b'\r\n' in tRaw: +# print('readAnything, fixCrLf: fix crlf') +# tRaw = tRaw.replace(b'\r\n', b'\n') +# +# if b'\r\r\n' in tRaw: +# print('readAnything, fixCrLf: fix crcrlf') +# tRaw = tRaw.replace(b'\r\r\n', b'\r\n') +# if b'\r' in tRaw: +# # print 'readAnything, self.fixCrLf, remove cr' +# tRaw = tRaw.replace(b'\r', b'') +# return tRaw def DecodeEncode(tRaw, filetype): """return the decoded string or False diff --git a/tests/mock_readwritefile/nsapps_aaron.ini b/tests/mock_readwritefile/nsapps_aaron.ini new file mode 100644 index 0000000000000000000000000000000000000000..e30244ac52905f13371df2dc792fa1fe669e04c2 GIT binary patch literal 192222 zcmeHw+m0Pcc3nOj@IM^Du=Ox#f|n-Qgw2N{S!5H<+pwyeJ>nQdkyT`unm1Jy`!+p* zJOr#68-gtemS9D&?+Iz>o#6AAs|NV!>toXad-zoU-(_+8aDxTxdesQeW#s7El?L&M%E_U$KBYZx^ z-#wH%z`xu0?qqSiI8nS^yoGw!mNlIsKfp}rpB+Y6B8F>r~05#R6Of8T=KeP~=oJ?e?t*)6^Re|2iRy{Nq( zw90lPMYdp_+KWSK4^d)kpnQ*d(QxJSICACN4O$)6ev;3l;mYSx>ymH(Q1{zt*%t%b zZ=?7Kw($@e`W8N;t$OsjWUW&ExEJLvxvP|A!#ju`&j;$@=^=L>jV`$*t4FO%R<-fd z5L<@Xuwsx$qf0);Adgy?d}_bnKK5$##qvp0gTXN`( zl}3vm4Oc#onk%0d%iFW3N5hrRqvp!z#qxIfJQ}Wi9yM1!FP68<=h1NG^QgJ;;#`H&Vu|Z0`wV5ybd;jp9M`clXO3sZP5jICB4*tg#&KL9;VRJy z{5giTl~UGq@Np5@_*y_Y5oVgo@6Mv+{CEEd-<=NSsJYMazmwsAoIB&n6=mNEBTB9z zJu5y1{m0PHKK`ieHSoqUCN1)_R~T{jje4kmA9bucAE53Is7C!p?$io<_{~nyRN_u? z3l{JU8X-5Nz?M1>Ds1tqVYJINnMID@J}klV94m9(hB{`ihVzEhEx*49E7${NMrM}1 zh*plWP(rTA5or^BpBDj#b)v*tZ)>*3dX3wlaC%_9Ggdv$1xOG3SpSP_Yy05;IBe$v zR`oXV=O*R`jvcQ3ov6syzjoD|`qw^H!M+h%hO3U6yD0VUM38mJO+VILu&j0lSvmL1 zoD}=3w}EjHDC@APtI&q*dcJqd)y_5iHdXy|)@8KwI?5 z?p1BCSMTO<@2by@-^q0>+rWP5IrB`&sL&Wx|)Mp<(Kb=-ubSTqg?y;sJZs- zMe}z5^=P>AdDL9_ylCDopGU)$&!gtb=SA~&`8*n~d>%DdJ};WL%jeN><@2by@-^p| z8^|vl%j~~e+0dA1l~ua3iuQ7P;|#aeJ!)>N?^+q5*{UUv+|}8LYKG{_r>nCORc*QXv_0Qw&1;o!R{3U?Z?ryMKYQ90m#QNt_4&P#SeFl1 zDX;QPHTpH@Q|&(V=EAz#b(L?b8t<}I*=H)>)OQNUy%ejgv&uTFth35G)i~1?m6c(= zaaUK=(n!|J7^|$)eTuxR{ghRgv7R%(#}O~g}u3}X0Kgko^N4qe;m_#Bb2T-UX6FEqr)y6R`1Y@ z^UXH=^~pT(OwnqVZ8hFm&9bd#*{X4Lm35*=tj0S$hv;V2_VJGPO3$AkUFDxu{#oUp zRsLD!pYrB%t%9ub&-nbafm<1_;ckoD`1d+)LWudsn-ggt0eUpL`ci6ZkD8l{S9f@( zZ%}v>)~{BxY`k6DS(|D0(bX(lpM0tqw8}T*^Uda~)1kVDXEnlEjc`^YoYe@YPefj= z>r}Pn_IkgW>pIQ%9j>xYSJu%=p11SN9mjdpT#NQbh0T3n$>-5<<@2by@{JkiNIs8- zE1yTrm2b=#M)G+yT=_g|u6*8zq1`?`8m@dEHCMiGW=6S(ebArcRmW8h>B=E*!d#LU zv73EV@_95|AN8np`RHm#+UlNx`dQp&8&)3mGJ|Ww9+M$t>(+BGeBKYc{N{F)z&KO#P|_+q^%*R+{P;wxAB4lUlOV37rrIKbw=J9 z{W*5K>HUZE*e?*@TDgzk@D&igqPZ2`{MZXO2y2af4R!hZG$rz(d7*TvE<922@YKg1 zjwcEpp5{%4XjLlZ(VX{(b=dg)tFLLiF8f^h zLwq^re25OR=HdDj1+Q&o&BO6T!NZd^562S)4^Ngp98VNHJo(BM>>1;&n;Up*>h@vo zh&+_<2X(zprdM`E^A^r~Ucj!jORp%(chb`Fr7ONXzlCfwgWo>KAHE{R_YP<&)8r%% zHFsw6P;=)e4>h+}dZ@X5(?iYep&sfmKc!Exm&e}TyLKP%TDa5w1w2bW#n<)t2H;1N z^zpjza(hx2UTz=i!prSFU3j_urVB5($NZ1tW%1VqKYxkO9~8d|pTB^$@_#=Lzxjjk zS(bWP{F7k2!~E!dWTmU{qc}<$m+3Bm(hbmC?~&)OBh&KclK3*=RC(<>=+D2TIggp! z7ah#p{#btPen7T?XIOjWGhp2681 zW_hkZM9t;SO_C%wg9+?HQQDs3URwg2zp-xUA7_?zN)ir*WYVJ?RL+1kQ= zH)%_c8Q~(@iZ&7T_Y`lDvd5NZ=B+7ORq=A$c^6)8TVK>*Zd>n?D7VFT;pMjcMGX#% z@$bTBZ{r-=8b&MbJan|g_%eBXOLqne%-&P@LVVvmzKcvNnITGUg~-Ks(!VNxf}h!% z#Ln#(?ULs9ieHBu@!nz1zVHUL`}l^hM^4ozVw>?@+^Sx`D*gl-j_oSF=Ju9$z2^3n zzb$^h_-$l~zb-g3`9bmXFh1e<kE3d@YT* zhQ0^KypN#8X=i5Q96ZOOu^+1uQGJM8a)8Bvf4a?9kM9)K)C*4Gk{9<^Uug?yXJFx4g$mh@CJ?u4{ zD^BBc1G+DF^QaZ_j(BgOojIa-5_Ae8@}EX!O3P_{05VFJ5`wgTTW>*#9GZOYh`9VDdGE@0C1o z#QW@(&qXez3I2xYxjk+Jd+q4;%y#kL7x;M(azBOiM9l4JQC?;?MY+19#LVqu?N*iB zugbA@9C80J3O8eEv@O{L7&2p`LadYkP4Y%lyk3Y3rv}m~Yq;mx7&t z8hFUxZ*0bqe|p|5%kM9bYI-=b_x?Dl>EX!6`{Sslha>CmkE5C%jx4u7j%s>1j1L?$ zeFW>}tRd%pI0oW6#3ugz7~eb%eW+{rJ!4uNQL#72`Mns)H}DgCUI&=NIT5VB{`+;% z;QUVf-94216qvN69r#(KTaPM>6rEai9Gx{-q@~}~%af}ckeOUG<2 zxpa|cpW1cY*aqBB5btxFKcSkof%qGaRxg3W7y%w3_w0c4IQJUkf8Jj9CG@U;+JHtq zDGP0fR{c%rZkjdKTfpM%XYB2BY)RzU$Cf7Io`5(9u3k9pJM#7bHoqHeHFrd7Y75RK zOxX`;!sM?0aztFp&5_9z{Q}P#bH_nu{@+H<|3UGm;qw>a^Ji#h{pMxyufoqi4Zryh zLG~e0@)FLD-$1-t!)KrIaO@2{3h{$cbqCS)pm+;^pW$;O%uK{p`zgGYeNy_sM`297 z5A<2_36Qz|73cYxY1LWJw1eUgzvZ9tv#|BL|~tL)=9 zah;6V)Cb2eUItbFAzFXaiJPiyW&5-3xA2#G_+lb|tjgSFce(wtS?ZM45A#RfetHEX z;v49*a0S|(?XB{50j)4(j}db$dr4Ew+Iy7ato?Sd-hYCg<0N z_~@eSX+tr$-QHBnlUSY2 zS3%?jKlGwy55Kj^Cbza{ec&>F8^84|LJYgOo zzYokK$AmKc!y>UHRHp%o~N(Ay@S!yUYMoagGZ?>Uu2AX9`9ki z%~8skk(Qt~Y{uJ>}zZ4cwfm`ARm z4%d7*zU22R8MVfFKAygfYkB@^-gWRtPU5JYbBJ+WkEdSZjNcU5aSgebqveRex!Qe5 z#~mHB@Ew2i2wd$$JN%imfaD{0AjzH?1&<5j53AxD~WxTe? z9eLR1QOlf}egdgtt563IAT3|2k8`3NjZJwr!;?RE)Vd1qi}QMa6>@TQ5AJUrX6E?& zxJt-U@r>ci;-80~S>p%AkU7Bl_|@-pJpKIx%v5~G&h~6;&JFSQmZ|rmwPEG<<2J0^ zUfPB=g>RbWgQSF4gw(xU&8*EYI&k*>4k8eW%Bq-Fg@aGX72P z4>yr#Ia9s|Oy-f8)wqK4Yhmr{lkmwNfimwjCr7F`3+!T+_=~_z%xYTi+IgjyX;!n= zLtc%mq0I8^eSHDR`OP#ZO4?gGw=5SGnkA@x#N6_Iu&GKM$XA zeEH+SGFk&VJcarSogWrQEF*<>YjRh^Gka?X6nfnW0;!qLp7uF6C}5sW;J2ORAMaOQMxu zOD^SZEoq*lpO#cBhn7Suzm{Ce-C9!KY17i;Js)-d6OC@2u~(+j`L*Iw?$(O>nr&KH z%wek4t%vS7r2eaN>xmw7xAQTQv$)6c=j0^wxa3qBpj&d5RwVSbTbDrfJv2p>p=he_ zp~*9cqN%=zCW}v_RXxt5DJyQLsZt)zMQc7`&rPJP`P3V4%~677S#v~NmNlz&@{XWh zS8MifL{3@r**D&rqXf&c=7_c|YgX%g6Ri225s~>ER?+CA8Yg?%LA{6ejH6N>P1#|Z z=AtKxd)4)jWP?MHRM$gN-(U!m>Uu~=)2~Rk$hh9dxe*o}d(PABfS{*LZ`D-^rnl@W z+Vs|4MSC+Xyeh$PD}NU&eph;(6R7$c_oeg9=rlXt(qr8^ZhlWl&0ExQr}T+BQhtB% zB+Ts@Zi_05csWPb?`5~TW>wciDo3NYaKeVKn)JC{cmBQs&V}(LMqDX-vv>E?Qa%oQ zS)L!RTvR3Nw|-I8OMP9FyLswb?4Jo0E%MJIx|HKAB7NG`nIcWv!q-7d8jrh-xC%~BNl`MbrMm6Jl|*Tc>*&;zsCar(>HKYpkx7X3G*4PfseHUr#RO{(4e2P3wuf3w{!IxA9ge z?N?vCMgOP47e&w07;W(`GV*qab@h|*JOTFt5%PJ6iMk)4&S&}h7=Jnl=Q(&w3{S3m zRLMcyi8_xrLofS%?4`eiUAEr?hugSifGaEfd>iL0&xDnh)A;EG{yf0wj^{3S@cjw= z#FG58Q@n$3PvY|oYMm{N_TAn3)i)I`hdnghSHWKWROeW_5tT>1 zR#!f$w`I9o`#gGTye%QVGtw-hXn2T<%wrHm!$VYL8-pks9-?Bz7(~(V5asJ*5Jkg7 zl;6a6GA_e&Hn0!h^_IodtUdG7wL)#Cy z$Da3=@Fp`_2=~VEjKf2G|5;C;NY~BZ3zDaHQI5T!hc&7Cjc2B!!#A<^iL?28&w-TWGIDR*f|@okKTL}Q4CR5!ndT*_SsgD(&5`FQbN%zp6B0=@@AJ1=k1DCd)P`^RO>!)sMQH)mXdkE~;k|0;40 zcL(VEOQ6Z~x%1^W;ZZ!BJOBMHe9s$8%I{AHZxi3lh_`SP$Z6Y$bU<+>fk-^KWuFGTSMbKaD@ z7jD;K#_D?(zG|iEjN65m-_G0R%Wv`Rc=>I=9WTEZwBzOW2aa0)McCI7Z;*&@o%|Bk z%Do=H3Zt6%7U>^{pZ{6-9R3Q*Rrt_{@FeZUbk}ZU_PH9yl=Px_Yg+TJ9e<{vo8FgU z4D|qryxsm8@&wn9yX#eZYx(C_C1DrkIKRpX^<$i$-8Rh-8|>C3TSl~Vv`m#=ehrJ$F3Lk}URk3} z!?bx%r=pfar(>*Ld~|87zqPxRhqQ$MkGqJL^}e1zf9>}6IIm4C^@qCUW@+^T z(mJn}@@OhMrD^t+NfbSNMdW<=qUhl(3gyEWMGs#-n#O0Fs+31l-jt@0ua_4^nU2njBL2~Nk=iuxPKBfOq9{`iFFIq-;Q2m&=4DSpaZiJH+d|zs_$EszS zF?gbaFRAmDOxxJ7*onRBLJ zR^_YfRO7nt-t}0W3aLx-{xfSXzfN7s-8xmY9j8-KnO>(t z%CA$Ga<@)3&L5{!QJG$+LdvgGmvXmGHP10lr=l{wPKA_Tr!M7govLr0*6Gk5_%-Bi z=5O9Mu!%WScTK#yhkpb6O88DL??8AFR`sU2;mg5P4(qlXYE02Ntyc$AR6I;YuW^{7 z;$h0W$6<?I|iAChh1h_LOYG`Y+*MoTf((c>*U{PT@q@ke(uMr#gwhednR*@$w2U-qev)*NZRmf+0w%>mkYNhajo0ha@W> zf~2}0lB{|NlInU$vf?2~s_P*s%n(BLKvh<;7s_&u6+J~a4 zzK14n;oL%BZ;AFB-~B4@UgrIXe5sswGEU(&v|8TIwgs=`y%PuUnE3KEZHP9kUFW=2 zt$K@j?#$A(I?$z!++Niz?R*>L_O2n4=k~cFJmmI7Py53>l4oDm@LqSH{KZ_d9Y#ts ztS81>S$Ljf3O~FCES_Z6+r&@c@sEQyJ_)xLiVyNXMfM{80siPU;wrwhFx9;n(lb{! zKMfh19=3$m0Jo zXm5&a(yY1MmfwsrEQ4%>IqZuVC2io3JEIVLh-=8#yla`3*w5;ldb#t|y0c3kD%}~a zSteP1I>$L&h{{kd2-I*+yGag{Hzsi{Stil6a3|RuWsqsKqP((ImevT*||Aq3Dyg;%=)E4Np@lonKQfc-;(eN~-()l&zQXZx$ z#q%+m5)DsNDxF_bF6CjGQpOphDbesWrPBE|A<9<1w zPUXG1<(?pVoyOdrpDF{SeZOfIn|qRoz7J0J5Bd5Eui-ucdNuoaJSZRZgt0#ASF|9dR1J zs$(p}hw6yS@R&McZvWu>BYzm)8j0_Z{5-rb^21=4L%m`H``9lI7`tqs&-M}GBKP6* zgu%C#<{iz*%kYwBXFi{8gP9N)RRf=_q+^FHRt zcv>dT0`QG1_A;h9rNsAMIZGJN2oh@-NZc7td)mjp++D*HzPtDzXAa|ernpZxw`Z-P zNA(0qe6N^Xv3FaZVA{sFksr>8kSq3hbNko@Ncc2J{RMv7!wG-RRn%J{x39Igc-}ei zHmuzK)P^<0pKhR^#=D95g4|m1p6yXLf*rjG_QX@MpMwVXscvCE9IdVI%h;C!rAL_2 zep<_I`FB@n?yrh?0emw$bfLu^7uywH`4abD?Z(h1`s}G}52F7()U58gE9rA4ho_Pi zYi7j-=sHPD)-B3a>&t1~P4shGcS(Ob>lVf|)-CGG zvu^ba^IEs4_P1`;%x~SIJe74f(a&k!CH?8FTNu+=x2P}Ax;4_zYu%#S-?~*Zzjcf9 zRMy=@Kc{t<^ry3KVN7G)qP{%q*625{b&G0$>sHPD)-B3YS$7ltoYq~^pU%34F^zSL z`tqz>^DTL;TU7g7cibZ~Kcn+q%2-DnMb z6TkTkqv;(k92)$%apef=<0%fpao^utgs4?}i+4Qtmo zVcWcW!?o(_?eRA-BgL8hzPE2wJt2RVl8<#!j`N(ndw{cGJQMu%l|23C)zUiY=Zh-c z#rg0Vvo6Z6PUQb-D^2S}bv;{E=`POu>%^t(>O}Esh)z`3(}_yw*NIEn)rsQk5S^&5 zrxTUVuM?NDs}tpeAv#fAPbVs!UneeQS0~C}Lv*6Lo=(=#7kiFgA>ZlYPdwXlfd5|! z@Bdsv-|u@^U)e?v@HqbP^KGolk$7&q~j7hm`r`E}w_c6B2EAEFc0^>m`r z`E}w_c6FjiHAE+>>*++L^XtT=?CL}@b%;(>*VBng=hul#+0}`n{}7$1uBQ`~&aV@f zva1tiue45bm`r z`E}w_c6BmkmfOH?b-vGf2lIFSO$N-W$|Rn)RXV@6UCL404{#1=8!Kt|F{}CvFDt|o zN&QySD7|uqC%sDNm)@o9O0T$|_CHFmQl9iGonLyFvMaqJe7p22v7`K5O$yV5I` zw@a^5p7bi6UwW6aE4`w2yYwpMNw3oRrFSX2(odOVHj!ia4)}Xm!MbJdG<;yFdGsc{eD&9JJ*Go#b&1H1-%`XuaNBpUl{Gpy|G!i$8co{Xn& z*?qc|+oV%%4@*{`#;R)XEv#=-t1r;gq@2~Wq3k<}nujghPh+>qsoEZvEPfc4YI|7S z{?aCwYI|7JI@e}5@&3;xoO+Kt2{&PlF_*+wp?H3mahLcE=T`qRkHYh}fcwDoo)iiU@%C^807G(1GbjxmU$;UOx5j6oC)4^eSs z45Dawh>9j-5Jkg7jM3pHa`l?sAJ#p2tTTmo@D6EaXx@{;+enz9+53J_$z<#&)NilM z?J>=_S4!sG-ZHP#+`iE+O>VzvN6YOK^ST@60q-NXUOVi$x;y-ZTzwy_pGP!nDn&2< z=v|qui*g*jH@yeeZ^qJ-*Z1XA3y4COT>baLyOceRDUPHgzNazK>5lp-jfp~jjk%OP zjVUs=X>0=-qy7>By;3zD9dXWq^YZ)n`!MW!PSeY;F>%^O+0&S!dz;1_c}2(3UvmqH zLYG|qz0#%ZX-pYvtj0uVxW+^wzs6k3p2n1O+cef}6RPRx$g$F+8r9y!oO0QN;Ax04 zy@**jMR6RT{rVFutqhfJi)yKEy|c7pc$#KYUa3nV8Xlrz;21>F@DLS4#vqD@hbSK$ zgD4svqP#7Q$ktXVk0xu}MZT@u|A{R2f9h*t|51u%+J8h`rv0m>_QRPm&vInDX=`kY zDiKjMJVaS`8Znps`zI3GWIX$|u>UBadi1>Qi|2re2|2dotr6+2B z!t?IesplgJmSgooTaLvGZ8^Oiq2;#p=D2$fr;+pIfO8{#faj?B=gszmsuaudg(~85 z{Gp1t9G|EnF2^sbi0VaoQJMBTKR)QQ>U$fbPwoKHd2({HhPB`5r%lwq47U*+L%BEx ze?8^|O0K4t{sitoc>Sc0Tum?i$=6c)$ko)+zw;W(9(h`ZP4jy*RiXSxAFWYc7^cvNZ!lC8;W!rOsQe6;>A96-m!VN zmUH*ax)F!;%x;B~SA0k5U3F}Ig=Bj)y(ZbbPUOR`Ut_C4m#ILf|-RlYkIhu?<%4|&U#=UnM4 zw~(o0_K11;X?Ufoo0F!!P^8sHHUG{D%}IArjyorKZ(`(+KSim>@0m+UojV@HInorG z9}D`locuj)k?mZmer|h;y~m+k`7x!Bt`s-gxLVvblWI><>((6oRuQBPn|ppzXsX|h z)<+}Fk*fcGoF=a8`&kqRy7?zh^31fhs---d^1n3Aw5QZ%5hV{-9yT6VlssJd(0E)? z@^IxBXy1--IaNmZ2p_jvi3oojA>&P~8FXrh7wGmSuWG6?d6l zQN?}JJ)jHIlK>WOTs#4^W0M^w=i$FgLB!t`22LDwVru|+#`ZDjM4ZC*K^3S zi}k~;AV+TdU({4?OJ79DZPSYg8V7iz2H9nL)W8*;KL~dY{Z;tstBfi%7`3BTSz~*aE&!9WTFkwBzOXk9NFa9-@BHIqVPcJ%v8;0P#G&qx%6omA#~?_60;A zOylMD6w0%Yoex{#ro1n3Gu9URQQU3CTgtZa+ga#Sl{%O%x0Dz&^kM6I&hJ58?V5Ks zsjFS{dr-T4!#s#IxlbnUwOoUx9EAS%eS8<+z>R+OLGTaW@5!vjm!waHeXPe1KjXh= z@jd%?>fi0buegd{uIL@rUSGCW$xEM|uD7s*;vQ}bST8=r=VrKd=o+M^O!My*R#CK$ z^Eya;qSvnGw8SS`CD!cy>mc#TOcI}LmH5HoIUPqY1y6Xs2Sals?3Tv(-Oku@!gdx^@ z8#|^i;g&?suf11%iMN}1%J5vTETL1J?C;ShHZilnJ3iTJd*~N!V%)WfpFcxikvA`Y zhTIWL#yi^Ypwt#d8!EGg|L@?NW8g+5qpyiu-bDTZD8K&vuH$UeO`L7o1jn0){eT#m zcx&@x$jR5YA44nm!&oft+GRhRR$0V zP}OQ2^O3S>c(G4WC5B@KGewWV zK^>cYk!|Q{hA7Q3%tQSAC`hn{F>-8=c#FO8Jnj_1(%jWsh@)V=jzCS3r#L>RW<<@y zR%}aStF5{eBU!_R*jK7SSLc32#K9}$?5_*$=1kg?zVJXj0kg?uNsHw%ZWY+UI$I9%_T!cVJM zJO}ty@o%AVBIWkEbx`D)AI`w@K7uXyJLfZE-z>M6NiKSJy{+a|Feau5u zM)d4Jev`HX4^?zLRQb+WRMGKJX@7U&c^7eV_!4}uUp9)nzcxZFWuN6m=;3~A65ne& zLDGyYKlU^rKS=A{M;0v)S-#MQ%+oi`QmVFxB@bxB;^>ky|K*L@&B&tVA74dgRY-00zj{{UH#@s{H& z-Zb+OXmGV!quji^$gUObc=#`qm~m8~Ol<@CEe zv$dDQXpS>+oGYa_maieoUPlMoo1}rj+Q$%w4>#Y z4DD#S<3c-H?x@g?mOCc2qYaA)S{)jfIr&QK)9@nf6f-eRxwDR#+e5k$b9+fQVs20A zM$GLk-H5q8rWmDgmXqsvgY@A2Q|OfJE-|R-$Bjq{SNA|2*C9`p3A$~YejDZ{mHm{BCcd` zwRJD7_)go0o(SQ&kf~lyudn1T?m6e4bfQnQ+k6eww?Ug4-9cZZuG!pv zbOk8f<)M2_x$}V-voNDd898>)TteP8z%DDw?MGY>|IcCN@85-0z@LZDA3@e~B|U!k z%ka%_h0o!q0yn~H`3IQM={IKk02+S`D|iyxq5i`96#dc?;j7|5h1C$*z$X<>h@3lP zmB^I%G0>sv}s%>4Ok3%WgkO&=8h6V@N^(d6D9j*A)99);QK+@4mKwMoO;ZJF20 zx~wU`m%S|hD0t3agioEVrH^rs1idhNS^SLqRpRP=^t-frZm#e|5dt+@BSKb zDZ3g`j7hJ$r8J_N-in(_=hui!+0}@mUw@6Lrl%2=&aV-dva1n|CHiYbH9d`}bbgJv zlwFM|bM)7UYI+(`>HHdTDZ3g`&Pr>9JsOqrq-PE8cHv8>Q zDOVcfZnddqPZkjr?>&tv7N%Qa5kYl41jUv<2&&^D$XELysE&sq+wX&*Iv#@Tybprv zcnI>1@4!Fe8oYaUego$VI0D$2Xs?Rse}x^`9LJH<{8k*#N>GMr?i7*~(o%jKo^OS` zW1o5c^V@Ho|1s9fb3@;;$1GLJxPO?lIG<0l>ne?u+nz5#hP@z39368;fHP4%cl8`U zbFW`{!eyG-x3siV+WveCPUi)6q}=|%vHgDuBm8(4h5PJZ9*+877ICB>Pptge;4%It z_Cds+5O*tHz*zq_=CIwke;s`no{^$$ajvtzVs#(C;XF2@wZ`+^nW&pa{+_G2>!KWc zt_PUmjgs}72k^D!$AYxBM59ZlspkutwWS_nx;Nu%OSHz=mT2VHmP@(KwiGRMXiK!l zXiGHmYs;nFrY-f{b7)Jn#%N14@@vbb+@>vMtsL4CtufjXjr`hjDYt1$ebThHnC(=m zTMsPFzjtxEoqLsak5G;oi96^6bG6LP=CO3YY|YZjG-;iblvUQ#jAExlS!F%SiZc#n zmGvmgCmqTv>rs~dI+Rt`qbz%MD66bTIoh@?@k|d}oJ2*-RdwmoW04M_IlHL^*PM0OeS&g{wXC&NWP7;iNgng8dgs+I{JruSpD#a=iH# ztoHc&Td@Cr(R?p>9XGRaEgaPviEu z&fFgZvcTQN_zW3(|dG?t&-zBZPh+@3j>pWL21mY>|7J(i#0 zae();a=*MA31SS2=eIc{#r@^%LuobIn_rUurRTJoV(TKR=y<5|vazV5EXyn+He-{%34Gf9S>EWFcwvGJXG0x8)~yoRnx=K=ztlMp0a`4r(I8pynT$8 zUBj5;NmvKa&Bi)WIECjlqt)*>-Uc`O#cv{?v$e@ZZclQh&+SW>gG2{mgh(yNds5#* zUuV8glH0pniF5mxi<{fST-@Ov_b%3fZ}mEr9HUuz*Uh`IhR3jlY5Z(A%-8E2Umbf2 zpSl8nC1yOK%iF&fF?0J<9pNpEnz?iCA@oWKbNkR5=6`thxjchh$B*rpcdo3CFoT4p z&E)o(Hmu=(at$lZSMc|Sotx(Q;B6I;@K@(g^RCdfqvg&#?P$~b#|#mxs>$5`(Jou= z%+iiFwQtllIovyVYs}RiJBsF|X3v2!uYV5Fm?;0QFqd-N74`&uJe~#SnRm{cKf{Ul zdx&zJN9PR_oN4d3E4WQZ{rp*_^Xtf^JWNN5Be566^$4E6m>~x9?%N%#F|_GOwY^b@ zO6S*+OL>@%6#a(jNVPp3sdRoFxs-?LNHKeuj#S&zkxJ*+kxO})j+AMJ=}5Ib9jSDF z9l4Z;=}7r8ts~|ymGX4K8eALY><8!l{4+N5v5#EE`Qiq`T2qut`-4`5N-I#EVS19r``VsR^J~$iT(`Y@xNke^B))sbIWxYP zShdOSsJ^IO)mg4~g_>WxF6Fv*t6Eg=vyY9c&T_RV)cjg>DW|olJlUqj#atk7?U!kr zB5gW8inIJ$b1B!g=6JSpf15VvbGGU%*Cw0#9Vf6m(p~4QN2pnGGVHB9Ibgm_c`&Ur z@~TqZT4HI%?ljFd{#Dz~r{CyafYI|7nk~CI7?`LV*dOJ;(^7tfOjaSu! zK7lu!$64~JH{6mV|I4xDh_xI`R=eaKSH8}cd~&2E>%3Rq)jeu#IWc*fb9c@0KVmJ% zlGQHX6ifC`!O6S*)OS!I}s$VEd4%35bdwNjm{CaRH zr}Z#J{1?sq^177k@;ZK@%rnd$RNJ!$M%?*v@s~wBMHrQD?#<+HJR5gku2DxF_1F6A!0DDREci|BZIQR)18aVdA{MR_-^7iKb*@}ySo z<2d1c?6o?DoY-&1nm1zb)pwq7;+bnw)JV?bLGd>&SJR4J6Rs$ExQcJ%aYf0)Rs3kf zw%>-E(yZ!wNb;MwQ|cD_jsE_``k4F{ zJg4uC-&*%xteZc2$(Oq*$LLk=I-ykkdW}t9z9?xwA6Mxv?w7T@>3L}Fs$LX3+H7{w zM;G-XI-dQibbh_Kl)LOjk*-ZI)jbTQFQ{g>CKk1*y7~3uQtr}=B5|8u9D5O+ZY{-( z9@mHR>qUHYQSQ=Rs6QyoF(SGLhG(ROP%lG+i012SCre)lutaV6${haZbwsn z4^0tfD4ObfX!6^kXsYj_$^M6;slJCM+a8Lh`W~8m;}-gm>zFz0GX~qlSnD%*$6NUO z44)e~Rmpn>IU?iDhP(r-yqi#cIgc;ddRhy7afEkVya0-BBG|{wEpJWy2(P981ar8Y z4dt84yp^2qDN$Nc_V|z&r1_}IISU#fKkDM7m#@#SCl+l`A#>kXN98Yq05(Yp^Kh} zUi#*cmz~ROML*OYeBRApzKl!+-!GdfR(&e1r{=ng-1y$vOp)BktHyI9O5N?1=0<)t zo*Pjb!;QReJU5~=h8ua~cy2^#3^(%5@!W_~H#f9b{^cqqU!=W^na~ZpcX|unTl#j@ zvl(|^ARF<$+C5}MV)&lU((=T#Hu|B9o~ILeUM_Ue^U&pKxzI(=LzideLKi&`U7nN+ zUGzM3c}^~L(euztPmE{3`28*W2TdT`DawptrGX? z_}Q(Q5_waOO6#Lq%9B~%k*3+q5A7{VCJNnn&6LRVo8ez-N%uTJT3&g_aIK5N7_G}Y z(p<1@RH_?^rTMq+L;k9Hv%Y9)eNQbmQzCEOKGa;d_0dd;T*znAns2UKav=(1 zxajt}E-pl23>V5*!*wAFW4Mq%4d+4>#&98DOLIZ%QK@bumge8ON1n1raH;nH5Hqo) znU!u_6a?O;eb9w%7E<|Ar7xMh!T!_LLF68;c zxe$dhT*&i>b0G?2xKLC`b3yA-scs~e=HGhkcsoPvczYE)4)@vD@1=~l($wdzX2pj^ z)YnnRj>yGP$BxLwQO6Eh@KKHgWEXEv>^PkvcAQ?tj$W}N>g%XuN95wDV@KrTsAGpL z_-JECg%XuN95wDV@KrTsAGpL z_-JEC=++8q;+>E zb~z1u}l)bZPNk{8bk%u8Z_1)qkPS!j_ALRk&XJ*9( z{QMdI-Ne5ihZ&oF{4dVP@MO;3;izyOXJB{=g(q;T6k`3dcC{wY%{-dBgXz0Cp;S_u zffi5M@C~Xv!#C|U&dt@}skRdJDqfi3i`K`&`LXTL z=RS`UOL6xZ-+X!xT&zP|o3Qbl`1kzb3Ss=+I=H8MmjBR&|dF#B6b~!-HJcD(v zg`N11gXi#V=3RXIw83g`;@chI$CGVehEklbjQ`(7dmqE^Kf~Yihb?$5oOpXse2I4+ zdG_rA&hUM>P-^|~9G)mepM4R|iG2x4<0(D;w&Z*s?MaY;GVSB%$N5yC?Yb0^c!KloIdbXP!ym?P}2iKgTzWDU2%QKGG|{)m>LAzca^cEIoBpbzX94j55t@tBRui$ghLM zCl+GL5F3dRU(s!`jkSC9b&&XECW#sC(-LcU-s>RosZ0_xW2YoO>Zn)x*SRdxE=Ik( zh}!GOryLdVrKvLG%*Xf`&+4Ox^5t6obby`#&#`ZzPdl^e(yttQx`ZCZX~dv&_;Ud{ zf>~u9Kb^!+Z{yDe{O>J%Uck@%?n3ze$x!+->RrH3Zv!c2mAmLGTtm*<1gG`$etqRk z(Q)NsUz(Df!JkXullhQRtl=j{pi3cdp2Pq7bD1sKpIyh8;T?Rg;jiSngj{?BQe6yv z3Eo`9UK2AJdr=4I>%@CbIUeB6ZyIa(E$dTub#NKEnEm+U;V&hpOl&o_?ODi8$>TQ{ X@Dnw5CX7NZhu@#X?@yt0)WQD;mZ%d{ literal 0 HcmV?d00001 diff --git a/tests/mock_readwritefile/nsapps_short.ini b/tests/mock_readwritefile/nsapps_short.ini new file mode 100644 index 0000000000000000000000000000000000000000..78775d10ccce9edafc7ac819ae8212f2729881df GIT binary patch literal 126 zcmezW&zeDjftP`c0Yv996f-0;6aZl{gC0XBke>+@O#_lS45>g?I*`l*;v%4`5+I!d s6f Date: Fri, 21 Jun 2024 17:13:53 +0200 Subject: [PATCH 4/4] fixed and improved a few tests... --- src/natlinkcore/readwritefile.py | 5 +- tests/mock_readwritefile/cp1252.txt | 2 +- .../{latin1 accented.txt => latin1.txt} | 0 tests/test_readwritefile.py | 68 +++++++++++++++++-- 4 files changed, 65 insertions(+), 10 deletions(-) rename tests/mock_readwritefile/{latin1 accented.txt => latin1.txt} (100%) diff --git a/src/natlinkcore/readwritefile.py b/src/natlinkcore/readwritefile.py index d34ed0b..0ff4415 100644 --- a/src/natlinkcore/readwritefile.py +++ b/src/natlinkcore/readwritefile.py @@ -38,8 +38,6 @@ class ReadWriteFile: `encodings` and `encoding` can be overridden at creation of an instance. `encodings` must then be a list of possible encodings - `encoding` is then - when `encoding` is a str, `encodings` is set to a list only containing this encoding the default `encodings` are: `['ascii', 'utf-8', 'cp1252', 'latin-1']` @@ -47,6 +45,9 @@ class ReadWriteFile: When the encoding is 'ascii' and at write time, non ascii characters are present, care is taken to encode the output to another encoding, most often (default) 'utf-8'. + + When you need a 'utf-16le' encoding (for 'nsapps.ini' of Dragon), + pass `encodings = ['utf16-le']` when creating the instance. """ def __init__(self, encodings=None): self.input_path = '' diff --git a/tests/mock_readwritefile/cp1252.txt b/tests/mock_readwritefile/cp1252.txt index 8d169a0..78617b2 100644 --- a/tests/mock_readwritefile/cp1252.txt +++ b/tests/mock_readwritefile/cp1252.txt @@ -1 +1 @@ -cp1252 café +cp1252 euro: € \ No newline at end of file diff --git a/tests/mock_readwritefile/latin1 accented.txt b/tests/mock_readwritefile/latin1.txt similarity index 100% rename from tests/mock_readwritefile/latin1 accented.txt rename to tests/mock_readwritefile/latin1.txt diff --git a/tests/test_readwritefile.py b/tests/test_readwritefile.py index ed4879b..74e69e6 100644 --- a/tests/test_readwritefile.py +++ b/tests/test_readwritefile.py @@ -52,7 +52,7 @@ def test_accented_characters_write_file(tmp_path): # newFile = join(testDir, 'output-accented.txt') testDir = tmp_path / testFolderName testDir.mkdir() - newFile = testDir/"outut-accented.txt" + newFile = testDir/"output-accented.txt" text = 'caf\xe9' rwfile = ReadWriteFile(encodings=['ascii']) # optional encoding # this is with default errors='xmlcharrefreplace': @@ -89,7 +89,7 @@ def test_other_encodings_write_file(tmp_path): testDir = tmp_path / testFolderName testDir.mkdir() - oldFile = mock_readwritefiledir/'latin1 accented.txt' + oldFile = mock_readwritefiledir/'latin1.txt' rwfile = ReadWriteFile(encodings=['latin1']) # optional encoding text = rwfile.readAnything(oldFile) @@ -133,11 +133,32 @@ def test_nsapps_utf16(tmp_path): assert encoding2 == 'utf-16le' def test_latin1_cp1252_write_file(tmp_path): + """have one latin-1 file and one that is specific cp1252 (euro sign) + + Currently both return cp1252, as is is hard to distinguish them and cp1252 is more general + """ testDir = tmp_path / testFolderName testDir.mkdir() - _newFile = testDir/ 'latin1.txt' - _newFile = testDir/'cp1252.txt' - assert False, "QH TODO" + mock_files_list = os.listdir(mock_readwritefiledir) + + assert 'latin1.txt' in mock_files_list + assert 'cp1252.txt' in mock_files_list + + rwfilelatin1 = ReadWriteFile() + rwfilecp1252 = ReadWriteFile() + latin1_path = mock_readwritefiledir/'latin1.txt' + cp1252_path = mock_readwritefiledir/'cp1252.txt' + + rwfilelatin1.readAnything(latin1_path) + + assert rwfilelatin1.bom == '' + assert rwfilelatin1.encoding == 'cp1252' + + rwfilecp1252.readAnything(cp1252_path) + assert rwfilecp1252.bom == '' + assert rwfilecp1252.encoding == 'cp1252' + + # TODO (QH) to be done, these encodings do not take all characters, # and need special attention. @@ -151,11 +172,15 @@ def test_read_write_file(tmp_path): assert len(mock_files_list) > 0 for F in mock_files_list: + encodings = None + if F.startswith('nsapps'): + encodings = ['utf-16le'] + continue # utf16-le is not caught by the standard function, but needs its own encoding if not F.startswith('output-'): Fout = 'output-' + F #read the file from the mock folder F_path = mock_readwritefiledir / F - rwfile = ReadWriteFile() + rwfile = ReadWriteFile(encodings=encodings) text = rwfile.readAnything(F_path) trunk, _ext = splitext(F) Fout = trunk + ".txt" @@ -172,13 +197,42 @@ def test_read_write_file(tmp_path): raise ValueError(f'old: "{F_path}", new: "{Fout_path}", differ at pos {i}: Old: "{o}", new: "{n}", partold (i:i+2): "{parto}", partnew: "{partn}"') def test_acoustics_ini(tmp_path): + """this is a utf-8 file with a bom mark. Try also writing back! + """ + testDir = tmp_path / testFolderName + testDir.mkdir() + + F='acoustic.ini' F_path = mock_readwritefiledir/F rwfile = ReadWriteFile() config_text = rwfile.readAnything(F_path) Config = configparser.ConfigParser() Config.read_string(config_text) - assert Config.get('Acoustics', '2 2') == '2_2' + assert Config.get('Acoustics', '2 2') == '2_2' + + newFile1 = 'output1' + F + newPath1 = testDir/newFile1 + rwfile.writeAnything(newPath1, config_text) + + assert filecmp.cmp(F_path, newPath1) + + rwfile2 = ReadWriteFile() + text2 = rwfile2.readAnything(newPath1) + bom2 = rwfile2.bom + encoding2 = rwfile2.encoding + + tRaw = rwfile.rawText + tRaw2 = rwfile2.rawText + + assert tRaw2 == tRaw + assert text2[0:5] == '[Base' + assert bom2 == [239, 187, 191] + assert encoding2 == 'utf-8' + + + + @pytest.mark.parametrize("F", ['originalnatlink.ini', 'natlinkconfigured.ini']) def test_config_ini(tmp_path,F):