From f91cdfb75dfdf1f14baa2b75de8a42733f408add Mon Sep 17 00:00:00 2001 From: Manali Chanchlani Date: Thu, 19 May 2016 13:46:47 -0400 Subject: [PATCH 1/3] adding rnr framework --- rnr/example_schema.zip | Bin 0 -> 21325 bytes themis/answer.py | 30 +++++--- themis/main.py | 157 ++++++++++++++++++++++++++++++++-------- themis/rnr.py | 160 +++++++++++++++++++++++++++++++++++++++++ train.py | 115 +++++++++++++++++++++++++++++ 5 files changed, 420 insertions(+), 42 deletions(-) create mode 100644 rnr/example_schema.zip create mode 100644 themis/rnr.py create mode 100644 train.py diff --git a/rnr/example_schema.zip b/rnr/example_schema.zip new file mode 100644 index 0000000000000000000000000000000000000000..2a2a3f405e5409259c99b3ecb6f813d751b4056b GIT binary patch literal 21325 zcmaI7Q?Mv7w`RL++qP}nwr$(CZQHhO+qSuv*S`Dju2X$(*Qrh_kIB=h`etTE6r_Pc zPyitQ%|CGs5&-{gzyKfs7`r+-nc5k9(s|h0sA|9f{NdwPKM>?ne+5JVh*jTq+LA!< zJ*W2al&Tsk_%dC0vjQ+gF)Um%h3uJR09y?bAwitkx0gO+vS|vyw}GC`;q?7XgwvEFm^REctqE+VsdYAa-8LexhVOwu?=-eXLkkUrz@Qe~UA%Co^^jJB zF_U~`jGWgiJ!J~4P6B$<^o{Q!T;RbcUuArZN=IaR_s6qIBQ4=p z3C1qdMgn+g-VW`B}eVy!YAz z-coI-%>akNJHJsjk|lB3t6A#EWlWAS_dzjy5|GH7)>sF3ol)S=7=3Vu4NJkNX4>i5 zyqkCXc4CK_RFlslAJ`gRQHq^GD^(k8#1bksRLc1{+oa;sI`0Vf2~)Iw;vvSa~3 zQaP6V{%M7M*E`TC;d*RzPL3%T(y+q^D1}OnP%@YSxa`Hk09+r7flYX@BFa$4C!eeE zHk84Mu=~lMn+3ei_Z9Ffxq-&rJ#+&Kq`aX$cF$2fO0v`do48D#tTPWE)2OS?jRwu9 zv>Ap9m&RCh7&Oc*?pmHk!w)XXDiSmayFCHyn&c)oxkqb}l0_?C)+-@^Yu>TsYaKeo zbV0oIn}q$VlRLI~O|I=XnRj(jD>tam-B#}na3|h(h_-er_(AT?hGnALC zjK^`ZS4P>@=&j=Fm$gN{^a}tcAWXl-D>3jG&UMtU+Q?!HeZW z*~0W%HAg?(>8NkwGyxaUHJ8}orZ&$uLyP9C{A&SRe-AQKj3R?69EI_6Wp4<+X!Ui& zu9SFLG#JHGgRYg*-Y=kQxL)D}sw@!(S-yt50X!{tiCtH6ag$K47^x^%2*CK<6Ya z@-Tv|q!B<^u{&mt)q3w0Jdq#bG?FS2WY*;Is2%ew`3I)y#Ih_Z+#FG%DrVGqnskWa)XsH#FoAj&4x4yWG?y7gZGxe$K^=Ey9lCTCMC$yx?x)x+|Df6qCFZ?x%4VC;3Isxks6&??q_WMyGHaZ(y>k90L*}s;sf;V{t)+b$&VX zbw{8pXV7Y>s4;TBf$j~kt7W;+v>SU}`$1-VspS+^zK(%0zDvW^UDL_S%*;U8ffZK2 zf{JQ1i~aWQ37GIk>=BxCpGC~D8XXjShPDTz&hl$F@E>Pn2GH9kIUpSQ@-%(z!u;bL zKU^9fcN9&6C<0v#o{~Qp(x-TpLxSPJOfsVf)pR%t-DNA0>FjpoR=JpnvgtCZ@bp9} z8X(>}eKG6nfka#!`h8`Ax^e5=PCikl;K4qBnPu%Hes><>g{#$Pzvg=v zp*>dLCXx3%Pj1gN6VIu?c}frU@kd&dMJW+U2>Jt|UE7*>%ajZbF_W(&EgR(mf3{ zbrr1JS}L*lT020JO^vg>R(I^89L^&1FJZ4Yf!3=Gs?#)Kg8UTwz+>%G3#LD*Rs@q( zN`V?~B`j>8qG6|ETT+)`EyFgd%U(>$X4hJ*HUUl$Y)`wIk=h2tEDLexqzBD@)%_g zNT*C&1j_npi7!V`i2h&Ai#$d_)U0{_i-b#}mzW8=NIC-YRH| zXv~vZgUE1gGmW8|KA6kk@xR>Txy7iHXsTUr$4TAk5R0zfE zgrw)6ozW1+h|2ou}P8h|^5hXH^(1(zg|=&0r4K*Iw|2ov7y=o_^F3 zEu7&4F|3SsU8Y(S|A-u%QeT5!IEE|OpBw*STKe=mt@lLq6m1z^JNc|uchwxl2{ zy=>%N*sF``B2YGTZx>;}%0e-@jHi~$>%&QQReIB)vF7jh^KUhU7m-Lla9y+#A5Q|c zZwuFO0ku8ah-1o#$PE`^D^A?~lk5%>bJo`e!ry&c!rBF!xb*Axe1kNTymJOM!WNfP znR_X)Fj@djzY3QvPzFmnQL+7M5r%;fcx)yqoKivrYG@VQOpmKUL1Ryq50UVoCYM`is*)2$aG>O}QrITGL-B(^(c` zX*@|hdB?L?tbt^a5Fi{D3rNp@#_e@|1JdcplQ&uQ^=N_wT-{xFbyYn+^xKn#d|6s& ztDX9vX*O>ul*?4p(u;rQzNU-!?76S{^=x~;+x6_Fb5ANMYbRBdE6r?H235)*=0#a; z>|AVD6j`a6cL69`eH|QIiVvzQRaLQ6SFMy*&7@0JH&;Fk)&^384k>x7Vly+E8Y7j^ z#}FQto2QT0?N=D+W2T&@;RiR^J(>4{xSgu)sy?+~0Tbq$aKcwZr=&X=4kcI2O00FD zVz_eGKP+}CuF67IO6eS7(Z%iX(h8R%8ix8+m^B4j)C2|9ejzV^|?vQ zhIW>C0Wj0Nz}lhPLfNv$_$dSXO)IKvZeG`}<$aXC($-{R~0VElq zICIxJ{$Uu!L9}4nkQ8UJ^B{rxwks!_4flkKrV?l7WwKHMY)zLyR#ujGiq5t$blOBq z7$mWeaJqSlN~+bnUqxdKQj*|m7}{CFMORg%hUUn(*g6jJFTHI$wo(HKi=}!6L0CNe zLNe%vi^sYH;34KXNFXGrXQO$)07Qx zXAZE()@2XbqN2wJf*H}*-Y7UG)4E`F63|-h5D@{QgX&JczOL~kkLmxtOk)+D^! z@DoqljVwK3&s86A6M^nRBvQxi=|6r5($CO73<4M_9cr(lIg7}MVF0mVq79fYZ}!px ztS6gUpo($No`IyXTS#BiRX??1ifEG`Yj|#2Y-dRUHaYa#JfW0YZ-A%Bk`gfmYz~x_ z93LQ}H5AB6u;FAV!1=QRQbF}Mna<3?T*E7kna3HLXI~Wn$!!1ylL5^}^E(Du3?HLv zWfrD168mVgi!h+ziYmL5?7FIu&OjSDv-;utnHSV#K?lelKhTGpqS-xlHnUtE*!pTy zO5+95QkH|QJ+_7l^0;>c7%n3~1`v}S6;*^i(Sux&l?B@R+e_YZ!M~rmAToe{Ki0Zn ziGNNeH+wZ%1Ze*1yI9d<{X4fVuzD+l#rTIYToWj|xUOL!bfO9XdNc7*L+mzXWt$a( zwIEzruho_wd#pN4SgWnLP5q1Gxgo+HW3gO7aP4p|$uQu3guI+}N;!v>xFl14c8FuU zL2N?~LF8e+pejH=u-bD?GU?`Fb|mGUoDoMs3=c7^w9!NV&%2)qF9C6E8-&z?o`J+T zjSK&N?6Q_KdqaI3S$&u*YoY!wk=|04-Xm|iM>J?6*u@dHKtPsu@-3s>Q(>|!1OwWa zOZrxu*!n;y34masifgbxG?GBis(Vlstih(gb4OJT8v|kF(u%@Ji!e4_bxfQ|10fS=3C`j;yP5fO82Zm4q z51AL%Ysw*57<25Ez*M2sRaEID{_^wmUcwkfGlDS(nz3 zegIpodL)y-!K!EFqYFJ5o66e;-laaRY6YOFH28f+boRgKR&r zRp^?dRYNJoW;9m}eJp3^7}Yg+?crAbIZI)IV>z*K@^lwTBr zJ#HAkUWlRxEEU8r9)T@W!%QDP!nbhLduu!qg&tjDH?l?&p}`ZI0DN$eM# zgTUF`oXDG40(t!vi+a6}-XL8RwjS*kBtE#kaqparF&IfoKO%nzR9%F*ROtl;Apd9; zbVs%inqfC044H7XboIPsU5g}=QWVbxly5O*%35)4>cv3 zf_7$dlKeg~)oMP4a4%R+7{c4I=xIJga5xdKzWCRNC?eePaECwT@QUYAE!*LFx3!r= z%rMgl#)U~SzQV#KK00CBHb=ebXI~QGWuOhOivTY(vCbj`zSVQwSzzaLL(E7YRd=$go@7gH&IKYGbs=ky z*SI-|Hf;vfu4VoECa(74-(7c$v*H?0|6J7{BzI z)15kl&fM|j(|nTyyii4=8Q( z&H~Rh2jyTltr-A;L!FpvDrE|*g_{awgn5iS13060OQ;;WkAKNJqXscc+}rbDmNLD$ zDcsmD{>(S(UPvBX5fHHIC6X;?nb7&w_l#q`UetqPi30Ow8*&`iz z6eK#e(Nn{aK_N#2jmx43Av6rnre}?uB;nQ$vM`K}whoh{FW?ju9*Q$qtzv_-W-2E!kIbMoe*ol?nn>>=aeH_59`$X2u4uRM7%t&x&;=)>2Ry(rMRLXoNJpHS2 zxQjy^Fc8ww;9#g(>3`k41gp&xKu$<9#O>7^r-GdvM&EI^77by1=iMj`uVfrI__h|3 zD9~df+;J|d$F-4ce*7L^_a`%XNUpxFDek$

UQPZhneAaD{Qzy0*lnRg*k(bx-vl zm(+WXfOC5~y7_p0V}i6ELRP9O!FaoL%FY1Nk10Ys!7dQec=u41^Xj+?JGNy_^JqZW zXUQHP5*qWE!(t0}OU|Ztf*Ed^{aRh!QlPbfrrhuVC29#HWRGj}tBW7FckIAT(X&qA zjo~w`_s%9YUm!K;ZBK}baHG~oX3{;h$4?d^3%l$&tb%-`Ns6E&!;7zST%!Z=1cohN zm=e~4Z@xX~72!9>3v(&Q{_YD;Vha9c??A5$8D{?Fnk?7{h=WDu*>9P9P^(#)UZ^64 z;~BatKZg1V62RYR9whwAiv>=Eiqtr+CqQZQ^tv23EmaRcs4;JXgKjhgmKQJ=5VJXq zcjO8JYapk4(3?OLhFFo3;eFhmQ}!q$L5$?vQq;Tz)5cqZLTfm?HIp8x9+&4jmQHjn zd9_g4LqlFoKo(*6>&cp3kL*Xepl(H4f#Q=e3E2Oc*`@qG-9dlCdNSjkw>Xz`uH4#ctYG|_x z5gz6!tP3EVWt%%$)i}k&&J|&AhZ8tRiu;lv#|7dUX}1W{C)6G_w+|@?Dqb%VUJ|JEVSHp= zeQ6%vrwok^_uhS;0SDoT`wn14B=bb~OWfh~9|y;2+)Cr3=|pbf(O^#MPI^-pkRs_l zlt+qZ3zKLDIKdK^Xd zXaI#VY+GmLH_?@6PJBGaPYm31^}&9_+qSGhvBDU8g-_ElwwWC{B$|D$il&dJ+!|?% zJ4Kwd2u>%x%aEB<29fXux`IkSk#@P@SVo4>96`iiuxC^G459{r*rHP0dvLD`lq^8W zgSLhIQ!t~`9i-c$n8OfmzEUCRv3z!l*Q}ssBty4O3Xw)!7~P_3{*&5YMgJ9?Y1XtO zlEWTA_@HYTpPd)1<43nJBuIHHF*xY;g7-JN(mYyy#CoU<^ z(9LiRRz;i6E>)Yu&9{}|k1QOMe$jED0SxlA@WIRP1=w3S{MG%RC&*DFR73jw`qg<} z%WzOTar>(iuxV<31?7V;#ya~%e;NeLG1zTnwhGDy_$_5y3PhQ>l6No*Fw?mtwN@(` zNf?!+yjZF{lNv<9Y+|#7rVV%t3_QZgpI<@07qTYy%&d}M`#~>R7e|+tuPMzzYgm~b z81(+&ve%p0870h>8_tVAtom|NF-h<{51P|?K%w{Cc>k*=U7~?*rL$hd*xRV;eO_#e zwTYPlj~OSylSA@l$9%x}Y0ZcSRE3BMmPzq1-U2>YcNBUHBidyhEN(){DlqmDqn-H1P{cl?>djW;1X3^_wOcn0UX zg`yd4AHbPv-zZjylWXyG_3c8HJ31U6);6Hq9guW7 z+(amu6TQVZt>|RCe0ddAQ|gbMX{(ixHY|n_VovcXSQAyt+LAbJfN63N%BPg|5C_r#4BW?F+FCcZbjCQL!CamPQGAQqff9~V;B15?A*J629P|h#5*~w6J;R_Q|4ng#Z)oJO(v$LSh2wPbV+G&diSi>T9(Bq zZ&l&5DMpvVWLV=$K@oI#MgN4o??~L{5DY(8Hu{)oYfEhak(6;;ql=b2FxOcbgkk7? zn@o2ZyN9%^Eq4D+j2hA*lw$$Z>Pbc#rC}|7oLr`h5KBzWcFc=IB~4AZX?pww=7D5J ztUpyT2jwOT|861RLRxyFzEys)J_5pcmp8Blq(24B3oMfP4Y6`k%hhw?te;P%JtP#( zmk0TDnOI0q=rb0_ga+w*b!165L>FO;_LfS}OGfT>4R6@XAjX}(ZY4^N)7Emt(YSZK z6y|J|be2&CzGdINzo~=1yYUkCa0K9F*Tgx3p!aVQd@nF}j7f%qd`E^ooU=g#=PbF7 z$21FMl23u=j0KxJOi|rUn5Z?o;Pzz!ZS2L*Dl}wZmz5QK<$xAnn zmFLDDVqq8aD>28wU`QvyDe5k}PK5lh!*CZ#8Y^g^ImF2?>B8%}Lctmyf)Yn&dSPC^ z=y{-V4jk1kvm;)bwc94YBHD+48oQIQ!|saAkP*?`_f_JAR&-ZPYz1Mu+n?6sM&%g0)*8O2$zgw}Z z`N$ZXOreH*@I_^AAeTd+lCGLbelVscmL&f(h7;$b3Dr2Rs@PsnVfXyh^RJtd!>q}@ z=imKZ<`U2!=MK7jpWP4Y+}Z%?ibXSD{|_y>Tq#b1WT++zC7&~CAjZJlzb#-b#VTS- zLj-4U^#YPlyYBlxtN~mD8C;c(6wSd^)WaQzckDVOvYL?^)6to`dvyiEf)6DK(V|?* zB|D)ze*vVW=wQNz9{1v1YBo4|zZNX>>5uv%b|z$#MxGDhd6xbj-u!$VxxNqFUg2zm zzaz8AA*Sq_r3(z6T%ihh&%?q0Hi2DaYs*spoe!Wn)4&@xcQ$Kd*avbU>$5Qk-1vLjI~I?1%r zv{klZ+ofrZngrv0TB!8z_Pp}D+dnS_>xDaDElhydytJlh65L!(pORHuJ8<%Yum1UC zeK!h-PQTH(doZ&s0o})=dyGmGc|FH@YOfQJNNb()5QU7W^Qzi;Y{gR#OA;cTfQL$< z$Kkqmy$)L4P9n3PfUi96&H4Lj~utMF6w1XqNX34>XOfuDBZZKQNB#+e}} zl7W&EindvRQb5gRszFfp*vkHLJNe3W#R3ZihxHJMnv^=%{={vNnmS|T7F;uwNojdBiHEC z;$c({Bqx~-u?8*P4?k!8Repk)t2)|%?s*nG|L>3M-TuE9>a#)%J?b)|M-ie?_g6r% zI%@fJQUFu$Q^!v^_9SjycMepY9h@u1vE^%Y7wk>-1{?Kp*zUT0{(iRFMh&MysYVu5EZ$%%@m@c z{4cVohx5gq-_d&V>f302mfYLKO&fh4#^~Gif&o9=kDPqXLuTr49Wk)aALeK}(E%8u z4spNIdya5eR8EM)Pud_f;&!TGpZsYxTIAFw1#)1;N!|Ul_Q5JVoIBtcM~e#5oNfCZ z+OW#4U~xVgo=Gvu(KV^~V*}kg`;%u1uocS8;aiI;pTY`l^&McyX|5V;Qx--nc_$fJ z-sKArhPqex`r7N+HxZ4Ej z&Tik(dmDXj3Mj5xA}M}q$KLwL>c_hLQnbU#Rgk|b)@OM-Z<~1^r5E1rpuYFN{pI(p zWK)o@Hgq$zuQN_nxy|sCBFD(^pMgOI_g7W`Vu0*l^UTH2mm`t9%~*8ev2$w(aX3E; z(ri{iZ{cqcm#nS7Env@ht+?3v$_~oMp2Vg>PIH#WtM9vyVFiWtSTkpzqAxG~0SOvP zVHro{5V815PHBXrnI`w`dJ|OF8Z6dh#OoMZmOW=2Xj-RK;quTe| zeevB{P`~kcwrW32DQ?%QFSZr*Xbo$zJ%wH3uDxx)z{(8x%qiEFzUU6#xc+tqVSY;) zm-Js?4jp^k=d{y*5lrZ3U?Ux9=EgG+^$k^IUTnLk@(z?#?daXR>$#jP;WO$7?}&m4WPQA+SUJe{E0Q_)|5y+IkLZaDFe@(@&s38`{vXh5k-yq7S~y*Sfm zP+W89LP*zWFZ^H{Tz-EiEgOdJ|0ohyd6&mAl+T_ziC0p%&k_jia=dfjMS9K22AE=P zpzR}BQNYLLK-q!J1@5!q`Nzvz&dpk=S^Uka&go+9-WzJci=K3TUt^fjdj9N$r8?37 zFst1m>{aQ)I|&H)+OyJ_3E5DM=8pD`+lw+*yRwwCb#Z{2dFxou5TJ|Ej`Xq&Mv^jb zkL&_dl*5##v)teQJ+m^`x`2edgQjnEn4FK^LDPCRPOCTo@vStancz-glaoT4XCP*- zG=PRmE?F&!=iFF(`5WwaacwE5{>l8X{@IF|x!w1IdbPP}0+gL#;9UUzFigdyjoqJM zdgtEu_s@RU+w#6U*Q3=N_b+wSf|00ytalYUjZpT8ii;}}E19}D?{SB{#>Dg>&2z`w=14i5G&QHs zELJZHBV_KI`U+21pEEQppYL9+kN#qsovWg^|0L7l`lw?Yhal?tuseRM%8qSW*PqM3 z8V79H2ol#`YR08?I^8K@fGa67S)1ZU($imV9Nz7#vI+*Fc4iP@`b&YC$9i>suCA`4 z_WnX7y+bM-JD%%`4ES7*hiA@=z;73Ati*KcEqsT0;|AN~wKJ0h>0nU_$E2T9(v_8Z zBHOb@tnt;4)4st&lRXtn)w&w-DYeg`=D$N_oDWTZxHqWXk3n_@7Ddp=6Fw4mb<-KC zIt;3Gbdfq&Q0u67;czLu;0-dO5Z1e}ZeX)kb|l zzO2W+u=Juju2>WNmYwQ852-q%Jv_GYx}qgd?iPHGqX`fF!87xU0zrlI6^dE}&fE`o zBe$!)x&+9czB?t&S@F#Kfjd+7_14Bb+Tf}%5H3q7`{Z9`b^E2UUE199{e|P+3H3{* zyXW8Oe7Kjq5%d_2T=fi2pw9XUJaGLUaC5R;9@%iEiXE_3+PR;Y%KzXUM)U6U1^uKt zsziC(){KIoYK>V5J|mVfg`YF|0OR&{nBnh-DO_A~8lC$Gl(YWae-v(srJ(YnJRz%t zN(ch^M6VTkC*q<{hq%W9VLmkYb-;q~2c*)jNcigSHtA`#&y*N?Vt;HL@rF1OjTTwLe>|IR4u)shd;`-I5dExZyju-p4C}RuF6eU{dkX6 ztQY}_?6rth@EJ7{zru|o%E=7VVY@*IvyMKE>_~|wdk5$c77X@^4n^{}G#j91u|K_Y z;uUZ}cw$Sc;FZ-gf+!VPlf3V71Uyyv%F<(OLfhBTp!UIdMA&~|s)I7N>4R)3{+Wx+ z5A;Nq-^p7g=KsLV`T_}0HlXg<94w!fP;{Tky%T=PiD=nTzMq$0C<7&ACL2!&d;b~T zq^vjDW|tep&pHvG#>34`${d`-JP~N{=im(Qivp~da?mA9;joCY^@->PM&4nBIFkO;65V2p@*1jTM z3+YDndQGm%K!F&|KoZk|6Bj@fLnR2op;6(_dlq~D!P#>x4=s`IztFE zizt}Oh?UsVqjI{sq-KGdyAc`(pnx4Zqn!JHK3U-n(ouBN8+6bhXI@@jy1l&|zRSZ+ z3-4rYpR0Hz^o}B!Wd^&nDe)$yrxpujKXB|4q_fG%IWiVXXl=|l~lw`45 z>teeqwwjh<1C;)mCqoB^`ekXYk}9s!qOIP#scfOj=E`q_<)OsLTYBEgD5b>aRt59sMS(`&CLQN$Tj7ptAF`WG`n)Wc$l{}ju_b^ z4fk8Y#^7Gb{?BAs)j|ejgRj(N4In#c(LwE`Q$Bt>IRwW2B_pV^EH-Q@Xw#Zst^yty67tdnARekljVPo?HFX_qiVHdG^TM2JsSOfCpYxjgD$+b(-1Ej0W z0}2b(Bkqo%9I?{y)dlvQ{t(bSeANpl50rsp+_2*TfGCwt&5>O>B~i4WBTG*n%=`54 z{QCKVZ@)U;oSof#y*+hDe*G28?eF*V_VoVv#ro}1Fkkmq_V4WR<5mER0IALhSB>2O zj^adGFd2{}XR-4{f%@LAlgXNYLSa*#)8=Kj(hF=!r$DAwrgx5(X$jD212<}z!A`>I z?k!&2bMWXAk1EJGK_^5TnoyB7`oA4#~erTJ3~(qKE~ExA~2)RLh(3u_Qzm;&@rk2S{qXM5T4aK+NhYX zJ>Uc;MBu03p4A@Iz32&(sxY{5;SKC>o#J2Bt*tVxD%Hig=cPCc1V{AO-H)wq#?;S5LU`0`Cpr~*23?*P?Hee=bYh5-8AvWj= zJ6M~&`Xv>(u^pC?X-jT!Ku-peQi^BRhz5=6X)w)JfN8Ln3RnUVdZuJfj1g{~)JoNC zL%4eUlsr~~y-T1LAWRV<3)x_cRl_IU`F`k7c5Gaax3qWvd zHPIpxgeC$l-Agjvh+3W>w->LME03PzSn?n3U5i(=8YL(=qhr&9U=Q4RKNPtQp~$54 z&O;6(MO4hhSav9>ely8}Nh@k|A@mvh5K6$I4Q8qVBh1XhSTN0@8Fi;znnq5xIfPxH+r|8K!x*9FP#g1 z`?VUpW>FBV-yQytFC~C_fzmrb1Z)#_HpXdoQ!3hrbQ_K@aj9CTm$wI54E)}&Q2{jrON#CBAeq+L+s_S<1x`LMV2pk}yqoG?r~FLuJLICmz`*Df zp)AxVYDzVW1R!ZdT%n%eaKKp>P!(1H`aDid?|3b^ECAy`(P77w*JbnCLhHV;`{!AY zU0qAnwX-iB4nCiLMoql?1K{jq2<+n_G!`4Oo2zwcPI#I2!7#i#B&k)H4Qw}IB1AMz zZ9a+)0ic58H4M6&Hub?`uO*lCc^{qDJ_wzCk~jwMWVt@0Fus`yQO15e%d3iE&sEKo z!tt2UK*;GgYZB!Gw7NPd=?CYG1ca=PQcu{Jz|2q0x<0dDpcGlVv+c@fn>Zu*?8w+n5-c7U~C}2D!!qO6=#D)!xd9S zrELj~8ezLZb23ydqa%X-skG9(iU5YXcWVg#oWyb}e8J6u7IK7r_?KpWzYR+0s>dcy zfQE4qaZP6j%ik(ZkHcrc3KW3x1@56@a70Oi)hnx}5!tkh6Zb?;rn1u;m?PAw?qvc7 zMrasFCPovSTf|h+L5J*{vEV1_=@G5OBlKl?fmjZaW{>-RzKvNle1PxmWnzXo-&*Ne zM`;7Jsg-I5r$XFmUVc%HC$zIrjudu@wM%sjr%e1UZPbV&YKXIsRAw>*pLklxc8>dd zLP^LI3ErVu0si-^{@u}~pcF+sE6N4tpPvwT3Q|Xf5|T1Jw1W+y0289lNr5*JP8<^K z8tP<>#}XwOAUK# zoQ9v#a|)-}6mMYNNxEDfn0l3w;6J^wp}bUmOiH3JlAz35O=uDFKok+2{^k$)lY4@I zb&j;Y3^j7>uIhDeVoaZ*y#;KJNw>?Wc9T3({Ww!M1jc|P#K9GMj)nnN!>x$?dD_$h z+ck-CqOj=(m&m`RIyL709)zW0tC2dusOF3TW~|EV7^gP?p?LN?zdd#S67m;G zgp_=9tiaK>ECI=L+Imp^Eeyn2C=6fRyu~5&mjWAe)v|{H~}r5+K$NWZdL zE

VpfnIh0kCpJP_n`>5he5;sZ54MSJ(*2v}9d_U1OHWB|O27&{o(zWjm#WW*1>p z)DJPxfDk-{zA+@^Xg1j+NL8RX9o0j%J)8DmCkoR6zcLXuQ_~}kZPfTc_-s+#)(6m_ z&$DI7D6qqcr-V9A{{jRTQSMP$-1s7J6jZm`66X+po!?z^1WbvuU`+0)BSt)dmrd$} zJ4ULqNJ-ruhOV3ptQ#LyEPtvd-`V~k!-Wwb*ue$-gRsc(#`G=W zoM;a2Xw(eD6cuWE7XcMzGZ2ex!eROT^e~6SiOz-a_pd9GcG59F_MQQTKq@I6rboUEC%_e41{Z8f1#{_k8CSU^AdILB>R?k16|1$cB|%#_HNu0B#9ggBKZ0AmEsl0lRYp#I z2CMh0F2EX9KSfmM;8&$vL}zFe1+zmJs1(d0XBoR(!5Y~ee21|=8K7$zPAHn*O557|u&88oonmUGX z&{bHoz=b~Af##T1)ISOI+omLYexyuanqh}lS5TX6n@7b;adJtz?vtx4oF;H28a9)j z4;9g~v{?-Sv3f!HXI3D614nr|N(YUifMr3d_Mm4j=t40~f4l+HjU4GcklKJJbqVl> zxuc*7TL9BT&T%a68Pzf_gQ&;G7DeP#2X+Xsaxfz=W7I=?WeD@=wYn3SJcI{BAi3s$1<2!m24t_XBS^n7=5w-l$a7JPer}y*E!vg|L z05;~?*bNt{9Bw@KyvfBQhZfhu7M_v~1?Qo`vmHsGBc9|I3=0Mb$S`0Of%L`~LNLTq zGp30uLWIpbNU1T1`!o0#P~LDc!`!pwyH&QN%m)O8sNS(0Oz8AqZ7U=GRs@JZI7bn| z!121G4Rb896Cr0_bjkzbNPp?Z2sIv9CkQd6-pVQ` zTETdiPQ~YMyl6huZdeVzC4ORXGo=Fo_X|d(;_P$;*b5*UEw-(qsCWy3yHMz%fo1jZ z`hG&+3*P-`7=%&rbkfu_=)`sBwC;=>4Ljkl#hPlr$Gnliu73^riuWLel#BlE4*>^o z!+x$PqQlAVo*w4Sw=KH5m#TDY9RN)?p#T*&Mf4_jK)0-e4rRo$oSd=hALUp5 z{oivybb$tML#wu%8W(Xz$>pFNV|BpQaS4a;%PLhLf^Azx1V8-~513(^xQh{xq8sBb z!nMcAz~|qI1x(pc51gSM$c1}?BRg%Wy1_|Un>*5kQ&XIGM{;O9S8Fzpfy#K4STR-g zh)ACi!K%2!u>dM*CxK~|!MqgJR;$J(h?z>@2$0zyt##>SQY7eP>SCszfdnDyypQfx zqERP+IZC~muS%`BlGA1O_@M|WitdnHxjr5-rc9cJlw*A^qGJU*I4sU$CuSq!k79ao z+|fX+_G1`dY9QwpcbZF8La{XeKgI62t(+0!MjE8iH0?P-lpiDd6WMOA0-W4zkPIh~ zFrgyA(C=T`3vg~G`FLClxlFz^7jau;^kPP!ToUe>TsM!)hP$T}lhXdWRbQx?`rY6-4SN-u$MOXB%CS8e2N}ym+WIkftBTdY%T;pX;L`? zFpA(PB;*P|S~#SRRPsRiBgPJJV>lvWpUmNaSwgRY8#BOQ@J9-FT@6G z)r$)Aj$4$;?%1~T4L;U4er4A9$NWl*sz$WO+jGVCyDTQ*tIP5USekb7q9%#fKpU|K zlCTFn=%Ip5)&Tz;D234+IH4<#7!5Omplv$cP+j5zGtf_P7_l+M(w@WFItTcZ9_EvM zHAnnHOcG~F*Av`J0HM@8M+D~{RVIF_1ab_ync%^nA)$z1BQu+u-zr47Pk42;D(-Dp z3%|LWv;SJc|1k8h;Vb~mo913${algpj2?OU%$4@r5EKoJCA*`t^1mqk#gaekCK9O% zHwvKnb9Jw4q4P~fXMDT!nsJj6=ce14-jl=~u^7C$Ny_#xqGLL^zx zzL9d~y)18F;MPcgT;9Q2OfkTe&~)sXj8hkTficS(u1qc9-5|o@)xkt?y3nLNs!|X4 z^SD{vALAWt8Fo2h2ySD(Jv^{eB@hZ==-dAHfON?icMHT3*gRoAreafDuKtiB&>?m(Gr0Ne`8|bm( z4qeCI^f1!^J@o#&?FloVh7E6UwBb`U-Q>el0gFhkQNmIY#J>CDb$Y2a=x$U_NW+b~ zu?`UUpxZMQ;}gZ2Nm+7C@!|-gQ)L7lWK?kLmIz%T(!P+wqD2RsdsXy}zF^l`Xv?x{ zLuyB}zm6iRHTym&H@jaDmE}^LaMc%0gDb^r>F`&1+3Oxf zv8&Ygn-YX6mDg2f*^4c#Xi<|!N~+ENTJ6=p&c|2%cPh{e-mE^ia-^lG6KJ1%8-S)W zIay#3(o>0q!&uyo{xM?+1Wj}>ttQuu>M*4yXYh9RA_DtoZlP@yNhjKNG3QnI zXdfK1XTFA@-SHp zP|m7`XI#4-R9M}{RAHKyi*UA{zL#yZX7_5bw1Z#E=VvG9`Tl#IYmz~JG={DR&7r&6 zseEcX$-&5+Xm{Lckb|y{ZvotB?myNjT89|m6dyYzIa~qk4Q-^6av)zI6M?%kT>^_+ zB4@_zuW~t0{N~bd03+lY^Pyh4nn3Yb2ZKNXN~S)Epgvga;3AE)jW$dc|6d>H z8P(Lbwc${Lv_KH4p~E3?s47weNJmgWqzi-+Llx;=6zNi=hK|w^1f)vu5W4gZ(jrZR zAR;Boh2wXQ=sn*Zd+a^-{;}RM*W7c=_s3l0>2i=){$iDXQ+9L4y}jF%mA6LPUR*2( zrqj?sLWQRv3?KyZIem*RSp{^mkJABNv&+Ip_2E0Dlv7t7=*t@!0j%y`>7g8oPsd+h z%fAexsg>L_Q5b1#p|}8l_8P@B?p<&##I(|l>dP^>G52y14NGElS$YUyuGsu;ZvsJpn|wBXk~k5E=;YvKUF1uez7Bi9Q=a(!#O-nhJx>}z_Ek^o+<=D6)`eLzmfiQK4Yw?Mv`6hsLN z!3&62L;qgH(#y?kov{+8C{=Ew&9uJYRsa}!0T0eyu58ak0%0S%H4WwY?e;| zltd}Fyq5A94nM}r+7z$dN=M4SD!+r#0jMWVqcLu zN`f8*p$xYncf&sOv>1%orRw&nxH4D5m_j<9l=x1ya4k^UhVTo{IgGs;cV5j?wA0Zi;+4_*UqJlLUTf&y1@Z@xj<76n+wZXN|(}PJw zDkH^_^aIRGh{Y#};DZu0QEec09ivL6aLjZV9z%+2cO(~bv5X$@@HdTsJB~I<4K4|R zt>Hs20{reWOQ0;<*Xfz&51Pwt?&D{K7$d{YT~LF}yU0fejcd8DHtumdiY|62zV80)_zmrKq0|l!oa0* z+}*GWYW5k@ugv1D;YThK=384GafOvGT}rxWLt4r#mw2Xmf|sYjfose~uEdLMs_&Y> z{`?ft+#CFfFT@7Ok(Qw6GUgiuEkUtF3l#ahYfD|8V^h6OoUAdfO{t6TrTAWq);-aJ ziiny~YWR*W)j+Qp2kaZo)CNyXD_m;zD6w=}?R{y4Xfa{$=iPV=u{i3aj+TC%>y~kk zO<f7N_89~H_b00&G&I;YzLgVK~0$J^oFbnt0i`KY(U|pGNNUhZiqdnXRa1+ zAL_`)n^;~WtzlHb^fC*QTtD4H22&8jhwNT#24wbyQc>OBeA}}j>!+Y~GM#Ds3`bd$ zjwTAFQZ?=r-ZY3J)SGnh4egWSNso&EZ*j8J>o!v<2})LzXCzgiEpNA34-U)4H# z3UHY9kNS|raPblXid?8B|Ds3?nFy9zRpqit?V7cyQZLx=CRzifa+9+7^UPIA_#Ha8 zkh6q7V~j{W_P$Kt&iYjtz*2hiHY@;Nx+pNK(hhBX%q-a2^top(+1DG%NADO|GfnmE zF!f=d-e!vD7WD_+Wan)H(e^;QE7;GX*_!Uxv}Nmh#*Df2;n{D~JXJK&Q8JE7_cN3H zWofR%v0>Y3?zSaqj6p)jW@7Yk(p|JIR&%;(3ly#0;yebu{sGOd2%DKhqbZ!Ux zYljS;73q-G{o#k-obqwSy3Ol zlq7?Z+D$-hH2Lzp6pSE9+`Z=|)gvO((S^Z#7x1@*^tuZt65iKaFE>EvUqQsV=iF%} z>m<6O@X|H-30Fj_i<0cC(;bc-=t~!xxyzn|L7&WwR16`|UG(O-UAy%bbcT*~2c+ zRp@#*5)ux=Wbt>HQzW1*q1|36w-Akk%mRmVvEXYtdWB zRYICJJ;!1khB|uX&rTP6{A^BM(h-h(wq-7ITpdi!ecLs$xIpf1B^VTj177R7N}~F9 z;^xj%g&SWe4rf>v^eIu%S7~3<^jJ;2EzXism-P$m>k9GhHWnRrkTM8(KY&&pPD9K_ za}SgDN4N>+5(e|_;(DOFm+T?gNX$Wh%z_(;gEJtd6L~o^Dr{|91f7zOEOsf@Cz3(d zG}+SV?hPkyvn_31df!1fHJIpBQ+TweydD5td1X>k=QVWKb3|pQD08YpTf55T6{V+- z;Tko=AEp=ACn9xt%02HuA7oMPn-780oAJwVCZG+POzIf0=#Gb`eE~+C5nFNH;zq}O zM$n5M8jHq6TM6*44V@zb$QSj4Fsec7O zp&A{9ABZrLjvmxKiMtHM z%vLoVw`isGhlrI=uOXPd2^X1hAwo#7<@rNus2HcfpLZla7Rh6xh|s{!`Qtou6B$>h zr?MJVCekaVl>&UQ-r?_j+!5AOZE@Uo7l;}Q?I#^&vXk6l#f4cFa%Bn!zLPKfVz?tL z18Bs*N2q~{5Bk+2w`MQ_j7>A|Wt$5s?wLAIirSWQa!R{{CL7$!$@7Jtnab;2lFd}Q z2V334QdgW@IGbNnz{%BISpWb5qaX9@ABd93-{#jOr}Jw#{Fa=8<{g8-FeRjNQu=52 z+35NwrsVgrH^cd5g7E)60k<@xv9vO?F#B}trrsw?y-=;k_|fzPP%s3~=?`$gK@meJ zeiS>qoEEn#nu+bb%AK}QFrDV$+i`V%p6(mtWH*X=oCeefHGBHiFQ}nVIU7a>5XMw` zhLpQTNoQNTK%T-%7H7A1_wETO`NlXJCfGs%5zLS_7 z${7U)V0C$=ZGd8B;&b?{!%y^xXXNt*N<_7+F#_p6Z}mUYfD!kpq;wXP0QgRdniVjw zDFr0~`5V}oj#YxgU?*P%3ZnZb@|~*X{Jh{kTGbsTj|B5FR5h3G)#6%%LI|a|mbHlj zw?#Z`3LIh>R^JwtX1|+r+|%BMjc=pikmegACsnIFgJtqf^HSSDdc)%p7Vi); zgs;$$Cv9(`!^EwL1WdZtHrrLqJybz1y^%+ld;#zH`!iHX^$e~_r$^s1(hwWfTlnHRJh9%sTvnCy zmj%ipwmPjdWt`1-qW&Bg_EONy%QsE5HxGtG$wKP`>~2!pr(a2t;o9zuSRhJV_he{1 zR^)tgS0Yw4!y8?C<#W^GVBCmKQng+D7;y9jj=~0a7!=42U=QhO46IkY9&+c)Ok@t- zRmI?UgU8qvUm0=*4rpaq*%zG798R#Y!SvoF9koT^+*WWpXP>Okz1kdKu4z0ic9$0W z)$3icuLM>oTmh|S3Ho(WJ;}Y4BUj4fd%cQw7uvV_CrFr|`h5g6<>Xn%+)h!dG)CCa za-uEkzST9o8(FcaFnm?L;ikL=GAYs5-w-t^b*#(m+ze~VEhIfFwe6tvLoqE z)}YA2Nm|DO;!C>Ud@m};dwK-(%2|w z?Qqy}jO|Gy&(myXM)C_g9A=6L@;!B@(>+blvcSk+2{Tyt!s0VyVu!eJjD@K?D z+vH~%kN7lv;avg_MluU&m!Q>ipDIWz(XJEB6qHLY7QvX?<`SMpnZ!bhJo8?tdjr)VAcUuYdXd>VqE zaGmp%DRKb$UyP~K$Mqclv)5nYgQtMP|AP-|sh_F*e?+REDj(%Ii2#4|sfhpKQ~g-} zGl=u-ADUl+(EEG&KV9`7Ru%C-tg7GT&I4jVzsOxA0sM{+`h8PBqdI>&i}V*67#ZLf zw#>h4oEh4AhotyLgNXlB-+a4ojK_F`uw#9TW+1o{L( -i -c -x -r [option_argument ] -n -d [enable debug output for script] -v [ enable verbose output for curl]') + +try: + opts, args = getopt.getopt(sys.argv[1:],"hdvu:i:c:x:n:r:",["user=","inputfile=","cluster=","collection=","name=","rows="]) +except getopt.GetoptError as err: + print str(err) + print usage() + sys.exit(2) +for opt, arg in opts: + if opt == '-h': + usage() + sys.exit() + elif opt in ("-u", "--user"): + CREDS = arg + elif opt in ("-i", "--inputfile"): + RELEVANCE_FILE = arg + elif opt in ("-c", "--cluster"): + CLUSTER = arg + elif opt in ("-x", "--collection"): + COLLECTION = arg + elif opt in ("-n", "--name"): + RANKERNAME = arg + elif opt in ("-r", "--rows"): + ROWS = arg + elif opt == '-d': + DEBUG = True + elif opt == '-v': + VERBOSE = '-v' + +if not RELEVANCE_FILE or not CLUSTER or not COLLECTION or not RANKERNAME: + print ('Required argument missing.') + usage() + sys.exit(2) + +print("Input file is %s" % (RELEVANCE_FILE)) +print("Solr cluster is %s" % (CLUSTER)) +print("Solr collection is %s" % (COLLECTION)) +print("Ranker name is %s" % (RANKERNAME)) +print("Rows per query %s" % (ROWS)) + +#constants used for the SOLR and Ranker URLs +BASEURL="https://gateway.watsonplatform.net/retrieve-and-rank/api/v1/" +SOLRURL= BASEURL+"solr_clusters/%s/solr/%s/fcselect" % (CLUSTER, COLLECTION) +RANKERURL=BASEURL+"rankers" + +with open(RELEVANCE_FILE, 'rb') as csvfile: + add_header = 'true' + question_relevance = csv.reader(csvfile) + with open(TRAININGDATA, "a") as training_file: + print ('Generating training data...') + for row in question_relevance: + # question = row[0] + question = urllib.quote(row[0]) + print question + relevance = ','.join(row[1:]) + curl_cmd = 'curl -k -s %s -u %s -d "q=%s>=%s&generateHeader=%s&rows=%s&returnRSInput=true&wt=json" "%s"' % (VERBOSE, CREDS, question, relevance, add_header, ROWS, SOLRURL) + if DEBUG: + print (curl_cmd) + process = subprocess.Popen(shlex.split(curl_cmd), stdout=subprocess.PIPE) + output = process.communicate()[0] + if DEBUG: + print (output) + try: + parsed_json = json.loads(output) + if 'RSInput' in parsed_json: + training_file.write(parsed_json['RSInput']) + else: + continue + except: + print ('Command:') + print (curl_cmd) + print ('Response:') + print (output) + # continue + # training_file.write(parsed_json['RSInput']) + raise + add_header = 'false' +print ('Generating training data complete.') + +# Train the ranker with the training data that was generate above from the query/relevance input +ranker_curl_cmd = 'curl -k -X POST -u %s -F training_data=@%s -F training_metadata="{\\"name\\":\\"%s\\"}" %s' % (CREDS, TRAININGDATA, RANKERNAME, RANKERURL) +if DEBUG: + print (ranker_curl_cmd) +process = subprocess.Popen(shlex.split(ranker_curl_cmd), stdout=subprocess.PIPE) +response = process.communicate()[0] +print response From b0b926f9a78e260ec174b7342ca265167abdb1cb Mon Sep 17 00:00:00 2001 From: Manali Chanchlani Date: Fri, 20 May 2016 10:25:17 -0400 Subject: [PATCH 2/3] adding readme.md --- README.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ themis/main.py | 18 +++++------------- themis/rnr.py | 29 +++++++++++------------------ train.py | 5 +---- 4 files changed, 61 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 0dbf7de..7a145ef 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,50 @@ After the model has been trained, you can submit questions to it. If the command to ask questions to either Solr or NLC fails you can rerun it and it will pick up where it left off. + +To ask questions of RnR we must first train a model using the truth file downloaded from XMGR as training data. + +create corpus.json: + themis answer rnr corpus_json CORPUS.CSV-FILE + + add commit:{} at the end of the json file created to index solr + +create cluster: + themis answer rnr cluster RNR-URL USERNAME PASSWORD + (note the cluster id from response) + +check cluster status: + themis answer rnr cluster_status RNR-URL USERNAME PASSWORD CLUSTER-ID + +upload solr schema: + themis answer rnr schema RNR-URL USERNAME PASSWORD CLUSTER-ID SCHEMA-ZIP-FILE + +associate config: + themis answer rnr config RNR-URL USERNAME PASSWORD CLUSTER-ID + +upload corpus.json : + themis answer rnr corpus_upload RNR-URL USERNAME PASSWORD CLUSTER-ID CORPUS.JSON-FILE + +test corpus: + themis answer rnr corpus_test RNR-URL USERNAME PASSWORD CLUSTER-ID + +modify truth file to add relevance: + themis answer rnr truth TRUTH-FILE + +upload truth file: (the train.py script is given by RnR team and recommended not be modified) + python train.py -u USERNAME:PASSWORD -i TRUTH-FILE -c CLUSTER-ID -x "example_collection" -n "themis-ranker" + (note the ranker id from the response) + +check ranker status: + themis answer rnr ranker_status RNR-URL USERNAME PASSWORD RANKER-ID + +query trained ranker: + themis answer rnr ranker_query RNR-URL USERNAME PASSWORD CLUSTER-ID RANKER-ID SAMPLE-QUESTIONS-FILE + +untrained: + themis answer rnr untrained_ranker_query RNR-URL USERNAME PASSWORD CLUSTER-ID SAMPLE-QUESTIONS-FILE + + ### Submit Answers to Annotation Assist A human annotator needs to judge whether the answers to the questions returned by the various systems are correct. diff --git a/themis/main.py b/themis/main.py index 45991e8..90cea1d 100644 --- a/themis/main.py +++ b/themis/main.py @@ -26,8 +26,8 @@ from themis.xmgr import CorpusFileType, XmgrProject, DownloadCorpusFromXmgrClosure, download_truth_from_xmgr, \ validate_truth_with_corpus, TruthFileType, examine_truth, validate_answers_with_corpus, augment_corpus_answers, \ augment_corpus_truth -from themis.rnr import hello, convert_corpus_to_json, create_cluster, check_cluster_status, check_ranker_status, \ - upload_corpus, upload_schema, upload_truth, associate_config, upload_test_corpus, query_ranker, query_trained_rnr, create_truth, query_untrained_rnr +from themis.rnr import convert_corpus_to_json, create_cluster, check_cluster_status, check_ranker_status, \ + upload_corpus, upload_schema, associate_config, upload_test_corpus, query_ranker, query_trained_rnr, create_truth, query_untrained_rnr @@ -338,10 +338,6 @@ def answer_command(subparsers): rnr_subparsers = rnr_parser.add_subparsers(title="Retrieve and Rank", description="train, use and manage RnR models", help="RnR actions") - # Say hello - rnr_hello = rnr_subparsers.add_parser("hello", parents=[rnr_shared_arguments], help="say hello") - rnr_hello.set_defaults(func = rnr_hello_handler) - # Convert corpus to rnr specific json format rnr_corpus_json = rnr_subparsers.add_parser("corpus_json", help="convert corpus to json") rnr_corpus_json.add_argument("corpus_file", help="path to corpus file") @@ -383,7 +379,6 @@ def answer_command(subparsers): rnr_truth.add_argument("truth_file", help="truth file path") rnr_truth.set_defaults(func=rnr_truth_handler) - # check ranker status rnr_ranker_status = rnr_subparsers.add_parser("ranker_status", parents = [rnr_shared_arguments], help=" check the status of ranker") rnr_ranker_status.add_argument("ranker", help= "ranker id") @@ -396,13 +391,14 @@ def answer_command(subparsers): rnr_ranker_query.add_argument("question_file", help= "question to solr") rnr_ranker_query.set_defaults(func=rnr_ranker_query_handler) - # query sample questions + # query sample questions for trained RnR rnr_sample_questions_query = rnr_subparsers.add_parser("ranker_query", parents = [rnr_shared_arguments], help= " query the ranker ") rnr_sample_questions_query.add_argument("cluster", help="cluster id") rnr_sample_questions_query.add_argument("ranker", help= "ranker id") rnr_sample_questions_query.add_argument("query_file", help= "sample questions file to query solr") rnr_sample_questions_query.set_defaults(func=rnr_query_trained_rnr_handler) + # query sample questions for untrained RnR rnr_untrained_sample_questions_query = rnr_subparsers.add_parser("untrained_ranker_query", parents = [rnr_shared_arguments], help= " query the ranker ") rnr_untrained_sample_questions_query.add_argument("cluster", help="cluster id") rnr_untrained_sample_questions_query.add_argument("query_file", help= "sample questions file to query solr") @@ -432,9 +428,6 @@ def nlc_status_handler(args): def nlc_delete_handler(args): remove_classifiers(args.url, args.username, args.password, args.classifiers) -def rnr_hello_handler(args): - print hello(args.url, args.username, args.password) - def rnr_corpus_handler(args): convert_corpus_to_json(args.corpus_file) @@ -468,9 +461,8 @@ def rnr_query_trained_rnr_handler(args): def rnr_query_untrained_rnr_handler(args): print(query_untrained_rnr(args.url, args.username, args.password, args.cluster, args.query_file)) - def rnr_truth_handler(args): - print create_truth(args.truth_file) + print(create_truth(args.truth_file)) class QuestionSetFileType(CsvFileType): def __init__(self): diff --git a/themis/rnr.py b/themis/rnr.py index 0ab1786..1afabc6 100644 --- a/themis/rnr.py +++ b/themis/rnr.py @@ -5,18 +5,13 @@ BASE_URL = "https://gateway.watsonplatform.net/retrieve-and-rank/api/" -def hello(USERNAME, PASSWORD, URL): - return "hello" - def convert_corpus_to_json(CORPUS_FILE): df = pd.read_csv(CORPUS_FILE) df = df[['Answer', 'Answer Id']] - # f = open('corpus.json', 'w') - f = open('metlife/corpus_trial.json', 'w') + f = open('corpus_temp.json', 'w') df.to_json(f, orient = 'records') - - with open('metlife/corpus_trial.json', 'r') as f: + with open('corpus_temp.json', 'r') as f: data = json.load(f) a = [] for row in data: @@ -24,7 +19,7 @@ def convert_corpus_to_json(CORPUS_FILE): a.append(("add", temp)) out = '{%s}' % ',\n'.join(['"{}": {}'.format(action, json.dumps(dictionary)) for action, dictionary in a]) - with open ('metlife/corpus.json', 'w') as f: + with open ('corpus.json', 'w') as f: f.write(out) @@ -79,10 +74,8 @@ def create_truth(TRUTH_FILE): df = pd.read_csv(TRUTH_FILE) df = df[['Question', 'Answer Id']] df['Question'] = df['Question'].str.replace(":", "") - print(df.keys()) df['Relevance'] = 4 - print(df.keys()) - df.to_csv('metlife/rnr_truthincorpus.csv', index = False, header = False) + df.to_csv('rnr_truthincorpus.csv', index = False, header = False) def check_ranker_status(BASE_URL,USERNAME, PASSWORD, RANKER_ID): @@ -97,11 +90,13 @@ def query_ranker(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, RANKER_ID, QUERY): resp = requests.get(BASE_URL+'v1/solr_clusters/'+CLUSTER_ID+'/solr/example_collection/fcselect?ranker_id='+RANKER_ID+'&q='+QUERY+'&wt=json', auth=cred) return resp.text + def query_untrained_ranker(BASE_URL, USERNAME, PASSWORD, CLUSTER_ID, QUERY): cred = (USERNAME, PASSWORD) resp = requests.get(BASE_URL+'v1/solr_clusters/'+CLUSTER_ID+'/solr/example_collection/fcselect?q='+QUERY+'&wt=json', auth=cred) return resp.text + def query_trained_rnr(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, RANKER_ID, QUESTION_FILE): answers = [] with open(QUESTION_FILE, 'r') as f: @@ -111,11 +106,10 @@ def query_trained_rnr(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, RANKER_ID, QUESTI for row in rows: query = row['Question'].replace("#", "").replace(":","") resp = query_ranker(BASE_URL, USERNAME,PASSWORD,CLUSTER_ID,RANKER_ID, query) - # if resp.status == 200: try: res = json.loads(resp) except: - print resp.status_code, resp.text + print resp.text answers.append([query,0,"Query Error"]) continue @@ -124,12 +118,13 @@ def query_trained_rnr(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, RANKER_ID, QUESTI else: answers.append([query, 0, "No docs returned from RnR"]) - with open('deakin/answers.trained.rnr.csv', 'w') as f: + with open('answers.trained.rnr.csv', 'w') as f: output_writer = csv.writer(f) output_writer.writerow(['Question', 'Confidence', 'Answer']) for r in answers: output_writer.writerow((r)) + def query_untrained_rnr(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, QUESTION_FILE): answers = [] with open(QUESTION_FILE, 'r') as f: @@ -139,12 +134,10 @@ def query_untrained_rnr(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, QUESTION_FILE): for row in rows: query = row['Question'].replace("#", "") resp = query_untrained_ranker(BASE_URL, USERNAME,PASSWORD,CLUSTER_ID, query) - # if resp.status_code == 200: try: res = json.loads(resp) except: - print "inside error!!!!!!!" - # print resp.status_code, resp.text + print resp.text answers.append([query,0,"Query Error"]) continue @@ -153,7 +146,7 @@ def query_untrained_rnr(BASE_URL,USERNAME, PASSWORD, CLUSTER_ID, QUESTION_FILE): else: answers.append([query, 0, "No docs returned from RnR"]) - with open('metlife/answers.untrained.rnr.csv', 'w') as f: + with open('answers.untrained.rnr.csv', 'w') as f: output_writer = csv.writer(f) output_writer.writerow(['Question', 'Confidence', 'Answer']) for r in answers: diff --git a/train.py b/train.py index e8e5b11..f20aa54 100644 --- a/train.py +++ b/train.py @@ -9,8 +9,7 @@ import urllib #remove the ranker training file (just in case it's left over from a previous run) -TRAININGDATA='trainingdata2.txt' -# TRAININGDATA='modified_trainingdata.txt' +TRAININGDATA='trainingdata.txt' try: os.remove(TRAININGDATA) @@ -100,8 +99,6 @@ def usage(): print (curl_cmd) print ('Response:') print (output) - # continue - # training_file.write(parsed_json['RSInput']) raise add_header = 'false' print ('Generating training data complete.') From 55d7632771ebc4bb6ae56449dc8740520e3e28a5 Mon Sep 17 00:00:00 2001 From: Manali Chanchlani Date: Tue, 14 Jun 2016 10:10:13 -0400 Subject: [PATCH 3/3] changing schema --- rnr/example_schema.zip | Bin 21325 -> 23017 bytes rnr/example_schema/currency.xml | 67 +++ rnr/example_schema/lang/stopwords_en.txt | 54 ++ rnr/example_schema/protwords.txt | 21 + rnr/example_schema/schema.xml | 620 +++++++++++++++++++++++ rnr/example_schema/solrconfig.xml | 599 ++++++++++++++++++++++ rnr/example_schema/stopwords.txt | 14 + rnr/example_schema/synonyms.txt | 29 ++ 8 files changed, 1404 insertions(+) create mode 100644 rnr/example_schema/currency.xml create mode 100644 rnr/example_schema/lang/stopwords_en.txt create mode 100644 rnr/example_schema/protwords.txt create mode 100644 rnr/example_schema/schema.xml create mode 100644 rnr/example_schema/solrconfig.xml create mode 100644 rnr/example_schema/stopwords.txt create mode 100644 rnr/example_schema/synonyms.txt diff --git a/rnr/example_schema.zip b/rnr/example_schema.zip index 2a2a3f405e5409259c99b3ecb6f813d751b4056b..b8709160d4073afb433145838468f421c35d843d 100644 GIT binary patch delta 12115 zcmaKy1yEec*2iaXcMC2F8r&Tc+#$HT1$Vy?Bm~#s5Zv9}-Gc-R65L&ae#ySK?`<~w zzM1N&J5zoB_jDh*JypNaJn(Kc7*LRgghBxQeA&s3N&Io~?+;WE8py=Wz}n8rM9MHUD^0~AW z4Rar~$clInBhC5ssI=rFSf6LAlMD?Ew{~Ei869N=U(V7wGG-s>d7-!pk{{hx;z05| zTyPl8!yC@`co372|6H;nT}`}1vH_^~J@$uba0rD+OCr5J*R?!%IU(I1i&|G~>&Me4 z$95=;W~3aopl_sSy=#be4$o0qFS@_h%&U!Zw~f#ml5fAdX+F1bk4`~=`~N55l^PH*>VjMCrMB|vMV*^j~~7(4u_oCo}`iA z-3w~AKB-}Cq8hzB(x~_U9;6R{R}yZb?FC(NL*8Ko-d+$t)!lUaDPF<|5mLn<09KHO zg&R`gv@`>QKu~y~AMpeJj6O)=*8sx*2%w&xtnh33H|l@HedR~oe~v%n{;#C@yV=jU z|HtefvghwA691e&6#tJ5gWf-7Qmmjf33Vj_n*R;9&6cXXoci zVc{;GeQ9C)k=$;eSTuM-N`l{0)X3Sv!NkVMox#o8>SvakFzZz^Bf*eh{XW&oz{c#4 zeQ|=>O;JTx}hU9T}Y5oc?{wY7uw{ z2mt#GU>7=Kh0|6A9^zW9u6J?7yDG@2K}^Kw<;vuDJjRrs}4 z8zBx~Yx0XBAzovk!&Tr-U^!m(AA65NdhHqH50<$*7@`v{B>|0&D~`t_!%s~F+k5_H zGO9T5OU)*Iq<6__rb_UL>y~vgTzpoPo-Qr7H`|?gc=!0EoeZ3c_LOEQN+T%kA)YRV zg!x11O67(vqWjo=m72v{r}&}uoO6d>6;~;(SAFKCr3q~sx}CyL-$0V@?UvfFWU$1f znVCxV)zj9^AOhM|{a=2e9Ytz~@*c19sDY$s$u>1{E`|Y7%S|CQhdah&pXiVH7hEuv z+0jny8|#{ME0zvvfUAj2V$H>M)(JiGT?m6Rl_HtCNrnq$NE3@ceJXwI;(9Zl{XkS3 zO0RjQ_4ZhBhH#mv-%%y5HW{A?H0$~yeVR!IZ^|vk%pQm~=cIxqIXTJprM`1UDm6;> zr-e0O#s0#}iy)ZYluqc>Ts{9X#3Dyg1G(A7@T;THrs%OVtax+wGIk>e%*6S)g(_I; z&JkN)|E1*8d-SbSq=ZnMz>oMI87jUjhI6W7vmW`Cl*Fucfxc&Rm9cX@n~pBEhO%l zYFYM=CeMhMlqrF{G2(mSZY87J2eS(XL26|NUiPJh%;#MCE!9O{5bo9Ps>jfRY=>Y^dXup5#L(q?BDdAC_}6z7l9 zaeOh%OW0Wm7ZugMD509|5X6=&BdZfpodK!(mJZJvsYjnpe}qNCB_)vX72LsO*dx!F zNw`T+gW-GnBp154n3Wn`@FF^WPc2%br2%mFu@}+Dz$OqD0xch5Wr#e3!ceITtzz&w ztU!iLu_Sy8KJ#n3l@%Q}s)`07J^VXu>?wOeb%EXPkx-tt?6Q}nkmI2|(h5zt&O(#Q zq_mEB$SAaE?;Aio8e_rC*g+I{L1QkmEL=7m82Fg+Evw#&Qvy$&L(0)j{0uTmcv z>~ej{bStDLsx7aWJjak;x(@+YL_1}XHC2b-!~L?D%5ifA@cHD6wD)%P@J*Dhxae&i z+q3fvOMPc{9r#cI@>2Nz+p>=uqHXQCupMBdC>qjr%B*(<=pS4$lp$N0`pZ$3k#3Js z@!8N%HWwA4m2bBT%UD`Hq9j%=;=LyT1g+7PK5RP`%vcXBU_d{v<68k8k{wwUZO1DY zW3C`YUb0IU#8=2WYXN4~$x}g(L!G{P)`f>NWD!g9-w4DmpcSGj8(59;NCi$uB?Hf} z#7E%@w=r8M#u-M?KVm|m-%l}0ud1zD5<6pwdM+gc_o|wdJ(8wVp+Y?P zT1BYSY)Vu|w%DFs&>z?nx5AD9u1PCYq+$mi^kH8UL8@f9gw-zoh*O&7A0ndP)!?yLA29O)QAT(vK0;0whZ(!L7Y&lpClgO)8 zObom#_Jh)sbTyyNBJp(rN--s?9`)lJ4Vc-eOi`v8@PHb{+YL3}LLiGcZ!J?L{B zCNaIsT4;q^;uX?>|DLESGwqk&3gQnuBR-~1QFUUs`_oBO7*R?=J1t~VZWhS;U(qDt z!AE`pts+I>zCqpsVZtkQex6C_Plgly=Jx)(o6Vb#H?Ya6Q~WOh_vlylSGDSxx;sZW zCX*k8Nk)O$-E;}P@@*)Tmj^eD#ugABPjVrbB%yOJCSN=D&VE9#;J(vwVS2q&xmwr& zZWC8w6Xj8{1$DT)6i-H2Ek5H#9O#HT$KwlY#5EanA*&Q0y%qA4AQrJ+Wrt?9<9P(B zW5Du8!y>yz;rSQ<_|XnssbSg}f*R4vR@|=;o$xW#Au%spx$!^|+O{veTgCOJ0}pLI z)jnpU4!kiL8f{tW+amNU6vv+H6yk0wnqVfEv#Ijd?7HQpke!B{f|#TD99%H5kEuxx zQjI&~U8p)xvY^_X+mMjV>5Xu;K*(u~r?w#$q*XO)29fCl`b;_I=kR!+PAs99@p({o zt`eap5qT;@Fokjw$2K@7Ve?M=3elG+V5MoKd(CD%Y1Zz{ z28+u1AJ6CKDfXr|nmHOv@7_Ts=hD-?aI5V_0zr+SOqg0!2)p_b_%~mY8sk1 zVc~xq30&|%ybZXu_YqKc+}o@f=xDf$O>gfK+u0?hWm1TbR5T(}L(@@zgGWs<-ka}q znv5n>d0gRYyl`+WWmYtgJyo=70_( z|2LFnHw{AsR!z02UQe9I|eKQ@ki3&>VbyROM zJZgR}zS8gb+Bi6%4Ghs(iIT%QD}ot=Y4Kf^-tkD$z?AgO_-D@;E}$C9yd8EnXA@#Z z#}uGE8o3Inn;k76-sWs_lLGF|MO~{KVC0%QeXKG%vt?U{u)ONe^qt!y7Uj1ln0Lg8 zq0Em;D(u4vlLG1K(Z=;ZRC14}qlE0!RI8Xbg`!`Pg%bEFZNq~mK`*d-sP(6wXDhIa zjRm_ue97jt9oHkF79LjP)EK_CyHP^g=mQu-rXWS!V+(bRnb_0Kc4cQ(1y_tcaaL#c zmoI1Lnfri2_&41x2d)K;NP~TngQC}K7)l1zC#kS`ntHW8hWf|8ve1=_27J+&!vULd z%wG57wOi-#ScV%4ztH_e%=39VoQ*!;d&%AHLgEeQXHP#(R;B!R)Lq={p*XPW z&c^WIC+LVM#HbqcJNRiS>9i)ULU!Q28VWCs^imykMknS;9lTq&J|qtxYd+FFymHhm zapN__l@LoJ6#3_GwurWkfmvu+LGO4aKM*)tnDa{Xq#6}r&yQf_eQlw2-U^tFRWV58 zRhL%}z4T_dZVdr}*EGylW+J*!k9O<_E8mW4W*!?3{Oxd)9W-S1Pk~;` zx)*BxDjkV*4ovTJ29k31q3_l(?9RtnJd>ess!;f2;xR;a*COq;ze?c& zgGQFTuC0-ZC?u*?cTCU6Ao6FwXEQcGu0?zu?7vTb)9f62=O?CQ6QaKjykKxs?8#nD z?CJ`FMqu?vS3K63a!_cwl7{z^=8%3JjlYm({KP4}>Kz&XC91x2vMaZ}{;N{AJ_QxGEieYPIpC`5&NJ|F#Ja#4-g83B7Y)(BiCf{ZOom~7gMdTOm9JtFpy83WU*tfSw$DL3@bWHd(X zwMr>d%*(n;1w%meKzM8aF`M`yDLjhJ!F8~gAWl(#<)mudUNig%dGJ}2KriL_RL2*l z^diJ=UT@MRszk&j2PUquYnMLl z0x1OA?d_%R>FmB?qM7MMornz+(;aLp@)>deElm6nM4CYuP(3Rdz6br3!#x>*Gq|I4 z=H+W@l}uG=SbP`NBN5x%ZP4o<@Zz)nCuJ-c*jPyDZmkjW0*{l|Mx?MQg|b^uFgahi z#uAow<+skWbZ2+O^sf;>@f3Jhel)D;hkciXDB#%nreW3Oi-qiT=CG8L zJ?y95>z7=jEC3u6M|O!|#LZXpo!BM=OKsgX&8rC-QOFd- z^n@;FWYc9SpaW-uWuL`<5`R97xtlRrE?`r87m59v8CH=Vu-Vw}SBy z0?5dGAqXGm-FaUF5UI8mh8*+E*R{s@btBs*TQNKc-jiv6KRcB6quB<=f#8zGKQF@h(=olYk0w-1ObKJgz4-0)3->5 zM*}F`MNco*Os_*ylrBh(MRcE=-Nc~60C5H>v_(9$P zDk`VTGO**B37>y)YnC4e!h`aqzMjSLX3Ov#La0L3*>cm6;9;Q0d&hha`9iZ=k%*ws zpNSX;B^VK)mX6UE;nkb4O+8hA{+$iF!{x;S3S(U6Qr^ylP-}3TVl&*~{*FyKJzAG0 z(o&oS-BNAGhSw6D1)|*tyJ8&5x_V!RqL&Po8Nf%Gq@dzx+1*3sw0DiMCg?#PG$^q> zJ3I{=yn^;&q485qNl@~V_otswF#5vBn>W!~VLNL-ZA42bqZFo1J-G2<<8&YP1$~wW ze;8a|kVL2P>e9Hp1i8`+EY#GMM4ZghdbkQIeJG}jnM^9%dy zZvL$2liVt}3_g?Sw2t6utH&Q!$1<8)Hljvb>~-wt%;j(tTkE#t&JN^ zkQ9++$9M0i)+DIi)~xA;LShb-m7OqQ)iaML^;az%U{K>}*FuPNwym-J1Njh)~p-ub%v1dS_Y6qvfYK%=g3{m~o-%|^)?ddPjuWzhM zt*m{ndvHTsd@LMIN;nf4?E+bdv9XamY?*=}TTrR2D^74R@cRAZ;Z|mIC6Iy5yyFZa z!9?Ij_HVn55suT*DST~ag3Pde&9rFSz7-p|Y7j}Zh&;x3S+hDk#hTt3(_o{csHu(` zW|fp3pjHyRia-0=v{N4tdrw7{`YyG>|63qpbn2-M>gj3$@^>P~Rc&rNijx4T!tZ`2 z$$0#-!zfh6m8fE@46yfQ*;f0s?o1M}bzaM9@LgStU72SjyR_I!2%&@gOl8qR+yS3u zA{@||pL0u!(@BL}O-+mw_CHGtrPvOrA(|uI4Q|RtQ0eiUlD^0W5*~YDjd2xeg$tDh zh59t2_Hgi2VZ1VD=A0QECmMOlpvCJE;H?*`!c-_w%Q8FtwOj4-M0e~%^R%U>IUJdv z@0nCt+rYl;$?vivRIN94zCZ3eq)X&pi~FRjs_wkiwu%%+3+dtkw&?xMhal42 z67zIfW5JX$YM!b@mph1i-1m9m`TX*X34vEz7>pw=(VHF{K%dBSeLcI;{m1b*%&_JpOP4DM#4~m?U7`|2?YLLk7T(PaU~6|r(FN#dwuJr;+_uqU z*@C1pyc+G!xK>~2DUPYRPTn(j_X9ExC;awB)rv4dtmu@Rnpq}WG|&SfF8CY;7O96L zd^}$mI=W`9fp6C~CHt=NA6;??7L&lbRtEdzK2Z)0ZCrV7%(cTYLc!$cPD_$jR}-Hg zm5W@KF!r<;pt?!f6L<(Rj!owndWjMGkXl!D;5VMg>#aC95@qmF0s7p7;meh;-}}@QLui;P;^5g7 zi{M`q0cBepWY+=Or}A_wkMa_k?@c`g$>tY*IC4LnY_|=P-@CKb1E>3gsCVCRW!=1u z+xBtgM;ss6sVnrv`>xGYZ$=O@h6aS%0n3TplSE-C*MV}UZL@Eu6@&p5`Tg&3DC%i@ zy)yR2K0L>6o&?im#Vj60o#KZ=S2kZy^nb{M1Q-rI)-9Q3W)ApH4Ax{hq@?Opc?edTt$;LBcrSapX?*v)w^bwD6%rYNs?1aeq<`9uC(RG%s>ac{WE3Si& zt@m}O)}~5wwW^V}NQl`Wj~f?0&)yxdt(R14%=)ClLV`zb;v-#&{VhzJc`ui^?HVo7I3JBw zugDL@l`~X>dr!Cwx0ij((Hd2X>kE zCyCsv6oX?}tcqJr8N~N;DVcBdPPIdvh7z3T4#a6_6J?wZB4{#Y6hoHj{I!~~^$w@` zB=EfnhKgU5)I!<@U)g^h1s`#lGG$Cb$uHWu;xD~@+J+H3p$m_G%<_bbTyLe*ZM$eC z1u9Lk;ZCZCg7DDw45q`@o9lDAyMUacktO0f-I|Tb3}!w5qgi*kNVyl8lbl~8bz5!- z@$UjHR?}r{QC;(uVNej`Ai1A#=Cz^fx+_DNI|niQJh8MmLVYCR)`H;RAU!wi4;TzJ zMoMG+dp4B4XLJGzvmXqne)5e!0$rVO zg$Mcrfo?=gm*^(^s9hIdB5=ZUZ)GZF<-cZhE?5#SS?`jULEj5&vRkJ z81)6g_AMv%HP`x?;JxJ{iV?t~x^YREcBl^Bt$=_R7#643JB{9AL2B9PMzf?wjYvgk zoArfkm*smzk1TEa+-*VJEPcnqov5*WBXEOKW)5vtjo_(FXDhP8_YC1F>VvS zhJyLJ@1wJ0Y!mC4OSoklm4oAdw2a5 z!7KP=RE|A%Rk7KY1vd()rFJ;VU%nGX!iLRkZHTpN#)4Y1lzpG6@9VADCQ2aE4EYdh z?EO{TML7<;>$$kGZZXwZ`XHkA8hA#GXp6y{WM=v-to` zvYZJUMQxc_IT^X6p<;bkwv)fS)Gv(+9{x2{hb3Mrv8UN_?$jKGjc ztC}FfkVY{7@wGBNb(6}>%JQ@29hz&uRf7uJBWd?bH!T~UNvpSH5b%)|lG3^Jc;Q5+ z399;UGrIyE!cWQBJ@M#vD>E68bwyvP7^w%KlYP)~K^@uxEVP}{Ajb+AzeLzUU%-?v zlEi+EGPU>C#o>$0jfRB{=ge1V$o8;bp}}*vBelrf-I^!9$ZCs7HgjYX-`S3-u#)`gI&Bf}E3GK$`LE8owMK8tL%`9h$=?nq%8U1`m9nc_pY919dgO0t z_!oe_(vz}!SJIYG{fjUA2e88c;S!}84y!)Jg$OBC33ziCnE#5-~RRrvJvCeePb=)MK`{H@)P=-SH=TQok$ z;Hd}c*XC><`t)VZDNd&c#hWQ*%W;9}u7{o5U`z-4Tgt3_H6j|GnwDPYObu9ZWX;eG zbsioN(Dcm#M|G_XuL~uhtdH(pQNn;;L8waZu?N+f6q}tL2sL>%1$(Q`a&T5|xa4c; z2W3lU81ixg{y36~xl_CZ2v4pr?9Engns`MtAE|ETXp;{RHxR(c#4j=AZZhMcixt{G zekr$u?0q$mERl6qaf9=k6uL?5-ElY(_4@^*CZK+&W2IY*x@E!3*Y#ugP%7S{EW{yo zwexC%{&D~$XTr<`vQNmCKQ7R?F7)K!~_ z+FSuc*@is4R6)QoMrHMAyXlA<&8_BzkBhELqucq+*b~H03RyZ{ujx@@NXdMZ|KDj2E~i-?-&))KX4niRt`qCHl`M4e+oeS3#zK-f4Th( zU~On$UvDR%N>(a2!aK=7#8-;k4xIcHzBRrUu&*(o$=k$rJbpRBeM>>Ww9ZVM%i+&P z7$K3AQZ{ygQ>n4Q+A%J)OGn;d4bSu0$W%fpDm~GAQE3{J+)9JsDC&81t~I_b;w(eu z0tHADZ8D?jvNu_oj=-*7mMYfKw=(P&1zO~9%sGAIZMV&*zWEg5dT?!MJpOJXjp?u>R zt?d&o;Pg$0pNQ{v0{b#B3+7QBJ7@QOkJoej$Qu zY1bqx)>KP9&iZhk?o?y5nrOGEvA1LTj2FhqqY^D_YOc(%YTl*k@Xnyc<0*OC@EXBq zPvhB6^PB}vP4cYRipd6UEZjQ1omrS1(pBK_r&?@8|r(&A*Ayw`~B6- zT7fFxJq~u5fKV0xuG>;?7#dNy*tj>b4o$wADmnEdQje?oUW=e!os+uGkl8h!Mq7-q znpSC+B}}oF))py_x*;})0Z=&;ST4{X&8iHi-C289FE12Pj3r+M9~O~AEfI=U)H?

NzRD7Hh9G$_o{~4Np%uBYyQ)V;UW*&oeXx>xYhB zKHNNGARsv>bDS4xKGerf<|LsIb&Roooh(Af#5&sTS9z6k|GBbJIykQ*TgDAQyfb))0E#&fQ{(-<-Ak!?vbME_FJaKQd zNMqrwbnm2W!cQqve8^^bZ>Fre8$0^jVaBS^sCBq*_|e#O&6xmXt|skZJ^RCNitIevg+y$t0jBQH`cf8Wwr^CmNyP5PoV6v zkM4usiv@!bVHa~it@iosAWLc+p9j36;(Y*4{AtuwbL7Y)$(bZYeP}_fK>+`Y zbw&CUeHH2u{(1Quy9oCm@>M|^41o>#UvTlSL?s^Rzd|{F{J#kSjDLX_;r@fc{!8+o zkxKZ#NInSP{y;bX^H@?*hXegdRQ`Vw|Mdz02|WliL;OYK9~SznqWFtf zIexDHkK!-9Cc=N$9BZvW$VTvs3=-sT8~-bk3HKApj4e|jqsRJ-;i0eq{BMT;1~lP7 zKLO1{1v0Ydf3e8%^Zni82d?=H@&~LL%cn>N5dJb-`bSj%wo!j0H~;WVZB>{@?XT(a z^HlNI^TxAZR0B|cQ~eX^1bzajaG*b#&cAEg;fHtV{7A0iALG|`RWknX*#3HK`DgW? zmHeZ|f&Uaa(JwCBG5zlH->Ml7^zYNmFFWy5_pe@4|02sT|2MBG|3aPraQlDVBDPS8 zi2c`4ffRoC@Yl}quOvE9B9s3$`-qi(xBR7z{58hVFh8~)g!<$C9SQ_mQ2zPt{{UWW BcuxQT delta 10541 zcmZ{q19T?aw(q~#=-9Sx+qThhI<~&p>^L2EY}-l4PRF)w=k-44oqPAb_q|%9Mpdnv z^?O5XYrm`qSCgg-S7PWCSD_D)}&8C*PE{v#3% z4+@$X34;uT{bS*5YH9Y(_-}1!ZVB^f-h-e4&iSp}S0%pZJAal#%z5djU(WjT#<`!Hm$@9Uq+K$*DAZfHOkDCFwyx~{M8@odo^Ip@KPxeD%A2Y8DQQQLk~bS()7P!Wsx8!@AuO!<9^qJ& zP1&w%jeB`CY@e0&spjnA_Vci;0*Aex0@T({mGzrZ22=4!!?0^QPB#S-;nfu;h%y2W zg1e1QP+92pHyKV{U0A{0qIBTJ{VOLYXGBptg*yL9d&s>aAh{UR-ivFM{=SPYy3s!y zIDUhp#FIwLAJSUj1z@GSL$JgA2K$XY+D{eCZ%kQJd-=3-Asd{nHKNyP7Z;%1rj`{1 z=jt<~yh*5sXXw&A7q&Y_IJse1h6HtZ2vwXqWX~|S-0J1)=H=nz=>v3w-He^`s5OB z?YNnnmsp6`gRzPWl&i>BfIXS=6^1l5m9Pc6wfr<&O9P1ylre4&^-M4AkZ$_gc%T8+ z6{A~9M^L0K@9)m)Z;z|b8N5Qnhzkn;%TRYIcb7uvh}}s@XCBxhCI-sDJ&{muo+4F~ z!QN@T1oqS^>>9<&g#vy9~Mt+!4TQ^dNemiSg2KHmmwULJr|0TRs(IZ}O_CJG&Z z+3-_P5q8;BoA_eMPcmgaqHvEhOS8vK46f}9i6(wW_wH4oVVdrCAmCSFUvnAVL3mm; z6Syr4z5i5ky%$giq?%%UnkL%)$2bg~-E``LzUICyOL&vwV9jmaOfy>&*w<~Zh&qkoo|5>7gDHdSju?cw26!T)c-l@DiHOL6wN%wgJ~QTbIhJt4zo z0J!CZZ7rb0dD7e}J2ZQ4n5aSHH%8pS@=#Horu-(&SFYQdaC@woVjtXot z?xBMKq`@@NNF z$Uj3j@tj%VCNEJQnhE38C3U4qVzDQL3?Q7R<{AWqLlNqjcMmK>FdFdrHS|4ozFk2bHa|fiwhsGpZaL{0CbG2R~z?z@8rAYx9^Go9E*Os(UddV!%c*Qj75 zVaodkr=68OUdAECfhtpHz>fCHrmbq;MV$j@2gc=$>%vRR;2WgC7xtAYElU`PsJrU> zD$NluzbqK}toSe=8U*GDLmD{=%15YxsQ<;tn?| zYEl-;wnc;#u2KTUhkQLxJIW8R(r7?7@)4wdP&_c*k+!V5YV?+lGmc|}!)T+V)k_Z? zZz7BBX)2x7gR&Pc&`Im%ct0$(f)axLbYAl z=xLv`=LENw zB5-=0p}ekGm~aBfvK%Y5nR^J1XnO|SQ~YWL>h&oRd4CzTL_Q;GGte$fx^;f$-Z~O( z^y_=-4&@Vw`V9QBS|y~J=<6Oif&JGsn2DZzP?0rpC~)CT#K)Vn#L!=gi_H{kCVOQ@;=Gp8Gq#k6_8NqkbFNVNViRessz| z_>*|e!yRd#%PWpgy=aZ^>6`83mo#&|Abj{F(-Rzg(w#kyRSWd9Zq8XTekOW>{q5r9zjWrJ+GK%}47gALnoAHp{{2 z&EUv6iF$j`1oeVKGQl8A@Suv*^knGt?>bNu4FuB3Nkp`vyq;NyiGjQUjZWeybO%4K z;{#D_DqN#JAaMfFN2bq+S28wju&+x1I zo$tl!U^bS~E`dlE+3q3$dqlei1!WhUE;O0+CU-|}rs^D!L`w2xy<*U6xxl5AIky(u zCbV?06$TiiCN>)3NSC8&Z^uX`b~w@Sruj~}sC(P#%>hIXRTApS)Jg1?ZfY>$7SW1K zP|UgwAqtp2z=B0)EmF4F$J^cvRYnUl#KCpK@i+91;B3U7pkV620dRH} zND&_y!s>|LuaIfw=GWXz5W}ponso5wqcE7XCO0*GMtNK{bS^U<#BlI@%bpbq(!?uk zsG{(Cx_T^*z96Hp1Zd8XmC7~F(&zoK{3CVzhr_uXTSLWYL%-T3_BH}K;X8hmpsI}t zp=w%wnW|P}qJE18BF7{*a^DchL6qWs-kaBWI8nbOIoQV&HrE>q)$IY;8uy@BL% zdLim+PWH#|9ff(=MZ+gdYYGu*P6pf!W>|_ik>GEZnN9Ljog)m-?Ve?mO>W|Wv|T2! zAGZkIr=~|hIg{p9y0Y6*3?x(Mun-uY3?UwE<3WOx4+I4PVJBoUji}=N_VVSRPwq$MIX+A_@fIR=UH7jLsr@JKxgXio33vXq;DWvr=Eb z5Ol%6E-TDmg;vT0)#KFST{O9<-;xlY)hO1t9>WqwT6!(<>IQG6(brAO1;$R)|K zJU1Y+7O7R4ZaV57ez1eyL|g3`$ZU6zF5u>qSWl=WL^hyKmk?(GUzuXW3i>zkJN7vv zO@y%$f#-$j+3{wz=R|onh;|EKJ7hXsZmZZ@F?AF*LgcrN`L#jVM3GKMD%L%ITq^{& zLsby%hlpGRHU^L*i^E5Q!5zO{lvaYJ#>sSCpU~RJU4?`tP-Pf^i0LMLOIkY(8IM%< z7pk{L8Qi`Mm`PZ(Am@#+-AABp2%A|DqZX%y1DcnJ@v%nY9D(Bfw!M&7k5%4I{Ai)V z>a^r+dMbe%7wyIM50@rX<&N;TLA}e>)1t!~V>1u+&ee64=#|KewpV&F8{`;YQ$V5H|Fc|huku3~IEOdJp0Y!>-2s9Wcna^cc89Zj?G}mx zqi8L`U=MCg&JZesbOUog<^Vt5v5F`%?eNN$xQF7G*LnJsA8E=5Bee3Km3`r!eSV=5 z3v-s9xjnaG20@d&dE^csN4QTl4b%~Rb9jE(14ZWbJqHTTCaI8t|58*%ru?+p-i8zI zFMM^K&zF)z1BD(D)^|14LTMqxl#?nQhN<2wKTE&%xjlQFBvc(IjveyC0GeAT-giSBv(F67{ywF)fT&<+bzm}hz~`(sw=;>KEH<8! zyqfprreTp1bndla@PNhaIP?DVis=#sdM=xBC&Af7Q|)tSo3BgC3bM;Q0+kq?BR}W^ zDM)Wl+M_N)O7xqY@aQo&VSV;Or9*LNa!BPu)#~vuZ$nbKw~nnt7fA2{&$UTZ%pqO& zI$D;}@X~QA+rF{$xg;+P)wjP`+e|Wo%1~S?YlTY!^RlA_WV;s%3OJr-aB7RRfFCAek>6ulNVF>2NXZ^>nA2KhE$ zekJv!>Ro60aut*%%YNkOeL^bs1ofh(@7%UvbeYJ-qpAj9dZn7WayPjoOXQ-kJ;q1R z#TIu%oL<`YUJ>S__HY|xrn?mE+}l8jCk+OVtTc^1zWml^2AV9RZ=po0;LAar1N7%w z8GghMOSW>CC&THeH|W?VaI_K&?l$E>)fzjX!tEbeXfr5gKA(lRE|Bi z5CU0`U;H8TX-(=plc@i;wASaVuCB}yC|MD&4W@X(6>F8XQ7D$d+t;x+Q}^IzjhXh( zgvdTUVg)u39biWy$^adE;q}OGhH#05i8n|@?Q{*R*+?VBA3JR%Nd(5~>Nw(QT{<3%ayLpl%c+B% zbFN>WRl!}Hd5L;B0=PM~@phmYfpx-9xfYJmiLfwFs0iDWwir;{1*dV?<^e2VcQh{W6Kii8=6JVb0u-wJI59Zd;ZR6z>v=OwOpyBlQ!AhdC+_C;X>bRnF4j9la zvLl_Huv?`(A=yN_8N86P!)=R5lM~Zk7YG|8j8ujd06C`7HKGTl($tZpUurZmavpr8 zdBB|UtYM<>S+yH(dBpza0l6n~whgmf#=BJOi3Jq=c-VGXq63vbGT!(HNTb6HcptOFt@A>Jb;xO^`((`k3owWes z-5J=zkmIxdOq*5dFIzHW?hAa@QOJ_vCQ5{@qf+rXkOgH9$og!6Y{*xWQ0XH&c&rwZ zzFBwQd}sIP=}F@$t);3DDx>Xh*}mY^Ba_#T&>D+MU*D+86&1cJK#mgUNi0|kS^EUY z$}qr(_FZqpxl}B1^S@48N!8bTZMW;WmEW) zv~Hd%)Vp_rF62Fhi1bkhdGte9p89jjpYA}5px?sX%(dRy4RFZ0*~QP>`L?~o$+$H( zfK##w>XyvorqiPdGScv$2L@T!prliqw*RHTTXsRSaFHboz?;f3VB?qH+})Ua3XeD=tE4`G&@&juptUt}M<8t`gk%%lGFFBwbLU0leyoF|xl32_`*hC9RF` zX0U@~c^(wvIgiJd>aXh$Du5I<yQ0(S>=(#SAkxE>tW zJ+_4MQ%JHFq4a12^_uC+|gk4_}O)1&dXWNszwh@mJwSv@=s? z1QRtiEPcHYwUCC(XpJyXe%IRmET7eigl^5V@1u9h^*v&C*4XcqinT9_7DD$Ik_w{c zo>e5~Z(mOw80D|o*_ZlzE4F8ODz3LFN{6!`SdgWyT+^Yp7-MeK*&cU0E~&ACJv+b4 z8RweP<$ids$x5wY8rhKw#L+wuDIDjW52^`yWMGAtk5x=DE`1FEHh$O0>FPdpDimSZiI;HI#OTpK%s>+Scz3^GXqUbvU#3F;Goeeja7WmUW)6Y-`BJ9Cf~!+vA7-oSCD&%}V>GCjt5L&Kkua z-UCn4BI#Fn$rXlx&JDf&LLZ1h+Dy~$lQX74kD6SkL9OpEN(>~(u zuvr(XXEC7%^i<>PtJ2 zw8Wmr%eJ=rhFsbj@>0R_RFcRD(mHllN0eVzpd=KKNlW(yF>V1 zesmXKvQv!0+}kpY)8CIfm1Wftb!t#;?1)~`cz^Qv*i4_?IY z?z;WswN#1~5x!KVfaateyDu`mUyvrRbi;cK=JLfXEpA1+6)uC|{^ zQq!aX6=ELV!HgR=sv>k+x582Y-W5V8a7PBMcL8@X2mg)boG^_AFaA4^Y;{#tr=Xp8@TMv z5;7YGZAe^aGT=*#n6|ibdS351Tb^MS1$eV`+JwoK!sB6-gwB?rEbDwnCUGmkF-?f4p+xE`;&zA7`A0F3ILd_e0zt4ds)!4+jS$mu$*su`z{MDm@X}d zZA)$GsL^{61mJXDHHAzXtm8Xx3)E74S2ug;_B4z~kdLsY$xBXx1AIhf5+dq6T3WE` zQcne}lXuSaX;i0N`p~i!y3_C2M#pc!x9`7=LpI-)Nz1&8W0{I4_MIe4soW=sgx0y9 zcrPNnCguIjaF#GOQLL$uVl!dwz$XJXIS7Dp@>Y`*mKv6y8ReO6?Cl$U4Fpjm&QA+W z<2tu5tq3%GhR^1eYs8&uZ3KJ%VO|^7TI0b>>QTH=-mx2zrWz+!3g28DV8fDe{Z_;Tw6c#e$Ylvn}`Eryvs1@OiH++@qc9nuND$* zt)x*)x0!2qk>iTBL%m0R^<9^A6OfQvM~(AOa&bjrr_hw--)*t+CJ%}J4uU&?H9Xc! zciZx~{HqvImX_OR0;dy=87AvdbB?dA%NY(qz;~n4$8aXa&Q;kPIKr|GTpVz0;u1zb z>~|+@R9pL2)b?=vskOy{i!62OrD0lFrPrPm3Nn`@m$58qB0Ki##^v2SFRx@2VrLEw zX*e5@zH3nB=j!SzZVwb8>+DnGTJu~~W+LQq+&*w-275eeVkc$LY!KMa9x_@TsvI97 zN`;J!-z9&Mku5DX5L=x9ny|-}Uyu0)^^J6t%$DnG#U)kVhFE;`6>(qHJ@78ky6=Lo z^~{K2P{uzetm~&T)3g|s>FJ}i&Y@S)t|Q`8c_A5lZ0f%u@*U^K>Ek97NO|7xRjs-F zT%HsGM`75e$4}lzWpn#ws1r~b0*g7yKB$S*S`|?+BvDki)R9vL9ISyR6-IGCBS@b3 z-BEJMQ)NjM^U@0H6|{)|vfNaVZl_T8eX*A9wuIEd8>aBGG~!TUW<1|5s4&l+eb$D! zOJSvxTgU!AsPc!#fDibq&5#$4L1fDbd%WLor>d82n${=}k5z)UDCxb6X`kIF;%y*Q zdiKu%2+K=_In?2fp7GerATtH3z;-z5}jW zGw%&c@dLqj6#rUR;Cr&8TBPTbAI%!3XiLQOSixw?2{}x4?ys`hN6fHmg$9Qj zD9n|uRrG@@>ydedv9fpX!IC)>P_c~$i4p;mTGD&OfiDVj{S3Hnu%he(H^0_o#1p;! z^@wwOJH`7V1RLs&&@(u1p128exu899rPTo;JArZ%TaS8L^h(?5pU| zyWl;-?VqtVA()#C!B>`9Ih;AeeL@14ZoO(*t<*)=_@1*|wK zX1!!10uwt5G{maDMv|=8@dfFZHouR}X8XS1+hGzescv4(hTA|WSbia>Z`)0|kKQ;a z^EHVI$wKUTHk(!060`rUt9wGa5ZsL(bW3TEbc?h0-p7VX06vT+vbG@W*?U)B`3a#U z3k7YDels0N1OTx50RT#}VBlx~=s!ils8)&^@V5=|FBZby*2&b~&fMzHzm|V97k}sq z%|s#I`X;7BY~b!D(2#9x=W-CHNbM90^v$Z>e9qLyhv`0E=CoK2!7a;Q;+}u_j%l1^c$}i1si!+#f^qZ*+pt{R za^x$zv8xESjm+thX&UBFWqL>|4#f&Mh=0pV3lKyXGQ5bm%yhG^%ESp_d^O_4GK9Su zvc6DU?gE}7#H;i;%~1-S4H1j+t*qc}iyL1s^(j}DUP$D34eMD&i`jHHg$?aczu1Mqf9@dwP_Kz=rr=1N6yAvOc5V;AMKky5{ ze&%GFSh^#0`?_+Kp(AyRL*!4z*(&l7xHFTAN#@Tu=SX6X9w#S1MF@gk<7e<(PNmZ0 ztEb+~@JpmHew`lBC@)FrJy}TM4u`=kcO5AzeUJFOLa?EUloHR1 zQG-5Wu`pHnlUG*IFL-P)%x74SW5$kGE9hWiNNOhY>uNm34D5KcD^nqODDBPL2qo|a z%@#M`eE(@YVZ#-Hgfu8)z??ZRHsWxZZ73tlywbT+6H>surOO&!{ca}pl4$!B?cf*q z(qyV*OWDhv#8D8$ZiRkTo9DEy$E@UadEQdJib|t%1(Jui_8}@ZxdF=2LW~+!orfx= z{ZFI^<7woqZqV8{eS~B5Jk$haj%%R1qHW1O7tTe!kcsrK_3V(`5p}U!>acB^HJUR8U6;Y>NHq$8&6}4h&b<)dBT%;4+wqehA<(m^kBxV6jyg zJDhs$EH9njZ#G8TkEmx47?e*>i#@~x%b~XzgqOknpKb!UM%GeMZ(A1PR7y@#dKy*k^Ky~>SjZuWf%VrSV|@V^$4EhiMt%O>>IQmRzR{Tv zT8e1PrM^T88z{)mGTTK5f&4T#zRI$n^}@BHPdy~#_|Jg;>Wl+UOppGb=ga@>*niU~2zdXy#{XUs|JAKTN^N|Ke@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rnr/example_schema/lang/stopwords_en.txt b/rnr/example_schema/lang/stopwords_en.txt new file mode 100644 index 0000000..2c164c0 --- /dev/null +++ b/rnr/example_schema/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/rnr/example_schema/protwords.txt b/rnr/example_schema/protwords.txt new file mode 100644 index 0000000..1dfc0ab --- /dev/null +++ b/rnr/example_schema/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/rnr/example_schema/schema.xml b/rnr/example_schema/schema.xml new file mode 100644 index 0000000..596fe70 --- /dev/null +++ b/rnr/example_schema/schema.xml @@ -0,0 +1,620 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Answer Id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rnr/example_schema/solrconfig.xml b/rnr/example_schema/solrconfig.xml new file mode 100644 index 0000000..4572b3a --- /dev/null +++ b/rnr/example_schema/solrconfig.xml @@ -0,0 +1,599 @@ + + + + + + + + + 5.2.1 + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + true + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + true + + + 20 + + + 200 + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + explicit + json + true + Answer + + + + + + + {!xport} + xsort + false + + + + query + + + + + + watson_text_en + + + + + + + fcQueryParser + + + fcFeatureGenerator + + + + + + + + text + + + + + + + + + + + + + + explicit + true + + + + + + + + + + + + + + true + false + + + terms + + + + + + *:* + + + diff --git a/rnr/example_schema/stopwords.txt b/rnr/example_schema/stopwords.txt new file mode 100644 index 0000000..ae1e83e --- /dev/null +++ b/rnr/example_schema/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/rnr/example_schema/synonyms.txt b/rnr/example_schema/synonyms.txt new file mode 100644 index 0000000..7f72128 --- /dev/null +++ b/rnr/example_schema/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma +