From ec32cf54c60088607a7e70e746f86e9142a16f38 Mon Sep 17 00:00:00 2001 From: Martin Fischer Date: Mon, 7 Aug 2023 19:20:58 +0200 Subject: [PATCH] fix(dict): Remove unsure corrections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The typo dictionary words.csv previously contained a bunch of problematic entries such as: abouta,about algorithmi,algorithm attachen,attach shouldbe,should Which resulted in wrong corrections if the following spaces (indicated by ␣) were accidentally missed: about␣a algorithm␣i developed attach␣en masse should␣be Many of these entries were introduced by taking entries from the codespell-dict and removing corrections containing spaces (since typos currently doesn't support them), e.g the codespell dictionary contains: abouta->about a, about, shouldbe->should, should be, This commit updates `tests/verify.rs` to automatically remove entries in the form of `{correction}{common_word},{correction}`, where `{common_word}` is one of the 1000 most frequent English words. The top-1000-most-frequent-words.csv file was generated by running: curl https://norvig.com/ngrams/count_1w.txt \ | head -n1024 \ | awk '{print $1;}' \ | grep -vE '^([^ia]|al|re)$' \ > top-1000-most-frequent-words.csv --- .../assets/top-1000-most-frequent-words.csv | 1000 +++++++++++++++++ crates/typos-dict/assets/words.csv | 39 - crates/typos-dict/src/dict_codegen.rs | 425 ++----- crates/typos-dict/tests/verify.rs | 27 + 4 files changed, 1122 insertions(+), 369 deletions(-) create mode 100644 crates/typos-dict/assets/top-1000-most-frequent-words.csv diff --git a/crates/typos-dict/assets/top-1000-most-frequent-words.csv b/crates/typos-dict/assets/top-1000-most-frequent-words.csv new file mode 100644 index 000000000..8de101f40 --- /dev/null +++ b/crates/typos-dict/assets/top-1000-most-frequent-words.csv @@ -0,0 +1,1000 @@ +the +of +and +to +a +in +for +is +on +that +by +this +with +i +you +it +not +or +be +are +from +at +as +your +all +have +new +more +an +was +we +will +home +can +us +about +if +page +my +has +search +free +but +our +one +other +do +no +information +time +they +site +he +up +may +what +which +their +news +out +use +any +there +see +only +so +his +when +contact +here +business +who +web +also +now +help +get +pm +view +online +first +am +been +would +how +were +me +services +some +these +click +its +like +service +than +find +price +date +back +top +people +had +list +name +just +over +state +year +day +into +email +two +health +world +next +used +go +work +last +most +products +music +buy +data +make +them +should +product +system +post +her +city +add +policy +number +such +please +available +copyright +support +message +after +best +software +then +jan +good +video +well +where +info +rights +public +books +high +school +through +each +links +she +review +years +order +very +privacy +book +items +company +read +group +sex +need +many +user +said +de +does +set +under +general +research +university +january +mail +full +map +reviews +program +life +know +games +way +days +management +part +could +great +united +hotel +real +item +international +center +ebay +must +store +travel +comments +made +development +report +off +member +details +line +terms +before +hotels +did +send +right +type +because +local +those +using +results +office +education +national +car +design +take +posted +internet +address +community +within +states +area +want +phone +dvd +shipping +reserved +subject +between +forum +family +long +based +code +show +even +black +check +special +prices +website +index +being +women +much +sign +file +link +open +today +technology +south +case +project +same +pages +uk +version +section +own +found +sports +house +related +security +both +county +american +photo +game +members +power +while +care +network +down +computer +systems +three +total +place +end +following +download +him +without +per +access +think +north +resources +current +posts +big +media +law +control +water +history +pictures +size +art +personal +since +including +guide +shop +directory +board +location +change +white +text +small +rating +rate +government +children +during +usa +return +students +shopping +account +times +sites +level +digital +profile +previous +form +events +love +old +john +main +call +hours +image +department +title +description +non +insurance +another +why +shall +property +class +cd +still +money +quality +every +listing +content +country +private +little +visit +save +tools +low +reply +customer +december +compare +movies +include +college +value +article +york +man +card +jobs +provide +food +source +author +different +press +learn +sale +around +print +course +job +canada +process +teen +room +stock +training +too +credit +point +join +science +men +categories +advanced +west +sales +look +english +left +team +estate +box +conditions +select +windows +photos +gay +thread +week +category +note +live +large +gallery +table +register +however +june +october +november +market +library +really +action +start +series +model +features +air +industry +plan +human +provided +tv +yes +required +second +hot +accessories +cost +movie +forums +march +la +september +better +say +questions +july +yahoo +going +medical +test +friend +come +dec +server +pc +study +application +cart +staff +articles +san +feedback +again +play +looking +issues +april +never +users +complete +street +topic +comment +financial +things +working +against +standard +tax +person +below +mobile +less +got +blog +party +payment +equipment +login +student +let +programs +offers +legal +above +recent +park +stores +side +act +problem +red +give +memory +performance +social +august +quote +language +story +sell +options +experience +rates +create +key +body +young +america +important +field +few +east +paper +single +ii +age +activities +club +example +girls +additional +password +latest +something +road +gift +question +changes +night +ca +hard +texas +oct +pay +four +poker +status +browse +issue +range +building +seller +court +february +always +result +audio +light +write +war +nov +offer +blue +groups +easy +given +files +event +release +analysis +request +fax +china +making +picture +needs +possible +might +professional +yet +month +major +star +areas +future +space +committee +hand +sun +cards +problems +london +washington +meeting +rss +become +interest +id +child +keep +enter +california +porn +share +similar +garden +schools +million +added +reference +companies +listed +baby +learning +energy +run +delivery +net +popular +term +film +stories +put +computers +journal +reports +co +try +welcome +central +images +president +notice +god +original +head +radio +until +cell +color +self +council +away +includes +track +australia +discussion +archive +once +others +entertainment +agreement +format +least +society +months +log +safety +friends +sure +faq +trade +edition +cars +messages +marketing +tell +further +updated +association +able +having +provides +david +fun +already +green +studies +close +common +drive +specific +several +gold +feb +living +sep +collection +called +short +arts +lot +ask +display +limited +powered +solutions +means +director +daily +beach +past +natural +whether +due +et +electronics +five +upon +period +planning +database +says +official +weather +mar +land +average +done +technical +window +france +pro +region +island +record +direct +microsoft +conference +environment +records +st +district +calendar +costs +style +url +front +statement +update +parts +aug +ever +downloads +early +miles +sound +resource +present +applications +either +ago +document +word +works +material +bill +apr +written +talk +federal +hosting +rules +final +adult +tickets +thing +centre +requirements +via +cheap +nude +kids +finance +true +minutes +else +mark +third +rock +gifts +europe +reading +topics +bad +individual +tips +plus +auto +cover +usually +edit +together +videos +percent +fast +function +fact +unit +getting +global +tech +meet +far +economic +en +player +projects +lyrics +often +subscribe +submit +germany +amount +watch +included +feel +though +bank +risk +thanks +everything +deals +various +words +linux +jul +production +commercial +james +weight +town +heart +advertising +received +choose +treatment +newsletter +archives +points +knowledge +magazine +error +camera +jun +girl +currently +construction +toys +registered +clear +golf +receive +domain +methods +chapter +makes +protection +policies +loan +wide +beauty +manager +india +position +taken +sort +listings +models +michael +known +half +cases +step +engineering +florida +simple +quick +none +wireless +license +paul +friday +lake +whole +annual +published +later +basic +sony +shows +corporate +google +church +method +purchase +customers +active +response +practice +hardware +figure +materials +fire +holiday +chat +enough +designed +along +among +death +writing +speed +html +countries +loss +face +brand +discount +higher +effects +created +remember +standards +oil +bit +yellow +political +increase +advertise +kingdom +base +near +environmental +thought +stuff +french +storage +oh +japan +doing +loans +shoes +entry +stay +nature +orders +availability +africa +summary +turn +mean +growth +notes +agency +king +monday +european +activity +copy +although +drug +pics +western +income +force +cash +employment +overall +bay diff --git a/crates/typos-dict/assets/words.csv b/crates/typos-dict/assets/words.csv index 8d8af1eeb..a048720df 100644 --- a/crates/typos-dict/assets/words.csv +++ b/crates/typos-dict/assets/words.csv @@ -139,7 +139,6 @@ abotu,about abou,about,abound abount,about abourt,abort,about -abouta,about abouve,above abov,above aboved,above @@ -2087,7 +2086,6 @@ algorithimical,algorithmic,algorithmically algorithimically,algorithmically algorithims,algorithm,algorithms algorithmes,algorithms -algorithmi,algorithm algorithmical,algorithmically algorithmm,algorithm algorithmmic,algorithmic @@ -2372,7 +2370,6 @@ allocal,allocate allocarion,allocation allocat,allocate allocatbale,allocatable -allocatedi,allocated allocatedp,allocated allocateing,allocating allocateng,allocating @@ -5273,7 +5270,6 @@ attachement,attachment attachements,attachments attachemnt,attachment attachemnts,attachments -attachen,attach attachged,attached attachmant,attachment attachmants,attachments @@ -5397,7 +5393,6 @@ attriburted,attributed attriburtes,attributes attriburtion,attribution attribut,attribute -attributei,attribute attributen,attribute attributess,attributes attributo,attribution @@ -7373,7 +7368,6 @@ bounbdaries,boundaries bounbdary,boundary boundaires,boundaries boundares,boundaries -boundaryi,boundary boundarys,boundaries bounday,boundary boundays,boundaries @@ -7636,7 +7630,6 @@ broadcas,broadcast broadcase,broadcast broadcasing,broadcasting broadcastes,broadcasts -broadcasti,broadcast broadcastors,broadcasts broadcat,broadcasts,broadcast broadley,broadly @@ -8887,7 +8880,6 @@ catastrphic,catastrophic cataylst,catalyst catche,catch catched,caught -catchi,catch catchip,catchup catchs,catches categogical,categorical @@ -10425,7 +10417,6 @@ clickbat,clickbait clickear,clicker clien,client cliens,clients -clienta,client cliente,client,clientele clientelle,clientele clientes,clients @@ -11170,7 +11161,6 @@ commandent,commandment commandered,commanded commandes,commands commandeur,commanders -commandi,command commandmant,commandment commandmants,commandments commandmends,commandments @@ -12937,7 +12927,6 @@ connectinos,connections connectins,connects,connections connectiom,connection connectioms,connections -connectiona,connection connectionas,connections connectiong,connecting connectit,connecticut @@ -13549,7 +13538,6 @@ contails,contains contaiminate,contaminate contaiminated,contaminated contaiminating,contaminating -containa,contain containd,contained containe,contain,contained,container,contains containees,containers @@ -15066,7 +15054,6 @@ creamic,ceramic creasoat,creosote creastor,creator creatation,creation -createa,create createable,creatable createdd,created createin,creatine @@ -15394,7 +15381,6 @@ cuestion,question cuestionable,questionable cuestioned,questioned cuestions,questions -cuileoga,cuileog culiminating,culminating culitvate,cultivate culprint,culprit @@ -17202,7 +17188,6 @@ dependenices,dependencies dependenies,dependencies dependening,depending dependensies,dependencies -dependenta,dependent dependente,dependence dependeny,dependency dependet,dependent @@ -18363,7 +18348,6 @@ dictrionaries,dictionaries dictrionary,dictionary dicussed,discussed dicussions,discussions -didi,did didsapointed,disappointed diea,idea,die diect,direct @@ -20188,7 +20172,6 @@ downgradde,downgrade downgradded,downgraded downgraddes,downgrades downgradding,downgrading -downgradei,downgrade downgradingn,downgrading downgrate,downgrade downgrated,downgrade,downgraded @@ -21341,7 +21324,6 @@ enabeled,enabled enabeling,enabling enabels,enables enabing,enabling -enabledi,enabled enableing,enabling enablen,enabled enahnces,enhances @@ -24551,7 +24533,6 @@ extremaly,extremely extremaste,extremes extremeley,extremely extremelly,extremely -extrememe,extreme extrememely,extremely extrememly,extremely extremeophile,extremophile @@ -25970,7 +25951,6 @@ fragmet,fragment fragmnet,fragment frambuffer,framebuffer framebufer,framebuffer -framei,frame frament,fragment framented,fragmented framents,fragments @@ -26311,7 +26291,6 @@ functionnalities,functionalities functionnality,functionality functionnaly,functionally functionning,functioning -functionon,function functionss,functions functios,functions functiosn,functions @@ -27781,7 +27760,6 @@ harvestgain,harvesting harware,hardware harwdare,hardware hases,hashes -hashi,hash hashs,hashes hashses,hashes hasing,hashing @@ -27798,7 +27776,6 @@ hauntig,haunting hauty,haughty hav,have,half hava,have -havea,have havee,have haveing,having haversting,harvesting @@ -29769,7 +29746,6 @@ incliuding,including inclode,include inclreased,increased includ,include -includea,include includeds,includes,included includee,include includeing,including @@ -30127,7 +30103,6 @@ indentifies,identifies indentifing,identifying indentify,identify indentifying,identifying -indentin,indent indentit,identity indentity,identity indentleveal,indentlevel @@ -31404,7 +31379,6 @@ installataions,installations installatation,installation installatin,installations installating,installation -installationa,installation installatons,installations installatron,installation installe,installer,installed,install @@ -32432,7 +32406,6 @@ intorvert,introvert intorverted,introverted intorverts,introverts intot,into -intoto,into intoxicacion,intoxication intoxicatin,intoxication intoxicaton,intoxication @@ -33019,7 +32992,6 @@ iteraion,iteration iteraions,iterations iteratable,iterable iterater,iterator -iteraterate,iterate iteratered,iterated iteratior,iterator iteratiors,iterators @@ -37293,7 +37265,6 @@ morbidy,morbidly morbildy,morbidly mordern,modern mordibly,morbidly -moreso,more morever,moreover morevoer,moreover morg,morgue @@ -43627,7 +43598,6 @@ positionned,positioned positionnes,positions positionning,positioning positionns,positions -positionof,position positiv,positive positiveity,positivity positivie,positive @@ -46649,7 +46619,6 @@ reactquire,reacquire readabilty,readability readanle,readable readble,readable -readby,read readdrss,readdress readdrssed,readdressed readdrsses,readdresses @@ -52211,7 +52180,6 @@ shoudlnt,shouldnt shoudn,shouldn shoudt,should shoul,should,shawl,shoal -shouldbe,should shouldes,shoulders shouldnot,shouldnt shouldt,shouldnt @@ -52465,7 +52433,6 @@ simiilar,similar similair,similar similairty,similarity similaraties,similarities -similari,similar similarily,similarly similarites,similarities similarlity,similarity @@ -60564,7 +60531,6 @@ unser,under,unset,unsure,user unsespecting,unsuspecting unseting,unsetting unsetlling,unsettling -unsetset,unset unsettin,unsetting unsettleing,unsettling unsharable,unshareable @@ -60871,7 +60837,6 @@ upgradde,upgrade upgradded,upgraded upgraddes,upgrades upgradding,upgrading -upgradei,upgrade upgradingn,upgrading upgrads,upgrades upgrate,upgrade @@ -61755,7 +61720,6 @@ vieports,viewports vietmanese,vietnamese vietnamees,vietnamese vietnameese,vietnamese -vietnamesea,vietnamese vietnamesse,vietnamese vietnamiese,vietnamese vietnamnese,vietnamese @@ -61896,7 +61860,6 @@ visibiltiy,visibility visibilty,visibility visibily,visibility,visibly visibl,visible -visibleable,visible visibles,visible visibley,visibly visiblities,visibilities @@ -62727,7 +62690,6 @@ witdhs,widths witdth,width witdths,widths wite,write,white -witha,with withces,witches withdral,withdrawal withdrawalls,withdrawals @@ -63059,7 +63021,6 @@ xinitiazlize,xinitialize xmdoel,xmodel xode,code,xcode xour,your -xwindows,x xyou,you yaching,yachting yaer,year diff --git a/crates/typos-dict/src/dict_codegen.rs b/crates/typos-dict/src/dict_codegen.rs index 31c764c15..b6363468e 100644 --- a/crates/typos-dict/src/dict_codegen.rs +++ b/crates/typos-dict/src/dict_codegen.rs @@ -625,7 +625,6 @@ pub static WORD_X_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictge dictgen::InsensitiveStr::Ascii("mdoel"), dictgen::InsensitiveStr::Ascii("ode"), dictgen::InsensitiveStr::Ascii("our"), - dictgen::InsensitiveStr::Ascii("windows"), dictgen::InsensitiveStr::Ascii("you"), ], values: &[ @@ -655,7 +654,6 @@ pub static WORD_X_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictge &["xmodel"], &["code", "xcode"], &["your"], - &["x"], &["you"], ], range: 3..=11, @@ -1872,7 +1870,6 @@ static WORD_WITH_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen: pub static WORD_WITH_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { keys: &[ - dictgen::InsensitiveStr::Ascii("a"), dictgen::InsensitiveStr::Ascii("ces"), dictgen::InsensitiveStr::Ascii("dral"), dictgen::InsensitiveStr::Ascii("drawalls"), @@ -1924,7 +1921,6 @@ pub static WORD_WITH_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("uout"), ], values: &[ - &["with"], &["witches"], &["withdrawal"], &["withdrawals"], @@ -5032,7 +5028,6 @@ pub static WORD_VISI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("bilty"), dictgen::InsensitiveStr::Ascii("bily"), dictgen::InsensitiveStr::Ascii("bl"), - dictgen::InsensitiveStr::Ascii("bleable"), dictgen::InsensitiveStr::Ascii("bles"), dictgen::InsensitiveStr::Ascii("bley"), dictgen::InsensitiveStr::Ascii("blities"), @@ -5063,7 +5058,6 @@ pub static WORD_VISI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["visibility", "visibly"], &["visible"], &["visible"], - &["visible"], &["visibly"], &["visibilities"], &["visibility"], @@ -5461,7 +5455,6 @@ pub static WORD_VIE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("tmanese"), dictgen::InsensitiveStr::Ascii("tnamees"), dictgen::InsensitiveStr::Ascii("tnameese"), - dictgen::InsensitiveStr::Ascii("tnamesea"), dictgen::InsensitiveStr::Ascii("tnamesse"), dictgen::InsensitiveStr::Ascii("tnamiese"), dictgen::InsensitiveStr::Ascii("tnamnese"), @@ -5485,7 +5478,6 @@ pub static WORD_VIE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["vietnamese"], &["vietnamese"], &["vietnamese"], - &["vietnamese"], &["viewers"], &["viewpoint"], &["viewpoints"], @@ -8479,7 +8471,6 @@ pub static WORD_UPG_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("radded"), dictgen::InsensitiveStr::Ascii("raddes"), dictgen::InsensitiveStr::Ascii("radding"), - dictgen::InsensitiveStr::Ascii("radei"), dictgen::InsensitiveStr::Ascii("radingn"), dictgen::InsensitiveStr::Ascii("rads"), dictgen::InsensitiveStr::Ascii("rate"), @@ -8513,7 +8504,6 @@ pub static WORD_UPG_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["upgraded"], &["upgrades"], &["upgrading"], - &["upgrade"], &["upgrading"], &["upgrades"], &["upgrade"], @@ -9527,7 +9517,6 @@ pub static WORD_UNSE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("specting"), dictgen::InsensitiveStr::Ascii("ting"), dictgen::InsensitiveStr::Ascii("tlling"), - dictgen::InsensitiveStr::Ascii("tset"), dictgen::InsensitiveStr::Ascii("ttin"), dictgen::InsensitiveStr::Ascii("ttleing"), ], @@ -9552,7 +9541,6 @@ pub static WORD_UNSE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["unsuspecting"], &["unsetting"], &["unsettling"], - &["unset"], &["unsetting"], &["unsettling"], ], @@ -36516,7 +36504,6 @@ pub static WORD_SIMI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("lair"), dictgen::InsensitiveStr::Ascii("lairty"), dictgen::InsensitiveStr::Ascii("laraties"), - dictgen::InsensitiveStr::Ascii("lari"), dictgen::InsensitiveStr::Ascii("larily"), dictgen::InsensitiveStr::Ascii("larites"), dictgen::InsensitiveStr::Ascii("larlity"), @@ -36550,7 +36537,6 @@ pub static WORD_SIMI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["similar"], &["similarity"], &["similarities"], - &["similar"], &["similarly"], &["similarities"], &["similarity"], @@ -37453,7 +37439,6 @@ pub static WORD_SHO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("udn"), dictgen::InsensitiveStr::Ascii("udt"), dictgen::InsensitiveStr::Ascii("ul"), - dictgen::InsensitiveStr::Ascii("uldbe"), dictgen::InsensitiveStr::Ascii("uldes"), dictgen::InsensitiveStr::Ascii("uldnot"), dictgen::InsensitiveStr::Ascii("uldt"), @@ -37518,7 +37503,6 @@ pub static WORD_SHO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["shouldn"], &["should"], &["should", "shawl", "shoal"], - &["should"], &["shoulders"], &["shouldnt"], &["shouldnt"], @@ -56256,7 +56240,6 @@ pub static WORD_READ_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("abilty"), dictgen::InsensitiveStr::Ascii("anle"), dictgen::InsensitiveStr::Ascii("ble"), - dictgen::InsensitiveStr::Ascii("by"), dictgen::InsensitiveStr::Ascii("drss"), dictgen::InsensitiveStr::Ascii("drssed"), dictgen::InsensitiveStr::Ascii("drsses"), @@ -56280,7 +56263,6 @@ pub static WORD_READ_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["readability"], &["readable"], &["readable"], - &["read"], &["readdress"], &["readdressed"], &["readdresses"], @@ -65917,7 +65899,6 @@ pub static WORD_POSI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("tionnes"), dictgen::InsensitiveStr::Ascii("tionning"), dictgen::InsensitiveStr::Ascii("tionns"), - dictgen::InsensitiveStr::Ascii("tionof"), dictgen::InsensitiveStr::Ascii("tiv"), dictgen::InsensitiveStr::Ascii("tiveity"), dictgen::InsensitiveStr::Ascii("tivie"), @@ -65978,7 +65959,6 @@ pub static WORD_POSI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["positions"], &["positioning"], &["positions"], - &["position"], &["positive"], &["positivity"], &["positive"], @@ -87531,7 +87511,6 @@ pub static WORD_MOR_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("bildy"), dictgen::InsensitiveStr::Ascii("dern"), dictgen::InsensitiveStr::Ascii("dibly"), - dictgen::InsensitiveStr::Ascii("eso"), dictgen::InsensitiveStr::Ascii("ever"), dictgen::InsensitiveStr::Ascii("evoer"), dictgen::InsensitiveStr::Ascii("g"), @@ -87580,7 +87559,6 @@ pub static WORD_MOR_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["morbidly"], &["modern"], &["morbidly"], - &["more"], &["moreover"], &["moreover"], &["morgue"], @@ -101472,7 +101450,6 @@ pub static WORD_ITE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("raions"), dictgen::InsensitiveStr::Ascii("ratable"), dictgen::InsensitiveStr::Ascii("rater"), - dictgen::InsensitiveStr::Ascii("raterate"), dictgen::InsensitiveStr::Ascii("ratered"), dictgen::InsensitiveStr::Ascii("ratior"), dictgen::InsensitiveStr::Ascii("ratiors"), @@ -101513,7 +101490,6 @@ pub static WORD_ITE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["iterations"], &["iterable"], &["iterator"], - &["iterate"], &["iterated"], &["iterator"], &["iterators"], @@ -103478,7 +103454,6 @@ pub static WORD_INTO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("rverted"), dictgen::InsensitiveStr::Ascii("rverts"), dictgen::InsensitiveStr::Ascii("t"), - dictgen::InsensitiveStr::Ascii("to"), dictgen::InsensitiveStr::Ascii("xicacion"), dictgen::InsensitiveStr::Ascii("xicatin"), dictgen::InsensitiveStr::Ascii("xicaton"), @@ -103510,7 +103485,6 @@ pub static WORD_INTO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["introverted"], &["introverts"], &["into"], - &["into"], &["intoxication"], &["intoxication"], &["intoxication"], @@ -106592,7 +106566,6 @@ pub static WORD_INSTAL_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d dictgen::InsensitiveStr::Ascii("latation"), dictgen::InsensitiveStr::Ascii("latin"), dictgen::InsensitiveStr::Ascii("lating"), - dictgen::InsensitiveStr::Ascii("lationa"), dictgen::InsensitiveStr::Ascii("latons"), dictgen::InsensitiveStr::Ascii("latron"), dictgen::InsensitiveStr::Ascii("le"), @@ -106627,7 +106600,6 @@ pub static WORD_INSTAL_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d &["installation"], &["installations"], &["installation"], - &["installation"], &["installations"], &["installation"], &["installer", "installed", "install"], @@ -110720,7 +110692,6 @@ pub static WORD_INDEN_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("tifing"), dictgen::InsensitiveStr::Ascii("tify"), dictgen::InsensitiveStr::Ascii("tifying"), - dictgen::InsensitiveStr::Ascii("tin"), dictgen::InsensitiveStr::Ascii("tit"), dictgen::InsensitiveStr::Ascii("tity"), dictgen::InsensitiveStr::Ascii("tleveal"), @@ -110743,7 +110714,6 @@ pub static WORD_INDEN_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["identifying"], &["identify"], &["identifying"], - &["indent"], &["identity"], &["identity"], &["indentlevel"], @@ -111935,7 +111905,6 @@ pub static WORD_INCL_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("ode"), dictgen::InsensitiveStr::Ascii("reased"), dictgen::InsensitiveStr::Ascii("ud"), - dictgen::InsensitiveStr::Ascii("udea"), dictgen::InsensitiveStr::Ascii("udeds"), dictgen::InsensitiveStr::Ascii("udee"), dictgen::InsensitiveStr::Ascii("udeing"), @@ -111975,7 +111944,6 @@ pub static WORD_INCL_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["include"], &["increased"], &["include"], - &["include"], &["includes", "included"], &["include"], &["including"], @@ -119034,7 +119002,6 @@ static WORD_HAV_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen:: pub static WORD_HAV_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { keys: &[ dictgen::InsensitiveStr::Ascii("a"), - dictgen::InsensitiveStr::Ascii("ea"), dictgen::InsensitiveStr::Ascii("ee"), dictgen::InsensitiveStr::Ascii("eing"), dictgen::InsensitiveStr::Ascii("ersting"), @@ -119044,7 +119011,6 @@ pub static WORD_HAV_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("ve"), ], values: &[ - &["have"], &["have"], &["have"], &["having"], @@ -119102,7 +119068,6 @@ static WORD_HAS_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen:: pub static WORD_HAS_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { keys: &[ dictgen::InsensitiveStr::Ascii("es"), - dictgen::InsensitiveStr::Ascii("hi"), dictgen::InsensitiveStr::Ascii("hs"), dictgen::InsensitiveStr::Ascii("hses"), dictgen::InsensitiveStr::Ascii("ing"), @@ -119113,7 +119078,6 @@ pub static WORD_HAS_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict ], values: &[ &["hashes"], - &["hash"], &["hashes"], &["hashes"], &["hashing"], @@ -124373,7 +124337,6 @@ pub static WORD_FUNCT_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("ionnality"), dictgen::InsensitiveStr::Ascii("ionnaly"), dictgen::InsensitiveStr::Ascii("ionning"), - dictgen::InsensitiveStr::Ascii("ionon"), dictgen::InsensitiveStr::Ascii("ionss"), dictgen::InsensitiveStr::Ascii("ios"), dictgen::InsensitiveStr::Ascii("iosn"), @@ -124428,7 +124391,6 @@ pub static WORD_FUNCT_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["functionality"], &["functionally"], &["functioning"], - &["function"], &["functions"], &["functions"], &["functions"], @@ -125365,7 +125327,6 @@ pub static WORD_FRAM_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic keys: &[ dictgen::InsensitiveStr::Ascii("buffer"), dictgen::InsensitiveStr::Ascii("ebufer"), - dictgen::InsensitiveStr::Ascii("ei"), dictgen::InsensitiveStr::Ascii("ent"), dictgen::InsensitiveStr::Ascii("ented"), dictgen::InsensitiveStr::Ascii("ents"), @@ -125389,7 +125350,6 @@ pub static WORD_FRAM_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic values: &[ &["framebuffer"], &["framebuffer"], - &["frame"], &["fragment"], &["fragmented"], &["fragments"], @@ -130108,7 +130068,6 @@ pub static WORD_EXTRE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("maste"), dictgen::InsensitiveStr::Ascii("meley"), dictgen::InsensitiveStr::Ascii("melly"), - dictgen::InsensitiveStr::Ascii("meme"), dictgen::InsensitiveStr::Ascii("memely"), dictgen::InsensitiveStr::Ascii("memly"), dictgen::InsensitiveStr::Ascii("meophile"), @@ -130143,7 +130102,6 @@ pub static WORD_EXTRE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["extremes"], &["extremely"], &["extremely"], - &["extreme"], &["extremely"], &["extremely"], &["extremophile"], @@ -140059,7 +140017,6 @@ pub static WORD_ENA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("beling"), dictgen::InsensitiveStr::Ascii("bels"), dictgen::InsensitiveStr::Ascii("bing"), - dictgen::InsensitiveStr::Ascii("bledi"), dictgen::InsensitiveStr::Ascii("bleing"), dictgen::InsensitiveStr::Ascii("blen"), dictgen::InsensitiveStr::Ascii("hnces"), @@ -140078,7 +140035,6 @@ pub static WORD_ENA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["enabling"], &["enables"], &["enabling"], - &["enabled"], &["enabling"], &["enabled"], &["enhances"], @@ -144207,7 +144163,6 @@ pub static WORD_DOWNG_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("radded"), dictgen::InsensitiveStr::Ascii("raddes"), dictgen::InsensitiveStr::Ascii("radding"), - dictgen::InsensitiveStr::Ascii("radei"), dictgen::InsensitiveStr::Ascii("radingn"), dictgen::InsensitiveStr::Ascii("rate"), dictgen::InsensitiveStr::Ascii("rated"), @@ -144235,7 +144190,6 @@ pub static WORD_DOWNG_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["downgraded"], &["downgrades"], &["downgrading"], - &["downgrade"], &["downgrading"], &["downgrade"], &["downgrade", "downgraded"], @@ -150398,12 +150352,9 @@ static WORD_DID_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen:: }; pub static WORD_DID_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("i"), - dictgen::InsensitiveStr::Ascii("sapointed"), - ], - values: &[&["did"], &["disappointed"]], - range: 1..=9, + keys: &[dictgen::InsensitiveStr::Ascii("sapointed")], + values: &[&["disappointed"]], + range: 9..=9, }; static WORD_DIC_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { @@ -154610,7 +154561,6 @@ pub static WORD_DEPEND_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d dictgen::InsensitiveStr::Ascii("enies"), dictgen::InsensitiveStr::Ascii("ening"), dictgen::InsensitiveStr::Ascii("ensies"), - dictgen::InsensitiveStr::Ascii("enta"), dictgen::InsensitiveStr::Ascii("ente"), dictgen::InsensitiveStr::Ascii("eny"), dictgen::InsensitiveStr::Ascii("et"), @@ -154659,7 +154609,6 @@ pub static WORD_DEPEND_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d &["dependencies"], &["depending"], &["dependencies"], - &["dependent"], &["dependence"], &["dependency"], &["dependent"], @@ -159619,7 +159568,7 @@ static WORD_CU_CHILDREN: [Option<&dictgen::DictTrieNode<&'static [&'static str]> None, None, None, - Some(&WORD_CUI_NODE), + None, None, None, Some(&WORD_CUL_NODE), @@ -160585,17 +160534,6 @@ pub static WORD_CUL_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict range: 4..=9, }; -static WORD_CUI_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_CUI_CHILDREN), - value: None, -}; - -pub static WORD_CUI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[dictgen::InsensitiveStr::Ascii("leoga")], - values: &[&["cuileog"]], - range: 5..=5, -}; - static WORD_CUE_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { children: dictgen::DictTrieChild::Flat(&WORD_CUE_CHILDREN), value: None, @@ -161678,7 +161616,6 @@ pub static WORD_CREA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("soat"), dictgen::InsensitiveStr::Ascii("stor"), dictgen::InsensitiveStr::Ascii("tation"), - dictgen::InsensitiveStr::Ascii("tea"), dictgen::InsensitiveStr::Ascii("teable"), dictgen::InsensitiveStr::Ascii("tedd"), dictgen::InsensitiveStr::Ascii("tein"), @@ -161727,7 +161664,6 @@ pub static WORD_CREA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["creosote"], &["creator"], &["creation"], - &["create"], &["creatable"], &["created"], &["creatine"], @@ -167128,7 +167064,6 @@ pub static WORD_CONTAI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d dictgen::InsensitiveStr::Ascii("minate"), dictgen::InsensitiveStr::Ascii("minated"), dictgen::InsensitiveStr::Ascii("minating"), - dictgen::InsensitiveStr::Ascii("na"), dictgen::InsensitiveStr::Ascii("nd"), dictgen::InsensitiveStr::Ascii("ne"), dictgen::InsensitiveStr::Ascii("nees"), @@ -167174,7 +167109,6 @@ pub static WORD_CONTAI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d &["contaminate"], &["contaminated"], &["contaminating"], - &["contain"], &["contained"], &["contain", "contained", "container", "contains"], &["containers"], @@ -169250,7 +169184,6 @@ pub static WORD_CONNE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("ctins"), dictgen::InsensitiveStr::Ascii("ctiom"), dictgen::InsensitiveStr::Ascii("ctioms"), - dictgen::InsensitiveStr::Ascii("ctiona"), dictgen::InsensitiveStr::Ascii("ctionas"), dictgen::InsensitiveStr::Ascii("ctiong"), dictgen::InsensitiveStr::Ascii("ctit"), @@ -169305,7 +169238,6 @@ pub static WORD_CONNE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["connects", "connections"], &["connection"], &["connections"], - &["connection"], &["connections"], &["connecting"], &["connecticut"], @@ -174814,7 +174746,6 @@ pub static WORD_COMMA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("ndered"), dictgen::InsensitiveStr::Ascii("ndes"), dictgen::InsensitiveStr::Ascii("ndeur"), - dictgen::InsensitiveStr::Ascii("ndi"), dictgen::InsensitiveStr::Ascii("ndmant"), dictgen::InsensitiveStr::Ascii("ndmants"), dictgen::InsensitiveStr::Ascii("ndmends"), @@ -174846,7 +174777,6 @@ pub static WORD_COMMA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["commanded"], &["commands"], &["commanders"], - &["command"], &["commandment"], &["commandments"], &["commandments"], @@ -176999,7 +176929,6 @@ pub static WORD_CLI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("ckear"), dictgen::InsensitiveStr::Ascii("en"), dictgen::InsensitiveStr::Ascii("ens"), - dictgen::InsensitiveStr::Ascii("enta"), dictgen::InsensitiveStr::Ascii("ente"), dictgen::InsensitiveStr::Ascii("entelle"), dictgen::InsensitiveStr::Ascii("entes"), @@ -177059,7 +176988,6 @@ pub static WORD_CLI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["clicker"], &["client"], &["clients"], - &["client"], &["client", "clientele"], &["clientele"], &["clients"], @@ -182528,17 +182456,10 @@ pub static WORD_CATC_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic keys: &[ dictgen::InsensitiveStr::Ascii("he"), dictgen::InsensitiveStr::Ascii("hed"), - dictgen::InsensitiveStr::Ascii("hi"), dictgen::InsensitiveStr::Ascii("hip"), dictgen::InsensitiveStr::Ascii("hs"), ], - values: &[ - &["catch"], - &["caught"], - &["catch"], - &["catchup"], - &["catches"], - ], + values: &[&["catch"], &["caught"], &["catchup"], &["catches"]], range: 2..=3, }; @@ -186491,7 +186412,6 @@ pub static WORD_BROA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("dcase"), dictgen::InsensitiveStr::Ascii("dcasing"), dictgen::InsensitiveStr::Ascii("dcastes"), - dictgen::InsensitiveStr::Ascii("dcasti"), dictgen::InsensitiveStr::Ascii("dcastors"), dictgen::InsensitiveStr::Ascii("dcat"), dictgen::InsensitiveStr::Ascii("dley"), @@ -186510,7 +186430,6 @@ pub static WORD_BROA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["broadcast"], &["broadcasting"], &["broadcasts"], - &["broadcast"], &["broadcasts"], &["broadcasts", "broadcast"], &["broadly"], @@ -187294,221 +187213,99 @@ pub static WORD_BOUQ_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic }; static WORD_BOUN_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Nested(&WORD_BOUN_CHILDREN), - value: Some(&["bound"]), -}; - -static WORD_BOUN_CHILDREN: [Option<&dictgen::DictTrieNode<&'static [&'static str]>>; 26] = [ - Some(&WORD_BOUNA_NODE), - Some(&WORD_BOUNB_NODE), - None, - Some(&WORD_BOUND_NODE), - Some(&WORD_BOUNE_NODE), - None, - Some(&WORD_BOUNG_NODE), - Some(&WORD_BOUNH_NODE), - Some(&WORD_BOUNI_NODE), - None, - None, - None, - None, - Some(&WORD_BOUNN_NODE), - None, - None, - None, - Some(&WORD_BOUNR_NODE), - Some(&WORD_BOUNS_NODE), - Some(&WORD_BOUNT_NODE), - Some(&WORD_BOUNU_NODE), - None, - None, - None, - None, - None, -]; - -static WORD_BOUNU_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNU_CHILDREN), - value: None, -}; - -pub static WORD_BOUNU_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("daries"), - dictgen::InsensitiveStr::Ascii("dary"), - dictgen::InsensitiveStr::Ascii("s"), - ], - values: &[&["boundaries"], &["boundary"], &["bonus"]], - range: 1..=6, -}; - -static WORD_BOUNT_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNT_CHILDREN), + children: dictgen::DictTrieChild::Flat(&WORD_BOUN_CHILDREN), value: Some(&["bound"]), }; -pub static WORD_BOUNT_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("ries"), - dictgen::InsensitiveStr::Ascii("ry"), - ], - values: &[&["boundaries"], &["boundary"]], - range: 2..=4, -}; - -static WORD_BOUNS_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNS_CHILDREN), - value: Some(&["bounds"]), -}; - -pub static WORD_BOUNS_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("aries"), - dictgen::InsensitiveStr::Ascii("ary"), - dictgen::InsensitiveStr::Ascii("d"), - dictgen::InsensitiveStr::Ascii("es"), - ], - values: &[&["boundaries"], &["boundary"], &["bounds"], &["bonuses"]], - range: 1..=5, -}; - -static WORD_BOUNR_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNR_CHILDREN), - value: None, -}; - -pub static WORD_BOUNR_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { +pub static WORD_BOUN_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { keys: &[ - dictgen::InsensitiveStr::Ascii("adies"), - dictgen::InsensitiveStr::Ascii("ady"), - dictgen::InsensitiveStr::Ascii("aies"), + dictgen::InsensitiveStr::Ascii("aaries"), + dictgen::InsensitiveStr::Ascii("aary"), + dictgen::InsensitiveStr::Ascii("ad"), + dictgen::InsensitiveStr::Ascii("adaries"), + dictgen::InsensitiveStr::Ascii("adary"), + dictgen::InsensitiveStr::Ascii("aded"), + dictgen::InsensitiveStr::Ascii("ading"), + dictgen::InsensitiveStr::Ascii("adries"), + dictgen::InsensitiveStr::Ascii("adry"), + dictgen::InsensitiveStr::Ascii("ads"), + dictgen::InsensitiveStr::Ascii("ardies"), + dictgen::InsensitiveStr::Ascii("ardy"), dictgen::InsensitiveStr::Ascii("aries"), dictgen::InsensitiveStr::Ascii("ary"), - dictgen::InsensitiveStr::Ascii("ay"), + dictgen::InsensitiveStr::Ascii("bdaries"), + dictgen::InsensitiveStr::Ascii("bdary"), + dictgen::InsensitiveStr::Ascii("daires"), + dictgen::InsensitiveStr::Ascii("dares"), + dictgen::InsensitiveStr::Ascii("darys"), + dictgen::InsensitiveStr::Ascii("day"), + dictgen::InsensitiveStr::Ascii("days"), + dictgen::InsensitiveStr::Ascii("deries"), + dictgen::InsensitiveStr::Ascii("dery"), + dictgen::InsensitiveStr::Ascii("dig"), + dictgen::InsensitiveStr::Ascii("dimg"), + dictgen::InsensitiveStr::Ascii("din"), + dictgen::InsensitiveStr::Ascii("drary"), + dictgen::InsensitiveStr::Ascii("dries"), + dictgen::InsensitiveStr::Ascii("dry"), + dictgen::InsensitiveStr::Ascii("duaries"), + dictgen::InsensitiveStr::Ascii("ed"), + dictgen::InsensitiveStr::Ascii("garies"), + dictgen::InsensitiveStr::Ascii("gary"), + dictgen::InsensitiveStr::Ascii("gin"), + dictgen::InsensitiveStr::Ascii("gind"), + dictgen::InsensitiveStr::Ascii("hdaries"), + dictgen::InsensitiveStr::Ascii("hdary"), + dictgen::InsensitiveStr::Ascii("idng"), + dictgen::InsensitiveStr::Ascii("ing"), + dictgen::InsensitiveStr::Ascii("ites"), + dictgen::InsensitiveStr::Ascii("nd"), + dictgen::InsensitiveStr::Ascii("ndaries"), + dictgen::InsensitiveStr::Ascii("ndary"), + dictgen::InsensitiveStr::Ascii("nded"), + dictgen::InsensitiveStr::Ascii("nding"), + dictgen::InsensitiveStr::Ascii("nds"), + dictgen::InsensitiveStr::Ascii("radies"), + dictgen::InsensitiveStr::Ascii("rady"), + dictgen::InsensitiveStr::Ascii("raies"), + dictgen::InsensitiveStr::Ascii("raries"), + dictgen::InsensitiveStr::Ascii("rary"), + dictgen::InsensitiveStr::Ascii("ray"), + dictgen::InsensitiveStr::Ascii("s"), + dictgen::InsensitiveStr::Ascii("saries"), + dictgen::InsensitiveStr::Ascii("sary"), + dictgen::InsensitiveStr::Ascii("sd"), + dictgen::InsensitiveStr::Ascii("ses"), + dictgen::InsensitiveStr::Ascii("t"), + dictgen::InsensitiveStr::Ascii("tries"), + dictgen::InsensitiveStr::Ascii("try"), + dictgen::InsensitiveStr::Ascii("udaries"), + dictgen::InsensitiveStr::Ascii("udary"), + dictgen::InsensitiveStr::Ascii("us"), ], values: &[ &["boundaries"], &["boundary"], - &["boundaries"], - &["boundaries"], - &["boundary"], - &["boundary"], - ], - range: 2..=5, -}; - -static WORD_BOUNN_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNN_CHILDREN), - value: None, -}; - -pub static WORD_BOUNN_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("d"), - dictgen::InsensitiveStr::Ascii("daries"), - dictgen::InsensitiveStr::Ascii("dary"), - dictgen::InsensitiveStr::Ascii("ded"), - dictgen::InsensitiveStr::Ascii("ding"), - dictgen::InsensitiveStr::Ascii("ds"), - ], - values: &[ &["bound"], &["boundaries"], &["boundary"], &["bounded"], &["bounding"], + &["boundaries"], + &["boundary"], &["bounds"], - ], - range: 1..=6, -}; - -static WORD_BOUNI_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNI_CHILDREN), - value: None, -}; - -pub static WORD_BOUNI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("dng"), - dictgen::InsensitiveStr::Ascii("ng"), - dictgen::InsensitiveStr::Ascii("tes"), - ], - values: &[&["bounding"], &["bounding"], &["bounties"]], - range: 2..=3, -}; - -static WORD_BOUNH_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNH_CHILDREN), - value: None, -}; - -pub static WORD_BOUNH_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("daries"), - dictgen::InsensitiveStr::Ascii("dary"), - ], - values: &[&["boundaries"], &["boundary"]], - range: 4..=6, -}; - -static WORD_BOUNG_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNG_CHILDREN), - value: None, -}; - -pub static WORD_BOUNG_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("aries"), - dictgen::InsensitiveStr::Ascii("ary"), - dictgen::InsensitiveStr::Ascii("in"), - dictgen::InsensitiveStr::Ascii("ind"), - ], - values: &[&["boundaries"], &["boundary"], &["bounding"], &["bounding"]], - range: 2..=5, -}; - -static WORD_BOUNE_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNE_CHILDREN), - value: None, -}; - -pub static WORD_BOUNE_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[dictgen::InsensitiveStr::Ascii("d")], - values: &[&["bounded"]], - range: 1..=1, -}; - -static WORD_BOUND_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUND_CHILDREN), - value: None, -}; - -pub static WORD_BOUND_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("aires"), - dictgen::InsensitiveStr::Ascii("ares"), - dictgen::InsensitiveStr::Ascii("aryi"), - dictgen::InsensitiveStr::Ascii("arys"), - dictgen::InsensitiveStr::Ascii("ay"), - dictgen::InsensitiveStr::Ascii("ays"), - dictgen::InsensitiveStr::Ascii("eries"), - dictgen::InsensitiveStr::Ascii("ery"), - dictgen::InsensitiveStr::Ascii("ig"), - dictgen::InsensitiveStr::Ascii("img"), - dictgen::InsensitiveStr::Ascii("in"), - dictgen::InsensitiveStr::Ascii("rary"), - dictgen::InsensitiveStr::Ascii("ries"), - dictgen::InsensitiveStr::Ascii("ry"), - dictgen::InsensitiveStr::Ascii("uaries"), - ], - values: &[ &["boundaries"], + &["boundary"], &["boundaries"], &["boundary"], &["boundaries"], &["boundary"], &["boundaries"], &["boundaries"], + &["boundaries"], + &["boundary"], + &["boundaries"], + &["boundaries"], &["boundary"], &["bounding"], &["bounding"], @@ -187517,63 +187314,41 @@ pub static WORD_BOUND_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["boundaries"], &["boundary"], &["boundaries"], - ], - range: 2..=6, -}; - -static WORD_BOUNB_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNB_CHILDREN), - value: None, -}; - -pub static WORD_BOUNB_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("daries"), - dictgen::InsensitiveStr::Ascii("dary"), - ], - values: &[&["boundaries"], &["boundary"]], - range: 4..=6, -}; - -static WORD_BOUNA_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { - children: dictgen::DictTrieChild::Flat(&WORD_BOUNA_CHILDREN), - value: None, -}; - -pub static WORD_BOUNA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dictgen::DictTable { - keys: &[ - dictgen::InsensitiveStr::Ascii("aries"), - dictgen::InsensitiveStr::Ascii("ary"), - dictgen::InsensitiveStr::Ascii("d"), - dictgen::InsensitiveStr::Ascii("daries"), - dictgen::InsensitiveStr::Ascii("dary"), - dictgen::InsensitiveStr::Ascii("ded"), - dictgen::InsensitiveStr::Ascii("ding"), - dictgen::InsensitiveStr::Ascii("dries"), - dictgen::InsensitiveStr::Ascii("dry"), - dictgen::InsensitiveStr::Ascii("ds"), - dictgen::InsensitiveStr::Ascii("rdies"), - dictgen::InsensitiveStr::Ascii("rdy"), - dictgen::InsensitiveStr::Ascii("ries"), - dictgen::InsensitiveStr::Ascii("ry"), - ], - values: &[ + &["bounded"], &["boundaries"], &["boundary"], + &["bounding"], + &["bounding"], + &["boundaries"], + &["boundary"], + &["bounding"], + &["bounding"], + &["bounties"], &["bound"], &["boundaries"], &["boundary"], &["bounded"], &["bounding"], + &["bounds"], + &["boundaries"], + &["boundary"], + &["boundaries"], &["boundaries"], &["boundary"], + &["boundary"], &["bounds"], &["boundaries"], &["boundary"], + &["bounds"], + &["bonuses"], + &["bound"], &["boundaries"], &["boundary"], + &["boundaries"], + &["boundary"], + &["bonus"], ], - range: 1..=6, + range: 1..=7, }; static WORD_BOUL_NODE: dictgen::DictTrieNode<&'static [&'static str]> = dictgen::DictTrieNode { @@ -194499,7 +194274,6 @@ pub static WORD_ATTRI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di dictgen::InsensitiveStr::Ascii("burtes"), dictgen::InsensitiveStr::Ascii("burtion"), dictgen::InsensitiveStr::Ascii("but"), - dictgen::InsensitiveStr::Ascii("butei"), dictgen::InsensitiveStr::Ascii("buten"), dictgen::InsensitiveStr::Ascii("butess"), dictgen::InsensitiveStr::Ascii("buto"), @@ -194552,7 +194326,6 @@ pub static WORD_ATTRI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = di &["attribution"], &["attribute"], &["attribute"], - &["attribute"], &["attributes"], &["attribution"], &["attributed"], @@ -194838,7 +194611,6 @@ pub static WORD_ATTA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("chements"), dictgen::InsensitiveStr::Ascii("chemnt"), dictgen::InsensitiveStr::Ascii("chemnts"), - dictgen::InsensitiveStr::Ascii("chen"), dictgen::InsensitiveStr::Ascii("chged"), dictgen::InsensitiveStr::Ascii("chmant"), dictgen::InsensitiveStr::Ascii("chmants"), @@ -194869,7 +194641,6 @@ pub static WORD_ATTA_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["attachments"], &["attachment"], &["attachments"], - &["attach"], &["attached"], &["attachment"], &["attachments"], @@ -204144,7 +203915,6 @@ pub static WORD_ALLO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic dictgen::InsensitiveStr::Ascii("carion"), dictgen::InsensitiveStr::Ascii("cat"), dictgen::InsensitiveStr::Ascii("catbale"), - dictgen::InsensitiveStr::Ascii("catedi"), dictgen::InsensitiveStr::Ascii("catedp"), dictgen::InsensitiveStr::Ascii("cateing"), dictgen::InsensitiveStr::Ascii("cateng"), @@ -204207,7 +203977,6 @@ pub static WORD_ALLO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dic &["allocate"], &["allocatable"], &["allocated"], - &["allocated"], &["allocating"], &["allocating"], &["allocating", "allocation"], @@ -205063,7 +204832,6 @@ pub static WORD_ALGORI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d dictgen::InsensitiveStr::Ascii("thimically"), dictgen::InsensitiveStr::Ascii("thims"), dictgen::InsensitiveStr::Ascii("thmes"), - dictgen::InsensitiveStr::Ascii("thmi"), dictgen::InsensitiveStr::Ascii("thmical"), dictgen::InsensitiveStr::Ascii("thmm"), dictgen::InsensitiveStr::Ascii("thmmic"), @@ -205118,7 +204886,6 @@ pub static WORD_ALGORI_CHILDREN: dictgen::DictTable<&'static [&'static str]> = d &["algorithmically"], &["algorithm", "algorithms"], &["algorithms"], - &["algorithm"], &["algorithmically"], &["algorithm"], &["algorithmic"], @@ -211324,7 +211091,6 @@ pub static WORD_ABO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict dictgen::InsensitiveStr::Ascii("u"), dictgen::InsensitiveStr::Ascii("unt"), dictgen::InsensitiveStr::Ascii("urt"), - dictgen::InsensitiveStr::Ascii("uta"), dictgen::InsensitiveStr::Ascii("uve"), dictgen::InsensitiveStr::Ascii("v"), dictgen::InsensitiveStr::Ascii("ved"), @@ -211377,7 +211143,6 @@ pub static WORD_ABO_CHILDREN: dictgen::DictTable<&'static [&'static str]> = dict &["about", "abound"], &["about"], &["abort", "about"], - &["about"], &["above"], &["above"], &["above"], diff --git a/crates/typos-dict/tests/verify.rs b/crates/typos-dict/tests/verify.rs index 24f63bcf5..9a6ccc56c 100644 --- a/crates/typos-dict/tests/verify.rs +++ b/crates/typos-dict/tests/verify.rs @@ -94,6 +94,7 @@ fn process>( let varcon_words = varcon_words(); let allowed_words = allowed_words(); let word_variants = proper_word_variants(); + let top_1000_most_frequent_words = top_1000_most_frequent_words(); let rows: Vec<_> = rows .into_iter() .filter(|(typo, _)| { @@ -122,6 +123,19 @@ fn process>( } (typo, new_corrections) }) + .filter(|(typo, corrections)| { + if corrections.len() == 1 { + let only_correction = corrections.iter().next().unwrap(); + // We don't want to correct e.g. "includea" to "include" since + // it might just be a missing space ("include a"). + if let Some(suffix) = typo.strip_prefix(only_correction) { + if top_1000_most_frequent_words.contains(suffix) { + return false; + } + } + } + true + }) .collect(); let mut dict = Dict::new(); for (bad, good) in rows { @@ -191,6 +205,11 @@ fn test_varcon_best_match() { ); } +#[test] +fn test_single_correction_dropping_common_word() { + assert!(process([("includea", ["include"])]).is_empty()); +} + fn is_word(word: &str) -> bool { word.chars().all(|c| c.is_alphabetic()) } @@ -257,3 +276,11 @@ fn allowed_words() -> std::collections::HashMap { }) .collect() } + +fn top_1000_most_frequent_words() -> HashSet { + std::fs::read_to_string("assets/top-1000-most-frequent-words.csv") + .unwrap() + .lines() + .map(str::to_string) + .collect() +}