encodeblock_amd64.s 525 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303
  1. // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
  2. //go:build !appengine && !noasm && gc && !noasm
  3. #include "textflag.h"
  4. // func _dummy_()
  5. TEXT ·_dummy_(SB), $0
  6. #ifdef GOAMD64_v4
  7. #ifndef GOAMD64_v3
  8. #define GOAMD64_v3
  9. #endif
  10. #endif
  11. RET
  12. // func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
  13. // Requires: BMI, SSE2
  14. TEXT ·encodeBlockAsm(SB), $24-64
  15. MOVQ tmp+48(FP), AX
  16. MOVQ dst_base+0(FP), CX
  17. MOVQ $0x00000200, DX
  18. MOVQ AX, BX
  19. PXOR X0, X0
  20. zero_loop_encodeBlockAsm:
  21. MOVOU X0, (BX)
  22. MOVOU X0, 16(BX)
  23. MOVOU X0, 32(BX)
  24. MOVOU X0, 48(BX)
  25. MOVOU X0, 64(BX)
  26. MOVOU X0, 80(BX)
  27. MOVOU X0, 96(BX)
  28. MOVOU X0, 112(BX)
  29. ADDQ $0x80, BX
  30. DECQ DX
  31. JNZ zero_loop_encodeBlockAsm
  32. MOVL $0x00000000, 12(SP)
  33. MOVQ src_len+32(FP), DX
  34. LEAQ -9(DX), BX
  35. LEAQ -8(DX), SI
  36. MOVL SI, 8(SP)
  37. SHRQ $0x05, DX
  38. SUBL DX, BX
  39. LEAQ (CX)(BX*1), BX
  40. MOVQ BX, (SP)
  41. MOVL $0x00000001, DX
  42. MOVL DX, 16(SP)
  43. MOVQ src_base+24(FP), BX
  44. search_loop_encodeBlockAsm:
  45. MOVL DX, SI
  46. SUBL 12(SP), SI
  47. SHRL $0x06, SI
  48. LEAL 4(DX)(SI*1), SI
  49. CMPL SI, 8(SP)
  50. JAE emit_remainder_encodeBlockAsm
  51. MOVQ (BX)(DX*1), DI
  52. MOVL SI, 20(SP)
  53. MOVQ $0x0000cf1bbcdcbf9b, R9
  54. MOVQ DI, R10
  55. MOVQ DI, R11
  56. SHRQ $0x08, R11
  57. SHLQ $0x10, R10
  58. IMULQ R9, R10
  59. SHRQ $0x32, R10
  60. SHLQ $0x10, R11
  61. IMULQ R9, R11
  62. SHRQ $0x32, R11
  63. MOVL (AX)(R10*4), SI
  64. MOVL (AX)(R11*4), R8
  65. MOVL DX, (AX)(R10*4)
  66. LEAL 1(DX), R10
  67. MOVL R10, (AX)(R11*4)
  68. MOVQ DI, R10
  69. SHRQ $0x10, R10
  70. SHLQ $0x10, R10
  71. IMULQ R9, R10
  72. SHRQ $0x32, R10
  73. MOVL DX, R9
  74. SUBL 16(SP), R9
  75. MOVL 1(BX)(R9*1), R11
  76. MOVQ DI, R9
  77. SHRQ $0x08, R9
  78. CMPL R9, R11
  79. JNE no_repeat_found_encodeBlockAsm
  80. LEAL 1(DX), DI
  81. MOVL 12(SP), R8
  82. MOVL DI, SI
  83. SUBL 16(SP), SI
  84. JZ repeat_extend_back_end_encodeBlockAsm
  85. repeat_extend_back_loop_encodeBlockAsm:
  86. CMPL DI, R8
  87. JBE repeat_extend_back_end_encodeBlockAsm
  88. MOVB -1(BX)(SI*1), R9
  89. MOVB -1(BX)(DI*1), R10
  90. CMPB R9, R10
  91. JNE repeat_extend_back_end_encodeBlockAsm
  92. LEAL -1(DI), DI
  93. DECL SI
  94. JNZ repeat_extend_back_loop_encodeBlockAsm
  95. repeat_extend_back_end_encodeBlockAsm:
  96. MOVL DI, SI
  97. SUBL 12(SP), SI
  98. LEAQ 5(CX)(SI*1), SI
  99. CMPQ SI, (SP)
  100. JB repeat_dst_size_check_encodeBlockAsm
  101. MOVQ $0x00000000, ret+56(FP)
  102. RET
  103. repeat_dst_size_check_encodeBlockAsm:
  104. MOVL 12(SP), SI
  105. CMPL SI, DI
  106. JEQ emit_literal_done_repeat_emit_encodeBlockAsm
  107. MOVL DI, R9
  108. MOVL DI, 12(SP)
  109. LEAQ (BX)(SI*1), R10
  110. SUBL SI, R9
  111. LEAL -1(R9), SI
  112. CMPL SI, $0x3c
  113. JB one_byte_repeat_emit_encodeBlockAsm
  114. CMPL SI, $0x00000100
  115. JB two_bytes_repeat_emit_encodeBlockAsm
  116. CMPL SI, $0x00010000
  117. JB three_bytes_repeat_emit_encodeBlockAsm
  118. CMPL SI, $0x01000000
  119. JB four_bytes_repeat_emit_encodeBlockAsm
  120. MOVB $0xfc, (CX)
  121. MOVL SI, 1(CX)
  122. ADDQ $0x05, CX
  123. JMP memmove_long_repeat_emit_encodeBlockAsm
  124. four_bytes_repeat_emit_encodeBlockAsm:
  125. MOVL SI, R11
  126. SHRL $0x10, R11
  127. MOVB $0xf8, (CX)
  128. MOVW SI, 1(CX)
  129. MOVB R11, 3(CX)
  130. ADDQ $0x04, CX
  131. JMP memmove_long_repeat_emit_encodeBlockAsm
  132. three_bytes_repeat_emit_encodeBlockAsm:
  133. MOVB $0xf4, (CX)
  134. MOVW SI, 1(CX)
  135. ADDQ $0x03, CX
  136. JMP memmove_long_repeat_emit_encodeBlockAsm
  137. two_bytes_repeat_emit_encodeBlockAsm:
  138. MOVB $0xf0, (CX)
  139. MOVB SI, 1(CX)
  140. ADDQ $0x02, CX
  141. CMPL SI, $0x40
  142. JB memmove_repeat_emit_encodeBlockAsm
  143. JMP memmove_long_repeat_emit_encodeBlockAsm
  144. one_byte_repeat_emit_encodeBlockAsm:
  145. SHLB $0x02, SI
  146. MOVB SI, (CX)
  147. ADDQ $0x01, CX
  148. memmove_repeat_emit_encodeBlockAsm:
  149. LEAQ (CX)(R9*1), SI
  150. // genMemMoveShort
  151. CMPQ R9, $0x08
  152. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
  153. CMPQ R9, $0x10
  154. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
  155. CMPQ R9, $0x20
  156. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
  157. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
  158. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
  159. MOVQ (R10), R11
  160. MOVQ R11, (CX)
  161. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  162. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
  163. MOVQ (R10), R11
  164. MOVQ -8(R10)(R9*1), R10
  165. MOVQ R11, (CX)
  166. MOVQ R10, -8(CX)(R9*1)
  167. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  168. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
  169. MOVOU (R10), X0
  170. MOVOU -16(R10)(R9*1), X1
  171. MOVOU X0, (CX)
  172. MOVOU X1, -16(CX)(R9*1)
  173. JMP memmove_end_copy_repeat_emit_encodeBlockAsm
  174. emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
  175. MOVOU (R10), X0
  176. MOVOU 16(R10), X1
  177. MOVOU -32(R10)(R9*1), X2
  178. MOVOU -16(R10)(R9*1), X3
  179. MOVOU X0, (CX)
  180. MOVOU X1, 16(CX)
  181. MOVOU X2, -32(CX)(R9*1)
  182. MOVOU X3, -16(CX)(R9*1)
  183. memmove_end_copy_repeat_emit_encodeBlockAsm:
  184. MOVQ SI, CX
  185. JMP emit_literal_done_repeat_emit_encodeBlockAsm
  186. memmove_long_repeat_emit_encodeBlockAsm:
  187. LEAQ (CX)(R9*1), SI
  188. // genMemMoveLong
  189. MOVOU (R10), X0
  190. MOVOU 16(R10), X1
  191. MOVOU -32(R10)(R9*1), X2
  192. MOVOU -16(R10)(R9*1), X3
  193. MOVQ R9, R12
  194. SHRQ $0x05, R12
  195. MOVQ CX, R11
  196. ANDL $0x0000001f, R11
  197. MOVQ $0x00000040, R13
  198. SUBQ R11, R13
  199. DECQ R12
  200. JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
  201. LEAQ -32(R10)(R13*1), R11
  202. LEAQ -32(CX)(R13*1), R14
  203. emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
  204. MOVOU (R11), X4
  205. MOVOU 16(R11), X5
  206. MOVOA X4, (R14)
  207. MOVOA X5, 16(R14)
  208. ADDQ $0x20, R14
  209. ADDQ $0x20, R11
  210. ADDQ $0x20, R13
  211. DECQ R12
  212. JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
  213. emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
  214. MOVOU -32(R10)(R13*1), X4
  215. MOVOU -16(R10)(R13*1), X5
  216. MOVOA X4, -32(CX)(R13*1)
  217. MOVOA X5, -16(CX)(R13*1)
  218. ADDQ $0x20, R13
  219. CMPQ R9, R13
  220. JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
  221. MOVOU X0, (CX)
  222. MOVOU X1, 16(CX)
  223. MOVOU X2, -32(CX)(R9*1)
  224. MOVOU X3, -16(CX)(R9*1)
  225. MOVQ SI, CX
  226. emit_literal_done_repeat_emit_encodeBlockAsm:
  227. ADDL $0x05, DX
  228. MOVL DX, SI
  229. SUBL 16(SP), SI
  230. MOVQ src_len+32(FP), R9
  231. SUBL DX, R9
  232. LEAQ (BX)(DX*1), R10
  233. LEAQ (BX)(SI*1), SI
  234. // matchLen
  235. XORL R12, R12
  236. matchlen_loopback_16_repeat_extend_encodeBlockAsm:
  237. CMPL R9, $0x10
  238. JB matchlen_match8_repeat_extend_encodeBlockAsm
  239. MOVQ (R10)(R12*1), R11
  240. MOVQ 8(R10)(R12*1), R13
  241. XORQ (SI)(R12*1), R11
  242. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
  243. XORQ 8(SI)(R12*1), R13
  244. JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm
  245. LEAL -16(R9), R9
  246. LEAL 16(R12), R12
  247. JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm
  248. matchlen_bsf_16repeat_extend_encodeBlockAsm:
  249. #ifdef GOAMD64_v3
  250. TZCNTQ R13, R13
  251. #else
  252. BSFQ R13, R13
  253. #endif
  254. SARQ $0x03, R13
  255. LEAL 8(R12)(R13*1), R12
  256. JMP repeat_extend_forward_end_encodeBlockAsm
  257. matchlen_match8_repeat_extend_encodeBlockAsm:
  258. CMPL R9, $0x08
  259. JB matchlen_match4_repeat_extend_encodeBlockAsm
  260. MOVQ (R10)(R12*1), R11
  261. XORQ (SI)(R12*1), R11
  262. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
  263. LEAL -8(R9), R9
  264. LEAL 8(R12), R12
  265. JMP matchlen_match4_repeat_extend_encodeBlockAsm
  266. matchlen_bsf_8_repeat_extend_encodeBlockAsm:
  267. #ifdef GOAMD64_v3
  268. TZCNTQ R11, R11
  269. #else
  270. BSFQ R11, R11
  271. #endif
  272. SARQ $0x03, R11
  273. LEAL (R12)(R11*1), R12
  274. JMP repeat_extend_forward_end_encodeBlockAsm
  275. matchlen_match4_repeat_extend_encodeBlockAsm:
  276. CMPL R9, $0x04
  277. JB matchlen_match2_repeat_extend_encodeBlockAsm
  278. MOVL (R10)(R12*1), R11
  279. CMPL (SI)(R12*1), R11
  280. JNE matchlen_match2_repeat_extend_encodeBlockAsm
  281. LEAL -4(R9), R9
  282. LEAL 4(R12), R12
  283. matchlen_match2_repeat_extend_encodeBlockAsm:
  284. CMPL R9, $0x01
  285. JE matchlen_match1_repeat_extend_encodeBlockAsm
  286. JB repeat_extend_forward_end_encodeBlockAsm
  287. MOVW (R10)(R12*1), R11
  288. CMPW (SI)(R12*1), R11
  289. JNE matchlen_match1_repeat_extend_encodeBlockAsm
  290. LEAL 2(R12), R12
  291. SUBL $0x02, R9
  292. JZ repeat_extend_forward_end_encodeBlockAsm
  293. matchlen_match1_repeat_extend_encodeBlockAsm:
  294. MOVB (R10)(R12*1), R11
  295. CMPB (SI)(R12*1), R11
  296. JNE repeat_extend_forward_end_encodeBlockAsm
  297. LEAL 1(R12), R12
  298. repeat_extend_forward_end_encodeBlockAsm:
  299. ADDL R12, DX
  300. MOVL DX, SI
  301. SUBL DI, SI
  302. MOVL 16(SP), DI
  303. TESTL R8, R8
  304. JZ repeat_as_copy_encodeBlockAsm
  305. // emitRepeat
  306. emit_repeat_again_match_repeat_encodeBlockAsm:
  307. MOVL SI, R8
  308. LEAL -4(SI), SI
  309. CMPL R8, $0x08
  310. JBE repeat_two_match_repeat_encodeBlockAsm
  311. CMPL R8, $0x0c
  312. JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm
  313. CMPL DI, $0x00000800
  314. JB repeat_two_offset_match_repeat_encodeBlockAsm
  315. cant_repeat_two_offset_match_repeat_encodeBlockAsm:
  316. CMPL SI, $0x00000104
  317. JB repeat_three_match_repeat_encodeBlockAsm
  318. CMPL SI, $0x00010100
  319. JB repeat_four_match_repeat_encodeBlockAsm
  320. CMPL SI, $0x0100ffff
  321. JB repeat_five_match_repeat_encodeBlockAsm
  322. LEAL -16842747(SI), SI
  323. MOVL $0xfffb001d, (CX)
  324. MOVB $0xff, 4(CX)
  325. ADDQ $0x05, CX
  326. JMP emit_repeat_again_match_repeat_encodeBlockAsm
  327. repeat_five_match_repeat_encodeBlockAsm:
  328. LEAL -65536(SI), SI
  329. MOVL SI, DI
  330. MOVW $0x001d, (CX)
  331. MOVW SI, 2(CX)
  332. SARL $0x10, DI
  333. MOVB DI, 4(CX)
  334. ADDQ $0x05, CX
  335. JMP repeat_end_emit_encodeBlockAsm
  336. repeat_four_match_repeat_encodeBlockAsm:
  337. LEAL -256(SI), SI
  338. MOVW $0x0019, (CX)
  339. MOVW SI, 2(CX)
  340. ADDQ $0x04, CX
  341. JMP repeat_end_emit_encodeBlockAsm
  342. repeat_three_match_repeat_encodeBlockAsm:
  343. LEAL -4(SI), SI
  344. MOVW $0x0015, (CX)
  345. MOVB SI, 2(CX)
  346. ADDQ $0x03, CX
  347. JMP repeat_end_emit_encodeBlockAsm
  348. repeat_two_match_repeat_encodeBlockAsm:
  349. SHLL $0x02, SI
  350. ORL $0x01, SI
  351. MOVW SI, (CX)
  352. ADDQ $0x02, CX
  353. JMP repeat_end_emit_encodeBlockAsm
  354. repeat_two_offset_match_repeat_encodeBlockAsm:
  355. XORQ R8, R8
  356. LEAL 1(R8)(SI*4), SI
  357. MOVB DI, 1(CX)
  358. SARL $0x08, DI
  359. SHLL $0x05, DI
  360. ORL DI, SI
  361. MOVB SI, (CX)
  362. ADDQ $0x02, CX
  363. JMP repeat_end_emit_encodeBlockAsm
  364. repeat_as_copy_encodeBlockAsm:
  365. // emitCopy
  366. CMPL DI, $0x00010000
  367. JB two_byte_offset_repeat_as_copy_encodeBlockAsm
  368. CMPL SI, $0x40
  369. JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm
  370. MOVB $0xff, (CX)
  371. MOVL DI, 1(CX)
  372. LEAL -64(SI), SI
  373. ADDQ $0x05, CX
  374. CMPL SI, $0x04
  375. JB four_bytes_remain_repeat_as_copy_encodeBlockAsm
  376. // emitRepeat
  377. emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
  378. MOVL SI, R8
  379. LEAL -4(SI), SI
  380. CMPL R8, $0x08
  381. JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
  382. CMPL R8, $0x0c
  383. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
  384. CMPL DI, $0x00000800
  385. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
  386. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
  387. CMPL SI, $0x00000104
  388. JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
  389. CMPL SI, $0x00010100
  390. JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
  391. CMPL SI, $0x0100ffff
  392. JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
  393. LEAL -16842747(SI), SI
  394. MOVL $0xfffb001d, (CX)
  395. MOVB $0xff, 4(CX)
  396. ADDQ $0x05, CX
  397. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
  398. repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
  399. LEAL -65536(SI), SI
  400. MOVL SI, DI
  401. MOVW $0x001d, (CX)
  402. MOVW SI, 2(CX)
  403. SARL $0x10, DI
  404. MOVB DI, 4(CX)
  405. ADDQ $0x05, CX
  406. JMP repeat_end_emit_encodeBlockAsm
  407. repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
  408. LEAL -256(SI), SI
  409. MOVW $0x0019, (CX)
  410. MOVW SI, 2(CX)
  411. ADDQ $0x04, CX
  412. JMP repeat_end_emit_encodeBlockAsm
  413. repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
  414. LEAL -4(SI), SI
  415. MOVW $0x0015, (CX)
  416. MOVB SI, 2(CX)
  417. ADDQ $0x03, CX
  418. JMP repeat_end_emit_encodeBlockAsm
  419. repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
  420. SHLL $0x02, SI
  421. ORL $0x01, SI
  422. MOVW SI, (CX)
  423. ADDQ $0x02, CX
  424. JMP repeat_end_emit_encodeBlockAsm
  425. repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
  426. XORQ R8, R8
  427. LEAL 1(R8)(SI*4), SI
  428. MOVB DI, 1(CX)
  429. SARL $0x08, DI
  430. SHLL $0x05, DI
  431. ORL DI, SI
  432. MOVB SI, (CX)
  433. ADDQ $0x02, CX
  434. JMP repeat_end_emit_encodeBlockAsm
  435. four_bytes_remain_repeat_as_copy_encodeBlockAsm:
  436. TESTL SI, SI
  437. JZ repeat_end_emit_encodeBlockAsm
  438. XORL R8, R8
  439. LEAL -1(R8)(SI*4), SI
  440. MOVB SI, (CX)
  441. MOVL DI, 1(CX)
  442. ADDQ $0x05, CX
  443. JMP repeat_end_emit_encodeBlockAsm
  444. two_byte_offset_repeat_as_copy_encodeBlockAsm:
  445. CMPL SI, $0x40
  446. JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
  447. CMPL DI, $0x00000800
  448. JAE long_offset_short_repeat_as_copy_encodeBlockAsm
  449. MOVL $0x00000001, R8
  450. LEAL 16(R8), R8
  451. MOVB DI, 1(CX)
  452. MOVL DI, R9
  453. SHRL $0x08, R9
  454. SHLL $0x05, R9
  455. ORL R9, R8
  456. MOVB R8, (CX)
  457. ADDQ $0x02, CX
  458. SUBL $0x08, SI
  459. // emitRepeat
  460. LEAL -4(SI), SI
  461. JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  462. emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  463. MOVL SI, R8
  464. LEAL -4(SI), SI
  465. CMPL R8, $0x08
  466. JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  467. CMPL R8, $0x0c
  468. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  469. CMPL DI, $0x00000800
  470. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  471. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  472. CMPL SI, $0x00000104
  473. JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  474. CMPL SI, $0x00010100
  475. JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  476. CMPL SI, $0x0100ffff
  477. JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  478. LEAL -16842747(SI), SI
  479. MOVL $0xfffb001d, (CX)
  480. MOVB $0xff, 4(CX)
  481. ADDQ $0x05, CX
  482. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
  483. repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  484. LEAL -65536(SI), SI
  485. MOVL SI, DI
  486. MOVW $0x001d, (CX)
  487. MOVW SI, 2(CX)
  488. SARL $0x10, DI
  489. MOVB DI, 4(CX)
  490. ADDQ $0x05, CX
  491. JMP repeat_end_emit_encodeBlockAsm
  492. repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  493. LEAL -256(SI), SI
  494. MOVW $0x0019, (CX)
  495. MOVW SI, 2(CX)
  496. ADDQ $0x04, CX
  497. JMP repeat_end_emit_encodeBlockAsm
  498. repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  499. LEAL -4(SI), SI
  500. MOVW $0x0015, (CX)
  501. MOVB SI, 2(CX)
  502. ADDQ $0x03, CX
  503. JMP repeat_end_emit_encodeBlockAsm
  504. repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  505. SHLL $0x02, SI
  506. ORL $0x01, SI
  507. MOVW SI, (CX)
  508. ADDQ $0x02, CX
  509. JMP repeat_end_emit_encodeBlockAsm
  510. repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
  511. XORQ R8, R8
  512. LEAL 1(R8)(SI*4), SI
  513. MOVB DI, 1(CX)
  514. SARL $0x08, DI
  515. SHLL $0x05, DI
  516. ORL DI, SI
  517. MOVB SI, (CX)
  518. ADDQ $0x02, CX
  519. JMP repeat_end_emit_encodeBlockAsm
  520. long_offset_short_repeat_as_copy_encodeBlockAsm:
  521. MOVB $0xee, (CX)
  522. MOVW DI, 1(CX)
  523. LEAL -60(SI), SI
  524. ADDQ $0x03, CX
  525. // emitRepeat
  526. emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  527. MOVL SI, R8
  528. LEAL -4(SI), SI
  529. CMPL R8, $0x08
  530. JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
  531. CMPL R8, $0x0c
  532. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
  533. CMPL DI, $0x00000800
  534. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
  535. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  536. CMPL SI, $0x00000104
  537. JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
  538. CMPL SI, $0x00010100
  539. JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
  540. CMPL SI, $0x0100ffff
  541. JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
  542. LEAL -16842747(SI), SI
  543. MOVL $0xfffb001d, (CX)
  544. MOVB $0xff, 4(CX)
  545. ADDQ $0x05, CX
  546. JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
  547. repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  548. LEAL -65536(SI), SI
  549. MOVL SI, DI
  550. MOVW $0x001d, (CX)
  551. MOVW SI, 2(CX)
  552. SARL $0x10, DI
  553. MOVB DI, 4(CX)
  554. ADDQ $0x05, CX
  555. JMP repeat_end_emit_encodeBlockAsm
  556. repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  557. LEAL -256(SI), SI
  558. MOVW $0x0019, (CX)
  559. MOVW SI, 2(CX)
  560. ADDQ $0x04, CX
  561. JMP repeat_end_emit_encodeBlockAsm
  562. repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  563. LEAL -4(SI), SI
  564. MOVW $0x0015, (CX)
  565. MOVB SI, 2(CX)
  566. ADDQ $0x03, CX
  567. JMP repeat_end_emit_encodeBlockAsm
  568. repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  569. SHLL $0x02, SI
  570. ORL $0x01, SI
  571. MOVW SI, (CX)
  572. ADDQ $0x02, CX
  573. JMP repeat_end_emit_encodeBlockAsm
  574. repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  575. XORQ R8, R8
  576. LEAL 1(R8)(SI*4), SI
  577. MOVB DI, 1(CX)
  578. SARL $0x08, DI
  579. SHLL $0x05, DI
  580. ORL DI, SI
  581. MOVB SI, (CX)
  582. ADDQ $0x02, CX
  583. JMP repeat_end_emit_encodeBlockAsm
  584. two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
  585. MOVL SI, R8
  586. SHLL $0x02, R8
  587. CMPL SI, $0x0c
  588. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm
  589. CMPL DI, $0x00000800
  590. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm
  591. LEAL -15(R8), R8
  592. MOVB DI, 1(CX)
  593. SHRL $0x08, DI
  594. SHLL $0x05, DI
  595. ORL DI, R8
  596. MOVB R8, (CX)
  597. ADDQ $0x02, CX
  598. JMP repeat_end_emit_encodeBlockAsm
  599. emit_copy_three_repeat_as_copy_encodeBlockAsm:
  600. LEAL -2(R8), R8
  601. MOVB R8, (CX)
  602. MOVW DI, 1(CX)
  603. ADDQ $0x03, CX
  604. repeat_end_emit_encodeBlockAsm:
  605. MOVL DX, 12(SP)
  606. JMP search_loop_encodeBlockAsm
  607. no_repeat_found_encodeBlockAsm:
  608. CMPL (BX)(SI*1), DI
  609. JEQ candidate_match_encodeBlockAsm
  610. SHRQ $0x08, DI
  611. MOVL (AX)(R10*4), SI
  612. LEAL 2(DX), R9
  613. CMPL (BX)(R8*1), DI
  614. JEQ candidate2_match_encodeBlockAsm
  615. MOVL R9, (AX)(R10*4)
  616. SHRQ $0x08, DI
  617. CMPL (BX)(SI*1), DI
  618. JEQ candidate3_match_encodeBlockAsm
  619. MOVL 20(SP), DX
  620. JMP search_loop_encodeBlockAsm
  621. candidate3_match_encodeBlockAsm:
  622. ADDL $0x02, DX
  623. JMP candidate_match_encodeBlockAsm
  624. candidate2_match_encodeBlockAsm:
  625. MOVL R9, (AX)(R10*4)
  626. INCL DX
  627. MOVL R8, SI
  628. candidate_match_encodeBlockAsm:
  629. MOVL 12(SP), DI
  630. TESTL SI, SI
  631. JZ match_extend_back_end_encodeBlockAsm
  632. match_extend_back_loop_encodeBlockAsm:
  633. CMPL DX, DI
  634. JBE match_extend_back_end_encodeBlockAsm
  635. MOVB -1(BX)(SI*1), R8
  636. MOVB -1(BX)(DX*1), R9
  637. CMPB R8, R9
  638. JNE match_extend_back_end_encodeBlockAsm
  639. LEAL -1(DX), DX
  640. DECL SI
  641. JZ match_extend_back_end_encodeBlockAsm
  642. JMP match_extend_back_loop_encodeBlockAsm
  643. match_extend_back_end_encodeBlockAsm:
  644. MOVL DX, DI
  645. SUBL 12(SP), DI
  646. LEAQ 5(CX)(DI*1), DI
  647. CMPQ DI, (SP)
  648. JB match_dst_size_check_encodeBlockAsm
  649. MOVQ $0x00000000, ret+56(FP)
  650. RET
  651. match_dst_size_check_encodeBlockAsm:
  652. MOVL DX, DI
  653. MOVL 12(SP), R8
  654. CMPL R8, DI
  655. JEQ emit_literal_done_match_emit_encodeBlockAsm
  656. MOVL DI, R9
  657. MOVL DI, 12(SP)
  658. LEAQ (BX)(R8*1), DI
  659. SUBL R8, R9
  660. LEAL -1(R9), R8
  661. CMPL R8, $0x3c
  662. JB one_byte_match_emit_encodeBlockAsm
  663. CMPL R8, $0x00000100
  664. JB two_bytes_match_emit_encodeBlockAsm
  665. CMPL R8, $0x00010000
  666. JB three_bytes_match_emit_encodeBlockAsm
  667. CMPL R8, $0x01000000
  668. JB four_bytes_match_emit_encodeBlockAsm
  669. MOVB $0xfc, (CX)
  670. MOVL R8, 1(CX)
  671. ADDQ $0x05, CX
  672. JMP memmove_long_match_emit_encodeBlockAsm
  673. four_bytes_match_emit_encodeBlockAsm:
  674. MOVL R8, R10
  675. SHRL $0x10, R10
  676. MOVB $0xf8, (CX)
  677. MOVW R8, 1(CX)
  678. MOVB R10, 3(CX)
  679. ADDQ $0x04, CX
  680. JMP memmove_long_match_emit_encodeBlockAsm
  681. three_bytes_match_emit_encodeBlockAsm:
  682. MOVB $0xf4, (CX)
  683. MOVW R8, 1(CX)
  684. ADDQ $0x03, CX
  685. JMP memmove_long_match_emit_encodeBlockAsm
  686. two_bytes_match_emit_encodeBlockAsm:
  687. MOVB $0xf0, (CX)
  688. MOVB R8, 1(CX)
  689. ADDQ $0x02, CX
  690. CMPL R8, $0x40
  691. JB memmove_match_emit_encodeBlockAsm
  692. JMP memmove_long_match_emit_encodeBlockAsm
  693. one_byte_match_emit_encodeBlockAsm:
  694. SHLB $0x02, R8
  695. MOVB R8, (CX)
  696. ADDQ $0x01, CX
  697. memmove_match_emit_encodeBlockAsm:
  698. LEAQ (CX)(R9*1), R8
  699. // genMemMoveShort
  700. CMPQ R9, $0x08
  701. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
  702. CMPQ R9, $0x10
  703. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
  704. CMPQ R9, $0x20
  705. JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
  706. JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
  707. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
  708. MOVQ (DI), R10
  709. MOVQ R10, (CX)
  710. JMP memmove_end_copy_match_emit_encodeBlockAsm
  711. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
  712. MOVQ (DI), R10
  713. MOVQ -8(DI)(R9*1), DI
  714. MOVQ R10, (CX)
  715. MOVQ DI, -8(CX)(R9*1)
  716. JMP memmove_end_copy_match_emit_encodeBlockAsm
  717. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
  718. MOVOU (DI), X0
  719. MOVOU -16(DI)(R9*1), X1
  720. MOVOU X0, (CX)
  721. MOVOU X1, -16(CX)(R9*1)
  722. JMP memmove_end_copy_match_emit_encodeBlockAsm
  723. emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
  724. MOVOU (DI), X0
  725. MOVOU 16(DI), X1
  726. MOVOU -32(DI)(R9*1), X2
  727. MOVOU -16(DI)(R9*1), X3
  728. MOVOU X0, (CX)
  729. MOVOU X1, 16(CX)
  730. MOVOU X2, -32(CX)(R9*1)
  731. MOVOU X3, -16(CX)(R9*1)
  732. memmove_end_copy_match_emit_encodeBlockAsm:
  733. MOVQ R8, CX
  734. JMP emit_literal_done_match_emit_encodeBlockAsm
  735. memmove_long_match_emit_encodeBlockAsm:
  736. LEAQ (CX)(R9*1), R8
  737. // genMemMoveLong
  738. MOVOU (DI), X0
  739. MOVOU 16(DI), X1
  740. MOVOU -32(DI)(R9*1), X2
  741. MOVOU -16(DI)(R9*1), X3
  742. MOVQ R9, R11
  743. SHRQ $0x05, R11
  744. MOVQ CX, R10
  745. ANDL $0x0000001f, R10
  746. MOVQ $0x00000040, R12
  747. SUBQ R10, R12
  748. DECQ R11
  749. JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
  750. LEAQ -32(DI)(R12*1), R10
  751. LEAQ -32(CX)(R12*1), R13
  752. emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
  753. MOVOU (R10), X4
  754. MOVOU 16(R10), X5
  755. MOVOA X4, (R13)
  756. MOVOA X5, 16(R13)
  757. ADDQ $0x20, R13
  758. ADDQ $0x20, R10
  759. ADDQ $0x20, R12
  760. DECQ R11
  761. JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
  762. emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
  763. MOVOU -32(DI)(R12*1), X4
  764. MOVOU -16(DI)(R12*1), X5
  765. MOVOA X4, -32(CX)(R12*1)
  766. MOVOA X5, -16(CX)(R12*1)
  767. ADDQ $0x20, R12
  768. CMPQ R9, R12
  769. JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
  770. MOVOU X0, (CX)
  771. MOVOU X1, 16(CX)
  772. MOVOU X2, -32(CX)(R9*1)
  773. MOVOU X3, -16(CX)(R9*1)
  774. MOVQ R8, CX
  775. emit_literal_done_match_emit_encodeBlockAsm:
  776. match_nolit_loop_encodeBlockAsm:
  777. MOVL DX, DI
  778. SUBL SI, DI
  779. MOVL DI, 16(SP)
  780. ADDL $0x04, DX
  781. ADDL $0x04, SI
  782. MOVQ src_len+32(FP), DI
  783. SUBL DX, DI
  784. LEAQ (BX)(DX*1), R8
  785. LEAQ (BX)(SI*1), SI
  786. // matchLen
  787. XORL R10, R10
  788. matchlen_loopback_16_match_nolit_encodeBlockAsm:
  789. CMPL DI, $0x10
  790. JB matchlen_match8_match_nolit_encodeBlockAsm
  791. MOVQ (R8)(R10*1), R9
  792. MOVQ 8(R8)(R10*1), R11
  793. XORQ (SI)(R10*1), R9
  794. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
  795. XORQ 8(SI)(R10*1), R11
  796. JNZ matchlen_bsf_16match_nolit_encodeBlockAsm
  797. LEAL -16(DI), DI
  798. LEAL 16(R10), R10
  799. JMP matchlen_loopback_16_match_nolit_encodeBlockAsm
  800. matchlen_bsf_16match_nolit_encodeBlockAsm:
  801. #ifdef GOAMD64_v3
  802. TZCNTQ R11, R11
  803. #else
  804. BSFQ R11, R11
  805. #endif
  806. SARQ $0x03, R11
  807. LEAL 8(R10)(R11*1), R10
  808. JMP match_nolit_end_encodeBlockAsm
  809. matchlen_match8_match_nolit_encodeBlockAsm:
  810. CMPL DI, $0x08
  811. JB matchlen_match4_match_nolit_encodeBlockAsm
  812. MOVQ (R8)(R10*1), R9
  813. XORQ (SI)(R10*1), R9
  814. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
  815. LEAL -8(DI), DI
  816. LEAL 8(R10), R10
  817. JMP matchlen_match4_match_nolit_encodeBlockAsm
  818. matchlen_bsf_8_match_nolit_encodeBlockAsm:
  819. #ifdef GOAMD64_v3
  820. TZCNTQ R9, R9
  821. #else
  822. BSFQ R9, R9
  823. #endif
  824. SARQ $0x03, R9
  825. LEAL (R10)(R9*1), R10
  826. JMP match_nolit_end_encodeBlockAsm
  827. matchlen_match4_match_nolit_encodeBlockAsm:
  828. CMPL DI, $0x04
  829. JB matchlen_match2_match_nolit_encodeBlockAsm
  830. MOVL (R8)(R10*1), R9
  831. CMPL (SI)(R10*1), R9
  832. JNE matchlen_match2_match_nolit_encodeBlockAsm
  833. LEAL -4(DI), DI
  834. LEAL 4(R10), R10
  835. matchlen_match2_match_nolit_encodeBlockAsm:
  836. CMPL DI, $0x01
  837. JE matchlen_match1_match_nolit_encodeBlockAsm
  838. JB match_nolit_end_encodeBlockAsm
  839. MOVW (R8)(R10*1), R9
  840. CMPW (SI)(R10*1), R9
  841. JNE matchlen_match1_match_nolit_encodeBlockAsm
  842. LEAL 2(R10), R10
  843. SUBL $0x02, DI
  844. JZ match_nolit_end_encodeBlockAsm
  845. matchlen_match1_match_nolit_encodeBlockAsm:
  846. MOVB (R8)(R10*1), R9
  847. CMPB (SI)(R10*1), R9
  848. JNE match_nolit_end_encodeBlockAsm
  849. LEAL 1(R10), R10
  850. match_nolit_end_encodeBlockAsm:
  851. ADDL R10, DX
  852. MOVL 16(SP), SI
  853. ADDL $0x04, R10
  854. MOVL DX, 12(SP)
  855. // emitCopy
  856. CMPL SI, $0x00010000
  857. JB two_byte_offset_match_nolit_encodeBlockAsm
  858. CMPL R10, $0x40
  859. JBE four_bytes_remain_match_nolit_encodeBlockAsm
  860. MOVB $0xff, (CX)
  861. MOVL SI, 1(CX)
  862. LEAL -64(R10), R10
  863. ADDQ $0x05, CX
  864. CMPL R10, $0x04
  865. JB four_bytes_remain_match_nolit_encodeBlockAsm
  866. // emitRepeat
  867. emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
  868. MOVL R10, DI
  869. LEAL -4(R10), R10
  870. CMPL DI, $0x08
  871. JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy
  872. CMPL DI, $0x0c
  873. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
  874. CMPL SI, $0x00000800
  875. JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
  876. cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  877. CMPL R10, $0x00000104
  878. JB repeat_three_match_nolit_encodeBlockAsm_emit_copy
  879. CMPL R10, $0x00010100
  880. JB repeat_four_match_nolit_encodeBlockAsm_emit_copy
  881. CMPL R10, $0x0100ffff
  882. JB repeat_five_match_nolit_encodeBlockAsm_emit_copy
  883. LEAL -16842747(R10), R10
  884. MOVL $0xfffb001d, (CX)
  885. MOVB $0xff, 4(CX)
  886. ADDQ $0x05, CX
  887. JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
  888. repeat_five_match_nolit_encodeBlockAsm_emit_copy:
  889. LEAL -65536(R10), R10
  890. MOVL R10, SI
  891. MOVW $0x001d, (CX)
  892. MOVW R10, 2(CX)
  893. SARL $0x10, SI
  894. MOVB SI, 4(CX)
  895. ADDQ $0x05, CX
  896. JMP match_nolit_emitcopy_end_encodeBlockAsm
  897. repeat_four_match_nolit_encodeBlockAsm_emit_copy:
  898. LEAL -256(R10), R10
  899. MOVW $0x0019, (CX)
  900. MOVW R10, 2(CX)
  901. ADDQ $0x04, CX
  902. JMP match_nolit_emitcopy_end_encodeBlockAsm
  903. repeat_three_match_nolit_encodeBlockAsm_emit_copy:
  904. LEAL -4(R10), R10
  905. MOVW $0x0015, (CX)
  906. MOVB R10, 2(CX)
  907. ADDQ $0x03, CX
  908. JMP match_nolit_emitcopy_end_encodeBlockAsm
  909. repeat_two_match_nolit_encodeBlockAsm_emit_copy:
  910. SHLL $0x02, R10
  911. ORL $0x01, R10
  912. MOVW R10, (CX)
  913. ADDQ $0x02, CX
  914. JMP match_nolit_emitcopy_end_encodeBlockAsm
  915. repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  916. XORQ DI, DI
  917. LEAL 1(DI)(R10*4), R10
  918. MOVB SI, 1(CX)
  919. SARL $0x08, SI
  920. SHLL $0x05, SI
  921. ORL SI, R10
  922. MOVB R10, (CX)
  923. ADDQ $0x02, CX
  924. JMP match_nolit_emitcopy_end_encodeBlockAsm
  925. four_bytes_remain_match_nolit_encodeBlockAsm:
  926. TESTL R10, R10
  927. JZ match_nolit_emitcopy_end_encodeBlockAsm
  928. XORL DI, DI
  929. LEAL -1(DI)(R10*4), R10
  930. MOVB R10, (CX)
  931. MOVL SI, 1(CX)
  932. ADDQ $0x05, CX
  933. JMP match_nolit_emitcopy_end_encodeBlockAsm
  934. two_byte_offset_match_nolit_encodeBlockAsm:
  935. CMPL R10, $0x40
  936. JBE two_byte_offset_short_match_nolit_encodeBlockAsm
  937. CMPL SI, $0x00000800
  938. JAE long_offset_short_match_nolit_encodeBlockAsm
  939. MOVL $0x00000001, DI
  940. LEAL 16(DI), DI
  941. MOVB SI, 1(CX)
  942. MOVL SI, R8
  943. SHRL $0x08, R8
  944. SHLL $0x05, R8
  945. ORL R8, DI
  946. MOVB DI, (CX)
  947. ADDQ $0x02, CX
  948. SUBL $0x08, R10
  949. // emitRepeat
  950. LEAL -4(R10), R10
  951. JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
  952. emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  953. MOVL R10, DI
  954. LEAL -4(R10), R10
  955. CMPL DI, $0x08
  956. JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
  957. CMPL DI, $0x0c
  958. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
  959. CMPL SI, $0x00000800
  960. JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
  961. cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  962. CMPL R10, $0x00000104
  963. JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
  964. CMPL R10, $0x00010100
  965. JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
  966. CMPL R10, $0x0100ffff
  967. JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
  968. LEAL -16842747(R10), R10
  969. MOVL $0xfffb001d, (CX)
  970. MOVB $0xff, 4(CX)
  971. ADDQ $0x05, CX
  972. JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
  973. repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  974. LEAL -65536(R10), R10
  975. MOVL R10, SI
  976. MOVW $0x001d, (CX)
  977. MOVW R10, 2(CX)
  978. SARL $0x10, SI
  979. MOVB SI, 4(CX)
  980. ADDQ $0x05, CX
  981. JMP match_nolit_emitcopy_end_encodeBlockAsm
  982. repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  983. LEAL -256(R10), R10
  984. MOVW $0x0019, (CX)
  985. MOVW R10, 2(CX)
  986. ADDQ $0x04, CX
  987. JMP match_nolit_emitcopy_end_encodeBlockAsm
  988. repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  989. LEAL -4(R10), R10
  990. MOVW $0x0015, (CX)
  991. MOVB R10, 2(CX)
  992. ADDQ $0x03, CX
  993. JMP match_nolit_emitcopy_end_encodeBlockAsm
  994. repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  995. SHLL $0x02, R10
  996. ORL $0x01, R10
  997. MOVW R10, (CX)
  998. ADDQ $0x02, CX
  999. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1000. repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
  1001. XORQ DI, DI
  1002. LEAL 1(DI)(R10*4), R10
  1003. MOVB SI, 1(CX)
  1004. SARL $0x08, SI
  1005. SHLL $0x05, SI
  1006. ORL SI, R10
  1007. MOVB R10, (CX)
  1008. ADDQ $0x02, CX
  1009. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1010. long_offset_short_match_nolit_encodeBlockAsm:
  1011. MOVB $0xee, (CX)
  1012. MOVW SI, 1(CX)
  1013. LEAL -60(R10), R10
  1014. ADDQ $0x03, CX
  1015. // emitRepeat
  1016. emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
  1017. MOVL R10, DI
  1018. LEAL -4(R10), R10
  1019. CMPL DI, $0x08
  1020. JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
  1021. CMPL DI, $0x0c
  1022. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  1023. CMPL SI, $0x00000800
  1024. JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  1025. cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  1026. CMPL R10, $0x00000104
  1027. JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
  1028. CMPL R10, $0x00010100
  1029. JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
  1030. CMPL R10, $0x0100ffff
  1031. JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
  1032. LEAL -16842747(R10), R10
  1033. MOVL $0xfffb001d, (CX)
  1034. MOVB $0xff, 4(CX)
  1035. ADDQ $0x05, CX
  1036. JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
  1037. repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
  1038. LEAL -65536(R10), R10
  1039. MOVL R10, SI
  1040. MOVW $0x001d, (CX)
  1041. MOVW R10, 2(CX)
  1042. SARL $0x10, SI
  1043. MOVB SI, 4(CX)
  1044. ADDQ $0x05, CX
  1045. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1046. repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
  1047. LEAL -256(R10), R10
  1048. MOVW $0x0019, (CX)
  1049. MOVW R10, 2(CX)
  1050. ADDQ $0x04, CX
  1051. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1052. repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
  1053. LEAL -4(R10), R10
  1054. MOVW $0x0015, (CX)
  1055. MOVB R10, 2(CX)
  1056. ADDQ $0x03, CX
  1057. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1058. repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
  1059. SHLL $0x02, R10
  1060. ORL $0x01, R10
  1061. MOVW R10, (CX)
  1062. ADDQ $0x02, CX
  1063. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1064. repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  1065. XORQ DI, DI
  1066. LEAL 1(DI)(R10*4), R10
  1067. MOVB SI, 1(CX)
  1068. SARL $0x08, SI
  1069. SHLL $0x05, SI
  1070. ORL SI, R10
  1071. MOVB R10, (CX)
  1072. ADDQ $0x02, CX
  1073. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1074. two_byte_offset_short_match_nolit_encodeBlockAsm:
  1075. MOVL R10, DI
  1076. SHLL $0x02, DI
  1077. CMPL R10, $0x0c
  1078. JAE emit_copy_three_match_nolit_encodeBlockAsm
  1079. CMPL SI, $0x00000800
  1080. JAE emit_copy_three_match_nolit_encodeBlockAsm
  1081. LEAL -15(DI), DI
  1082. MOVB SI, 1(CX)
  1083. SHRL $0x08, SI
  1084. SHLL $0x05, SI
  1085. ORL SI, DI
  1086. MOVB DI, (CX)
  1087. ADDQ $0x02, CX
  1088. JMP match_nolit_emitcopy_end_encodeBlockAsm
  1089. emit_copy_three_match_nolit_encodeBlockAsm:
  1090. LEAL -2(DI), DI
  1091. MOVB DI, (CX)
  1092. MOVW SI, 1(CX)
  1093. ADDQ $0x03, CX
  1094. match_nolit_emitcopy_end_encodeBlockAsm:
  1095. CMPL DX, 8(SP)
  1096. JAE emit_remainder_encodeBlockAsm
  1097. MOVQ -2(BX)(DX*1), DI
  1098. CMPQ CX, (SP)
  1099. JB match_nolit_dst_ok_encodeBlockAsm
  1100. MOVQ $0x00000000, ret+56(FP)
  1101. RET
  1102. match_nolit_dst_ok_encodeBlockAsm:
  1103. MOVQ $0x0000cf1bbcdcbf9b, R9
  1104. MOVQ DI, R8
  1105. SHRQ $0x10, DI
  1106. MOVQ DI, SI
  1107. SHLQ $0x10, R8
  1108. IMULQ R9, R8
  1109. SHRQ $0x32, R8
  1110. SHLQ $0x10, SI
  1111. IMULQ R9, SI
  1112. SHRQ $0x32, SI
  1113. LEAL -2(DX), R9
  1114. LEAQ (AX)(SI*4), R10
  1115. MOVL (R10), SI
  1116. MOVL R9, (AX)(R8*4)
  1117. MOVL DX, (R10)
  1118. CMPL (BX)(SI*1), DI
  1119. JEQ match_nolit_loop_encodeBlockAsm
  1120. INCL DX
  1121. JMP search_loop_encodeBlockAsm
  1122. emit_remainder_encodeBlockAsm:
  1123. MOVQ src_len+32(FP), AX
  1124. SUBL 12(SP), AX
  1125. LEAQ 5(CX)(AX*1), AX
  1126. CMPQ AX, (SP)
  1127. JB emit_remainder_ok_encodeBlockAsm
  1128. MOVQ $0x00000000, ret+56(FP)
  1129. RET
  1130. emit_remainder_ok_encodeBlockAsm:
  1131. MOVQ src_len+32(FP), AX
  1132. MOVL 12(SP), DX
  1133. CMPL DX, AX
  1134. JEQ emit_literal_done_emit_remainder_encodeBlockAsm
  1135. MOVL AX, SI
  1136. MOVL AX, 12(SP)
  1137. LEAQ (BX)(DX*1), AX
  1138. SUBL DX, SI
  1139. LEAL -1(SI), DX
  1140. CMPL DX, $0x3c
  1141. JB one_byte_emit_remainder_encodeBlockAsm
  1142. CMPL DX, $0x00000100
  1143. JB two_bytes_emit_remainder_encodeBlockAsm
  1144. CMPL DX, $0x00010000
  1145. JB three_bytes_emit_remainder_encodeBlockAsm
  1146. CMPL DX, $0x01000000
  1147. JB four_bytes_emit_remainder_encodeBlockAsm
  1148. MOVB $0xfc, (CX)
  1149. MOVL DX, 1(CX)
  1150. ADDQ $0x05, CX
  1151. JMP memmove_long_emit_remainder_encodeBlockAsm
  1152. four_bytes_emit_remainder_encodeBlockAsm:
  1153. MOVL DX, BX
  1154. SHRL $0x10, BX
  1155. MOVB $0xf8, (CX)
  1156. MOVW DX, 1(CX)
  1157. MOVB BL, 3(CX)
  1158. ADDQ $0x04, CX
  1159. JMP memmove_long_emit_remainder_encodeBlockAsm
  1160. three_bytes_emit_remainder_encodeBlockAsm:
  1161. MOVB $0xf4, (CX)
  1162. MOVW DX, 1(CX)
  1163. ADDQ $0x03, CX
  1164. JMP memmove_long_emit_remainder_encodeBlockAsm
  1165. two_bytes_emit_remainder_encodeBlockAsm:
  1166. MOVB $0xf0, (CX)
  1167. MOVB DL, 1(CX)
  1168. ADDQ $0x02, CX
  1169. CMPL DX, $0x40
  1170. JB memmove_emit_remainder_encodeBlockAsm
  1171. JMP memmove_long_emit_remainder_encodeBlockAsm
  1172. one_byte_emit_remainder_encodeBlockAsm:
  1173. SHLB $0x02, DL
  1174. MOVB DL, (CX)
  1175. ADDQ $0x01, CX
  1176. memmove_emit_remainder_encodeBlockAsm:
  1177. LEAQ (CX)(SI*1), DX
  1178. MOVL SI, BX
  1179. // genMemMoveShort
  1180. CMPQ BX, $0x03
  1181. JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
  1182. JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
  1183. CMPQ BX, $0x08
  1184. JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
  1185. CMPQ BX, $0x10
  1186. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
  1187. CMPQ BX, $0x20
  1188. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
  1189. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
  1190. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
  1191. MOVB (AX), SI
  1192. MOVB -1(AX)(BX*1), AL
  1193. MOVB SI, (CX)
  1194. MOVB AL, -1(CX)(BX*1)
  1195. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1196. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
  1197. MOVW (AX), SI
  1198. MOVB 2(AX), AL
  1199. MOVW SI, (CX)
  1200. MOVB AL, 2(CX)
  1201. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1202. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
  1203. MOVL (AX), SI
  1204. MOVL -4(AX)(BX*1), AX
  1205. MOVL SI, (CX)
  1206. MOVL AX, -4(CX)(BX*1)
  1207. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1208. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
  1209. MOVQ (AX), SI
  1210. MOVQ -8(AX)(BX*1), AX
  1211. MOVQ SI, (CX)
  1212. MOVQ AX, -8(CX)(BX*1)
  1213. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1214. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
  1215. MOVOU (AX), X0
  1216. MOVOU -16(AX)(BX*1), X1
  1217. MOVOU X0, (CX)
  1218. MOVOU X1, -16(CX)(BX*1)
  1219. JMP memmove_end_copy_emit_remainder_encodeBlockAsm
  1220. emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
  1221. MOVOU (AX), X0
  1222. MOVOU 16(AX), X1
  1223. MOVOU -32(AX)(BX*1), X2
  1224. MOVOU -16(AX)(BX*1), X3
  1225. MOVOU X0, (CX)
  1226. MOVOU X1, 16(CX)
  1227. MOVOU X2, -32(CX)(BX*1)
  1228. MOVOU X3, -16(CX)(BX*1)
  1229. memmove_end_copy_emit_remainder_encodeBlockAsm:
  1230. MOVQ DX, CX
  1231. JMP emit_literal_done_emit_remainder_encodeBlockAsm
  1232. memmove_long_emit_remainder_encodeBlockAsm:
  1233. LEAQ (CX)(SI*1), DX
  1234. MOVL SI, BX
  1235. // genMemMoveLong
  1236. MOVOU (AX), X0
  1237. MOVOU 16(AX), X1
  1238. MOVOU -32(AX)(BX*1), X2
  1239. MOVOU -16(AX)(BX*1), X3
  1240. MOVQ BX, DI
  1241. SHRQ $0x05, DI
  1242. MOVQ CX, SI
  1243. ANDL $0x0000001f, SI
  1244. MOVQ $0x00000040, R8
  1245. SUBQ SI, R8
  1246. DECQ DI
  1247. JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
  1248. LEAQ -32(AX)(R8*1), SI
  1249. LEAQ -32(CX)(R8*1), R9
  1250. emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
  1251. MOVOU (SI), X4
  1252. MOVOU 16(SI), X5
  1253. MOVOA X4, (R9)
  1254. MOVOA X5, 16(R9)
  1255. ADDQ $0x20, R9
  1256. ADDQ $0x20, SI
  1257. ADDQ $0x20, R8
  1258. DECQ DI
  1259. JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
  1260. emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
  1261. MOVOU -32(AX)(R8*1), X4
  1262. MOVOU -16(AX)(R8*1), X5
  1263. MOVOA X4, -32(CX)(R8*1)
  1264. MOVOA X5, -16(CX)(R8*1)
  1265. ADDQ $0x20, R8
  1266. CMPQ BX, R8
  1267. JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
  1268. MOVOU X0, (CX)
  1269. MOVOU X1, 16(CX)
  1270. MOVOU X2, -32(CX)(BX*1)
  1271. MOVOU X3, -16(CX)(BX*1)
  1272. MOVQ DX, CX
  1273. emit_literal_done_emit_remainder_encodeBlockAsm:
  1274. MOVQ dst_base+0(FP), AX
  1275. SUBQ AX, CX
  1276. MOVQ CX, ret+56(FP)
  1277. RET
  1278. // func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
  1279. // Requires: BMI, SSE2
  1280. TEXT ·encodeBlockAsm4MB(SB), $24-64
  1281. MOVQ tmp+48(FP), AX
  1282. MOVQ dst_base+0(FP), CX
  1283. MOVQ $0x00000200, DX
  1284. MOVQ AX, BX
  1285. PXOR X0, X0
  1286. zero_loop_encodeBlockAsm4MB:
  1287. MOVOU X0, (BX)
  1288. MOVOU X0, 16(BX)
  1289. MOVOU X0, 32(BX)
  1290. MOVOU X0, 48(BX)
  1291. MOVOU X0, 64(BX)
  1292. MOVOU X0, 80(BX)
  1293. MOVOU X0, 96(BX)
  1294. MOVOU X0, 112(BX)
  1295. ADDQ $0x80, BX
  1296. DECQ DX
  1297. JNZ zero_loop_encodeBlockAsm4MB
  1298. MOVL $0x00000000, 12(SP)
  1299. MOVQ src_len+32(FP), DX
  1300. LEAQ -9(DX), BX
  1301. LEAQ -8(DX), SI
  1302. MOVL SI, 8(SP)
  1303. SHRQ $0x05, DX
  1304. SUBL DX, BX
  1305. LEAQ (CX)(BX*1), BX
  1306. MOVQ BX, (SP)
  1307. MOVL $0x00000001, DX
  1308. MOVL DX, 16(SP)
  1309. MOVQ src_base+24(FP), BX
  1310. search_loop_encodeBlockAsm4MB:
  1311. MOVL DX, SI
  1312. SUBL 12(SP), SI
  1313. SHRL $0x06, SI
  1314. LEAL 4(DX)(SI*1), SI
  1315. CMPL SI, 8(SP)
  1316. JAE emit_remainder_encodeBlockAsm4MB
  1317. MOVQ (BX)(DX*1), DI
  1318. MOVL SI, 20(SP)
  1319. MOVQ $0x0000cf1bbcdcbf9b, R9
  1320. MOVQ DI, R10
  1321. MOVQ DI, R11
  1322. SHRQ $0x08, R11
  1323. SHLQ $0x10, R10
  1324. IMULQ R9, R10
  1325. SHRQ $0x32, R10
  1326. SHLQ $0x10, R11
  1327. IMULQ R9, R11
  1328. SHRQ $0x32, R11
  1329. MOVL (AX)(R10*4), SI
  1330. MOVL (AX)(R11*4), R8
  1331. MOVL DX, (AX)(R10*4)
  1332. LEAL 1(DX), R10
  1333. MOVL R10, (AX)(R11*4)
  1334. MOVQ DI, R10
  1335. SHRQ $0x10, R10
  1336. SHLQ $0x10, R10
  1337. IMULQ R9, R10
  1338. SHRQ $0x32, R10
  1339. MOVL DX, R9
  1340. SUBL 16(SP), R9
  1341. MOVL 1(BX)(R9*1), R11
  1342. MOVQ DI, R9
  1343. SHRQ $0x08, R9
  1344. CMPL R9, R11
  1345. JNE no_repeat_found_encodeBlockAsm4MB
  1346. LEAL 1(DX), DI
  1347. MOVL 12(SP), R8
  1348. MOVL DI, SI
  1349. SUBL 16(SP), SI
  1350. JZ repeat_extend_back_end_encodeBlockAsm4MB
  1351. repeat_extend_back_loop_encodeBlockAsm4MB:
  1352. CMPL DI, R8
  1353. JBE repeat_extend_back_end_encodeBlockAsm4MB
  1354. MOVB -1(BX)(SI*1), R9
  1355. MOVB -1(BX)(DI*1), R10
  1356. CMPB R9, R10
  1357. JNE repeat_extend_back_end_encodeBlockAsm4MB
  1358. LEAL -1(DI), DI
  1359. DECL SI
  1360. JNZ repeat_extend_back_loop_encodeBlockAsm4MB
  1361. repeat_extend_back_end_encodeBlockAsm4MB:
  1362. MOVL DI, SI
  1363. SUBL 12(SP), SI
  1364. LEAQ 4(CX)(SI*1), SI
  1365. CMPQ SI, (SP)
  1366. JB repeat_dst_size_check_encodeBlockAsm4MB
  1367. MOVQ $0x00000000, ret+56(FP)
  1368. RET
  1369. repeat_dst_size_check_encodeBlockAsm4MB:
  1370. MOVL 12(SP), SI
  1371. CMPL SI, DI
  1372. JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB
  1373. MOVL DI, R9
  1374. MOVL DI, 12(SP)
  1375. LEAQ (BX)(SI*1), R10
  1376. SUBL SI, R9
  1377. LEAL -1(R9), SI
  1378. CMPL SI, $0x3c
  1379. JB one_byte_repeat_emit_encodeBlockAsm4MB
  1380. CMPL SI, $0x00000100
  1381. JB two_bytes_repeat_emit_encodeBlockAsm4MB
  1382. CMPL SI, $0x00010000
  1383. JB three_bytes_repeat_emit_encodeBlockAsm4MB
  1384. MOVL SI, R11
  1385. SHRL $0x10, R11
  1386. MOVB $0xf8, (CX)
  1387. MOVW SI, 1(CX)
  1388. MOVB R11, 3(CX)
  1389. ADDQ $0x04, CX
  1390. JMP memmove_long_repeat_emit_encodeBlockAsm4MB
  1391. three_bytes_repeat_emit_encodeBlockAsm4MB:
  1392. MOVB $0xf4, (CX)
  1393. MOVW SI, 1(CX)
  1394. ADDQ $0x03, CX
  1395. JMP memmove_long_repeat_emit_encodeBlockAsm4MB
  1396. two_bytes_repeat_emit_encodeBlockAsm4MB:
  1397. MOVB $0xf0, (CX)
  1398. MOVB SI, 1(CX)
  1399. ADDQ $0x02, CX
  1400. CMPL SI, $0x40
  1401. JB memmove_repeat_emit_encodeBlockAsm4MB
  1402. JMP memmove_long_repeat_emit_encodeBlockAsm4MB
  1403. one_byte_repeat_emit_encodeBlockAsm4MB:
  1404. SHLB $0x02, SI
  1405. MOVB SI, (CX)
  1406. ADDQ $0x01, CX
  1407. memmove_repeat_emit_encodeBlockAsm4MB:
  1408. LEAQ (CX)(R9*1), SI
  1409. // genMemMoveShort
  1410. CMPQ R9, $0x08
  1411. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
  1412. CMPQ R9, $0x10
  1413. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
  1414. CMPQ R9, $0x20
  1415. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
  1416. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
  1417. emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
  1418. MOVQ (R10), R11
  1419. MOVQ R11, (CX)
  1420. JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
  1421. emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
  1422. MOVQ (R10), R11
  1423. MOVQ -8(R10)(R9*1), R10
  1424. MOVQ R11, (CX)
  1425. MOVQ R10, -8(CX)(R9*1)
  1426. JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
  1427. emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
  1428. MOVOU (R10), X0
  1429. MOVOU -16(R10)(R9*1), X1
  1430. MOVOU X0, (CX)
  1431. MOVOU X1, -16(CX)(R9*1)
  1432. JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
  1433. emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
  1434. MOVOU (R10), X0
  1435. MOVOU 16(R10), X1
  1436. MOVOU -32(R10)(R9*1), X2
  1437. MOVOU -16(R10)(R9*1), X3
  1438. MOVOU X0, (CX)
  1439. MOVOU X1, 16(CX)
  1440. MOVOU X2, -32(CX)(R9*1)
  1441. MOVOU X3, -16(CX)(R9*1)
  1442. memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
  1443. MOVQ SI, CX
  1444. JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB
  1445. memmove_long_repeat_emit_encodeBlockAsm4MB:
  1446. LEAQ (CX)(R9*1), SI
  1447. // genMemMoveLong
  1448. MOVOU (R10), X0
  1449. MOVOU 16(R10), X1
  1450. MOVOU -32(R10)(R9*1), X2
  1451. MOVOU -16(R10)(R9*1), X3
  1452. MOVQ R9, R12
  1453. SHRQ $0x05, R12
  1454. MOVQ CX, R11
  1455. ANDL $0x0000001f, R11
  1456. MOVQ $0x00000040, R13
  1457. SUBQ R11, R13
  1458. DECQ R12
  1459. JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  1460. LEAQ -32(R10)(R13*1), R11
  1461. LEAQ -32(CX)(R13*1), R14
  1462. emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
  1463. MOVOU (R11), X4
  1464. MOVOU 16(R11), X5
  1465. MOVOA X4, (R14)
  1466. MOVOA X5, 16(R14)
  1467. ADDQ $0x20, R14
  1468. ADDQ $0x20, R11
  1469. ADDQ $0x20, R13
  1470. DECQ R12
  1471. JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
  1472. emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
  1473. MOVOU -32(R10)(R13*1), X4
  1474. MOVOU -16(R10)(R13*1), X5
  1475. MOVOA X4, -32(CX)(R13*1)
  1476. MOVOA X5, -16(CX)(R13*1)
  1477. ADDQ $0x20, R13
  1478. CMPQ R9, R13
  1479. JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  1480. MOVOU X0, (CX)
  1481. MOVOU X1, 16(CX)
  1482. MOVOU X2, -32(CX)(R9*1)
  1483. MOVOU X3, -16(CX)(R9*1)
  1484. MOVQ SI, CX
  1485. emit_literal_done_repeat_emit_encodeBlockAsm4MB:
  1486. ADDL $0x05, DX
  1487. MOVL DX, SI
  1488. SUBL 16(SP), SI
  1489. MOVQ src_len+32(FP), R9
  1490. SUBL DX, R9
  1491. LEAQ (BX)(DX*1), R10
  1492. LEAQ (BX)(SI*1), SI
  1493. // matchLen
  1494. XORL R12, R12
  1495. matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
  1496. CMPL R9, $0x10
  1497. JB matchlen_match8_repeat_extend_encodeBlockAsm4MB
  1498. MOVQ (R10)(R12*1), R11
  1499. MOVQ 8(R10)(R12*1), R13
  1500. XORQ (SI)(R12*1), R11
  1501. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
  1502. XORQ 8(SI)(R12*1), R13
  1503. JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
  1504. LEAL -16(R9), R9
  1505. LEAL 16(R12), R12
  1506. JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
  1507. matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
  1508. #ifdef GOAMD64_v3
  1509. TZCNTQ R13, R13
  1510. #else
  1511. BSFQ R13, R13
  1512. #endif
  1513. SARQ $0x03, R13
  1514. LEAL 8(R12)(R13*1), R12
  1515. JMP repeat_extend_forward_end_encodeBlockAsm4MB
  1516. matchlen_match8_repeat_extend_encodeBlockAsm4MB:
  1517. CMPL R9, $0x08
  1518. JB matchlen_match4_repeat_extend_encodeBlockAsm4MB
  1519. MOVQ (R10)(R12*1), R11
  1520. XORQ (SI)(R12*1), R11
  1521. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
  1522. LEAL -8(R9), R9
  1523. LEAL 8(R12), R12
  1524. JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB
  1525. matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
  1526. #ifdef GOAMD64_v3
  1527. TZCNTQ R11, R11
  1528. #else
  1529. BSFQ R11, R11
  1530. #endif
  1531. SARQ $0x03, R11
  1532. LEAL (R12)(R11*1), R12
  1533. JMP repeat_extend_forward_end_encodeBlockAsm4MB
  1534. matchlen_match4_repeat_extend_encodeBlockAsm4MB:
  1535. CMPL R9, $0x04
  1536. JB matchlen_match2_repeat_extend_encodeBlockAsm4MB
  1537. MOVL (R10)(R12*1), R11
  1538. CMPL (SI)(R12*1), R11
  1539. JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB
  1540. LEAL -4(R9), R9
  1541. LEAL 4(R12), R12
  1542. matchlen_match2_repeat_extend_encodeBlockAsm4MB:
  1543. CMPL R9, $0x01
  1544. JE matchlen_match1_repeat_extend_encodeBlockAsm4MB
  1545. JB repeat_extend_forward_end_encodeBlockAsm4MB
  1546. MOVW (R10)(R12*1), R11
  1547. CMPW (SI)(R12*1), R11
  1548. JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB
  1549. LEAL 2(R12), R12
  1550. SUBL $0x02, R9
  1551. JZ repeat_extend_forward_end_encodeBlockAsm4MB
  1552. matchlen_match1_repeat_extend_encodeBlockAsm4MB:
  1553. MOVB (R10)(R12*1), R11
  1554. CMPB (SI)(R12*1), R11
  1555. JNE repeat_extend_forward_end_encodeBlockAsm4MB
  1556. LEAL 1(R12), R12
  1557. repeat_extend_forward_end_encodeBlockAsm4MB:
  1558. ADDL R12, DX
  1559. MOVL DX, SI
  1560. SUBL DI, SI
  1561. MOVL 16(SP), DI
  1562. TESTL R8, R8
  1563. JZ repeat_as_copy_encodeBlockAsm4MB
  1564. // emitRepeat
  1565. MOVL SI, R8
  1566. LEAL -4(SI), SI
  1567. CMPL R8, $0x08
  1568. JBE repeat_two_match_repeat_encodeBlockAsm4MB
  1569. CMPL R8, $0x0c
  1570. JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
  1571. CMPL DI, $0x00000800
  1572. JB repeat_two_offset_match_repeat_encodeBlockAsm4MB
  1573. cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
  1574. CMPL SI, $0x00000104
  1575. JB repeat_three_match_repeat_encodeBlockAsm4MB
  1576. CMPL SI, $0x00010100
  1577. JB repeat_four_match_repeat_encodeBlockAsm4MB
  1578. LEAL -65536(SI), SI
  1579. MOVL SI, DI
  1580. MOVW $0x001d, (CX)
  1581. MOVW SI, 2(CX)
  1582. SARL $0x10, DI
  1583. MOVB DI, 4(CX)
  1584. ADDQ $0x05, CX
  1585. JMP repeat_end_emit_encodeBlockAsm4MB
  1586. repeat_four_match_repeat_encodeBlockAsm4MB:
  1587. LEAL -256(SI), SI
  1588. MOVW $0x0019, (CX)
  1589. MOVW SI, 2(CX)
  1590. ADDQ $0x04, CX
  1591. JMP repeat_end_emit_encodeBlockAsm4MB
  1592. repeat_three_match_repeat_encodeBlockAsm4MB:
  1593. LEAL -4(SI), SI
  1594. MOVW $0x0015, (CX)
  1595. MOVB SI, 2(CX)
  1596. ADDQ $0x03, CX
  1597. JMP repeat_end_emit_encodeBlockAsm4MB
  1598. repeat_two_match_repeat_encodeBlockAsm4MB:
  1599. SHLL $0x02, SI
  1600. ORL $0x01, SI
  1601. MOVW SI, (CX)
  1602. ADDQ $0x02, CX
  1603. JMP repeat_end_emit_encodeBlockAsm4MB
  1604. repeat_two_offset_match_repeat_encodeBlockAsm4MB:
  1605. XORQ R8, R8
  1606. LEAL 1(R8)(SI*4), SI
  1607. MOVB DI, 1(CX)
  1608. SARL $0x08, DI
  1609. SHLL $0x05, DI
  1610. ORL DI, SI
  1611. MOVB SI, (CX)
  1612. ADDQ $0x02, CX
  1613. JMP repeat_end_emit_encodeBlockAsm4MB
  1614. repeat_as_copy_encodeBlockAsm4MB:
  1615. // emitCopy
  1616. CMPL DI, $0x00010000
  1617. JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
  1618. CMPL SI, $0x40
  1619. JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
  1620. MOVB $0xff, (CX)
  1621. MOVL DI, 1(CX)
  1622. LEAL -64(SI), SI
  1623. ADDQ $0x05, CX
  1624. CMPL SI, $0x04
  1625. JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
  1626. // emitRepeat
  1627. MOVL SI, R8
  1628. LEAL -4(SI), SI
  1629. CMPL R8, $0x08
  1630. JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1631. CMPL R8, $0x0c
  1632. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1633. CMPL DI, $0x00000800
  1634. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1635. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1636. CMPL SI, $0x00000104
  1637. JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1638. CMPL SI, $0x00010100
  1639. JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
  1640. LEAL -65536(SI), SI
  1641. MOVL SI, DI
  1642. MOVW $0x001d, (CX)
  1643. MOVW SI, 2(CX)
  1644. SARL $0x10, DI
  1645. MOVB DI, 4(CX)
  1646. ADDQ $0x05, CX
  1647. JMP repeat_end_emit_encodeBlockAsm4MB
  1648. repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1649. LEAL -256(SI), SI
  1650. MOVW $0x0019, (CX)
  1651. MOVW SI, 2(CX)
  1652. ADDQ $0x04, CX
  1653. JMP repeat_end_emit_encodeBlockAsm4MB
  1654. repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1655. LEAL -4(SI), SI
  1656. MOVW $0x0015, (CX)
  1657. MOVB SI, 2(CX)
  1658. ADDQ $0x03, CX
  1659. JMP repeat_end_emit_encodeBlockAsm4MB
  1660. repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1661. SHLL $0x02, SI
  1662. ORL $0x01, SI
  1663. MOVW SI, (CX)
  1664. ADDQ $0x02, CX
  1665. JMP repeat_end_emit_encodeBlockAsm4MB
  1666. repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
  1667. XORQ R8, R8
  1668. LEAL 1(R8)(SI*4), SI
  1669. MOVB DI, 1(CX)
  1670. SARL $0x08, DI
  1671. SHLL $0x05, DI
  1672. ORL DI, SI
  1673. MOVB SI, (CX)
  1674. ADDQ $0x02, CX
  1675. JMP repeat_end_emit_encodeBlockAsm4MB
  1676. four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
  1677. TESTL SI, SI
  1678. JZ repeat_end_emit_encodeBlockAsm4MB
  1679. XORL R8, R8
  1680. LEAL -1(R8)(SI*4), SI
  1681. MOVB SI, (CX)
  1682. MOVL DI, 1(CX)
  1683. ADDQ $0x05, CX
  1684. JMP repeat_end_emit_encodeBlockAsm4MB
  1685. two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
  1686. CMPL SI, $0x40
  1687. JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
  1688. CMPL DI, $0x00000800
  1689. JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB
  1690. MOVL $0x00000001, R8
  1691. LEAL 16(R8), R8
  1692. MOVB DI, 1(CX)
  1693. SHRL $0x08, DI
  1694. SHLL $0x05, DI
  1695. ORL DI, R8
  1696. MOVB R8, (CX)
  1697. ADDQ $0x02, CX
  1698. SUBL $0x08, SI
  1699. // emitRepeat
  1700. LEAL -4(SI), SI
  1701. JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1702. MOVL SI, R8
  1703. LEAL -4(SI), SI
  1704. CMPL R8, $0x08
  1705. JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1706. CMPL R8, $0x0c
  1707. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1708. CMPL DI, $0x00000800
  1709. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1710. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1711. CMPL SI, $0x00000104
  1712. JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1713. CMPL SI, $0x00010100
  1714. JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
  1715. LEAL -65536(SI), SI
  1716. MOVL SI, DI
  1717. MOVW $0x001d, (CX)
  1718. MOVW SI, 2(CX)
  1719. SARL $0x10, DI
  1720. MOVB DI, 4(CX)
  1721. ADDQ $0x05, CX
  1722. JMP repeat_end_emit_encodeBlockAsm4MB
  1723. repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1724. LEAL -256(SI), SI
  1725. MOVW $0x0019, (CX)
  1726. MOVW SI, 2(CX)
  1727. ADDQ $0x04, CX
  1728. JMP repeat_end_emit_encodeBlockAsm4MB
  1729. repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1730. LEAL -4(SI), SI
  1731. MOVW $0x0015, (CX)
  1732. MOVB SI, 2(CX)
  1733. ADDQ $0x03, CX
  1734. JMP repeat_end_emit_encodeBlockAsm4MB
  1735. repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1736. SHLL $0x02, SI
  1737. ORL $0x01, SI
  1738. MOVW SI, (CX)
  1739. ADDQ $0x02, CX
  1740. JMP repeat_end_emit_encodeBlockAsm4MB
  1741. repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
  1742. XORQ R8, R8
  1743. LEAL 1(R8)(SI*4), SI
  1744. MOVB DI, 1(CX)
  1745. SARL $0x08, DI
  1746. SHLL $0x05, DI
  1747. ORL DI, SI
  1748. MOVB SI, (CX)
  1749. ADDQ $0x02, CX
  1750. JMP repeat_end_emit_encodeBlockAsm4MB
  1751. long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
  1752. MOVB $0xee, (CX)
  1753. MOVW DI, 1(CX)
  1754. LEAL -60(SI), SI
  1755. ADDQ $0x03, CX
  1756. // emitRepeat
  1757. MOVL SI, R8
  1758. LEAL -4(SI), SI
  1759. CMPL R8, $0x08
  1760. JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1761. CMPL R8, $0x0c
  1762. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1763. CMPL DI, $0x00000800
  1764. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1765. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1766. CMPL SI, $0x00000104
  1767. JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1768. CMPL SI, $0x00010100
  1769. JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
  1770. LEAL -65536(SI), SI
  1771. MOVL SI, DI
  1772. MOVW $0x001d, (CX)
  1773. MOVW SI, 2(CX)
  1774. SARL $0x10, DI
  1775. MOVB DI, 4(CX)
  1776. ADDQ $0x05, CX
  1777. JMP repeat_end_emit_encodeBlockAsm4MB
  1778. repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1779. LEAL -256(SI), SI
  1780. MOVW $0x0019, (CX)
  1781. MOVW SI, 2(CX)
  1782. ADDQ $0x04, CX
  1783. JMP repeat_end_emit_encodeBlockAsm4MB
  1784. repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1785. LEAL -4(SI), SI
  1786. MOVW $0x0015, (CX)
  1787. MOVB SI, 2(CX)
  1788. ADDQ $0x03, CX
  1789. JMP repeat_end_emit_encodeBlockAsm4MB
  1790. repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1791. SHLL $0x02, SI
  1792. ORL $0x01, SI
  1793. MOVW SI, (CX)
  1794. ADDQ $0x02, CX
  1795. JMP repeat_end_emit_encodeBlockAsm4MB
  1796. repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
  1797. XORQ R8, R8
  1798. LEAL 1(R8)(SI*4), SI
  1799. MOVB DI, 1(CX)
  1800. SARL $0x08, DI
  1801. SHLL $0x05, DI
  1802. ORL DI, SI
  1803. MOVB SI, (CX)
  1804. ADDQ $0x02, CX
  1805. JMP repeat_end_emit_encodeBlockAsm4MB
  1806. two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
  1807. MOVL SI, R8
  1808. SHLL $0x02, R8
  1809. CMPL SI, $0x0c
  1810. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
  1811. CMPL DI, $0x00000800
  1812. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
  1813. LEAL -15(R8), R8
  1814. MOVB DI, 1(CX)
  1815. SHRL $0x08, DI
  1816. SHLL $0x05, DI
  1817. ORL DI, R8
  1818. MOVB R8, (CX)
  1819. ADDQ $0x02, CX
  1820. JMP repeat_end_emit_encodeBlockAsm4MB
  1821. emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
  1822. LEAL -2(R8), R8
  1823. MOVB R8, (CX)
  1824. MOVW DI, 1(CX)
  1825. ADDQ $0x03, CX
  1826. repeat_end_emit_encodeBlockAsm4MB:
  1827. MOVL DX, 12(SP)
  1828. JMP search_loop_encodeBlockAsm4MB
  1829. no_repeat_found_encodeBlockAsm4MB:
  1830. CMPL (BX)(SI*1), DI
  1831. JEQ candidate_match_encodeBlockAsm4MB
  1832. SHRQ $0x08, DI
  1833. MOVL (AX)(R10*4), SI
  1834. LEAL 2(DX), R9
  1835. CMPL (BX)(R8*1), DI
  1836. JEQ candidate2_match_encodeBlockAsm4MB
  1837. MOVL R9, (AX)(R10*4)
  1838. SHRQ $0x08, DI
  1839. CMPL (BX)(SI*1), DI
  1840. JEQ candidate3_match_encodeBlockAsm4MB
  1841. MOVL 20(SP), DX
  1842. JMP search_loop_encodeBlockAsm4MB
  1843. candidate3_match_encodeBlockAsm4MB:
  1844. ADDL $0x02, DX
  1845. JMP candidate_match_encodeBlockAsm4MB
  1846. candidate2_match_encodeBlockAsm4MB:
  1847. MOVL R9, (AX)(R10*4)
  1848. INCL DX
  1849. MOVL R8, SI
  1850. candidate_match_encodeBlockAsm4MB:
  1851. MOVL 12(SP), DI
  1852. TESTL SI, SI
  1853. JZ match_extend_back_end_encodeBlockAsm4MB
  1854. match_extend_back_loop_encodeBlockAsm4MB:
  1855. CMPL DX, DI
  1856. JBE match_extend_back_end_encodeBlockAsm4MB
  1857. MOVB -1(BX)(SI*1), R8
  1858. MOVB -1(BX)(DX*1), R9
  1859. CMPB R8, R9
  1860. JNE match_extend_back_end_encodeBlockAsm4MB
  1861. LEAL -1(DX), DX
  1862. DECL SI
  1863. JZ match_extend_back_end_encodeBlockAsm4MB
  1864. JMP match_extend_back_loop_encodeBlockAsm4MB
  1865. match_extend_back_end_encodeBlockAsm4MB:
  1866. MOVL DX, DI
  1867. SUBL 12(SP), DI
  1868. LEAQ 4(CX)(DI*1), DI
  1869. CMPQ DI, (SP)
  1870. JB match_dst_size_check_encodeBlockAsm4MB
  1871. MOVQ $0x00000000, ret+56(FP)
  1872. RET
  1873. match_dst_size_check_encodeBlockAsm4MB:
  1874. MOVL DX, DI
  1875. MOVL 12(SP), R8
  1876. CMPL R8, DI
  1877. JEQ emit_literal_done_match_emit_encodeBlockAsm4MB
  1878. MOVL DI, R9
  1879. MOVL DI, 12(SP)
  1880. LEAQ (BX)(R8*1), DI
  1881. SUBL R8, R9
  1882. LEAL -1(R9), R8
  1883. CMPL R8, $0x3c
  1884. JB one_byte_match_emit_encodeBlockAsm4MB
  1885. CMPL R8, $0x00000100
  1886. JB two_bytes_match_emit_encodeBlockAsm4MB
  1887. CMPL R8, $0x00010000
  1888. JB three_bytes_match_emit_encodeBlockAsm4MB
  1889. MOVL R8, R10
  1890. SHRL $0x10, R10
  1891. MOVB $0xf8, (CX)
  1892. MOVW R8, 1(CX)
  1893. MOVB R10, 3(CX)
  1894. ADDQ $0x04, CX
  1895. JMP memmove_long_match_emit_encodeBlockAsm4MB
  1896. three_bytes_match_emit_encodeBlockAsm4MB:
  1897. MOVB $0xf4, (CX)
  1898. MOVW R8, 1(CX)
  1899. ADDQ $0x03, CX
  1900. JMP memmove_long_match_emit_encodeBlockAsm4MB
  1901. two_bytes_match_emit_encodeBlockAsm4MB:
  1902. MOVB $0xf0, (CX)
  1903. MOVB R8, 1(CX)
  1904. ADDQ $0x02, CX
  1905. CMPL R8, $0x40
  1906. JB memmove_match_emit_encodeBlockAsm4MB
  1907. JMP memmove_long_match_emit_encodeBlockAsm4MB
  1908. one_byte_match_emit_encodeBlockAsm4MB:
  1909. SHLB $0x02, R8
  1910. MOVB R8, (CX)
  1911. ADDQ $0x01, CX
  1912. memmove_match_emit_encodeBlockAsm4MB:
  1913. LEAQ (CX)(R9*1), R8
  1914. // genMemMoveShort
  1915. CMPQ R9, $0x08
  1916. JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
  1917. CMPQ R9, $0x10
  1918. JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
  1919. CMPQ R9, $0x20
  1920. JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
  1921. JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
  1922. emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
  1923. MOVQ (DI), R10
  1924. MOVQ R10, (CX)
  1925. JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
  1926. emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
  1927. MOVQ (DI), R10
  1928. MOVQ -8(DI)(R9*1), DI
  1929. MOVQ R10, (CX)
  1930. MOVQ DI, -8(CX)(R9*1)
  1931. JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
  1932. emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
  1933. MOVOU (DI), X0
  1934. MOVOU -16(DI)(R9*1), X1
  1935. MOVOU X0, (CX)
  1936. MOVOU X1, -16(CX)(R9*1)
  1937. JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
  1938. emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
  1939. MOVOU (DI), X0
  1940. MOVOU 16(DI), X1
  1941. MOVOU -32(DI)(R9*1), X2
  1942. MOVOU -16(DI)(R9*1), X3
  1943. MOVOU X0, (CX)
  1944. MOVOU X1, 16(CX)
  1945. MOVOU X2, -32(CX)(R9*1)
  1946. MOVOU X3, -16(CX)(R9*1)
  1947. memmove_end_copy_match_emit_encodeBlockAsm4MB:
  1948. MOVQ R8, CX
  1949. JMP emit_literal_done_match_emit_encodeBlockAsm4MB
  1950. memmove_long_match_emit_encodeBlockAsm4MB:
  1951. LEAQ (CX)(R9*1), R8
  1952. // genMemMoveLong
  1953. MOVOU (DI), X0
  1954. MOVOU 16(DI), X1
  1955. MOVOU -32(DI)(R9*1), X2
  1956. MOVOU -16(DI)(R9*1), X3
  1957. MOVQ R9, R11
  1958. SHRQ $0x05, R11
  1959. MOVQ CX, R10
  1960. ANDL $0x0000001f, R10
  1961. MOVQ $0x00000040, R12
  1962. SUBQ R10, R12
  1963. DECQ R11
  1964. JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  1965. LEAQ -32(DI)(R12*1), R10
  1966. LEAQ -32(CX)(R12*1), R13
  1967. emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
  1968. MOVOU (R10), X4
  1969. MOVOU 16(R10), X5
  1970. MOVOA X4, (R13)
  1971. MOVOA X5, 16(R13)
  1972. ADDQ $0x20, R13
  1973. ADDQ $0x20, R10
  1974. ADDQ $0x20, R12
  1975. DECQ R11
  1976. JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
  1977. emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
  1978. MOVOU -32(DI)(R12*1), X4
  1979. MOVOU -16(DI)(R12*1), X5
  1980. MOVOA X4, -32(CX)(R12*1)
  1981. MOVOA X5, -16(CX)(R12*1)
  1982. ADDQ $0x20, R12
  1983. CMPQ R9, R12
  1984. JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
  1985. MOVOU X0, (CX)
  1986. MOVOU X1, 16(CX)
  1987. MOVOU X2, -32(CX)(R9*1)
  1988. MOVOU X3, -16(CX)(R9*1)
  1989. MOVQ R8, CX
  1990. emit_literal_done_match_emit_encodeBlockAsm4MB:
  1991. match_nolit_loop_encodeBlockAsm4MB:
  1992. MOVL DX, DI
  1993. SUBL SI, DI
  1994. MOVL DI, 16(SP)
  1995. ADDL $0x04, DX
  1996. ADDL $0x04, SI
  1997. MOVQ src_len+32(FP), DI
  1998. SUBL DX, DI
  1999. LEAQ (BX)(DX*1), R8
  2000. LEAQ (BX)(SI*1), SI
  2001. // matchLen
  2002. XORL R10, R10
  2003. matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
  2004. CMPL DI, $0x10
  2005. JB matchlen_match8_match_nolit_encodeBlockAsm4MB
  2006. MOVQ (R8)(R10*1), R9
  2007. MOVQ 8(R8)(R10*1), R11
  2008. XORQ (SI)(R10*1), R9
  2009. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
  2010. XORQ 8(SI)(R10*1), R11
  2011. JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB
  2012. LEAL -16(DI), DI
  2013. LEAL 16(R10), R10
  2014. JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
  2015. matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
  2016. #ifdef GOAMD64_v3
  2017. TZCNTQ R11, R11
  2018. #else
  2019. BSFQ R11, R11
  2020. #endif
  2021. SARQ $0x03, R11
  2022. LEAL 8(R10)(R11*1), R10
  2023. JMP match_nolit_end_encodeBlockAsm4MB
  2024. matchlen_match8_match_nolit_encodeBlockAsm4MB:
  2025. CMPL DI, $0x08
  2026. JB matchlen_match4_match_nolit_encodeBlockAsm4MB
  2027. MOVQ (R8)(R10*1), R9
  2028. XORQ (SI)(R10*1), R9
  2029. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
  2030. LEAL -8(DI), DI
  2031. LEAL 8(R10), R10
  2032. JMP matchlen_match4_match_nolit_encodeBlockAsm4MB
  2033. matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
  2034. #ifdef GOAMD64_v3
  2035. TZCNTQ R9, R9
  2036. #else
  2037. BSFQ R9, R9
  2038. #endif
  2039. SARQ $0x03, R9
  2040. LEAL (R10)(R9*1), R10
  2041. JMP match_nolit_end_encodeBlockAsm4MB
  2042. matchlen_match4_match_nolit_encodeBlockAsm4MB:
  2043. CMPL DI, $0x04
  2044. JB matchlen_match2_match_nolit_encodeBlockAsm4MB
  2045. MOVL (R8)(R10*1), R9
  2046. CMPL (SI)(R10*1), R9
  2047. JNE matchlen_match2_match_nolit_encodeBlockAsm4MB
  2048. LEAL -4(DI), DI
  2049. LEAL 4(R10), R10
  2050. matchlen_match2_match_nolit_encodeBlockAsm4MB:
  2051. CMPL DI, $0x01
  2052. JE matchlen_match1_match_nolit_encodeBlockAsm4MB
  2053. JB match_nolit_end_encodeBlockAsm4MB
  2054. MOVW (R8)(R10*1), R9
  2055. CMPW (SI)(R10*1), R9
  2056. JNE matchlen_match1_match_nolit_encodeBlockAsm4MB
  2057. LEAL 2(R10), R10
  2058. SUBL $0x02, DI
  2059. JZ match_nolit_end_encodeBlockAsm4MB
  2060. matchlen_match1_match_nolit_encodeBlockAsm4MB:
  2061. MOVB (R8)(R10*1), R9
  2062. CMPB (SI)(R10*1), R9
  2063. JNE match_nolit_end_encodeBlockAsm4MB
  2064. LEAL 1(R10), R10
  2065. match_nolit_end_encodeBlockAsm4MB:
  2066. ADDL R10, DX
  2067. MOVL 16(SP), SI
  2068. ADDL $0x04, R10
  2069. MOVL DX, 12(SP)
  2070. // emitCopy
  2071. CMPL SI, $0x00010000
  2072. JB two_byte_offset_match_nolit_encodeBlockAsm4MB
  2073. CMPL R10, $0x40
  2074. JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB
  2075. MOVB $0xff, (CX)
  2076. MOVL SI, 1(CX)
  2077. LEAL -64(R10), R10
  2078. ADDQ $0x05, CX
  2079. CMPL R10, $0x04
  2080. JB four_bytes_remain_match_nolit_encodeBlockAsm4MB
  2081. // emitRepeat
  2082. MOVL R10, DI
  2083. LEAL -4(R10), R10
  2084. CMPL DI, $0x08
  2085. JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
  2086. CMPL DI, $0x0c
  2087. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
  2088. CMPL SI, $0x00000800
  2089. JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
  2090. cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
  2091. CMPL R10, $0x00000104
  2092. JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
  2093. CMPL R10, $0x00010100
  2094. JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
  2095. LEAL -65536(R10), R10
  2096. MOVL R10, SI
  2097. MOVW $0x001d, (CX)
  2098. MOVW R10, 2(CX)
  2099. SARL $0x10, SI
  2100. MOVB SI, 4(CX)
  2101. ADDQ $0x05, CX
  2102. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2103. repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
  2104. LEAL -256(R10), R10
  2105. MOVW $0x0019, (CX)
  2106. MOVW R10, 2(CX)
  2107. ADDQ $0x04, CX
  2108. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2109. repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
  2110. LEAL -4(R10), R10
  2111. MOVW $0x0015, (CX)
  2112. MOVB R10, 2(CX)
  2113. ADDQ $0x03, CX
  2114. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2115. repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
  2116. SHLL $0x02, R10
  2117. ORL $0x01, R10
  2118. MOVW R10, (CX)
  2119. ADDQ $0x02, CX
  2120. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2121. repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
  2122. XORQ DI, DI
  2123. LEAL 1(DI)(R10*4), R10
  2124. MOVB SI, 1(CX)
  2125. SARL $0x08, SI
  2126. SHLL $0x05, SI
  2127. ORL SI, R10
  2128. MOVB R10, (CX)
  2129. ADDQ $0x02, CX
  2130. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2131. four_bytes_remain_match_nolit_encodeBlockAsm4MB:
  2132. TESTL R10, R10
  2133. JZ match_nolit_emitcopy_end_encodeBlockAsm4MB
  2134. XORL DI, DI
  2135. LEAL -1(DI)(R10*4), R10
  2136. MOVB R10, (CX)
  2137. MOVL SI, 1(CX)
  2138. ADDQ $0x05, CX
  2139. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2140. two_byte_offset_match_nolit_encodeBlockAsm4MB:
  2141. CMPL R10, $0x40
  2142. JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB
  2143. CMPL SI, $0x00000800
  2144. JAE long_offset_short_match_nolit_encodeBlockAsm4MB
  2145. MOVL $0x00000001, DI
  2146. LEAL 16(DI), DI
  2147. MOVB SI, 1(CX)
  2148. SHRL $0x08, SI
  2149. SHLL $0x05, SI
  2150. ORL SI, DI
  2151. MOVB DI, (CX)
  2152. ADDQ $0x02, CX
  2153. SUBL $0x08, R10
  2154. // emitRepeat
  2155. LEAL -4(R10), R10
  2156. JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2157. MOVL R10, DI
  2158. LEAL -4(R10), R10
  2159. CMPL DI, $0x08
  2160. JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2161. CMPL DI, $0x0c
  2162. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2163. CMPL SI, $0x00000800
  2164. JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2165. cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2166. CMPL R10, $0x00000104
  2167. JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2168. CMPL R10, $0x00010100
  2169. JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
  2170. LEAL -65536(R10), R10
  2171. MOVL R10, SI
  2172. MOVW $0x001d, (CX)
  2173. MOVW R10, 2(CX)
  2174. SARL $0x10, SI
  2175. MOVB SI, 4(CX)
  2176. ADDQ $0x05, CX
  2177. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2178. repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2179. LEAL -256(R10), R10
  2180. MOVW $0x0019, (CX)
  2181. MOVW R10, 2(CX)
  2182. ADDQ $0x04, CX
  2183. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2184. repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2185. LEAL -4(R10), R10
  2186. MOVW $0x0015, (CX)
  2187. MOVB R10, 2(CX)
  2188. ADDQ $0x03, CX
  2189. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2190. repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2191. SHLL $0x02, R10
  2192. ORL $0x01, R10
  2193. MOVW R10, (CX)
  2194. ADDQ $0x02, CX
  2195. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2196. repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
  2197. XORQ DI, DI
  2198. LEAL 1(DI)(R10*4), R10
  2199. MOVB SI, 1(CX)
  2200. SARL $0x08, SI
  2201. SHLL $0x05, SI
  2202. ORL SI, R10
  2203. MOVB R10, (CX)
  2204. ADDQ $0x02, CX
  2205. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2206. long_offset_short_match_nolit_encodeBlockAsm4MB:
  2207. MOVB $0xee, (CX)
  2208. MOVW SI, 1(CX)
  2209. LEAL -60(R10), R10
  2210. ADDQ $0x03, CX
  2211. // emitRepeat
  2212. MOVL R10, DI
  2213. LEAL -4(R10), R10
  2214. CMPL DI, $0x08
  2215. JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2216. CMPL DI, $0x0c
  2217. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2218. CMPL SI, $0x00000800
  2219. JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2220. cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2221. CMPL R10, $0x00000104
  2222. JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2223. CMPL R10, $0x00010100
  2224. JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
  2225. LEAL -65536(R10), R10
  2226. MOVL R10, SI
  2227. MOVW $0x001d, (CX)
  2228. MOVW R10, 2(CX)
  2229. SARL $0x10, SI
  2230. MOVB SI, 4(CX)
  2231. ADDQ $0x05, CX
  2232. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2233. repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2234. LEAL -256(R10), R10
  2235. MOVW $0x0019, (CX)
  2236. MOVW R10, 2(CX)
  2237. ADDQ $0x04, CX
  2238. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2239. repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2240. LEAL -4(R10), R10
  2241. MOVW $0x0015, (CX)
  2242. MOVB R10, 2(CX)
  2243. ADDQ $0x03, CX
  2244. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2245. repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2246. SHLL $0x02, R10
  2247. ORL $0x01, R10
  2248. MOVW R10, (CX)
  2249. ADDQ $0x02, CX
  2250. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2251. repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
  2252. XORQ DI, DI
  2253. LEAL 1(DI)(R10*4), R10
  2254. MOVB SI, 1(CX)
  2255. SARL $0x08, SI
  2256. SHLL $0x05, SI
  2257. ORL SI, R10
  2258. MOVB R10, (CX)
  2259. ADDQ $0x02, CX
  2260. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2261. two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
  2262. MOVL R10, DI
  2263. SHLL $0x02, DI
  2264. CMPL R10, $0x0c
  2265. JAE emit_copy_three_match_nolit_encodeBlockAsm4MB
  2266. CMPL SI, $0x00000800
  2267. JAE emit_copy_three_match_nolit_encodeBlockAsm4MB
  2268. LEAL -15(DI), DI
  2269. MOVB SI, 1(CX)
  2270. SHRL $0x08, SI
  2271. SHLL $0x05, SI
  2272. ORL SI, DI
  2273. MOVB DI, (CX)
  2274. ADDQ $0x02, CX
  2275. JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
  2276. emit_copy_three_match_nolit_encodeBlockAsm4MB:
  2277. LEAL -2(DI), DI
  2278. MOVB DI, (CX)
  2279. MOVW SI, 1(CX)
  2280. ADDQ $0x03, CX
  2281. match_nolit_emitcopy_end_encodeBlockAsm4MB:
  2282. CMPL DX, 8(SP)
  2283. JAE emit_remainder_encodeBlockAsm4MB
  2284. MOVQ -2(BX)(DX*1), DI
  2285. CMPQ CX, (SP)
  2286. JB match_nolit_dst_ok_encodeBlockAsm4MB
  2287. MOVQ $0x00000000, ret+56(FP)
  2288. RET
  2289. match_nolit_dst_ok_encodeBlockAsm4MB:
  2290. MOVQ $0x0000cf1bbcdcbf9b, R9
  2291. MOVQ DI, R8
  2292. SHRQ $0x10, DI
  2293. MOVQ DI, SI
  2294. SHLQ $0x10, R8
  2295. IMULQ R9, R8
  2296. SHRQ $0x32, R8
  2297. SHLQ $0x10, SI
  2298. IMULQ R9, SI
  2299. SHRQ $0x32, SI
  2300. LEAL -2(DX), R9
  2301. LEAQ (AX)(SI*4), R10
  2302. MOVL (R10), SI
  2303. MOVL R9, (AX)(R8*4)
  2304. MOVL DX, (R10)
  2305. CMPL (BX)(SI*1), DI
  2306. JEQ match_nolit_loop_encodeBlockAsm4MB
  2307. INCL DX
  2308. JMP search_loop_encodeBlockAsm4MB
  2309. emit_remainder_encodeBlockAsm4MB:
  2310. MOVQ src_len+32(FP), AX
  2311. SUBL 12(SP), AX
  2312. LEAQ 4(CX)(AX*1), AX
  2313. CMPQ AX, (SP)
  2314. JB emit_remainder_ok_encodeBlockAsm4MB
  2315. MOVQ $0x00000000, ret+56(FP)
  2316. RET
  2317. emit_remainder_ok_encodeBlockAsm4MB:
  2318. MOVQ src_len+32(FP), AX
  2319. MOVL 12(SP), DX
  2320. CMPL DX, AX
  2321. JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB
  2322. MOVL AX, SI
  2323. MOVL AX, 12(SP)
  2324. LEAQ (BX)(DX*1), AX
  2325. SUBL DX, SI
  2326. LEAL -1(SI), DX
  2327. CMPL DX, $0x3c
  2328. JB one_byte_emit_remainder_encodeBlockAsm4MB
  2329. CMPL DX, $0x00000100
  2330. JB two_bytes_emit_remainder_encodeBlockAsm4MB
  2331. CMPL DX, $0x00010000
  2332. JB three_bytes_emit_remainder_encodeBlockAsm4MB
  2333. MOVL DX, BX
  2334. SHRL $0x10, BX
  2335. MOVB $0xf8, (CX)
  2336. MOVW DX, 1(CX)
  2337. MOVB BL, 3(CX)
  2338. ADDQ $0x04, CX
  2339. JMP memmove_long_emit_remainder_encodeBlockAsm4MB
  2340. three_bytes_emit_remainder_encodeBlockAsm4MB:
  2341. MOVB $0xf4, (CX)
  2342. MOVW DX, 1(CX)
  2343. ADDQ $0x03, CX
  2344. JMP memmove_long_emit_remainder_encodeBlockAsm4MB
  2345. two_bytes_emit_remainder_encodeBlockAsm4MB:
  2346. MOVB $0xf0, (CX)
  2347. MOVB DL, 1(CX)
  2348. ADDQ $0x02, CX
  2349. CMPL DX, $0x40
  2350. JB memmove_emit_remainder_encodeBlockAsm4MB
  2351. JMP memmove_long_emit_remainder_encodeBlockAsm4MB
  2352. one_byte_emit_remainder_encodeBlockAsm4MB:
  2353. SHLB $0x02, DL
  2354. MOVB DL, (CX)
  2355. ADDQ $0x01, CX
  2356. memmove_emit_remainder_encodeBlockAsm4MB:
  2357. LEAQ (CX)(SI*1), DX
  2358. MOVL SI, BX
  2359. // genMemMoveShort
  2360. CMPQ BX, $0x03
  2361. JB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2
  2362. JE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3
  2363. CMPQ BX, $0x08
  2364. JB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7
  2365. CMPQ BX, $0x10
  2366. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
  2367. CMPQ BX, $0x20
  2368. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
  2369. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
  2370. emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
  2371. MOVB (AX), SI
  2372. MOVB -1(AX)(BX*1), AL
  2373. MOVB SI, (CX)
  2374. MOVB AL, -1(CX)(BX*1)
  2375. JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2376. emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
  2377. MOVW (AX), SI
  2378. MOVB 2(AX), AL
  2379. MOVW SI, (CX)
  2380. MOVB AL, 2(CX)
  2381. JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2382. emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
  2383. MOVL (AX), SI
  2384. MOVL -4(AX)(BX*1), AX
  2385. MOVL SI, (CX)
  2386. MOVL AX, -4(CX)(BX*1)
  2387. JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2388. emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
  2389. MOVQ (AX), SI
  2390. MOVQ -8(AX)(BX*1), AX
  2391. MOVQ SI, (CX)
  2392. MOVQ AX, -8(CX)(BX*1)
  2393. JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2394. emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
  2395. MOVOU (AX), X0
  2396. MOVOU -16(AX)(BX*1), X1
  2397. MOVOU X0, (CX)
  2398. MOVOU X1, -16(CX)(BX*1)
  2399. JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
  2400. emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
  2401. MOVOU (AX), X0
  2402. MOVOU 16(AX), X1
  2403. MOVOU -32(AX)(BX*1), X2
  2404. MOVOU -16(AX)(BX*1), X3
  2405. MOVOU X0, (CX)
  2406. MOVOU X1, 16(CX)
  2407. MOVOU X2, -32(CX)(BX*1)
  2408. MOVOU X3, -16(CX)(BX*1)
  2409. memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
  2410. MOVQ DX, CX
  2411. JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB
  2412. memmove_long_emit_remainder_encodeBlockAsm4MB:
  2413. LEAQ (CX)(SI*1), DX
  2414. MOVL SI, BX
  2415. // genMemMoveLong
  2416. MOVOU (AX), X0
  2417. MOVOU 16(AX), X1
  2418. MOVOU -32(AX)(BX*1), X2
  2419. MOVOU -16(AX)(BX*1), X3
  2420. MOVQ BX, DI
  2421. SHRQ $0x05, DI
  2422. MOVQ CX, SI
  2423. ANDL $0x0000001f, SI
  2424. MOVQ $0x00000040, R8
  2425. SUBQ SI, R8
  2426. DECQ DI
  2427. JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
  2428. LEAQ -32(AX)(R8*1), SI
  2429. LEAQ -32(CX)(R8*1), R9
  2430. emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
  2431. MOVOU (SI), X4
  2432. MOVOU 16(SI), X5
  2433. MOVOA X4, (R9)
  2434. MOVOA X5, 16(R9)
  2435. ADDQ $0x20, R9
  2436. ADDQ $0x20, SI
  2437. ADDQ $0x20, R8
  2438. DECQ DI
  2439. JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
  2440. emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
  2441. MOVOU -32(AX)(R8*1), X4
  2442. MOVOU -16(AX)(R8*1), X5
  2443. MOVOA X4, -32(CX)(R8*1)
  2444. MOVOA X5, -16(CX)(R8*1)
  2445. ADDQ $0x20, R8
  2446. CMPQ BX, R8
  2447. JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
  2448. MOVOU X0, (CX)
  2449. MOVOU X1, 16(CX)
  2450. MOVOU X2, -32(CX)(BX*1)
  2451. MOVOU X3, -16(CX)(BX*1)
  2452. MOVQ DX, CX
  2453. emit_literal_done_emit_remainder_encodeBlockAsm4MB:
  2454. MOVQ dst_base+0(FP), AX
  2455. SUBQ AX, CX
  2456. MOVQ CX, ret+56(FP)
  2457. RET
  2458. // func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
  2459. // Requires: BMI, SSE2
  2460. TEXT ·encodeBlockAsm12B(SB), $24-64
  2461. MOVQ tmp+48(FP), AX
  2462. MOVQ dst_base+0(FP), CX
  2463. MOVQ $0x00000080, DX
  2464. MOVQ AX, BX
  2465. PXOR X0, X0
  2466. zero_loop_encodeBlockAsm12B:
  2467. MOVOU X0, (BX)
  2468. MOVOU X0, 16(BX)
  2469. MOVOU X0, 32(BX)
  2470. MOVOU X0, 48(BX)
  2471. MOVOU X0, 64(BX)
  2472. MOVOU X0, 80(BX)
  2473. MOVOU X0, 96(BX)
  2474. MOVOU X0, 112(BX)
  2475. ADDQ $0x80, BX
  2476. DECQ DX
  2477. JNZ zero_loop_encodeBlockAsm12B
  2478. MOVL $0x00000000, 12(SP)
  2479. MOVQ src_len+32(FP), DX
  2480. LEAQ -9(DX), BX
  2481. LEAQ -8(DX), SI
  2482. MOVL SI, 8(SP)
  2483. SHRQ $0x05, DX
  2484. SUBL DX, BX
  2485. LEAQ (CX)(BX*1), BX
  2486. MOVQ BX, (SP)
  2487. MOVL $0x00000001, DX
  2488. MOVL DX, 16(SP)
  2489. MOVQ src_base+24(FP), BX
  2490. search_loop_encodeBlockAsm12B:
  2491. MOVL DX, SI
  2492. SUBL 12(SP), SI
  2493. SHRL $0x05, SI
  2494. LEAL 4(DX)(SI*1), SI
  2495. CMPL SI, 8(SP)
  2496. JAE emit_remainder_encodeBlockAsm12B
  2497. MOVQ (BX)(DX*1), DI
  2498. MOVL SI, 20(SP)
  2499. MOVQ $0x000000cf1bbcdcbb, R9
  2500. MOVQ DI, R10
  2501. MOVQ DI, R11
  2502. SHRQ $0x08, R11
  2503. SHLQ $0x18, R10
  2504. IMULQ R9, R10
  2505. SHRQ $0x34, R10
  2506. SHLQ $0x18, R11
  2507. IMULQ R9, R11
  2508. SHRQ $0x34, R11
  2509. MOVL (AX)(R10*4), SI
  2510. MOVL (AX)(R11*4), R8
  2511. MOVL DX, (AX)(R10*4)
  2512. LEAL 1(DX), R10
  2513. MOVL R10, (AX)(R11*4)
  2514. MOVQ DI, R10
  2515. SHRQ $0x10, R10
  2516. SHLQ $0x18, R10
  2517. IMULQ R9, R10
  2518. SHRQ $0x34, R10
  2519. MOVL DX, R9
  2520. SUBL 16(SP), R9
  2521. MOVL 1(BX)(R9*1), R11
  2522. MOVQ DI, R9
  2523. SHRQ $0x08, R9
  2524. CMPL R9, R11
  2525. JNE no_repeat_found_encodeBlockAsm12B
  2526. LEAL 1(DX), DI
  2527. MOVL 12(SP), R8
  2528. MOVL DI, SI
  2529. SUBL 16(SP), SI
  2530. JZ repeat_extend_back_end_encodeBlockAsm12B
  2531. repeat_extend_back_loop_encodeBlockAsm12B:
  2532. CMPL DI, R8
  2533. JBE repeat_extend_back_end_encodeBlockAsm12B
  2534. MOVB -1(BX)(SI*1), R9
  2535. MOVB -1(BX)(DI*1), R10
  2536. CMPB R9, R10
  2537. JNE repeat_extend_back_end_encodeBlockAsm12B
  2538. LEAL -1(DI), DI
  2539. DECL SI
  2540. JNZ repeat_extend_back_loop_encodeBlockAsm12B
  2541. repeat_extend_back_end_encodeBlockAsm12B:
  2542. MOVL DI, SI
  2543. SUBL 12(SP), SI
  2544. LEAQ 3(CX)(SI*1), SI
  2545. CMPQ SI, (SP)
  2546. JB repeat_dst_size_check_encodeBlockAsm12B
  2547. MOVQ $0x00000000, ret+56(FP)
  2548. RET
  2549. repeat_dst_size_check_encodeBlockAsm12B:
  2550. MOVL 12(SP), SI
  2551. CMPL SI, DI
  2552. JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
  2553. MOVL DI, R9
  2554. MOVL DI, 12(SP)
  2555. LEAQ (BX)(SI*1), R10
  2556. SUBL SI, R9
  2557. LEAL -1(R9), SI
  2558. CMPL SI, $0x3c
  2559. JB one_byte_repeat_emit_encodeBlockAsm12B
  2560. CMPL SI, $0x00000100
  2561. JB two_bytes_repeat_emit_encodeBlockAsm12B
  2562. JB three_bytes_repeat_emit_encodeBlockAsm12B
  2563. three_bytes_repeat_emit_encodeBlockAsm12B:
  2564. MOVB $0xf4, (CX)
  2565. MOVW SI, 1(CX)
  2566. ADDQ $0x03, CX
  2567. JMP memmove_long_repeat_emit_encodeBlockAsm12B
  2568. two_bytes_repeat_emit_encodeBlockAsm12B:
  2569. MOVB $0xf0, (CX)
  2570. MOVB SI, 1(CX)
  2571. ADDQ $0x02, CX
  2572. CMPL SI, $0x40
  2573. JB memmove_repeat_emit_encodeBlockAsm12B
  2574. JMP memmove_long_repeat_emit_encodeBlockAsm12B
  2575. one_byte_repeat_emit_encodeBlockAsm12B:
  2576. SHLB $0x02, SI
  2577. MOVB SI, (CX)
  2578. ADDQ $0x01, CX
  2579. memmove_repeat_emit_encodeBlockAsm12B:
  2580. LEAQ (CX)(R9*1), SI
  2581. // genMemMoveShort
  2582. CMPQ R9, $0x08
  2583. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
  2584. CMPQ R9, $0x10
  2585. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
  2586. CMPQ R9, $0x20
  2587. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
  2588. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
  2589. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
  2590. MOVQ (R10), R11
  2591. MOVQ R11, (CX)
  2592. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  2593. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
  2594. MOVQ (R10), R11
  2595. MOVQ -8(R10)(R9*1), R10
  2596. MOVQ R11, (CX)
  2597. MOVQ R10, -8(CX)(R9*1)
  2598. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  2599. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
  2600. MOVOU (R10), X0
  2601. MOVOU -16(R10)(R9*1), X1
  2602. MOVOU X0, (CX)
  2603. MOVOU X1, -16(CX)(R9*1)
  2604. JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
  2605. emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
  2606. MOVOU (R10), X0
  2607. MOVOU 16(R10), X1
  2608. MOVOU -32(R10)(R9*1), X2
  2609. MOVOU -16(R10)(R9*1), X3
  2610. MOVOU X0, (CX)
  2611. MOVOU X1, 16(CX)
  2612. MOVOU X2, -32(CX)(R9*1)
  2613. MOVOU X3, -16(CX)(R9*1)
  2614. memmove_end_copy_repeat_emit_encodeBlockAsm12B:
  2615. MOVQ SI, CX
  2616. JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
  2617. memmove_long_repeat_emit_encodeBlockAsm12B:
  2618. LEAQ (CX)(R9*1), SI
  2619. // genMemMoveLong
  2620. MOVOU (R10), X0
  2621. MOVOU 16(R10), X1
  2622. MOVOU -32(R10)(R9*1), X2
  2623. MOVOU -16(R10)(R9*1), X3
  2624. MOVQ R9, R12
  2625. SHRQ $0x05, R12
  2626. MOVQ CX, R11
  2627. ANDL $0x0000001f, R11
  2628. MOVQ $0x00000040, R13
  2629. SUBQ R11, R13
  2630. DECQ R12
  2631. JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  2632. LEAQ -32(R10)(R13*1), R11
  2633. LEAQ -32(CX)(R13*1), R14
  2634. emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
  2635. MOVOU (R11), X4
  2636. MOVOU 16(R11), X5
  2637. MOVOA X4, (R14)
  2638. MOVOA X5, 16(R14)
  2639. ADDQ $0x20, R14
  2640. ADDQ $0x20, R11
  2641. ADDQ $0x20, R13
  2642. DECQ R12
  2643. JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
  2644. emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
  2645. MOVOU -32(R10)(R13*1), X4
  2646. MOVOU -16(R10)(R13*1), X5
  2647. MOVOA X4, -32(CX)(R13*1)
  2648. MOVOA X5, -16(CX)(R13*1)
  2649. ADDQ $0x20, R13
  2650. CMPQ R9, R13
  2651. JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  2652. MOVOU X0, (CX)
  2653. MOVOU X1, 16(CX)
  2654. MOVOU X2, -32(CX)(R9*1)
  2655. MOVOU X3, -16(CX)(R9*1)
  2656. MOVQ SI, CX
  2657. emit_literal_done_repeat_emit_encodeBlockAsm12B:
  2658. ADDL $0x05, DX
  2659. MOVL DX, SI
  2660. SUBL 16(SP), SI
  2661. MOVQ src_len+32(FP), R9
  2662. SUBL DX, R9
  2663. LEAQ (BX)(DX*1), R10
  2664. LEAQ (BX)(SI*1), SI
  2665. // matchLen
  2666. XORL R12, R12
  2667. matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
  2668. CMPL R9, $0x10
  2669. JB matchlen_match8_repeat_extend_encodeBlockAsm12B
  2670. MOVQ (R10)(R12*1), R11
  2671. MOVQ 8(R10)(R12*1), R13
  2672. XORQ (SI)(R12*1), R11
  2673. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
  2674. XORQ 8(SI)(R12*1), R13
  2675. JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B
  2676. LEAL -16(R9), R9
  2677. LEAL 16(R12), R12
  2678. JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
  2679. matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
  2680. #ifdef GOAMD64_v3
  2681. TZCNTQ R13, R13
  2682. #else
  2683. BSFQ R13, R13
  2684. #endif
  2685. SARQ $0x03, R13
  2686. LEAL 8(R12)(R13*1), R12
  2687. JMP repeat_extend_forward_end_encodeBlockAsm12B
  2688. matchlen_match8_repeat_extend_encodeBlockAsm12B:
  2689. CMPL R9, $0x08
  2690. JB matchlen_match4_repeat_extend_encodeBlockAsm12B
  2691. MOVQ (R10)(R12*1), R11
  2692. XORQ (SI)(R12*1), R11
  2693. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
  2694. LEAL -8(R9), R9
  2695. LEAL 8(R12), R12
  2696. JMP matchlen_match4_repeat_extend_encodeBlockAsm12B
  2697. matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
  2698. #ifdef GOAMD64_v3
  2699. TZCNTQ R11, R11
  2700. #else
  2701. BSFQ R11, R11
  2702. #endif
  2703. SARQ $0x03, R11
  2704. LEAL (R12)(R11*1), R12
  2705. JMP repeat_extend_forward_end_encodeBlockAsm12B
  2706. matchlen_match4_repeat_extend_encodeBlockAsm12B:
  2707. CMPL R9, $0x04
  2708. JB matchlen_match2_repeat_extend_encodeBlockAsm12B
  2709. MOVL (R10)(R12*1), R11
  2710. CMPL (SI)(R12*1), R11
  2711. JNE matchlen_match2_repeat_extend_encodeBlockAsm12B
  2712. LEAL -4(R9), R9
  2713. LEAL 4(R12), R12
  2714. matchlen_match2_repeat_extend_encodeBlockAsm12B:
  2715. CMPL R9, $0x01
  2716. JE matchlen_match1_repeat_extend_encodeBlockAsm12B
  2717. JB repeat_extend_forward_end_encodeBlockAsm12B
  2718. MOVW (R10)(R12*1), R11
  2719. CMPW (SI)(R12*1), R11
  2720. JNE matchlen_match1_repeat_extend_encodeBlockAsm12B
  2721. LEAL 2(R12), R12
  2722. SUBL $0x02, R9
  2723. JZ repeat_extend_forward_end_encodeBlockAsm12B
  2724. matchlen_match1_repeat_extend_encodeBlockAsm12B:
  2725. MOVB (R10)(R12*1), R11
  2726. CMPB (SI)(R12*1), R11
  2727. JNE repeat_extend_forward_end_encodeBlockAsm12B
  2728. LEAL 1(R12), R12
  2729. repeat_extend_forward_end_encodeBlockAsm12B:
  2730. ADDL R12, DX
  2731. MOVL DX, SI
  2732. SUBL DI, SI
  2733. MOVL 16(SP), DI
  2734. TESTL R8, R8
  2735. JZ repeat_as_copy_encodeBlockAsm12B
  2736. // emitRepeat
  2737. MOVL SI, R8
  2738. LEAL -4(SI), SI
  2739. CMPL R8, $0x08
  2740. JBE repeat_two_match_repeat_encodeBlockAsm12B
  2741. CMPL R8, $0x0c
  2742. JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
  2743. CMPL DI, $0x00000800
  2744. JB repeat_two_offset_match_repeat_encodeBlockAsm12B
  2745. cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
  2746. CMPL SI, $0x00000104
  2747. JB repeat_three_match_repeat_encodeBlockAsm12B
  2748. LEAL -256(SI), SI
  2749. MOVW $0x0019, (CX)
  2750. MOVW SI, 2(CX)
  2751. ADDQ $0x04, CX
  2752. JMP repeat_end_emit_encodeBlockAsm12B
  2753. repeat_three_match_repeat_encodeBlockAsm12B:
  2754. LEAL -4(SI), SI
  2755. MOVW $0x0015, (CX)
  2756. MOVB SI, 2(CX)
  2757. ADDQ $0x03, CX
  2758. JMP repeat_end_emit_encodeBlockAsm12B
  2759. repeat_two_match_repeat_encodeBlockAsm12B:
  2760. SHLL $0x02, SI
  2761. ORL $0x01, SI
  2762. MOVW SI, (CX)
  2763. ADDQ $0x02, CX
  2764. JMP repeat_end_emit_encodeBlockAsm12B
  2765. repeat_two_offset_match_repeat_encodeBlockAsm12B:
  2766. XORQ R8, R8
  2767. LEAL 1(R8)(SI*4), SI
  2768. MOVB DI, 1(CX)
  2769. SARL $0x08, DI
  2770. SHLL $0x05, DI
  2771. ORL DI, SI
  2772. MOVB SI, (CX)
  2773. ADDQ $0x02, CX
  2774. JMP repeat_end_emit_encodeBlockAsm12B
  2775. repeat_as_copy_encodeBlockAsm12B:
  2776. // emitCopy
  2777. CMPL SI, $0x40
  2778. JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
  2779. CMPL DI, $0x00000800
  2780. JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B
  2781. MOVL $0x00000001, R8
  2782. LEAL 16(R8), R8
  2783. MOVB DI, 1(CX)
  2784. SHRL $0x08, DI
  2785. SHLL $0x05, DI
  2786. ORL DI, R8
  2787. MOVB R8, (CX)
  2788. ADDQ $0x02, CX
  2789. SUBL $0x08, SI
  2790. // emitRepeat
  2791. LEAL -4(SI), SI
  2792. JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  2793. MOVL SI, R8
  2794. LEAL -4(SI), SI
  2795. CMPL R8, $0x08
  2796. JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  2797. CMPL R8, $0x0c
  2798. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  2799. CMPL DI, $0x00000800
  2800. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  2801. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  2802. CMPL SI, $0x00000104
  2803. JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
  2804. LEAL -256(SI), SI
  2805. MOVW $0x0019, (CX)
  2806. MOVW SI, 2(CX)
  2807. ADDQ $0x04, CX
  2808. JMP repeat_end_emit_encodeBlockAsm12B
  2809. repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  2810. LEAL -4(SI), SI
  2811. MOVW $0x0015, (CX)
  2812. MOVB SI, 2(CX)
  2813. ADDQ $0x03, CX
  2814. JMP repeat_end_emit_encodeBlockAsm12B
  2815. repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  2816. SHLL $0x02, SI
  2817. ORL $0x01, SI
  2818. MOVW SI, (CX)
  2819. ADDQ $0x02, CX
  2820. JMP repeat_end_emit_encodeBlockAsm12B
  2821. repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
  2822. XORQ R8, R8
  2823. LEAL 1(R8)(SI*4), SI
  2824. MOVB DI, 1(CX)
  2825. SARL $0x08, DI
  2826. SHLL $0x05, DI
  2827. ORL DI, SI
  2828. MOVB SI, (CX)
  2829. ADDQ $0x02, CX
  2830. JMP repeat_end_emit_encodeBlockAsm12B
  2831. long_offset_short_repeat_as_copy_encodeBlockAsm12B:
  2832. MOVB $0xee, (CX)
  2833. MOVW DI, 1(CX)
  2834. LEAL -60(SI), SI
  2835. ADDQ $0x03, CX
  2836. // emitRepeat
  2837. MOVL SI, R8
  2838. LEAL -4(SI), SI
  2839. CMPL R8, $0x08
  2840. JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  2841. CMPL R8, $0x0c
  2842. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  2843. CMPL DI, $0x00000800
  2844. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  2845. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  2846. CMPL SI, $0x00000104
  2847. JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
  2848. LEAL -256(SI), SI
  2849. MOVW $0x0019, (CX)
  2850. MOVW SI, 2(CX)
  2851. ADDQ $0x04, CX
  2852. JMP repeat_end_emit_encodeBlockAsm12B
  2853. repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  2854. LEAL -4(SI), SI
  2855. MOVW $0x0015, (CX)
  2856. MOVB SI, 2(CX)
  2857. ADDQ $0x03, CX
  2858. JMP repeat_end_emit_encodeBlockAsm12B
  2859. repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  2860. SHLL $0x02, SI
  2861. ORL $0x01, SI
  2862. MOVW SI, (CX)
  2863. ADDQ $0x02, CX
  2864. JMP repeat_end_emit_encodeBlockAsm12B
  2865. repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
  2866. XORQ R8, R8
  2867. LEAL 1(R8)(SI*4), SI
  2868. MOVB DI, 1(CX)
  2869. SARL $0x08, DI
  2870. SHLL $0x05, DI
  2871. ORL DI, SI
  2872. MOVB SI, (CX)
  2873. ADDQ $0x02, CX
  2874. JMP repeat_end_emit_encodeBlockAsm12B
  2875. two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
  2876. MOVL SI, R8
  2877. SHLL $0x02, R8
  2878. CMPL SI, $0x0c
  2879. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
  2880. CMPL DI, $0x00000800
  2881. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
  2882. LEAL -15(R8), R8
  2883. MOVB DI, 1(CX)
  2884. SHRL $0x08, DI
  2885. SHLL $0x05, DI
  2886. ORL DI, R8
  2887. MOVB R8, (CX)
  2888. ADDQ $0x02, CX
  2889. JMP repeat_end_emit_encodeBlockAsm12B
  2890. emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
  2891. LEAL -2(R8), R8
  2892. MOVB R8, (CX)
  2893. MOVW DI, 1(CX)
  2894. ADDQ $0x03, CX
  2895. repeat_end_emit_encodeBlockAsm12B:
  2896. MOVL DX, 12(SP)
  2897. JMP search_loop_encodeBlockAsm12B
  2898. no_repeat_found_encodeBlockAsm12B:
  2899. CMPL (BX)(SI*1), DI
  2900. JEQ candidate_match_encodeBlockAsm12B
  2901. SHRQ $0x08, DI
  2902. MOVL (AX)(R10*4), SI
  2903. LEAL 2(DX), R9
  2904. CMPL (BX)(R8*1), DI
  2905. JEQ candidate2_match_encodeBlockAsm12B
  2906. MOVL R9, (AX)(R10*4)
  2907. SHRQ $0x08, DI
  2908. CMPL (BX)(SI*1), DI
  2909. JEQ candidate3_match_encodeBlockAsm12B
  2910. MOVL 20(SP), DX
  2911. JMP search_loop_encodeBlockAsm12B
  2912. candidate3_match_encodeBlockAsm12B:
  2913. ADDL $0x02, DX
  2914. JMP candidate_match_encodeBlockAsm12B
  2915. candidate2_match_encodeBlockAsm12B:
  2916. MOVL R9, (AX)(R10*4)
  2917. INCL DX
  2918. MOVL R8, SI
  2919. candidate_match_encodeBlockAsm12B:
  2920. MOVL 12(SP), DI
  2921. TESTL SI, SI
  2922. JZ match_extend_back_end_encodeBlockAsm12B
  2923. match_extend_back_loop_encodeBlockAsm12B:
  2924. CMPL DX, DI
  2925. JBE match_extend_back_end_encodeBlockAsm12B
  2926. MOVB -1(BX)(SI*1), R8
  2927. MOVB -1(BX)(DX*1), R9
  2928. CMPB R8, R9
  2929. JNE match_extend_back_end_encodeBlockAsm12B
  2930. LEAL -1(DX), DX
  2931. DECL SI
  2932. JZ match_extend_back_end_encodeBlockAsm12B
  2933. JMP match_extend_back_loop_encodeBlockAsm12B
  2934. match_extend_back_end_encodeBlockAsm12B:
  2935. MOVL DX, DI
  2936. SUBL 12(SP), DI
  2937. LEAQ 3(CX)(DI*1), DI
  2938. CMPQ DI, (SP)
  2939. JB match_dst_size_check_encodeBlockAsm12B
  2940. MOVQ $0x00000000, ret+56(FP)
  2941. RET
  2942. match_dst_size_check_encodeBlockAsm12B:
  2943. MOVL DX, DI
  2944. MOVL 12(SP), R8
  2945. CMPL R8, DI
  2946. JEQ emit_literal_done_match_emit_encodeBlockAsm12B
  2947. MOVL DI, R9
  2948. MOVL DI, 12(SP)
  2949. LEAQ (BX)(R8*1), DI
  2950. SUBL R8, R9
  2951. LEAL -1(R9), R8
  2952. CMPL R8, $0x3c
  2953. JB one_byte_match_emit_encodeBlockAsm12B
  2954. CMPL R8, $0x00000100
  2955. JB two_bytes_match_emit_encodeBlockAsm12B
  2956. JB three_bytes_match_emit_encodeBlockAsm12B
  2957. three_bytes_match_emit_encodeBlockAsm12B:
  2958. MOVB $0xf4, (CX)
  2959. MOVW R8, 1(CX)
  2960. ADDQ $0x03, CX
  2961. JMP memmove_long_match_emit_encodeBlockAsm12B
  2962. two_bytes_match_emit_encodeBlockAsm12B:
  2963. MOVB $0xf0, (CX)
  2964. MOVB R8, 1(CX)
  2965. ADDQ $0x02, CX
  2966. CMPL R8, $0x40
  2967. JB memmove_match_emit_encodeBlockAsm12B
  2968. JMP memmove_long_match_emit_encodeBlockAsm12B
  2969. one_byte_match_emit_encodeBlockAsm12B:
  2970. SHLB $0x02, R8
  2971. MOVB R8, (CX)
  2972. ADDQ $0x01, CX
  2973. memmove_match_emit_encodeBlockAsm12B:
  2974. LEAQ (CX)(R9*1), R8
  2975. // genMemMoveShort
  2976. CMPQ R9, $0x08
  2977. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
  2978. CMPQ R9, $0x10
  2979. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
  2980. CMPQ R9, $0x20
  2981. JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
  2982. JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
  2983. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
  2984. MOVQ (DI), R10
  2985. MOVQ R10, (CX)
  2986. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2987. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
  2988. MOVQ (DI), R10
  2989. MOVQ -8(DI)(R9*1), DI
  2990. MOVQ R10, (CX)
  2991. MOVQ DI, -8(CX)(R9*1)
  2992. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2993. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
  2994. MOVOU (DI), X0
  2995. MOVOU -16(DI)(R9*1), X1
  2996. MOVOU X0, (CX)
  2997. MOVOU X1, -16(CX)(R9*1)
  2998. JMP memmove_end_copy_match_emit_encodeBlockAsm12B
  2999. emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
  3000. MOVOU (DI), X0
  3001. MOVOU 16(DI), X1
  3002. MOVOU -32(DI)(R9*1), X2
  3003. MOVOU -16(DI)(R9*1), X3
  3004. MOVOU X0, (CX)
  3005. MOVOU X1, 16(CX)
  3006. MOVOU X2, -32(CX)(R9*1)
  3007. MOVOU X3, -16(CX)(R9*1)
  3008. memmove_end_copy_match_emit_encodeBlockAsm12B:
  3009. MOVQ R8, CX
  3010. JMP emit_literal_done_match_emit_encodeBlockAsm12B
  3011. memmove_long_match_emit_encodeBlockAsm12B:
  3012. LEAQ (CX)(R9*1), R8
  3013. // genMemMoveLong
  3014. MOVOU (DI), X0
  3015. MOVOU 16(DI), X1
  3016. MOVOU -32(DI)(R9*1), X2
  3017. MOVOU -16(DI)(R9*1), X3
  3018. MOVQ R9, R11
  3019. SHRQ $0x05, R11
  3020. MOVQ CX, R10
  3021. ANDL $0x0000001f, R10
  3022. MOVQ $0x00000040, R12
  3023. SUBQ R10, R12
  3024. DECQ R11
  3025. JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  3026. LEAQ -32(DI)(R12*1), R10
  3027. LEAQ -32(CX)(R12*1), R13
  3028. emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
  3029. MOVOU (R10), X4
  3030. MOVOU 16(R10), X5
  3031. MOVOA X4, (R13)
  3032. MOVOA X5, 16(R13)
  3033. ADDQ $0x20, R13
  3034. ADDQ $0x20, R10
  3035. ADDQ $0x20, R12
  3036. DECQ R11
  3037. JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
  3038. emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
  3039. MOVOU -32(DI)(R12*1), X4
  3040. MOVOU -16(DI)(R12*1), X5
  3041. MOVOA X4, -32(CX)(R12*1)
  3042. MOVOA X5, -16(CX)(R12*1)
  3043. ADDQ $0x20, R12
  3044. CMPQ R9, R12
  3045. JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
  3046. MOVOU X0, (CX)
  3047. MOVOU X1, 16(CX)
  3048. MOVOU X2, -32(CX)(R9*1)
  3049. MOVOU X3, -16(CX)(R9*1)
  3050. MOVQ R8, CX
  3051. emit_literal_done_match_emit_encodeBlockAsm12B:
  3052. match_nolit_loop_encodeBlockAsm12B:
  3053. MOVL DX, DI
  3054. SUBL SI, DI
  3055. MOVL DI, 16(SP)
  3056. ADDL $0x04, DX
  3057. ADDL $0x04, SI
  3058. MOVQ src_len+32(FP), DI
  3059. SUBL DX, DI
  3060. LEAQ (BX)(DX*1), R8
  3061. LEAQ (BX)(SI*1), SI
  3062. // matchLen
  3063. XORL R10, R10
  3064. matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
  3065. CMPL DI, $0x10
  3066. JB matchlen_match8_match_nolit_encodeBlockAsm12B
  3067. MOVQ (R8)(R10*1), R9
  3068. MOVQ 8(R8)(R10*1), R11
  3069. XORQ (SI)(R10*1), R9
  3070. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B
  3071. XORQ 8(SI)(R10*1), R11
  3072. JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B
  3073. LEAL -16(DI), DI
  3074. LEAL 16(R10), R10
  3075. JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B
  3076. matchlen_bsf_16match_nolit_encodeBlockAsm12B:
  3077. #ifdef GOAMD64_v3
  3078. TZCNTQ R11, R11
  3079. #else
  3080. BSFQ R11, R11
  3081. #endif
  3082. SARQ $0x03, R11
  3083. LEAL 8(R10)(R11*1), R10
  3084. JMP match_nolit_end_encodeBlockAsm12B
  3085. matchlen_match8_match_nolit_encodeBlockAsm12B:
  3086. CMPL DI, $0x08
  3087. JB matchlen_match4_match_nolit_encodeBlockAsm12B
  3088. MOVQ (R8)(R10*1), R9
  3089. XORQ (SI)(R10*1), R9
  3090. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B
  3091. LEAL -8(DI), DI
  3092. LEAL 8(R10), R10
  3093. JMP matchlen_match4_match_nolit_encodeBlockAsm12B
  3094. matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
  3095. #ifdef GOAMD64_v3
  3096. TZCNTQ R9, R9
  3097. #else
  3098. BSFQ R9, R9
  3099. #endif
  3100. SARQ $0x03, R9
  3101. LEAL (R10)(R9*1), R10
  3102. JMP match_nolit_end_encodeBlockAsm12B
  3103. matchlen_match4_match_nolit_encodeBlockAsm12B:
  3104. CMPL DI, $0x04
  3105. JB matchlen_match2_match_nolit_encodeBlockAsm12B
  3106. MOVL (R8)(R10*1), R9
  3107. CMPL (SI)(R10*1), R9
  3108. JNE matchlen_match2_match_nolit_encodeBlockAsm12B
  3109. LEAL -4(DI), DI
  3110. LEAL 4(R10), R10
  3111. matchlen_match2_match_nolit_encodeBlockAsm12B:
  3112. CMPL DI, $0x01
  3113. JE matchlen_match1_match_nolit_encodeBlockAsm12B
  3114. JB match_nolit_end_encodeBlockAsm12B
  3115. MOVW (R8)(R10*1), R9
  3116. CMPW (SI)(R10*1), R9
  3117. JNE matchlen_match1_match_nolit_encodeBlockAsm12B
  3118. LEAL 2(R10), R10
  3119. SUBL $0x02, DI
  3120. JZ match_nolit_end_encodeBlockAsm12B
  3121. matchlen_match1_match_nolit_encodeBlockAsm12B:
  3122. MOVB (R8)(R10*1), R9
  3123. CMPB (SI)(R10*1), R9
  3124. JNE match_nolit_end_encodeBlockAsm12B
  3125. LEAL 1(R10), R10
  3126. match_nolit_end_encodeBlockAsm12B:
  3127. ADDL R10, DX
  3128. MOVL 16(SP), SI
  3129. ADDL $0x04, R10
  3130. MOVL DX, 12(SP)
  3131. // emitCopy
  3132. CMPL R10, $0x40
  3133. JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B
  3134. CMPL SI, $0x00000800
  3135. JAE long_offset_short_match_nolit_encodeBlockAsm12B
  3136. MOVL $0x00000001, DI
  3137. LEAL 16(DI), DI
  3138. MOVB SI, 1(CX)
  3139. SHRL $0x08, SI
  3140. SHLL $0x05, SI
  3141. ORL SI, DI
  3142. MOVB DI, (CX)
  3143. ADDQ $0x02, CX
  3144. SUBL $0x08, R10
  3145. // emitRepeat
  3146. LEAL -4(R10), R10
  3147. JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3148. MOVL R10, DI
  3149. LEAL -4(R10), R10
  3150. CMPL DI, $0x08
  3151. JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3152. CMPL DI, $0x0c
  3153. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3154. CMPL SI, $0x00000800
  3155. JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3156. cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3157. CMPL R10, $0x00000104
  3158. JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
  3159. LEAL -256(R10), R10
  3160. MOVW $0x0019, (CX)
  3161. MOVW R10, 2(CX)
  3162. ADDQ $0x04, CX
  3163. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3164. repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3165. LEAL -4(R10), R10
  3166. MOVW $0x0015, (CX)
  3167. MOVB R10, 2(CX)
  3168. ADDQ $0x03, CX
  3169. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3170. repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3171. SHLL $0x02, R10
  3172. ORL $0x01, R10
  3173. MOVW R10, (CX)
  3174. ADDQ $0x02, CX
  3175. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3176. repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
  3177. XORQ DI, DI
  3178. LEAL 1(DI)(R10*4), R10
  3179. MOVB SI, 1(CX)
  3180. SARL $0x08, SI
  3181. SHLL $0x05, SI
  3182. ORL SI, R10
  3183. MOVB R10, (CX)
  3184. ADDQ $0x02, CX
  3185. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3186. long_offset_short_match_nolit_encodeBlockAsm12B:
  3187. MOVB $0xee, (CX)
  3188. MOVW SI, 1(CX)
  3189. LEAL -60(R10), R10
  3190. ADDQ $0x03, CX
  3191. // emitRepeat
  3192. MOVL R10, DI
  3193. LEAL -4(R10), R10
  3194. CMPL DI, $0x08
  3195. JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
  3196. CMPL DI, $0x0c
  3197. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
  3198. CMPL SI, $0x00000800
  3199. JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
  3200. cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3201. CMPL R10, $0x00000104
  3202. JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
  3203. LEAL -256(R10), R10
  3204. MOVW $0x0019, (CX)
  3205. MOVW R10, 2(CX)
  3206. ADDQ $0x04, CX
  3207. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3208. repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3209. LEAL -4(R10), R10
  3210. MOVW $0x0015, (CX)
  3211. MOVB R10, 2(CX)
  3212. ADDQ $0x03, CX
  3213. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3214. repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3215. SHLL $0x02, R10
  3216. ORL $0x01, R10
  3217. MOVW R10, (CX)
  3218. ADDQ $0x02, CX
  3219. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3220. repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
  3221. XORQ DI, DI
  3222. LEAL 1(DI)(R10*4), R10
  3223. MOVB SI, 1(CX)
  3224. SARL $0x08, SI
  3225. SHLL $0x05, SI
  3226. ORL SI, R10
  3227. MOVB R10, (CX)
  3228. ADDQ $0x02, CX
  3229. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3230. two_byte_offset_short_match_nolit_encodeBlockAsm12B:
  3231. MOVL R10, DI
  3232. SHLL $0x02, DI
  3233. CMPL R10, $0x0c
  3234. JAE emit_copy_three_match_nolit_encodeBlockAsm12B
  3235. CMPL SI, $0x00000800
  3236. JAE emit_copy_three_match_nolit_encodeBlockAsm12B
  3237. LEAL -15(DI), DI
  3238. MOVB SI, 1(CX)
  3239. SHRL $0x08, SI
  3240. SHLL $0x05, SI
  3241. ORL SI, DI
  3242. MOVB DI, (CX)
  3243. ADDQ $0x02, CX
  3244. JMP match_nolit_emitcopy_end_encodeBlockAsm12B
  3245. emit_copy_three_match_nolit_encodeBlockAsm12B:
  3246. LEAL -2(DI), DI
  3247. MOVB DI, (CX)
  3248. MOVW SI, 1(CX)
  3249. ADDQ $0x03, CX
  3250. match_nolit_emitcopy_end_encodeBlockAsm12B:
  3251. CMPL DX, 8(SP)
  3252. JAE emit_remainder_encodeBlockAsm12B
  3253. MOVQ -2(BX)(DX*1), DI
  3254. CMPQ CX, (SP)
  3255. JB match_nolit_dst_ok_encodeBlockAsm12B
  3256. MOVQ $0x00000000, ret+56(FP)
  3257. RET
  3258. match_nolit_dst_ok_encodeBlockAsm12B:
  3259. MOVQ $0x000000cf1bbcdcbb, R9
  3260. MOVQ DI, R8
  3261. SHRQ $0x10, DI
  3262. MOVQ DI, SI
  3263. SHLQ $0x18, R8
  3264. IMULQ R9, R8
  3265. SHRQ $0x34, R8
  3266. SHLQ $0x18, SI
  3267. IMULQ R9, SI
  3268. SHRQ $0x34, SI
  3269. LEAL -2(DX), R9
  3270. LEAQ (AX)(SI*4), R10
  3271. MOVL (R10), SI
  3272. MOVL R9, (AX)(R8*4)
  3273. MOVL DX, (R10)
  3274. CMPL (BX)(SI*1), DI
  3275. JEQ match_nolit_loop_encodeBlockAsm12B
  3276. INCL DX
  3277. JMP search_loop_encodeBlockAsm12B
  3278. emit_remainder_encodeBlockAsm12B:
  3279. MOVQ src_len+32(FP), AX
  3280. SUBL 12(SP), AX
  3281. LEAQ 3(CX)(AX*1), AX
  3282. CMPQ AX, (SP)
  3283. JB emit_remainder_ok_encodeBlockAsm12B
  3284. MOVQ $0x00000000, ret+56(FP)
  3285. RET
  3286. emit_remainder_ok_encodeBlockAsm12B:
  3287. MOVQ src_len+32(FP), AX
  3288. MOVL 12(SP), DX
  3289. CMPL DX, AX
  3290. JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
  3291. MOVL AX, SI
  3292. MOVL AX, 12(SP)
  3293. LEAQ (BX)(DX*1), AX
  3294. SUBL DX, SI
  3295. LEAL -1(SI), DX
  3296. CMPL DX, $0x3c
  3297. JB one_byte_emit_remainder_encodeBlockAsm12B
  3298. CMPL DX, $0x00000100
  3299. JB two_bytes_emit_remainder_encodeBlockAsm12B
  3300. JB three_bytes_emit_remainder_encodeBlockAsm12B
  3301. three_bytes_emit_remainder_encodeBlockAsm12B:
  3302. MOVB $0xf4, (CX)
  3303. MOVW DX, 1(CX)
  3304. ADDQ $0x03, CX
  3305. JMP memmove_long_emit_remainder_encodeBlockAsm12B
  3306. two_bytes_emit_remainder_encodeBlockAsm12B:
  3307. MOVB $0xf0, (CX)
  3308. MOVB DL, 1(CX)
  3309. ADDQ $0x02, CX
  3310. CMPL DX, $0x40
  3311. JB memmove_emit_remainder_encodeBlockAsm12B
  3312. JMP memmove_long_emit_remainder_encodeBlockAsm12B
  3313. one_byte_emit_remainder_encodeBlockAsm12B:
  3314. SHLB $0x02, DL
  3315. MOVB DL, (CX)
  3316. ADDQ $0x01, CX
  3317. memmove_emit_remainder_encodeBlockAsm12B:
  3318. LEAQ (CX)(SI*1), DX
  3319. MOVL SI, BX
  3320. // genMemMoveShort
  3321. CMPQ BX, $0x03
  3322. JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
  3323. JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
  3324. CMPQ BX, $0x08
  3325. JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
  3326. CMPQ BX, $0x10
  3327. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
  3328. CMPQ BX, $0x20
  3329. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
  3330. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
  3331. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
  3332. MOVB (AX), SI
  3333. MOVB -1(AX)(BX*1), AL
  3334. MOVB SI, (CX)
  3335. MOVB AL, -1(CX)(BX*1)
  3336. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3337. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
  3338. MOVW (AX), SI
  3339. MOVB 2(AX), AL
  3340. MOVW SI, (CX)
  3341. MOVB AL, 2(CX)
  3342. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3343. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
  3344. MOVL (AX), SI
  3345. MOVL -4(AX)(BX*1), AX
  3346. MOVL SI, (CX)
  3347. MOVL AX, -4(CX)(BX*1)
  3348. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3349. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
  3350. MOVQ (AX), SI
  3351. MOVQ -8(AX)(BX*1), AX
  3352. MOVQ SI, (CX)
  3353. MOVQ AX, -8(CX)(BX*1)
  3354. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3355. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
  3356. MOVOU (AX), X0
  3357. MOVOU -16(AX)(BX*1), X1
  3358. MOVOU X0, (CX)
  3359. MOVOU X1, -16(CX)(BX*1)
  3360. JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
  3361. emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
  3362. MOVOU (AX), X0
  3363. MOVOU 16(AX), X1
  3364. MOVOU -32(AX)(BX*1), X2
  3365. MOVOU -16(AX)(BX*1), X3
  3366. MOVOU X0, (CX)
  3367. MOVOU X1, 16(CX)
  3368. MOVOU X2, -32(CX)(BX*1)
  3369. MOVOU X3, -16(CX)(BX*1)
  3370. memmove_end_copy_emit_remainder_encodeBlockAsm12B:
  3371. MOVQ DX, CX
  3372. JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
  3373. memmove_long_emit_remainder_encodeBlockAsm12B:
  3374. LEAQ (CX)(SI*1), DX
  3375. MOVL SI, BX
  3376. // genMemMoveLong
  3377. MOVOU (AX), X0
  3378. MOVOU 16(AX), X1
  3379. MOVOU -32(AX)(BX*1), X2
  3380. MOVOU -16(AX)(BX*1), X3
  3381. MOVQ BX, DI
  3382. SHRQ $0x05, DI
  3383. MOVQ CX, SI
  3384. ANDL $0x0000001f, SI
  3385. MOVQ $0x00000040, R8
  3386. SUBQ SI, R8
  3387. DECQ DI
  3388. JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
  3389. LEAQ -32(AX)(R8*1), SI
  3390. LEAQ -32(CX)(R8*1), R9
  3391. emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
  3392. MOVOU (SI), X4
  3393. MOVOU 16(SI), X5
  3394. MOVOA X4, (R9)
  3395. MOVOA X5, 16(R9)
  3396. ADDQ $0x20, R9
  3397. ADDQ $0x20, SI
  3398. ADDQ $0x20, R8
  3399. DECQ DI
  3400. JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
  3401. emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
  3402. MOVOU -32(AX)(R8*1), X4
  3403. MOVOU -16(AX)(R8*1), X5
  3404. MOVOA X4, -32(CX)(R8*1)
  3405. MOVOA X5, -16(CX)(R8*1)
  3406. ADDQ $0x20, R8
  3407. CMPQ BX, R8
  3408. JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
  3409. MOVOU X0, (CX)
  3410. MOVOU X1, 16(CX)
  3411. MOVOU X2, -32(CX)(BX*1)
  3412. MOVOU X3, -16(CX)(BX*1)
  3413. MOVQ DX, CX
  3414. emit_literal_done_emit_remainder_encodeBlockAsm12B:
  3415. MOVQ dst_base+0(FP), AX
  3416. SUBQ AX, CX
  3417. MOVQ CX, ret+56(FP)
  3418. RET
  3419. // func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
  3420. // Requires: BMI, SSE2
  3421. TEXT ·encodeBlockAsm10B(SB), $24-64
  3422. MOVQ tmp+48(FP), AX
  3423. MOVQ dst_base+0(FP), CX
  3424. MOVQ $0x00000020, DX
  3425. MOVQ AX, BX
  3426. PXOR X0, X0
  3427. zero_loop_encodeBlockAsm10B:
  3428. MOVOU X0, (BX)
  3429. MOVOU X0, 16(BX)
  3430. MOVOU X0, 32(BX)
  3431. MOVOU X0, 48(BX)
  3432. MOVOU X0, 64(BX)
  3433. MOVOU X0, 80(BX)
  3434. MOVOU X0, 96(BX)
  3435. MOVOU X0, 112(BX)
  3436. ADDQ $0x80, BX
  3437. DECQ DX
  3438. JNZ zero_loop_encodeBlockAsm10B
  3439. MOVL $0x00000000, 12(SP)
  3440. MOVQ src_len+32(FP), DX
  3441. LEAQ -9(DX), BX
  3442. LEAQ -8(DX), SI
  3443. MOVL SI, 8(SP)
  3444. SHRQ $0x05, DX
  3445. SUBL DX, BX
  3446. LEAQ (CX)(BX*1), BX
  3447. MOVQ BX, (SP)
  3448. MOVL $0x00000001, DX
  3449. MOVL DX, 16(SP)
  3450. MOVQ src_base+24(FP), BX
  3451. search_loop_encodeBlockAsm10B:
  3452. MOVL DX, SI
  3453. SUBL 12(SP), SI
  3454. SHRL $0x05, SI
  3455. LEAL 4(DX)(SI*1), SI
  3456. CMPL SI, 8(SP)
  3457. JAE emit_remainder_encodeBlockAsm10B
  3458. MOVQ (BX)(DX*1), DI
  3459. MOVL SI, 20(SP)
  3460. MOVQ $0x9e3779b1, R9
  3461. MOVQ DI, R10
  3462. MOVQ DI, R11
  3463. SHRQ $0x08, R11
  3464. SHLQ $0x20, R10
  3465. IMULQ R9, R10
  3466. SHRQ $0x36, R10
  3467. SHLQ $0x20, R11
  3468. IMULQ R9, R11
  3469. SHRQ $0x36, R11
  3470. MOVL (AX)(R10*4), SI
  3471. MOVL (AX)(R11*4), R8
  3472. MOVL DX, (AX)(R10*4)
  3473. LEAL 1(DX), R10
  3474. MOVL R10, (AX)(R11*4)
  3475. MOVQ DI, R10
  3476. SHRQ $0x10, R10
  3477. SHLQ $0x20, R10
  3478. IMULQ R9, R10
  3479. SHRQ $0x36, R10
  3480. MOVL DX, R9
  3481. SUBL 16(SP), R9
  3482. MOVL 1(BX)(R9*1), R11
  3483. MOVQ DI, R9
  3484. SHRQ $0x08, R9
  3485. CMPL R9, R11
  3486. JNE no_repeat_found_encodeBlockAsm10B
  3487. LEAL 1(DX), DI
  3488. MOVL 12(SP), R8
  3489. MOVL DI, SI
  3490. SUBL 16(SP), SI
  3491. JZ repeat_extend_back_end_encodeBlockAsm10B
  3492. repeat_extend_back_loop_encodeBlockAsm10B:
  3493. CMPL DI, R8
  3494. JBE repeat_extend_back_end_encodeBlockAsm10B
  3495. MOVB -1(BX)(SI*1), R9
  3496. MOVB -1(BX)(DI*1), R10
  3497. CMPB R9, R10
  3498. JNE repeat_extend_back_end_encodeBlockAsm10B
  3499. LEAL -1(DI), DI
  3500. DECL SI
  3501. JNZ repeat_extend_back_loop_encodeBlockAsm10B
  3502. repeat_extend_back_end_encodeBlockAsm10B:
  3503. MOVL DI, SI
  3504. SUBL 12(SP), SI
  3505. LEAQ 3(CX)(SI*1), SI
  3506. CMPQ SI, (SP)
  3507. JB repeat_dst_size_check_encodeBlockAsm10B
  3508. MOVQ $0x00000000, ret+56(FP)
  3509. RET
  3510. repeat_dst_size_check_encodeBlockAsm10B:
  3511. MOVL 12(SP), SI
  3512. CMPL SI, DI
  3513. JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B
  3514. MOVL DI, R9
  3515. MOVL DI, 12(SP)
  3516. LEAQ (BX)(SI*1), R10
  3517. SUBL SI, R9
  3518. LEAL -1(R9), SI
  3519. CMPL SI, $0x3c
  3520. JB one_byte_repeat_emit_encodeBlockAsm10B
  3521. CMPL SI, $0x00000100
  3522. JB two_bytes_repeat_emit_encodeBlockAsm10B
  3523. JB three_bytes_repeat_emit_encodeBlockAsm10B
  3524. three_bytes_repeat_emit_encodeBlockAsm10B:
  3525. MOVB $0xf4, (CX)
  3526. MOVW SI, 1(CX)
  3527. ADDQ $0x03, CX
  3528. JMP memmove_long_repeat_emit_encodeBlockAsm10B
  3529. two_bytes_repeat_emit_encodeBlockAsm10B:
  3530. MOVB $0xf0, (CX)
  3531. MOVB SI, 1(CX)
  3532. ADDQ $0x02, CX
  3533. CMPL SI, $0x40
  3534. JB memmove_repeat_emit_encodeBlockAsm10B
  3535. JMP memmove_long_repeat_emit_encodeBlockAsm10B
  3536. one_byte_repeat_emit_encodeBlockAsm10B:
  3537. SHLB $0x02, SI
  3538. MOVB SI, (CX)
  3539. ADDQ $0x01, CX
  3540. memmove_repeat_emit_encodeBlockAsm10B:
  3541. LEAQ (CX)(R9*1), SI
  3542. // genMemMoveShort
  3543. CMPQ R9, $0x08
  3544. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
  3545. CMPQ R9, $0x10
  3546. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
  3547. CMPQ R9, $0x20
  3548. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
  3549. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
  3550. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
  3551. MOVQ (R10), R11
  3552. MOVQ R11, (CX)
  3553. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  3554. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
  3555. MOVQ (R10), R11
  3556. MOVQ -8(R10)(R9*1), R10
  3557. MOVQ R11, (CX)
  3558. MOVQ R10, -8(CX)(R9*1)
  3559. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  3560. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
  3561. MOVOU (R10), X0
  3562. MOVOU -16(R10)(R9*1), X1
  3563. MOVOU X0, (CX)
  3564. MOVOU X1, -16(CX)(R9*1)
  3565. JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
  3566. emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
  3567. MOVOU (R10), X0
  3568. MOVOU 16(R10), X1
  3569. MOVOU -32(R10)(R9*1), X2
  3570. MOVOU -16(R10)(R9*1), X3
  3571. MOVOU X0, (CX)
  3572. MOVOU X1, 16(CX)
  3573. MOVOU X2, -32(CX)(R9*1)
  3574. MOVOU X3, -16(CX)(R9*1)
  3575. memmove_end_copy_repeat_emit_encodeBlockAsm10B:
  3576. MOVQ SI, CX
  3577. JMP emit_literal_done_repeat_emit_encodeBlockAsm10B
  3578. memmove_long_repeat_emit_encodeBlockAsm10B:
  3579. LEAQ (CX)(R9*1), SI
  3580. // genMemMoveLong
  3581. MOVOU (R10), X0
  3582. MOVOU 16(R10), X1
  3583. MOVOU -32(R10)(R9*1), X2
  3584. MOVOU -16(R10)(R9*1), X3
  3585. MOVQ R9, R12
  3586. SHRQ $0x05, R12
  3587. MOVQ CX, R11
  3588. ANDL $0x0000001f, R11
  3589. MOVQ $0x00000040, R13
  3590. SUBQ R11, R13
  3591. DECQ R12
  3592. JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  3593. LEAQ -32(R10)(R13*1), R11
  3594. LEAQ -32(CX)(R13*1), R14
  3595. emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
  3596. MOVOU (R11), X4
  3597. MOVOU 16(R11), X5
  3598. MOVOA X4, (R14)
  3599. MOVOA X5, 16(R14)
  3600. ADDQ $0x20, R14
  3601. ADDQ $0x20, R11
  3602. ADDQ $0x20, R13
  3603. DECQ R12
  3604. JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
  3605. emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
  3606. MOVOU -32(R10)(R13*1), X4
  3607. MOVOU -16(R10)(R13*1), X5
  3608. MOVOA X4, -32(CX)(R13*1)
  3609. MOVOA X5, -16(CX)(R13*1)
  3610. ADDQ $0x20, R13
  3611. CMPQ R9, R13
  3612. JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  3613. MOVOU X0, (CX)
  3614. MOVOU X1, 16(CX)
  3615. MOVOU X2, -32(CX)(R9*1)
  3616. MOVOU X3, -16(CX)(R9*1)
  3617. MOVQ SI, CX
  3618. emit_literal_done_repeat_emit_encodeBlockAsm10B:
  3619. ADDL $0x05, DX
  3620. MOVL DX, SI
  3621. SUBL 16(SP), SI
  3622. MOVQ src_len+32(FP), R9
  3623. SUBL DX, R9
  3624. LEAQ (BX)(DX*1), R10
  3625. LEAQ (BX)(SI*1), SI
  3626. // matchLen
  3627. XORL R12, R12
  3628. matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
  3629. CMPL R9, $0x10
  3630. JB matchlen_match8_repeat_extend_encodeBlockAsm10B
  3631. MOVQ (R10)(R12*1), R11
  3632. MOVQ 8(R10)(R12*1), R13
  3633. XORQ (SI)(R12*1), R11
  3634. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
  3635. XORQ 8(SI)(R12*1), R13
  3636. JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B
  3637. LEAL -16(R9), R9
  3638. LEAL 16(R12), R12
  3639. JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
  3640. matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
  3641. #ifdef GOAMD64_v3
  3642. TZCNTQ R13, R13
  3643. #else
  3644. BSFQ R13, R13
  3645. #endif
  3646. SARQ $0x03, R13
  3647. LEAL 8(R12)(R13*1), R12
  3648. JMP repeat_extend_forward_end_encodeBlockAsm10B
  3649. matchlen_match8_repeat_extend_encodeBlockAsm10B:
  3650. CMPL R9, $0x08
  3651. JB matchlen_match4_repeat_extend_encodeBlockAsm10B
  3652. MOVQ (R10)(R12*1), R11
  3653. XORQ (SI)(R12*1), R11
  3654. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
  3655. LEAL -8(R9), R9
  3656. LEAL 8(R12), R12
  3657. JMP matchlen_match4_repeat_extend_encodeBlockAsm10B
  3658. matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
  3659. #ifdef GOAMD64_v3
  3660. TZCNTQ R11, R11
  3661. #else
  3662. BSFQ R11, R11
  3663. #endif
  3664. SARQ $0x03, R11
  3665. LEAL (R12)(R11*1), R12
  3666. JMP repeat_extend_forward_end_encodeBlockAsm10B
  3667. matchlen_match4_repeat_extend_encodeBlockAsm10B:
  3668. CMPL R9, $0x04
  3669. JB matchlen_match2_repeat_extend_encodeBlockAsm10B
  3670. MOVL (R10)(R12*1), R11
  3671. CMPL (SI)(R12*1), R11
  3672. JNE matchlen_match2_repeat_extend_encodeBlockAsm10B
  3673. LEAL -4(R9), R9
  3674. LEAL 4(R12), R12
  3675. matchlen_match2_repeat_extend_encodeBlockAsm10B:
  3676. CMPL R9, $0x01
  3677. JE matchlen_match1_repeat_extend_encodeBlockAsm10B
  3678. JB repeat_extend_forward_end_encodeBlockAsm10B
  3679. MOVW (R10)(R12*1), R11
  3680. CMPW (SI)(R12*1), R11
  3681. JNE matchlen_match1_repeat_extend_encodeBlockAsm10B
  3682. LEAL 2(R12), R12
  3683. SUBL $0x02, R9
  3684. JZ repeat_extend_forward_end_encodeBlockAsm10B
  3685. matchlen_match1_repeat_extend_encodeBlockAsm10B:
  3686. MOVB (R10)(R12*1), R11
  3687. CMPB (SI)(R12*1), R11
  3688. JNE repeat_extend_forward_end_encodeBlockAsm10B
  3689. LEAL 1(R12), R12
  3690. repeat_extend_forward_end_encodeBlockAsm10B:
  3691. ADDL R12, DX
  3692. MOVL DX, SI
  3693. SUBL DI, SI
  3694. MOVL 16(SP), DI
  3695. TESTL R8, R8
  3696. JZ repeat_as_copy_encodeBlockAsm10B
  3697. // emitRepeat
  3698. MOVL SI, R8
  3699. LEAL -4(SI), SI
  3700. CMPL R8, $0x08
  3701. JBE repeat_two_match_repeat_encodeBlockAsm10B
  3702. CMPL R8, $0x0c
  3703. JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
  3704. CMPL DI, $0x00000800
  3705. JB repeat_two_offset_match_repeat_encodeBlockAsm10B
  3706. cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
  3707. CMPL SI, $0x00000104
  3708. JB repeat_three_match_repeat_encodeBlockAsm10B
  3709. LEAL -256(SI), SI
  3710. MOVW $0x0019, (CX)
  3711. MOVW SI, 2(CX)
  3712. ADDQ $0x04, CX
  3713. JMP repeat_end_emit_encodeBlockAsm10B
  3714. repeat_three_match_repeat_encodeBlockAsm10B:
  3715. LEAL -4(SI), SI
  3716. MOVW $0x0015, (CX)
  3717. MOVB SI, 2(CX)
  3718. ADDQ $0x03, CX
  3719. JMP repeat_end_emit_encodeBlockAsm10B
  3720. repeat_two_match_repeat_encodeBlockAsm10B:
  3721. SHLL $0x02, SI
  3722. ORL $0x01, SI
  3723. MOVW SI, (CX)
  3724. ADDQ $0x02, CX
  3725. JMP repeat_end_emit_encodeBlockAsm10B
  3726. repeat_two_offset_match_repeat_encodeBlockAsm10B:
  3727. XORQ R8, R8
  3728. LEAL 1(R8)(SI*4), SI
  3729. MOVB DI, 1(CX)
  3730. SARL $0x08, DI
  3731. SHLL $0x05, DI
  3732. ORL DI, SI
  3733. MOVB SI, (CX)
  3734. ADDQ $0x02, CX
  3735. JMP repeat_end_emit_encodeBlockAsm10B
  3736. repeat_as_copy_encodeBlockAsm10B:
  3737. // emitCopy
  3738. CMPL SI, $0x40
  3739. JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
  3740. CMPL DI, $0x00000800
  3741. JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B
  3742. MOVL $0x00000001, R8
  3743. LEAL 16(R8), R8
  3744. MOVB DI, 1(CX)
  3745. SHRL $0x08, DI
  3746. SHLL $0x05, DI
  3747. ORL DI, R8
  3748. MOVB R8, (CX)
  3749. ADDQ $0x02, CX
  3750. SUBL $0x08, SI
  3751. // emitRepeat
  3752. LEAL -4(SI), SI
  3753. JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  3754. MOVL SI, R8
  3755. LEAL -4(SI), SI
  3756. CMPL R8, $0x08
  3757. JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  3758. CMPL R8, $0x0c
  3759. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  3760. CMPL DI, $0x00000800
  3761. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  3762. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  3763. CMPL SI, $0x00000104
  3764. JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
  3765. LEAL -256(SI), SI
  3766. MOVW $0x0019, (CX)
  3767. MOVW SI, 2(CX)
  3768. ADDQ $0x04, CX
  3769. JMP repeat_end_emit_encodeBlockAsm10B
  3770. repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  3771. LEAL -4(SI), SI
  3772. MOVW $0x0015, (CX)
  3773. MOVB SI, 2(CX)
  3774. ADDQ $0x03, CX
  3775. JMP repeat_end_emit_encodeBlockAsm10B
  3776. repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  3777. SHLL $0x02, SI
  3778. ORL $0x01, SI
  3779. MOVW SI, (CX)
  3780. ADDQ $0x02, CX
  3781. JMP repeat_end_emit_encodeBlockAsm10B
  3782. repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
  3783. XORQ R8, R8
  3784. LEAL 1(R8)(SI*4), SI
  3785. MOVB DI, 1(CX)
  3786. SARL $0x08, DI
  3787. SHLL $0x05, DI
  3788. ORL DI, SI
  3789. MOVB SI, (CX)
  3790. ADDQ $0x02, CX
  3791. JMP repeat_end_emit_encodeBlockAsm10B
  3792. long_offset_short_repeat_as_copy_encodeBlockAsm10B:
  3793. MOVB $0xee, (CX)
  3794. MOVW DI, 1(CX)
  3795. LEAL -60(SI), SI
  3796. ADDQ $0x03, CX
  3797. // emitRepeat
  3798. MOVL SI, R8
  3799. LEAL -4(SI), SI
  3800. CMPL R8, $0x08
  3801. JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3802. CMPL R8, $0x0c
  3803. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3804. CMPL DI, $0x00000800
  3805. JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3806. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3807. CMPL SI, $0x00000104
  3808. JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
  3809. LEAL -256(SI), SI
  3810. MOVW $0x0019, (CX)
  3811. MOVW SI, 2(CX)
  3812. ADDQ $0x04, CX
  3813. JMP repeat_end_emit_encodeBlockAsm10B
  3814. repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3815. LEAL -4(SI), SI
  3816. MOVW $0x0015, (CX)
  3817. MOVB SI, 2(CX)
  3818. ADDQ $0x03, CX
  3819. JMP repeat_end_emit_encodeBlockAsm10B
  3820. repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3821. SHLL $0x02, SI
  3822. ORL $0x01, SI
  3823. MOVW SI, (CX)
  3824. ADDQ $0x02, CX
  3825. JMP repeat_end_emit_encodeBlockAsm10B
  3826. repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
  3827. XORQ R8, R8
  3828. LEAL 1(R8)(SI*4), SI
  3829. MOVB DI, 1(CX)
  3830. SARL $0x08, DI
  3831. SHLL $0x05, DI
  3832. ORL DI, SI
  3833. MOVB SI, (CX)
  3834. ADDQ $0x02, CX
  3835. JMP repeat_end_emit_encodeBlockAsm10B
  3836. two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
  3837. MOVL SI, R8
  3838. SHLL $0x02, R8
  3839. CMPL SI, $0x0c
  3840. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
  3841. CMPL DI, $0x00000800
  3842. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
  3843. LEAL -15(R8), R8
  3844. MOVB DI, 1(CX)
  3845. SHRL $0x08, DI
  3846. SHLL $0x05, DI
  3847. ORL DI, R8
  3848. MOVB R8, (CX)
  3849. ADDQ $0x02, CX
  3850. JMP repeat_end_emit_encodeBlockAsm10B
  3851. emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
  3852. LEAL -2(R8), R8
  3853. MOVB R8, (CX)
  3854. MOVW DI, 1(CX)
  3855. ADDQ $0x03, CX
  3856. repeat_end_emit_encodeBlockAsm10B:
  3857. MOVL DX, 12(SP)
  3858. JMP search_loop_encodeBlockAsm10B
  3859. no_repeat_found_encodeBlockAsm10B:
  3860. CMPL (BX)(SI*1), DI
  3861. JEQ candidate_match_encodeBlockAsm10B
  3862. SHRQ $0x08, DI
  3863. MOVL (AX)(R10*4), SI
  3864. LEAL 2(DX), R9
  3865. CMPL (BX)(R8*1), DI
  3866. JEQ candidate2_match_encodeBlockAsm10B
  3867. MOVL R9, (AX)(R10*4)
  3868. SHRQ $0x08, DI
  3869. CMPL (BX)(SI*1), DI
  3870. JEQ candidate3_match_encodeBlockAsm10B
  3871. MOVL 20(SP), DX
  3872. JMP search_loop_encodeBlockAsm10B
  3873. candidate3_match_encodeBlockAsm10B:
  3874. ADDL $0x02, DX
  3875. JMP candidate_match_encodeBlockAsm10B
  3876. candidate2_match_encodeBlockAsm10B:
  3877. MOVL R9, (AX)(R10*4)
  3878. INCL DX
  3879. MOVL R8, SI
  3880. candidate_match_encodeBlockAsm10B:
  3881. MOVL 12(SP), DI
  3882. TESTL SI, SI
  3883. JZ match_extend_back_end_encodeBlockAsm10B
  3884. match_extend_back_loop_encodeBlockAsm10B:
  3885. CMPL DX, DI
  3886. JBE match_extend_back_end_encodeBlockAsm10B
  3887. MOVB -1(BX)(SI*1), R8
  3888. MOVB -1(BX)(DX*1), R9
  3889. CMPB R8, R9
  3890. JNE match_extend_back_end_encodeBlockAsm10B
  3891. LEAL -1(DX), DX
  3892. DECL SI
  3893. JZ match_extend_back_end_encodeBlockAsm10B
  3894. JMP match_extend_back_loop_encodeBlockAsm10B
  3895. match_extend_back_end_encodeBlockAsm10B:
  3896. MOVL DX, DI
  3897. SUBL 12(SP), DI
  3898. LEAQ 3(CX)(DI*1), DI
  3899. CMPQ DI, (SP)
  3900. JB match_dst_size_check_encodeBlockAsm10B
  3901. MOVQ $0x00000000, ret+56(FP)
  3902. RET
  3903. match_dst_size_check_encodeBlockAsm10B:
  3904. MOVL DX, DI
  3905. MOVL 12(SP), R8
  3906. CMPL R8, DI
  3907. JEQ emit_literal_done_match_emit_encodeBlockAsm10B
  3908. MOVL DI, R9
  3909. MOVL DI, 12(SP)
  3910. LEAQ (BX)(R8*1), DI
  3911. SUBL R8, R9
  3912. LEAL -1(R9), R8
  3913. CMPL R8, $0x3c
  3914. JB one_byte_match_emit_encodeBlockAsm10B
  3915. CMPL R8, $0x00000100
  3916. JB two_bytes_match_emit_encodeBlockAsm10B
  3917. JB three_bytes_match_emit_encodeBlockAsm10B
  3918. three_bytes_match_emit_encodeBlockAsm10B:
  3919. MOVB $0xf4, (CX)
  3920. MOVW R8, 1(CX)
  3921. ADDQ $0x03, CX
  3922. JMP memmove_long_match_emit_encodeBlockAsm10B
  3923. two_bytes_match_emit_encodeBlockAsm10B:
  3924. MOVB $0xf0, (CX)
  3925. MOVB R8, 1(CX)
  3926. ADDQ $0x02, CX
  3927. CMPL R8, $0x40
  3928. JB memmove_match_emit_encodeBlockAsm10B
  3929. JMP memmove_long_match_emit_encodeBlockAsm10B
  3930. one_byte_match_emit_encodeBlockAsm10B:
  3931. SHLB $0x02, R8
  3932. MOVB R8, (CX)
  3933. ADDQ $0x01, CX
  3934. memmove_match_emit_encodeBlockAsm10B:
  3935. LEAQ (CX)(R9*1), R8
  3936. // genMemMoveShort
  3937. CMPQ R9, $0x08
  3938. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
  3939. CMPQ R9, $0x10
  3940. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
  3941. CMPQ R9, $0x20
  3942. JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
  3943. JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
  3944. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
  3945. MOVQ (DI), R10
  3946. MOVQ R10, (CX)
  3947. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3948. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
  3949. MOVQ (DI), R10
  3950. MOVQ -8(DI)(R9*1), DI
  3951. MOVQ R10, (CX)
  3952. MOVQ DI, -8(CX)(R9*1)
  3953. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3954. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
  3955. MOVOU (DI), X0
  3956. MOVOU -16(DI)(R9*1), X1
  3957. MOVOU X0, (CX)
  3958. MOVOU X1, -16(CX)(R9*1)
  3959. JMP memmove_end_copy_match_emit_encodeBlockAsm10B
  3960. emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
  3961. MOVOU (DI), X0
  3962. MOVOU 16(DI), X1
  3963. MOVOU -32(DI)(R9*1), X2
  3964. MOVOU -16(DI)(R9*1), X3
  3965. MOVOU X0, (CX)
  3966. MOVOU X1, 16(CX)
  3967. MOVOU X2, -32(CX)(R9*1)
  3968. MOVOU X3, -16(CX)(R9*1)
  3969. memmove_end_copy_match_emit_encodeBlockAsm10B:
  3970. MOVQ R8, CX
  3971. JMP emit_literal_done_match_emit_encodeBlockAsm10B
  3972. memmove_long_match_emit_encodeBlockAsm10B:
  3973. LEAQ (CX)(R9*1), R8
  3974. // genMemMoveLong
  3975. MOVOU (DI), X0
  3976. MOVOU 16(DI), X1
  3977. MOVOU -32(DI)(R9*1), X2
  3978. MOVOU -16(DI)(R9*1), X3
  3979. MOVQ R9, R11
  3980. SHRQ $0x05, R11
  3981. MOVQ CX, R10
  3982. ANDL $0x0000001f, R10
  3983. MOVQ $0x00000040, R12
  3984. SUBQ R10, R12
  3985. DECQ R11
  3986. JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  3987. LEAQ -32(DI)(R12*1), R10
  3988. LEAQ -32(CX)(R12*1), R13
  3989. emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
  3990. MOVOU (R10), X4
  3991. MOVOU 16(R10), X5
  3992. MOVOA X4, (R13)
  3993. MOVOA X5, 16(R13)
  3994. ADDQ $0x20, R13
  3995. ADDQ $0x20, R10
  3996. ADDQ $0x20, R12
  3997. DECQ R11
  3998. JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
  3999. emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
  4000. MOVOU -32(DI)(R12*1), X4
  4001. MOVOU -16(DI)(R12*1), X5
  4002. MOVOA X4, -32(CX)(R12*1)
  4003. MOVOA X5, -16(CX)(R12*1)
  4004. ADDQ $0x20, R12
  4005. CMPQ R9, R12
  4006. JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
  4007. MOVOU X0, (CX)
  4008. MOVOU X1, 16(CX)
  4009. MOVOU X2, -32(CX)(R9*1)
  4010. MOVOU X3, -16(CX)(R9*1)
  4011. MOVQ R8, CX
  4012. emit_literal_done_match_emit_encodeBlockAsm10B:
  4013. match_nolit_loop_encodeBlockAsm10B:
  4014. MOVL DX, DI
  4015. SUBL SI, DI
  4016. MOVL DI, 16(SP)
  4017. ADDL $0x04, DX
  4018. ADDL $0x04, SI
  4019. MOVQ src_len+32(FP), DI
  4020. SUBL DX, DI
  4021. LEAQ (BX)(DX*1), R8
  4022. LEAQ (BX)(SI*1), SI
  4023. // matchLen
  4024. XORL R10, R10
  4025. matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
  4026. CMPL DI, $0x10
  4027. JB matchlen_match8_match_nolit_encodeBlockAsm10B
  4028. MOVQ (R8)(R10*1), R9
  4029. MOVQ 8(R8)(R10*1), R11
  4030. XORQ (SI)(R10*1), R9
  4031. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B
  4032. XORQ 8(SI)(R10*1), R11
  4033. JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B
  4034. LEAL -16(DI), DI
  4035. LEAL 16(R10), R10
  4036. JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B
  4037. matchlen_bsf_16match_nolit_encodeBlockAsm10B:
  4038. #ifdef GOAMD64_v3
  4039. TZCNTQ R11, R11
  4040. #else
  4041. BSFQ R11, R11
  4042. #endif
  4043. SARQ $0x03, R11
  4044. LEAL 8(R10)(R11*1), R10
  4045. JMP match_nolit_end_encodeBlockAsm10B
  4046. matchlen_match8_match_nolit_encodeBlockAsm10B:
  4047. CMPL DI, $0x08
  4048. JB matchlen_match4_match_nolit_encodeBlockAsm10B
  4049. MOVQ (R8)(R10*1), R9
  4050. XORQ (SI)(R10*1), R9
  4051. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B
  4052. LEAL -8(DI), DI
  4053. LEAL 8(R10), R10
  4054. JMP matchlen_match4_match_nolit_encodeBlockAsm10B
  4055. matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
  4056. #ifdef GOAMD64_v3
  4057. TZCNTQ R9, R9
  4058. #else
  4059. BSFQ R9, R9
  4060. #endif
  4061. SARQ $0x03, R9
  4062. LEAL (R10)(R9*1), R10
  4063. JMP match_nolit_end_encodeBlockAsm10B
  4064. matchlen_match4_match_nolit_encodeBlockAsm10B:
  4065. CMPL DI, $0x04
  4066. JB matchlen_match2_match_nolit_encodeBlockAsm10B
  4067. MOVL (R8)(R10*1), R9
  4068. CMPL (SI)(R10*1), R9
  4069. JNE matchlen_match2_match_nolit_encodeBlockAsm10B
  4070. LEAL -4(DI), DI
  4071. LEAL 4(R10), R10
  4072. matchlen_match2_match_nolit_encodeBlockAsm10B:
  4073. CMPL DI, $0x01
  4074. JE matchlen_match1_match_nolit_encodeBlockAsm10B
  4075. JB match_nolit_end_encodeBlockAsm10B
  4076. MOVW (R8)(R10*1), R9
  4077. CMPW (SI)(R10*1), R9
  4078. JNE matchlen_match1_match_nolit_encodeBlockAsm10B
  4079. LEAL 2(R10), R10
  4080. SUBL $0x02, DI
  4081. JZ match_nolit_end_encodeBlockAsm10B
  4082. matchlen_match1_match_nolit_encodeBlockAsm10B:
  4083. MOVB (R8)(R10*1), R9
  4084. CMPB (SI)(R10*1), R9
  4085. JNE match_nolit_end_encodeBlockAsm10B
  4086. LEAL 1(R10), R10
  4087. match_nolit_end_encodeBlockAsm10B:
  4088. ADDL R10, DX
  4089. MOVL 16(SP), SI
  4090. ADDL $0x04, R10
  4091. MOVL DX, 12(SP)
  4092. // emitCopy
  4093. CMPL R10, $0x40
  4094. JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B
  4095. CMPL SI, $0x00000800
  4096. JAE long_offset_short_match_nolit_encodeBlockAsm10B
  4097. MOVL $0x00000001, DI
  4098. LEAL 16(DI), DI
  4099. MOVB SI, 1(CX)
  4100. SHRL $0x08, SI
  4101. SHLL $0x05, SI
  4102. ORL SI, DI
  4103. MOVB DI, (CX)
  4104. ADDQ $0x02, CX
  4105. SUBL $0x08, R10
  4106. // emitRepeat
  4107. LEAL -4(R10), R10
  4108. JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4109. MOVL R10, DI
  4110. LEAL -4(R10), R10
  4111. CMPL DI, $0x08
  4112. JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4113. CMPL DI, $0x0c
  4114. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4115. CMPL SI, $0x00000800
  4116. JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4117. cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4118. CMPL R10, $0x00000104
  4119. JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
  4120. LEAL -256(R10), R10
  4121. MOVW $0x0019, (CX)
  4122. MOVW R10, 2(CX)
  4123. ADDQ $0x04, CX
  4124. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4125. repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4126. LEAL -4(R10), R10
  4127. MOVW $0x0015, (CX)
  4128. MOVB R10, 2(CX)
  4129. ADDQ $0x03, CX
  4130. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4131. repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4132. SHLL $0x02, R10
  4133. ORL $0x01, R10
  4134. MOVW R10, (CX)
  4135. ADDQ $0x02, CX
  4136. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4137. repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
  4138. XORQ DI, DI
  4139. LEAL 1(DI)(R10*4), R10
  4140. MOVB SI, 1(CX)
  4141. SARL $0x08, SI
  4142. SHLL $0x05, SI
  4143. ORL SI, R10
  4144. MOVB R10, (CX)
  4145. ADDQ $0x02, CX
  4146. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4147. long_offset_short_match_nolit_encodeBlockAsm10B:
  4148. MOVB $0xee, (CX)
  4149. MOVW SI, 1(CX)
  4150. LEAL -60(R10), R10
  4151. ADDQ $0x03, CX
  4152. // emitRepeat
  4153. MOVL R10, DI
  4154. LEAL -4(R10), R10
  4155. CMPL DI, $0x08
  4156. JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
  4157. CMPL DI, $0x0c
  4158. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
  4159. CMPL SI, $0x00000800
  4160. JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
  4161. cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4162. CMPL R10, $0x00000104
  4163. JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
  4164. LEAL -256(R10), R10
  4165. MOVW $0x0019, (CX)
  4166. MOVW R10, 2(CX)
  4167. ADDQ $0x04, CX
  4168. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4169. repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4170. LEAL -4(R10), R10
  4171. MOVW $0x0015, (CX)
  4172. MOVB R10, 2(CX)
  4173. ADDQ $0x03, CX
  4174. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4175. repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4176. SHLL $0x02, R10
  4177. ORL $0x01, R10
  4178. MOVW R10, (CX)
  4179. ADDQ $0x02, CX
  4180. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4181. repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
  4182. XORQ DI, DI
  4183. LEAL 1(DI)(R10*4), R10
  4184. MOVB SI, 1(CX)
  4185. SARL $0x08, SI
  4186. SHLL $0x05, SI
  4187. ORL SI, R10
  4188. MOVB R10, (CX)
  4189. ADDQ $0x02, CX
  4190. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4191. two_byte_offset_short_match_nolit_encodeBlockAsm10B:
  4192. MOVL R10, DI
  4193. SHLL $0x02, DI
  4194. CMPL R10, $0x0c
  4195. JAE emit_copy_three_match_nolit_encodeBlockAsm10B
  4196. CMPL SI, $0x00000800
  4197. JAE emit_copy_three_match_nolit_encodeBlockAsm10B
  4198. LEAL -15(DI), DI
  4199. MOVB SI, 1(CX)
  4200. SHRL $0x08, SI
  4201. SHLL $0x05, SI
  4202. ORL SI, DI
  4203. MOVB DI, (CX)
  4204. ADDQ $0x02, CX
  4205. JMP match_nolit_emitcopy_end_encodeBlockAsm10B
  4206. emit_copy_three_match_nolit_encodeBlockAsm10B:
  4207. LEAL -2(DI), DI
  4208. MOVB DI, (CX)
  4209. MOVW SI, 1(CX)
  4210. ADDQ $0x03, CX
  4211. match_nolit_emitcopy_end_encodeBlockAsm10B:
  4212. CMPL DX, 8(SP)
  4213. JAE emit_remainder_encodeBlockAsm10B
  4214. MOVQ -2(BX)(DX*1), DI
  4215. CMPQ CX, (SP)
  4216. JB match_nolit_dst_ok_encodeBlockAsm10B
  4217. MOVQ $0x00000000, ret+56(FP)
  4218. RET
  4219. match_nolit_dst_ok_encodeBlockAsm10B:
  4220. MOVQ $0x9e3779b1, R9
  4221. MOVQ DI, R8
  4222. SHRQ $0x10, DI
  4223. MOVQ DI, SI
  4224. SHLQ $0x20, R8
  4225. IMULQ R9, R8
  4226. SHRQ $0x36, R8
  4227. SHLQ $0x20, SI
  4228. IMULQ R9, SI
  4229. SHRQ $0x36, SI
  4230. LEAL -2(DX), R9
  4231. LEAQ (AX)(SI*4), R10
  4232. MOVL (R10), SI
  4233. MOVL R9, (AX)(R8*4)
  4234. MOVL DX, (R10)
  4235. CMPL (BX)(SI*1), DI
  4236. JEQ match_nolit_loop_encodeBlockAsm10B
  4237. INCL DX
  4238. JMP search_loop_encodeBlockAsm10B
  4239. emit_remainder_encodeBlockAsm10B:
  4240. MOVQ src_len+32(FP), AX
  4241. SUBL 12(SP), AX
  4242. LEAQ 3(CX)(AX*1), AX
  4243. CMPQ AX, (SP)
  4244. JB emit_remainder_ok_encodeBlockAsm10B
  4245. MOVQ $0x00000000, ret+56(FP)
  4246. RET
  4247. emit_remainder_ok_encodeBlockAsm10B:
  4248. MOVQ src_len+32(FP), AX
  4249. MOVL 12(SP), DX
  4250. CMPL DX, AX
  4251. JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B
  4252. MOVL AX, SI
  4253. MOVL AX, 12(SP)
  4254. LEAQ (BX)(DX*1), AX
  4255. SUBL DX, SI
  4256. LEAL -1(SI), DX
  4257. CMPL DX, $0x3c
  4258. JB one_byte_emit_remainder_encodeBlockAsm10B
  4259. CMPL DX, $0x00000100
  4260. JB two_bytes_emit_remainder_encodeBlockAsm10B
  4261. JB three_bytes_emit_remainder_encodeBlockAsm10B
  4262. three_bytes_emit_remainder_encodeBlockAsm10B:
  4263. MOVB $0xf4, (CX)
  4264. MOVW DX, 1(CX)
  4265. ADDQ $0x03, CX
  4266. JMP memmove_long_emit_remainder_encodeBlockAsm10B
  4267. two_bytes_emit_remainder_encodeBlockAsm10B:
  4268. MOVB $0xf0, (CX)
  4269. MOVB DL, 1(CX)
  4270. ADDQ $0x02, CX
  4271. CMPL DX, $0x40
  4272. JB memmove_emit_remainder_encodeBlockAsm10B
  4273. JMP memmove_long_emit_remainder_encodeBlockAsm10B
  4274. one_byte_emit_remainder_encodeBlockAsm10B:
  4275. SHLB $0x02, DL
  4276. MOVB DL, (CX)
  4277. ADDQ $0x01, CX
  4278. memmove_emit_remainder_encodeBlockAsm10B:
  4279. LEAQ (CX)(SI*1), DX
  4280. MOVL SI, BX
  4281. // genMemMoveShort
  4282. CMPQ BX, $0x03
  4283. JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
  4284. JE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
  4285. CMPQ BX, $0x08
  4286. JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
  4287. CMPQ BX, $0x10
  4288. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
  4289. CMPQ BX, $0x20
  4290. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
  4291. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
  4292. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
  4293. MOVB (AX), SI
  4294. MOVB -1(AX)(BX*1), AL
  4295. MOVB SI, (CX)
  4296. MOVB AL, -1(CX)(BX*1)
  4297. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4298. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
  4299. MOVW (AX), SI
  4300. MOVB 2(AX), AL
  4301. MOVW SI, (CX)
  4302. MOVB AL, 2(CX)
  4303. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4304. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
  4305. MOVL (AX), SI
  4306. MOVL -4(AX)(BX*1), AX
  4307. MOVL SI, (CX)
  4308. MOVL AX, -4(CX)(BX*1)
  4309. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4310. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
  4311. MOVQ (AX), SI
  4312. MOVQ -8(AX)(BX*1), AX
  4313. MOVQ SI, (CX)
  4314. MOVQ AX, -8(CX)(BX*1)
  4315. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4316. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
  4317. MOVOU (AX), X0
  4318. MOVOU -16(AX)(BX*1), X1
  4319. MOVOU X0, (CX)
  4320. MOVOU X1, -16(CX)(BX*1)
  4321. JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
  4322. emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
  4323. MOVOU (AX), X0
  4324. MOVOU 16(AX), X1
  4325. MOVOU -32(AX)(BX*1), X2
  4326. MOVOU -16(AX)(BX*1), X3
  4327. MOVOU X0, (CX)
  4328. MOVOU X1, 16(CX)
  4329. MOVOU X2, -32(CX)(BX*1)
  4330. MOVOU X3, -16(CX)(BX*1)
  4331. memmove_end_copy_emit_remainder_encodeBlockAsm10B:
  4332. MOVQ DX, CX
  4333. JMP emit_literal_done_emit_remainder_encodeBlockAsm10B
  4334. memmove_long_emit_remainder_encodeBlockAsm10B:
  4335. LEAQ (CX)(SI*1), DX
  4336. MOVL SI, BX
  4337. // genMemMoveLong
  4338. MOVOU (AX), X0
  4339. MOVOU 16(AX), X1
  4340. MOVOU -32(AX)(BX*1), X2
  4341. MOVOU -16(AX)(BX*1), X3
  4342. MOVQ BX, DI
  4343. SHRQ $0x05, DI
  4344. MOVQ CX, SI
  4345. ANDL $0x0000001f, SI
  4346. MOVQ $0x00000040, R8
  4347. SUBQ SI, R8
  4348. DECQ DI
  4349. JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
  4350. LEAQ -32(AX)(R8*1), SI
  4351. LEAQ -32(CX)(R8*1), R9
  4352. emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
  4353. MOVOU (SI), X4
  4354. MOVOU 16(SI), X5
  4355. MOVOA X4, (R9)
  4356. MOVOA X5, 16(R9)
  4357. ADDQ $0x20, R9
  4358. ADDQ $0x20, SI
  4359. ADDQ $0x20, R8
  4360. DECQ DI
  4361. JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
  4362. emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
  4363. MOVOU -32(AX)(R8*1), X4
  4364. MOVOU -16(AX)(R8*1), X5
  4365. MOVOA X4, -32(CX)(R8*1)
  4366. MOVOA X5, -16(CX)(R8*1)
  4367. ADDQ $0x20, R8
  4368. CMPQ BX, R8
  4369. JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
  4370. MOVOU X0, (CX)
  4371. MOVOU X1, 16(CX)
  4372. MOVOU X2, -32(CX)(BX*1)
  4373. MOVOU X3, -16(CX)(BX*1)
  4374. MOVQ DX, CX
  4375. emit_literal_done_emit_remainder_encodeBlockAsm10B:
  4376. MOVQ dst_base+0(FP), AX
  4377. SUBQ AX, CX
  4378. MOVQ CX, ret+56(FP)
  4379. RET
  4380. // func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
  4381. // Requires: BMI, SSE2
  4382. TEXT ·encodeBlockAsm8B(SB), $24-64
  4383. MOVQ tmp+48(FP), AX
  4384. MOVQ dst_base+0(FP), CX
  4385. MOVQ $0x00000008, DX
  4386. MOVQ AX, BX
  4387. PXOR X0, X0
  4388. zero_loop_encodeBlockAsm8B:
  4389. MOVOU X0, (BX)
  4390. MOVOU X0, 16(BX)
  4391. MOVOU X0, 32(BX)
  4392. MOVOU X0, 48(BX)
  4393. MOVOU X0, 64(BX)
  4394. MOVOU X0, 80(BX)
  4395. MOVOU X0, 96(BX)
  4396. MOVOU X0, 112(BX)
  4397. ADDQ $0x80, BX
  4398. DECQ DX
  4399. JNZ zero_loop_encodeBlockAsm8B
  4400. MOVL $0x00000000, 12(SP)
  4401. MOVQ src_len+32(FP), DX
  4402. LEAQ -9(DX), BX
  4403. LEAQ -8(DX), SI
  4404. MOVL SI, 8(SP)
  4405. SHRQ $0x05, DX
  4406. SUBL DX, BX
  4407. LEAQ (CX)(BX*1), BX
  4408. MOVQ BX, (SP)
  4409. MOVL $0x00000001, DX
  4410. MOVL DX, 16(SP)
  4411. MOVQ src_base+24(FP), BX
  4412. search_loop_encodeBlockAsm8B:
  4413. MOVL DX, SI
  4414. SUBL 12(SP), SI
  4415. SHRL $0x04, SI
  4416. LEAL 4(DX)(SI*1), SI
  4417. CMPL SI, 8(SP)
  4418. JAE emit_remainder_encodeBlockAsm8B
  4419. MOVQ (BX)(DX*1), DI
  4420. MOVL SI, 20(SP)
  4421. MOVQ $0x9e3779b1, R9
  4422. MOVQ DI, R10
  4423. MOVQ DI, R11
  4424. SHRQ $0x08, R11
  4425. SHLQ $0x20, R10
  4426. IMULQ R9, R10
  4427. SHRQ $0x38, R10
  4428. SHLQ $0x20, R11
  4429. IMULQ R9, R11
  4430. SHRQ $0x38, R11
  4431. MOVL (AX)(R10*4), SI
  4432. MOVL (AX)(R11*4), R8
  4433. MOVL DX, (AX)(R10*4)
  4434. LEAL 1(DX), R10
  4435. MOVL R10, (AX)(R11*4)
  4436. MOVQ DI, R10
  4437. SHRQ $0x10, R10
  4438. SHLQ $0x20, R10
  4439. IMULQ R9, R10
  4440. SHRQ $0x38, R10
  4441. MOVL DX, R9
  4442. SUBL 16(SP), R9
  4443. MOVL 1(BX)(R9*1), R11
  4444. MOVQ DI, R9
  4445. SHRQ $0x08, R9
  4446. CMPL R9, R11
  4447. JNE no_repeat_found_encodeBlockAsm8B
  4448. LEAL 1(DX), DI
  4449. MOVL 12(SP), R8
  4450. MOVL DI, SI
  4451. SUBL 16(SP), SI
  4452. JZ repeat_extend_back_end_encodeBlockAsm8B
  4453. repeat_extend_back_loop_encodeBlockAsm8B:
  4454. CMPL DI, R8
  4455. JBE repeat_extend_back_end_encodeBlockAsm8B
  4456. MOVB -1(BX)(SI*1), R9
  4457. MOVB -1(BX)(DI*1), R10
  4458. CMPB R9, R10
  4459. JNE repeat_extend_back_end_encodeBlockAsm8B
  4460. LEAL -1(DI), DI
  4461. DECL SI
  4462. JNZ repeat_extend_back_loop_encodeBlockAsm8B
  4463. repeat_extend_back_end_encodeBlockAsm8B:
  4464. MOVL DI, SI
  4465. SUBL 12(SP), SI
  4466. LEAQ 3(CX)(SI*1), SI
  4467. CMPQ SI, (SP)
  4468. JB repeat_dst_size_check_encodeBlockAsm8B
  4469. MOVQ $0x00000000, ret+56(FP)
  4470. RET
  4471. repeat_dst_size_check_encodeBlockAsm8B:
  4472. MOVL 12(SP), SI
  4473. CMPL SI, DI
  4474. JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B
  4475. MOVL DI, R9
  4476. MOVL DI, 12(SP)
  4477. LEAQ (BX)(SI*1), R10
  4478. SUBL SI, R9
  4479. LEAL -1(R9), SI
  4480. CMPL SI, $0x3c
  4481. JB one_byte_repeat_emit_encodeBlockAsm8B
  4482. CMPL SI, $0x00000100
  4483. JB two_bytes_repeat_emit_encodeBlockAsm8B
  4484. JB three_bytes_repeat_emit_encodeBlockAsm8B
  4485. three_bytes_repeat_emit_encodeBlockAsm8B:
  4486. MOVB $0xf4, (CX)
  4487. MOVW SI, 1(CX)
  4488. ADDQ $0x03, CX
  4489. JMP memmove_long_repeat_emit_encodeBlockAsm8B
  4490. two_bytes_repeat_emit_encodeBlockAsm8B:
  4491. MOVB $0xf0, (CX)
  4492. MOVB SI, 1(CX)
  4493. ADDQ $0x02, CX
  4494. CMPL SI, $0x40
  4495. JB memmove_repeat_emit_encodeBlockAsm8B
  4496. JMP memmove_long_repeat_emit_encodeBlockAsm8B
  4497. one_byte_repeat_emit_encodeBlockAsm8B:
  4498. SHLB $0x02, SI
  4499. MOVB SI, (CX)
  4500. ADDQ $0x01, CX
  4501. memmove_repeat_emit_encodeBlockAsm8B:
  4502. LEAQ (CX)(R9*1), SI
  4503. // genMemMoveShort
  4504. CMPQ R9, $0x08
  4505. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
  4506. CMPQ R9, $0x10
  4507. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
  4508. CMPQ R9, $0x20
  4509. JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
  4510. JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
  4511. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
  4512. MOVQ (R10), R11
  4513. MOVQ R11, (CX)
  4514. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4515. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
  4516. MOVQ (R10), R11
  4517. MOVQ -8(R10)(R9*1), R10
  4518. MOVQ R11, (CX)
  4519. MOVQ R10, -8(CX)(R9*1)
  4520. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4521. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
  4522. MOVOU (R10), X0
  4523. MOVOU -16(R10)(R9*1), X1
  4524. MOVOU X0, (CX)
  4525. MOVOU X1, -16(CX)(R9*1)
  4526. JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
  4527. emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
  4528. MOVOU (R10), X0
  4529. MOVOU 16(R10), X1
  4530. MOVOU -32(R10)(R9*1), X2
  4531. MOVOU -16(R10)(R9*1), X3
  4532. MOVOU X0, (CX)
  4533. MOVOU X1, 16(CX)
  4534. MOVOU X2, -32(CX)(R9*1)
  4535. MOVOU X3, -16(CX)(R9*1)
  4536. memmove_end_copy_repeat_emit_encodeBlockAsm8B:
  4537. MOVQ SI, CX
  4538. JMP emit_literal_done_repeat_emit_encodeBlockAsm8B
  4539. memmove_long_repeat_emit_encodeBlockAsm8B:
  4540. LEAQ (CX)(R9*1), SI
  4541. // genMemMoveLong
  4542. MOVOU (R10), X0
  4543. MOVOU 16(R10), X1
  4544. MOVOU -32(R10)(R9*1), X2
  4545. MOVOU -16(R10)(R9*1), X3
  4546. MOVQ R9, R12
  4547. SHRQ $0x05, R12
  4548. MOVQ CX, R11
  4549. ANDL $0x0000001f, R11
  4550. MOVQ $0x00000040, R13
  4551. SUBQ R11, R13
  4552. DECQ R12
  4553. JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  4554. LEAQ -32(R10)(R13*1), R11
  4555. LEAQ -32(CX)(R13*1), R14
  4556. emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
  4557. MOVOU (R11), X4
  4558. MOVOU 16(R11), X5
  4559. MOVOA X4, (R14)
  4560. MOVOA X5, 16(R14)
  4561. ADDQ $0x20, R14
  4562. ADDQ $0x20, R11
  4563. ADDQ $0x20, R13
  4564. DECQ R12
  4565. JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
  4566. emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
  4567. MOVOU -32(R10)(R13*1), X4
  4568. MOVOU -16(R10)(R13*1), X5
  4569. MOVOA X4, -32(CX)(R13*1)
  4570. MOVOA X5, -16(CX)(R13*1)
  4571. ADDQ $0x20, R13
  4572. CMPQ R9, R13
  4573. JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  4574. MOVOU X0, (CX)
  4575. MOVOU X1, 16(CX)
  4576. MOVOU X2, -32(CX)(R9*1)
  4577. MOVOU X3, -16(CX)(R9*1)
  4578. MOVQ SI, CX
  4579. emit_literal_done_repeat_emit_encodeBlockAsm8B:
  4580. ADDL $0x05, DX
  4581. MOVL DX, SI
  4582. SUBL 16(SP), SI
  4583. MOVQ src_len+32(FP), R9
  4584. SUBL DX, R9
  4585. LEAQ (BX)(DX*1), R10
  4586. LEAQ (BX)(SI*1), SI
  4587. // matchLen
  4588. XORL R12, R12
  4589. matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
  4590. CMPL R9, $0x10
  4591. JB matchlen_match8_repeat_extend_encodeBlockAsm8B
  4592. MOVQ (R10)(R12*1), R11
  4593. MOVQ 8(R10)(R12*1), R13
  4594. XORQ (SI)(R12*1), R11
  4595. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
  4596. XORQ 8(SI)(R12*1), R13
  4597. JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B
  4598. LEAL -16(R9), R9
  4599. LEAL 16(R12), R12
  4600. JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
  4601. matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
  4602. #ifdef GOAMD64_v3
  4603. TZCNTQ R13, R13
  4604. #else
  4605. BSFQ R13, R13
  4606. #endif
  4607. SARQ $0x03, R13
  4608. LEAL 8(R12)(R13*1), R12
  4609. JMP repeat_extend_forward_end_encodeBlockAsm8B
  4610. matchlen_match8_repeat_extend_encodeBlockAsm8B:
  4611. CMPL R9, $0x08
  4612. JB matchlen_match4_repeat_extend_encodeBlockAsm8B
  4613. MOVQ (R10)(R12*1), R11
  4614. XORQ (SI)(R12*1), R11
  4615. JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
  4616. LEAL -8(R9), R9
  4617. LEAL 8(R12), R12
  4618. JMP matchlen_match4_repeat_extend_encodeBlockAsm8B
  4619. matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
  4620. #ifdef GOAMD64_v3
  4621. TZCNTQ R11, R11
  4622. #else
  4623. BSFQ R11, R11
  4624. #endif
  4625. SARQ $0x03, R11
  4626. LEAL (R12)(R11*1), R12
  4627. JMP repeat_extend_forward_end_encodeBlockAsm8B
  4628. matchlen_match4_repeat_extend_encodeBlockAsm8B:
  4629. CMPL R9, $0x04
  4630. JB matchlen_match2_repeat_extend_encodeBlockAsm8B
  4631. MOVL (R10)(R12*1), R11
  4632. CMPL (SI)(R12*1), R11
  4633. JNE matchlen_match2_repeat_extend_encodeBlockAsm8B
  4634. LEAL -4(R9), R9
  4635. LEAL 4(R12), R12
  4636. matchlen_match2_repeat_extend_encodeBlockAsm8B:
  4637. CMPL R9, $0x01
  4638. JE matchlen_match1_repeat_extend_encodeBlockAsm8B
  4639. JB repeat_extend_forward_end_encodeBlockAsm8B
  4640. MOVW (R10)(R12*1), R11
  4641. CMPW (SI)(R12*1), R11
  4642. JNE matchlen_match1_repeat_extend_encodeBlockAsm8B
  4643. LEAL 2(R12), R12
  4644. SUBL $0x02, R9
  4645. JZ repeat_extend_forward_end_encodeBlockAsm8B
  4646. matchlen_match1_repeat_extend_encodeBlockAsm8B:
  4647. MOVB (R10)(R12*1), R11
  4648. CMPB (SI)(R12*1), R11
  4649. JNE repeat_extend_forward_end_encodeBlockAsm8B
  4650. LEAL 1(R12), R12
  4651. repeat_extend_forward_end_encodeBlockAsm8B:
  4652. ADDL R12, DX
  4653. MOVL DX, SI
  4654. SUBL DI, SI
  4655. MOVL 16(SP), DI
  4656. TESTL R8, R8
  4657. JZ repeat_as_copy_encodeBlockAsm8B
  4658. // emitRepeat
  4659. MOVL SI, DI
  4660. LEAL -4(SI), SI
  4661. CMPL DI, $0x08
  4662. JBE repeat_two_match_repeat_encodeBlockAsm8B
  4663. CMPL DI, $0x0c
  4664. JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
  4665. cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
  4666. CMPL SI, $0x00000104
  4667. JB repeat_three_match_repeat_encodeBlockAsm8B
  4668. LEAL -256(SI), SI
  4669. MOVW $0x0019, (CX)
  4670. MOVW SI, 2(CX)
  4671. ADDQ $0x04, CX
  4672. JMP repeat_end_emit_encodeBlockAsm8B
  4673. repeat_three_match_repeat_encodeBlockAsm8B:
  4674. LEAL -4(SI), SI
  4675. MOVW $0x0015, (CX)
  4676. MOVB SI, 2(CX)
  4677. ADDQ $0x03, CX
  4678. JMP repeat_end_emit_encodeBlockAsm8B
  4679. repeat_two_match_repeat_encodeBlockAsm8B:
  4680. SHLL $0x02, SI
  4681. ORL $0x01, SI
  4682. MOVW SI, (CX)
  4683. ADDQ $0x02, CX
  4684. JMP repeat_end_emit_encodeBlockAsm8B
  4685. XORQ R8, R8
  4686. LEAL 1(R8)(SI*4), SI
  4687. MOVB DI, 1(CX)
  4688. SARL $0x08, DI
  4689. SHLL $0x05, DI
  4690. ORL DI, SI
  4691. MOVB SI, (CX)
  4692. ADDQ $0x02, CX
  4693. JMP repeat_end_emit_encodeBlockAsm8B
  4694. repeat_as_copy_encodeBlockAsm8B:
  4695. // emitCopy
  4696. CMPL SI, $0x40
  4697. JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
  4698. CMPL DI, $0x00000800
  4699. JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B
  4700. MOVL $0x00000001, R8
  4701. LEAL 16(R8), R8
  4702. MOVB DI, 1(CX)
  4703. SHRL $0x08, DI
  4704. SHLL $0x05, DI
  4705. ORL DI, R8
  4706. MOVB R8, (CX)
  4707. ADDQ $0x02, CX
  4708. SUBL $0x08, SI
  4709. // emitRepeat
  4710. LEAL -4(SI), SI
  4711. JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  4712. MOVL SI, DI
  4713. LEAL -4(SI), SI
  4714. CMPL DI, $0x08
  4715. JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  4716. CMPL DI, $0x0c
  4717. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  4718. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
  4719. CMPL SI, $0x00000104
  4720. JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
  4721. LEAL -256(SI), SI
  4722. MOVW $0x0019, (CX)
  4723. MOVW SI, 2(CX)
  4724. ADDQ $0x04, CX
  4725. JMP repeat_end_emit_encodeBlockAsm8B
  4726. repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
  4727. LEAL -4(SI), SI
  4728. MOVW $0x0015, (CX)
  4729. MOVB SI, 2(CX)
  4730. ADDQ $0x03, CX
  4731. JMP repeat_end_emit_encodeBlockAsm8B
  4732. repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
  4733. SHLL $0x02, SI
  4734. ORL $0x01, SI
  4735. MOVW SI, (CX)
  4736. ADDQ $0x02, CX
  4737. JMP repeat_end_emit_encodeBlockAsm8B
  4738. XORQ R8, R8
  4739. LEAL 1(R8)(SI*4), SI
  4740. MOVB DI, 1(CX)
  4741. SARL $0x08, DI
  4742. SHLL $0x05, DI
  4743. ORL DI, SI
  4744. MOVB SI, (CX)
  4745. ADDQ $0x02, CX
  4746. JMP repeat_end_emit_encodeBlockAsm8B
  4747. long_offset_short_repeat_as_copy_encodeBlockAsm8B:
  4748. MOVB $0xee, (CX)
  4749. MOVW DI, 1(CX)
  4750. LEAL -60(SI), SI
  4751. ADDQ $0x03, CX
  4752. // emitRepeat
  4753. MOVL SI, DI
  4754. LEAL -4(SI), SI
  4755. CMPL DI, $0x08
  4756. JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4757. CMPL DI, $0x0c
  4758. JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4759. cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4760. CMPL SI, $0x00000104
  4761. JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
  4762. LEAL -256(SI), SI
  4763. MOVW $0x0019, (CX)
  4764. MOVW SI, 2(CX)
  4765. ADDQ $0x04, CX
  4766. JMP repeat_end_emit_encodeBlockAsm8B
  4767. repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4768. LEAL -4(SI), SI
  4769. MOVW $0x0015, (CX)
  4770. MOVB SI, 2(CX)
  4771. ADDQ $0x03, CX
  4772. JMP repeat_end_emit_encodeBlockAsm8B
  4773. repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
  4774. SHLL $0x02, SI
  4775. ORL $0x01, SI
  4776. MOVW SI, (CX)
  4777. ADDQ $0x02, CX
  4778. JMP repeat_end_emit_encodeBlockAsm8B
  4779. XORQ R8, R8
  4780. LEAL 1(R8)(SI*4), SI
  4781. MOVB DI, 1(CX)
  4782. SARL $0x08, DI
  4783. SHLL $0x05, DI
  4784. ORL DI, SI
  4785. MOVB SI, (CX)
  4786. ADDQ $0x02, CX
  4787. JMP repeat_end_emit_encodeBlockAsm8B
  4788. two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
  4789. MOVL SI, R8
  4790. SHLL $0x02, R8
  4791. CMPL SI, $0x0c
  4792. JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B
  4793. LEAL -15(R8), R8
  4794. MOVB DI, 1(CX)
  4795. SHRL $0x08, DI
  4796. SHLL $0x05, DI
  4797. ORL DI, R8
  4798. MOVB R8, (CX)
  4799. ADDQ $0x02, CX
  4800. JMP repeat_end_emit_encodeBlockAsm8B
  4801. emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
  4802. LEAL -2(R8), R8
  4803. MOVB R8, (CX)
  4804. MOVW DI, 1(CX)
  4805. ADDQ $0x03, CX
  4806. repeat_end_emit_encodeBlockAsm8B:
  4807. MOVL DX, 12(SP)
  4808. JMP search_loop_encodeBlockAsm8B
  4809. no_repeat_found_encodeBlockAsm8B:
  4810. CMPL (BX)(SI*1), DI
  4811. JEQ candidate_match_encodeBlockAsm8B
  4812. SHRQ $0x08, DI
  4813. MOVL (AX)(R10*4), SI
  4814. LEAL 2(DX), R9
  4815. CMPL (BX)(R8*1), DI
  4816. JEQ candidate2_match_encodeBlockAsm8B
  4817. MOVL R9, (AX)(R10*4)
  4818. SHRQ $0x08, DI
  4819. CMPL (BX)(SI*1), DI
  4820. JEQ candidate3_match_encodeBlockAsm8B
  4821. MOVL 20(SP), DX
  4822. JMP search_loop_encodeBlockAsm8B
  4823. candidate3_match_encodeBlockAsm8B:
  4824. ADDL $0x02, DX
  4825. JMP candidate_match_encodeBlockAsm8B
  4826. candidate2_match_encodeBlockAsm8B:
  4827. MOVL R9, (AX)(R10*4)
  4828. INCL DX
  4829. MOVL R8, SI
  4830. candidate_match_encodeBlockAsm8B:
  4831. MOVL 12(SP), DI
  4832. TESTL SI, SI
  4833. JZ match_extend_back_end_encodeBlockAsm8B
  4834. match_extend_back_loop_encodeBlockAsm8B:
  4835. CMPL DX, DI
  4836. JBE match_extend_back_end_encodeBlockAsm8B
  4837. MOVB -1(BX)(SI*1), R8
  4838. MOVB -1(BX)(DX*1), R9
  4839. CMPB R8, R9
  4840. JNE match_extend_back_end_encodeBlockAsm8B
  4841. LEAL -1(DX), DX
  4842. DECL SI
  4843. JZ match_extend_back_end_encodeBlockAsm8B
  4844. JMP match_extend_back_loop_encodeBlockAsm8B
  4845. match_extend_back_end_encodeBlockAsm8B:
  4846. MOVL DX, DI
  4847. SUBL 12(SP), DI
  4848. LEAQ 3(CX)(DI*1), DI
  4849. CMPQ DI, (SP)
  4850. JB match_dst_size_check_encodeBlockAsm8B
  4851. MOVQ $0x00000000, ret+56(FP)
  4852. RET
  4853. match_dst_size_check_encodeBlockAsm8B:
  4854. MOVL DX, DI
  4855. MOVL 12(SP), R8
  4856. CMPL R8, DI
  4857. JEQ emit_literal_done_match_emit_encodeBlockAsm8B
  4858. MOVL DI, R9
  4859. MOVL DI, 12(SP)
  4860. LEAQ (BX)(R8*1), DI
  4861. SUBL R8, R9
  4862. LEAL -1(R9), R8
  4863. CMPL R8, $0x3c
  4864. JB one_byte_match_emit_encodeBlockAsm8B
  4865. CMPL R8, $0x00000100
  4866. JB two_bytes_match_emit_encodeBlockAsm8B
  4867. JB three_bytes_match_emit_encodeBlockAsm8B
  4868. three_bytes_match_emit_encodeBlockAsm8B:
  4869. MOVB $0xf4, (CX)
  4870. MOVW R8, 1(CX)
  4871. ADDQ $0x03, CX
  4872. JMP memmove_long_match_emit_encodeBlockAsm8B
  4873. two_bytes_match_emit_encodeBlockAsm8B:
  4874. MOVB $0xf0, (CX)
  4875. MOVB R8, 1(CX)
  4876. ADDQ $0x02, CX
  4877. CMPL R8, $0x40
  4878. JB memmove_match_emit_encodeBlockAsm8B
  4879. JMP memmove_long_match_emit_encodeBlockAsm8B
  4880. one_byte_match_emit_encodeBlockAsm8B:
  4881. SHLB $0x02, R8
  4882. MOVB R8, (CX)
  4883. ADDQ $0x01, CX
  4884. memmove_match_emit_encodeBlockAsm8B:
  4885. LEAQ (CX)(R9*1), R8
  4886. // genMemMoveShort
  4887. CMPQ R9, $0x08
  4888. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
  4889. CMPQ R9, $0x10
  4890. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
  4891. CMPQ R9, $0x20
  4892. JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
  4893. JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
  4894. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
  4895. MOVQ (DI), R10
  4896. MOVQ R10, (CX)
  4897. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4898. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
  4899. MOVQ (DI), R10
  4900. MOVQ -8(DI)(R9*1), DI
  4901. MOVQ R10, (CX)
  4902. MOVQ DI, -8(CX)(R9*1)
  4903. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4904. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
  4905. MOVOU (DI), X0
  4906. MOVOU -16(DI)(R9*1), X1
  4907. MOVOU X0, (CX)
  4908. MOVOU X1, -16(CX)(R9*1)
  4909. JMP memmove_end_copy_match_emit_encodeBlockAsm8B
  4910. emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
  4911. MOVOU (DI), X0
  4912. MOVOU 16(DI), X1
  4913. MOVOU -32(DI)(R9*1), X2
  4914. MOVOU -16(DI)(R9*1), X3
  4915. MOVOU X0, (CX)
  4916. MOVOU X1, 16(CX)
  4917. MOVOU X2, -32(CX)(R9*1)
  4918. MOVOU X3, -16(CX)(R9*1)
  4919. memmove_end_copy_match_emit_encodeBlockAsm8B:
  4920. MOVQ R8, CX
  4921. JMP emit_literal_done_match_emit_encodeBlockAsm8B
  4922. memmove_long_match_emit_encodeBlockAsm8B:
  4923. LEAQ (CX)(R9*1), R8
  4924. // genMemMoveLong
  4925. MOVOU (DI), X0
  4926. MOVOU 16(DI), X1
  4927. MOVOU -32(DI)(R9*1), X2
  4928. MOVOU -16(DI)(R9*1), X3
  4929. MOVQ R9, R11
  4930. SHRQ $0x05, R11
  4931. MOVQ CX, R10
  4932. ANDL $0x0000001f, R10
  4933. MOVQ $0x00000040, R12
  4934. SUBQ R10, R12
  4935. DECQ R11
  4936. JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  4937. LEAQ -32(DI)(R12*1), R10
  4938. LEAQ -32(CX)(R12*1), R13
  4939. emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
  4940. MOVOU (R10), X4
  4941. MOVOU 16(R10), X5
  4942. MOVOA X4, (R13)
  4943. MOVOA X5, 16(R13)
  4944. ADDQ $0x20, R13
  4945. ADDQ $0x20, R10
  4946. ADDQ $0x20, R12
  4947. DECQ R11
  4948. JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
  4949. emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
  4950. MOVOU -32(DI)(R12*1), X4
  4951. MOVOU -16(DI)(R12*1), X5
  4952. MOVOA X4, -32(CX)(R12*1)
  4953. MOVOA X5, -16(CX)(R12*1)
  4954. ADDQ $0x20, R12
  4955. CMPQ R9, R12
  4956. JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
  4957. MOVOU X0, (CX)
  4958. MOVOU X1, 16(CX)
  4959. MOVOU X2, -32(CX)(R9*1)
  4960. MOVOU X3, -16(CX)(R9*1)
  4961. MOVQ R8, CX
  4962. emit_literal_done_match_emit_encodeBlockAsm8B:
  4963. match_nolit_loop_encodeBlockAsm8B:
  4964. MOVL DX, DI
  4965. SUBL SI, DI
  4966. MOVL DI, 16(SP)
  4967. ADDL $0x04, DX
  4968. ADDL $0x04, SI
  4969. MOVQ src_len+32(FP), DI
  4970. SUBL DX, DI
  4971. LEAQ (BX)(DX*1), R8
  4972. LEAQ (BX)(SI*1), SI
  4973. // matchLen
  4974. XORL R10, R10
  4975. matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
  4976. CMPL DI, $0x10
  4977. JB matchlen_match8_match_nolit_encodeBlockAsm8B
  4978. MOVQ (R8)(R10*1), R9
  4979. MOVQ 8(R8)(R10*1), R11
  4980. XORQ (SI)(R10*1), R9
  4981. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B
  4982. XORQ 8(SI)(R10*1), R11
  4983. JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B
  4984. LEAL -16(DI), DI
  4985. LEAL 16(R10), R10
  4986. JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B
  4987. matchlen_bsf_16match_nolit_encodeBlockAsm8B:
  4988. #ifdef GOAMD64_v3
  4989. TZCNTQ R11, R11
  4990. #else
  4991. BSFQ R11, R11
  4992. #endif
  4993. SARQ $0x03, R11
  4994. LEAL 8(R10)(R11*1), R10
  4995. JMP match_nolit_end_encodeBlockAsm8B
  4996. matchlen_match8_match_nolit_encodeBlockAsm8B:
  4997. CMPL DI, $0x08
  4998. JB matchlen_match4_match_nolit_encodeBlockAsm8B
  4999. MOVQ (R8)(R10*1), R9
  5000. XORQ (SI)(R10*1), R9
  5001. JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B
  5002. LEAL -8(DI), DI
  5003. LEAL 8(R10), R10
  5004. JMP matchlen_match4_match_nolit_encodeBlockAsm8B
  5005. matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
  5006. #ifdef GOAMD64_v3
  5007. TZCNTQ R9, R9
  5008. #else
  5009. BSFQ R9, R9
  5010. #endif
  5011. SARQ $0x03, R9
  5012. LEAL (R10)(R9*1), R10
  5013. JMP match_nolit_end_encodeBlockAsm8B
  5014. matchlen_match4_match_nolit_encodeBlockAsm8B:
  5015. CMPL DI, $0x04
  5016. JB matchlen_match2_match_nolit_encodeBlockAsm8B
  5017. MOVL (R8)(R10*1), R9
  5018. CMPL (SI)(R10*1), R9
  5019. JNE matchlen_match2_match_nolit_encodeBlockAsm8B
  5020. LEAL -4(DI), DI
  5021. LEAL 4(R10), R10
  5022. matchlen_match2_match_nolit_encodeBlockAsm8B:
  5023. CMPL DI, $0x01
  5024. JE matchlen_match1_match_nolit_encodeBlockAsm8B
  5025. JB match_nolit_end_encodeBlockAsm8B
  5026. MOVW (R8)(R10*1), R9
  5027. CMPW (SI)(R10*1), R9
  5028. JNE matchlen_match1_match_nolit_encodeBlockAsm8B
  5029. LEAL 2(R10), R10
  5030. SUBL $0x02, DI
  5031. JZ match_nolit_end_encodeBlockAsm8B
  5032. matchlen_match1_match_nolit_encodeBlockAsm8B:
  5033. MOVB (R8)(R10*1), R9
  5034. CMPB (SI)(R10*1), R9
  5035. JNE match_nolit_end_encodeBlockAsm8B
  5036. LEAL 1(R10), R10
  5037. match_nolit_end_encodeBlockAsm8B:
  5038. ADDL R10, DX
  5039. MOVL 16(SP), SI
  5040. ADDL $0x04, R10
  5041. MOVL DX, 12(SP)
  5042. // emitCopy
  5043. CMPL R10, $0x40
  5044. JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B
  5045. CMPL SI, $0x00000800
  5046. JAE long_offset_short_match_nolit_encodeBlockAsm8B
  5047. MOVL $0x00000001, DI
  5048. LEAL 16(DI), DI
  5049. MOVB SI, 1(CX)
  5050. SHRL $0x08, SI
  5051. SHLL $0x05, SI
  5052. ORL SI, DI
  5053. MOVB DI, (CX)
  5054. ADDQ $0x02, CX
  5055. SUBL $0x08, R10
  5056. // emitRepeat
  5057. LEAL -4(R10), R10
  5058. JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5059. MOVL R10, SI
  5060. LEAL -4(R10), R10
  5061. CMPL SI, $0x08
  5062. JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5063. CMPL SI, $0x0c
  5064. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5065. cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
  5066. CMPL R10, $0x00000104
  5067. JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
  5068. LEAL -256(R10), R10
  5069. MOVW $0x0019, (CX)
  5070. MOVW R10, 2(CX)
  5071. ADDQ $0x04, CX
  5072. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5073. repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
  5074. LEAL -4(R10), R10
  5075. MOVW $0x0015, (CX)
  5076. MOVB R10, 2(CX)
  5077. ADDQ $0x03, CX
  5078. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5079. repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
  5080. SHLL $0x02, R10
  5081. ORL $0x01, R10
  5082. MOVW R10, (CX)
  5083. ADDQ $0x02, CX
  5084. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5085. XORQ DI, DI
  5086. LEAL 1(DI)(R10*4), R10
  5087. MOVB SI, 1(CX)
  5088. SARL $0x08, SI
  5089. SHLL $0x05, SI
  5090. ORL SI, R10
  5091. MOVB R10, (CX)
  5092. ADDQ $0x02, CX
  5093. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5094. long_offset_short_match_nolit_encodeBlockAsm8B:
  5095. MOVB $0xee, (CX)
  5096. MOVW SI, 1(CX)
  5097. LEAL -60(R10), R10
  5098. ADDQ $0x03, CX
  5099. // emitRepeat
  5100. MOVL R10, SI
  5101. LEAL -4(R10), R10
  5102. CMPL SI, $0x08
  5103. JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
  5104. CMPL SI, $0x0c
  5105. JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
  5106. cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
  5107. CMPL R10, $0x00000104
  5108. JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
  5109. LEAL -256(R10), R10
  5110. MOVW $0x0019, (CX)
  5111. MOVW R10, 2(CX)
  5112. ADDQ $0x04, CX
  5113. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5114. repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
  5115. LEAL -4(R10), R10
  5116. MOVW $0x0015, (CX)
  5117. MOVB R10, 2(CX)
  5118. ADDQ $0x03, CX
  5119. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5120. repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
  5121. SHLL $0x02, R10
  5122. ORL $0x01, R10
  5123. MOVW R10, (CX)
  5124. ADDQ $0x02, CX
  5125. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5126. XORQ DI, DI
  5127. LEAL 1(DI)(R10*4), R10
  5128. MOVB SI, 1(CX)
  5129. SARL $0x08, SI
  5130. SHLL $0x05, SI
  5131. ORL SI, R10
  5132. MOVB R10, (CX)
  5133. ADDQ $0x02, CX
  5134. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5135. two_byte_offset_short_match_nolit_encodeBlockAsm8B:
  5136. MOVL R10, DI
  5137. SHLL $0x02, DI
  5138. CMPL R10, $0x0c
  5139. JAE emit_copy_three_match_nolit_encodeBlockAsm8B
  5140. LEAL -15(DI), DI
  5141. MOVB SI, 1(CX)
  5142. SHRL $0x08, SI
  5143. SHLL $0x05, SI
  5144. ORL SI, DI
  5145. MOVB DI, (CX)
  5146. ADDQ $0x02, CX
  5147. JMP match_nolit_emitcopy_end_encodeBlockAsm8B
  5148. emit_copy_three_match_nolit_encodeBlockAsm8B:
  5149. LEAL -2(DI), DI
  5150. MOVB DI, (CX)
  5151. MOVW SI, 1(CX)
  5152. ADDQ $0x03, CX
  5153. match_nolit_emitcopy_end_encodeBlockAsm8B:
  5154. CMPL DX, 8(SP)
  5155. JAE emit_remainder_encodeBlockAsm8B
  5156. MOVQ -2(BX)(DX*1), DI
  5157. CMPQ CX, (SP)
  5158. JB match_nolit_dst_ok_encodeBlockAsm8B
  5159. MOVQ $0x00000000, ret+56(FP)
  5160. RET
  5161. match_nolit_dst_ok_encodeBlockAsm8B:
  5162. MOVQ $0x9e3779b1, R9
  5163. MOVQ DI, R8
  5164. SHRQ $0x10, DI
  5165. MOVQ DI, SI
  5166. SHLQ $0x20, R8
  5167. IMULQ R9, R8
  5168. SHRQ $0x38, R8
  5169. SHLQ $0x20, SI
  5170. IMULQ R9, SI
  5171. SHRQ $0x38, SI
  5172. LEAL -2(DX), R9
  5173. LEAQ (AX)(SI*4), R10
  5174. MOVL (R10), SI
  5175. MOVL R9, (AX)(R8*4)
  5176. MOVL DX, (R10)
  5177. CMPL (BX)(SI*1), DI
  5178. JEQ match_nolit_loop_encodeBlockAsm8B
  5179. INCL DX
  5180. JMP search_loop_encodeBlockAsm8B
  5181. emit_remainder_encodeBlockAsm8B:
  5182. MOVQ src_len+32(FP), AX
  5183. SUBL 12(SP), AX
  5184. LEAQ 3(CX)(AX*1), AX
  5185. CMPQ AX, (SP)
  5186. JB emit_remainder_ok_encodeBlockAsm8B
  5187. MOVQ $0x00000000, ret+56(FP)
  5188. RET
  5189. emit_remainder_ok_encodeBlockAsm8B:
  5190. MOVQ src_len+32(FP), AX
  5191. MOVL 12(SP), DX
  5192. CMPL DX, AX
  5193. JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B
  5194. MOVL AX, SI
  5195. MOVL AX, 12(SP)
  5196. LEAQ (BX)(DX*1), AX
  5197. SUBL DX, SI
  5198. LEAL -1(SI), DX
  5199. CMPL DX, $0x3c
  5200. JB one_byte_emit_remainder_encodeBlockAsm8B
  5201. CMPL DX, $0x00000100
  5202. JB two_bytes_emit_remainder_encodeBlockAsm8B
  5203. JB three_bytes_emit_remainder_encodeBlockAsm8B
  5204. three_bytes_emit_remainder_encodeBlockAsm8B:
  5205. MOVB $0xf4, (CX)
  5206. MOVW DX, 1(CX)
  5207. ADDQ $0x03, CX
  5208. JMP memmove_long_emit_remainder_encodeBlockAsm8B
  5209. two_bytes_emit_remainder_encodeBlockAsm8B:
  5210. MOVB $0xf0, (CX)
  5211. MOVB DL, 1(CX)
  5212. ADDQ $0x02, CX
  5213. CMPL DX, $0x40
  5214. JB memmove_emit_remainder_encodeBlockAsm8B
  5215. JMP memmove_long_emit_remainder_encodeBlockAsm8B
  5216. one_byte_emit_remainder_encodeBlockAsm8B:
  5217. SHLB $0x02, DL
  5218. MOVB DL, (CX)
  5219. ADDQ $0x01, CX
  5220. memmove_emit_remainder_encodeBlockAsm8B:
  5221. LEAQ (CX)(SI*1), DX
  5222. MOVL SI, BX
  5223. // genMemMoveShort
  5224. CMPQ BX, $0x03
  5225. JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
  5226. JE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
  5227. CMPQ BX, $0x08
  5228. JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
  5229. CMPQ BX, $0x10
  5230. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
  5231. CMPQ BX, $0x20
  5232. JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
  5233. JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
  5234. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
  5235. MOVB (AX), SI
  5236. MOVB -1(AX)(BX*1), AL
  5237. MOVB SI, (CX)
  5238. MOVB AL, -1(CX)(BX*1)
  5239. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5240. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
  5241. MOVW (AX), SI
  5242. MOVB 2(AX), AL
  5243. MOVW SI, (CX)
  5244. MOVB AL, 2(CX)
  5245. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5246. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
  5247. MOVL (AX), SI
  5248. MOVL -4(AX)(BX*1), AX
  5249. MOVL SI, (CX)
  5250. MOVL AX, -4(CX)(BX*1)
  5251. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5252. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
  5253. MOVQ (AX), SI
  5254. MOVQ -8(AX)(BX*1), AX
  5255. MOVQ SI, (CX)
  5256. MOVQ AX, -8(CX)(BX*1)
  5257. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5258. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
  5259. MOVOU (AX), X0
  5260. MOVOU -16(AX)(BX*1), X1
  5261. MOVOU X0, (CX)
  5262. MOVOU X1, -16(CX)(BX*1)
  5263. JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
  5264. emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
  5265. MOVOU (AX), X0
  5266. MOVOU 16(AX), X1
  5267. MOVOU -32(AX)(BX*1), X2
  5268. MOVOU -16(AX)(BX*1), X3
  5269. MOVOU X0, (CX)
  5270. MOVOU X1, 16(CX)
  5271. MOVOU X2, -32(CX)(BX*1)
  5272. MOVOU X3, -16(CX)(BX*1)
  5273. memmove_end_copy_emit_remainder_encodeBlockAsm8B:
  5274. MOVQ DX, CX
  5275. JMP emit_literal_done_emit_remainder_encodeBlockAsm8B
  5276. memmove_long_emit_remainder_encodeBlockAsm8B:
  5277. LEAQ (CX)(SI*1), DX
  5278. MOVL SI, BX
  5279. // genMemMoveLong
  5280. MOVOU (AX), X0
  5281. MOVOU 16(AX), X1
  5282. MOVOU -32(AX)(BX*1), X2
  5283. MOVOU -16(AX)(BX*1), X3
  5284. MOVQ BX, DI
  5285. SHRQ $0x05, DI
  5286. MOVQ CX, SI
  5287. ANDL $0x0000001f, SI
  5288. MOVQ $0x00000040, R8
  5289. SUBQ SI, R8
  5290. DECQ DI
  5291. JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
  5292. LEAQ -32(AX)(R8*1), SI
  5293. LEAQ -32(CX)(R8*1), R9
  5294. emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
  5295. MOVOU (SI), X4
  5296. MOVOU 16(SI), X5
  5297. MOVOA X4, (R9)
  5298. MOVOA X5, 16(R9)
  5299. ADDQ $0x20, R9
  5300. ADDQ $0x20, SI
  5301. ADDQ $0x20, R8
  5302. DECQ DI
  5303. JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
  5304. emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
  5305. MOVOU -32(AX)(R8*1), X4
  5306. MOVOU -16(AX)(R8*1), X5
  5307. MOVOA X4, -32(CX)(R8*1)
  5308. MOVOA X5, -16(CX)(R8*1)
  5309. ADDQ $0x20, R8
  5310. CMPQ BX, R8
  5311. JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
  5312. MOVOU X0, (CX)
  5313. MOVOU X1, 16(CX)
  5314. MOVOU X2, -32(CX)(BX*1)
  5315. MOVOU X3, -16(CX)(BX*1)
  5316. MOVQ DX, CX
  5317. emit_literal_done_emit_remainder_encodeBlockAsm8B:
  5318. MOVQ dst_base+0(FP), AX
  5319. SUBQ AX, CX
  5320. MOVQ CX, ret+56(FP)
  5321. RET
  5322. // func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
  5323. // Requires: BMI, SSE2
  5324. TEXT ·encodeBetterBlockAsm(SB), $24-64
  5325. MOVQ tmp+48(FP), AX
  5326. MOVQ dst_base+0(FP), CX
  5327. MOVQ $0x00001200, DX
  5328. MOVQ AX, BX
  5329. PXOR X0, X0
  5330. zero_loop_encodeBetterBlockAsm:
  5331. MOVOU X0, (BX)
  5332. MOVOU X0, 16(BX)
  5333. MOVOU X0, 32(BX)
  5334. MOVOU X0, 48(BX)
  5335. MOVOU X0, 64(BX)
  5336. MOVOU X0, 80(BX)
  5337. MOVOU X0, 96(BX)
  5338. MOVOU X0, 112(BX)
  5339. ADDQ $0x80, BX
  5340. DECQ DX
  5341. JNZ zero_loop_encodeBetterBlockAsm
  5342. MOVL $0x00000000, 12(SP)
  5343. MOVQ src_len+32(FP), DX
  5344. LEAQ -6(DX), BX
  5345. LEAQ -8(DX), SI
  5346. MOVL SI, 8(SP)
  5347. SHRQ $0x05, DX
  5348. SUBL DX, BX
  5349. LEAQ (CX)(BX*1), BX
  5350. MOVQ BX, (SP)
  5351. MOVL $0x00000001, DX
  5352. MOVL $0x00000000, 16(SP)
  5353. MOVQ src_base+24(FP), BX
  5354. search_loop_encodeBetterBlockAsm:
  5355. MOVL DX, SI
  5356. SUBL 12(SP), SI
  5357. SHRL $0x07, SI
  5358. CMPL SI, $0x63
  5359. JBE check_maxskip_ok_encodeBetterBlockAsm
  5360. LEAL 100(DX), SI
  5361. JMP check_maxskip_cont_encodeBetterBlockAsm
  5362. check_maxskip_ok_encodeBetterBlockAsm:
  5363. LEAL 1(DX)(SI*1), SI
  5364. check_maxskip_cont_encodeBetterBlockAsm:
  5365. CMPL SI, 8(SP)
  5366. JAE emit_remainder_encodeBetterBlockAsm
  5367. MOVQ (BX)(DX*1), DI
  5368. MOVL SI, 20(SP)
  5369. MOVQ $0x00cf1bbcdcbfa563, R9
  5370. MOVQ $0x9e3779b1, SI
  5371. MOVQ DI, R10
  5372. MOVQ DI, R11
  5373. SHLQ $0x08, R10
  5374. IMULQ R9, R10
  5375. SHRQ $0x2f, R10
  5376. SHLQ $0x20, R11
  5377. IMULQ SI, R11
  5378. SHRQ $0x32, R11
  5379. MOVL (AX)(R10*4), SI
  5380. MOVL 524288(AX)(R11*4), R8
  5381. MOVL DX, (AX)(R10*4)
  5382. MOVL DX, 524288(AX)(R11*4)
  5383. MOVQ (BX)(SI*1), R10
  5384. MOVQ (BX)(R8*1), R11
  5385. CMPQ R10, DI
  5386. JEQ candidate_match_encodeBetterBlockAsm
  5387. CMPQ R11, DI
  5388. JNE no_short_found_encodeBetterBlockAsm
  5389. MOVL R8, SI
  5390. JMP candidate_match_encodeBetterBlockAsm
  5391. no_short_found_encodeBetterBlockAsm:
  5392. CMPL R10, DI
  5393. JEQ candidate_match_encodeBetterBlockAsm
  5394. CMPL R11, DI
  5395. JEQ candidateS_match_encodeBetterBlockAsm
  5396. MOVL 20(SP), DX
  5397. JMP search_loop_encodeBetterBlockAsm
  5398. candidateS_match_encodeBetterBlockAsm:
  5399. SHRQ $0x08, DI
  5400. MOVQ DI, R10
  5401. SHLQ $0x08, R10
  5402. IMULQ R9, R10
  5403. SHRQ $0x2f, R10
  5404. MOVL (AX)(R10*4), SI
  5405. INCL DX
  5406. MOVL DX, (AX)(R10*4)
  5407. CMPL (BX)(SI*1), DI
  5408. JEQ candidate_match_encodeBetterBlockAsm
  5409. DECL DX
  5410. MOVL R8, SI
  5411. candidate_match_encodeBetterBlockAsm:
  5412. MOVL 12(SP), DI
  5413. TESTL SI, SI
  5414. JZ match_extend_back_end_encodeBetterBlockAsm
  5415. match_extend_back_loop_encodeBetterBlockAsm:
  5416. CMPL DX, DI
  5417. JBE match_extend_back_end_encodeBetterBlockAsm
  5418. MOVB -1(BX)(SI*1), R8
  5419. MOVB -1(BX)(DX*1), R9
  5420. CMPB R8, R9
  5421. JNE match_extend_back_end_encodeBetterBlockAsm
  5422. LEAL -1(DX), DX
  5423. DECL SI
  5424. JZ match_extend_back_end_encodeBetterBlockAsm
  5425. JMP match_extend_back_loop_encodeBetterBlockAsm
  5426. match_extend_back_end_encodeBetterBlockAsm:
  5427. MOVL DX, DI
  5428. SUBL 12(SP), DI
  5429. LEAQ 5(CX)(DI*1), DI
  5430. CMPQ DI, (SP)
  5431. JB match_dst_size_check_encodeBetterBlockAsm
  5432. MOVQ $0x00000000, ret+56(FP)
  5433. RET
  5434. match_dst_size_check_encodeBetterBlockAsm:
  5435. MOVL DX, DI
  5436. ADDL $0x04, DX
  5437. ADDL $0x04, SI
  5438. MOVQ src_len+32(FP), R8
  5439. SUBL DX, R8
  5440. LEAQ (BX)(DX*1), R9
  5441. LEAQ (BX)(SI*1), R10
  5442. // matchLen
  5443. XORL R12, R12
  5444. matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
  5445. CMPL R8, $0x10
  5446. JB matchlen_match8_match_nolit_encodeBetterBlockAsm
  5447. MOVQ (R9)(R12*1), R11
  5448. MOVQ 8(R9)(R12*1), R13
  5449. XORQ (R10)(R12*1), R11
  5450. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
  5451. XORQ 8(R10)(R12*1), R13
  5452. JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm
  5453. LEAL -16(R8), R8
  5454. LEAL 16(R12), R12
  5455. JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
  5456. matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
  5457. #ifdef GOAMD64_v3
  5458. TZCNTQ R13, R13
  5459. #else
  5460. BSFQ R13, R13
  5461. #endif
  5462. SARQ $0x03, R13
  5463. LEAL 8(R12)(R13*1), R12
  5464. JMP match_nolit_end_encodeBetterBlockAsm
  5465. matchlen_match8_match_nolit_encodeBetterBlockAsm:
  5466. CMPL R8, $0x08
  5467. JB matchlen_match4_match_nolit_encodeBetterBlockAsm
  5468. MOVQ (R9)(R12*1), R11
  5469. XORQ (R10)(R12*1), R11
  5470. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
  5471. LEAL -8(R8), R8
  5472. LEAL 8(R12), R12
  5473. JMP matchlen_match4_match_nolit_encodeBetterBlockAsm
  5474. matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
  5475. #ifdef GOAMD64_v3
  5476. TZCNTQ R11, R11
  5477. #else
  5478. BSFQ R11, R11
  5479. #endif
  5480. SARQ $0x03, R11
  5481. LEAL (R12)(R11*1), R12
  5482. JMP match_nolit_end_encodeBetterBlockAsm
  5483. matchlen_match4_match_nolit_encodeBetterBlockAsm:
  5484. CMPL R8, $0x04
  5485. JB matchlen_match2_match_nolit_encodeBetterBlockAsm
  5486. MOVL (R9)(R12*1), R11
  5487. CMPL (R10)(R12*1), R11
  5488. JNE matchlen_match2_match_nolit_encodeBetterBlockAsm
  5489. LEAL -4(R8), R8
  5490. LEAL 4(R12), R12
  5491. matchlen_match2_match_nolit_encodeBetterBlockAsm:
  5492. CMPL R8, $0x01
  5493. JE matchlen_match1_match_nolit_encodeBetterBlockAsm
  5494. JB match_nolit_end_encodeBetterBlockAsm
  5495. MOVW (R9)(R12*1), R11
  5496. CMPW (R10)(R12*1), R11
  5497. JNE matchlen_match1_match_nolit_encodeBetterBlockAsm
  5498. LEAL 2(R12), R12
  5499. SUBL $0x02, R8
  5500. JZ match_nolit_end_encodeBetterBlockAsm
  5501. matchlen_match1_match_nolit_encodeBetterBlockAsm:
  5502. MOVB (R9)(R12*1), R11
  5503. CMPB (R10)(R12*1), R11
  5504. JNE match_nolit_end_encodeBetterBlockAsm
  5505. LEAL 1(R12), R12
  5506. match_nolit_end_encodeBetterBlockAsm:
  5507. MOVL DX, R8
  5508. SUBL SI, R8
  5509. // Check if repeat
  5510. CMPL 16(SP), R8
  5511. JEQ match_is_repeat_encodeBetterBlockAsm
  5512. CMPL R12, $0x01
  5513. JA match_length_ok_encodeBetterBlockAsm
  5514. CMPL R8, $0x0000ffff
  5515. JBE match_length_ok_encodeBetterBlockAsm
  5516. MOVL 20(SP), DX
  5517. INCL DX
  5518. JMP search_loop_encodeBetterBlockAsm
  5519. match_length_ok_encodeBetterBlockAsm:
  5520. MOVL R8, 16(SP)
  5521. MOVL 12(SP), SI
  5522. CMPL SI, DI
  5523. JEQ emit_literal_done_match_emit_encodeBetterBlockAsm
  5524. MOVL DI, R9
  5525. MOVL DI, 12(SP)
  5526. LEAQ (BX)(SI*1), R10
  5527. SUBL SI, R9
  5528. LEAL -1(R9), SI
  5529. CMPL SI, $0x3c
  5530. JB one_byte_match_emit_encodeBetterBlockAsm
  5531. CMPL SI, $0x00000100
  5532. JB two_bytes_match_emit_encodeBetterBlockAsm
  5533. CMPL SI, $0x00010000
  5534. JB three_bytes_match_emit_encodeBetterBlockAsm
  5535. CMPL SI, $0x01000000
  5536. JB four_bytes_match_emit_encodeBetterBlockAsm
  5537. MOVB $0xfc, (CX)
  5538. MOVL SI, 1(CX)
  5539. ADDQ $0x05, CX
  5540. JMP memmove_long_match_emit_encodeBetterBlockAsm
  5541. four_bytes_match_emit_encodeBetterBlockAsm:
  5542. MOVL SI, R11
  5543. SHRL $0x10, R11
  5544. MOVB $0xf8, (CX)
  5545. MOVW SI, 1(CX)
  5546. MOVB R11, 3(CX)
  5547. ADDQ $0x04, CX
  5548. JMP memmove_long_match_emit_encodeBetterBlockAsm
  5549. three_bytes_match_emit_encodeBetterBlockAsm:
  5550. MOVB $0xf4, (CX)
  5551. MOVW SI, 1(CX)
  5552. ADDQ $0x03, CX
  5553. JMP memmove_long_match_emit_encodeBetterBlockAsm
  5554. two_bytes_match_emit_encodeBetterBlockAsm:
  5555. MOVB $0xf0, (CX)
  5556. MOVB SI, 1(CX)
  5557. ADDQ $0x02, CX
  5558. CMPL SI, $0x40
  5559. JB memmove_match_emit_encodeBetterBlockAsm
  5560. JMP memmove_long_match_emit_encodeBetterBlockAsm
  5561. one_byte_match_emit_encodeBetterBlockAsm:
  5562. SHLB $0x02, SI
  5563. MOVB SI, (CX)
  5564. ADDQ $0x01, CX
  5565. memmove_match_emit_encodeBetterBlockAsm:
  5566. LEAQ (CX)(R9*1), SI
  5567. // genMemMoveShort
  5568. CMPQ R9, $0x04
  5569. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
  5570. CMPQ R9, $0x08
  5571. JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
  5572. CMPQ R9, $0x10
  5573. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
  5574. CMPQ R9, $0x20
  5575. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
  5576. JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
  5577. emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
  5578. MOVL (R10), R11
  5579. MOVL R11, (CX)
  5580. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
  5581. emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
  5582. MOVL (R10), R11
  5583. MOVL -4(R10)(R9*1), R10
  5584. MOVL R11, (CX)
  5585. MOVL R10, -4(CX)(R9*1)
  5586. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
  5587. emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
  5588. MOVQ (R10), R11
  5589. MOVQ -8(R10)(R9*1), R10
  5590. MOVQ R11, (CX)
  5591. MOVQ R10, -8(CX)(R9*1)
  5592. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
  5593. emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
  5594. MOVOU (R10), X0
  5595. MOVOU -16(R10)(R9*1), X1
  5596. MOVOU X0, (CX)
  5597. MOVOU X1, -16(CX)(R9*1)
  5598. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
  5599. emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
  5600. MOVOU (R10), X0
  5601. MOVOU 16(R10), X1
  5602. MOVOU -32(R10)(R9*1), X2
  5603. MOVOU -16(R10)(R9*1), X3
  5604. MOVOU X0, (CX)
  5605. MOVOU X1, 16(CX)
  5606. MOVOU X2, -32(CX)(R9*1)
  5607. MOVOU X3, -16(CX)(R9*1)
  5608. memmove_end_copy_match_emit_encodeBetterBlockAsm:
  5609. MOVQ SI, CX
  5610. JMP emit_literal_done_match_emit_encodeBetterBlockAsm
  5611. memmove_long_match_emit_encodeBetterBlockAsm:
  5612. LEAQ (CX)(R9*1), SI
  5613. // genMemMoveLong
  5614. MOVOU (R10), X0
  5615. MOVOU 16(R10), X1
  5616. MOVOU -32(R10)(R9*1), X2
  5617. MOVOU -16(R10)(R9*1), X3
  5618. MOVQ R9, R13
  5619. SHRQ $0x05, R13
  5620. MOVQ CX, R11
  5621. ANDL $0x0000001f, R11
  5622. MOVQ $0x00000040, R14
  5623. SUBQ R11, R14
  5624. DECQ R13
  5625. JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
  5626. LEAQ -32(R10)(R14*1), R11
  5627. LEAQ -32(CX)(R14*1), R15
  5628. emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
  5629. MOVOU (R11), X4
  5630. MOVOU 16(R11), X5
  5631. MOVOA X4, (R15)
  5632. MOVOA X5, 16(R15)
  5633. ADDQ $0x20, R15
  5634. ADDQ $0x20, R11
  5635. ADDQ $0x20, R14
  5636. DECQ R13
  5637. JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
  5638. emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
  5639. MOVOU -32(R10)(R14*1), X4
  5640. MOVOU -16(R10)(R14*1), X5
  5641. MOVOA X4, -32(CX)(R14*1)
  5642. MOVOA X5, -16(CX)(R14*1)
  5643. ADDQ $0x20, R14
  5644. CMPQ R9, R14
  5645. JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
  5646. MOVOU X0, (CX)
  5647. MOVOU X1, 16(CX)
  5648. MOVOU X2, -32(CX)(R9*1)
  5649. MOVOU X3, -16(CX)(R9*1)
  5650. MOVQ SI, CX
  5651. emit_literal_done_match_emit_encodeBetterBlockAsm:
  5652. ADDL R12, DX
  5653. ADDL $0x04, R12
  5654. MOVL DX, 12(SP)
  5655. // emitCopy
  5656. CMPL R8, $0x00010000
  5657. JB two_byte_offset_match_nolit_encodeBetterBlockAsm
  5658. CMPL R12, $0x40
  5659. JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm
  5660. MOVB $0xff, (CX)
  5661. MOVL R8, 1(CX)
  5662. LEAL -64(R12), R12
  5663. ADDQ $0x05, CX
  5664. CMPL R12, $0x04
  5665. JB four_bytes_remain_match_nolit_encodeBetterBlockAsm
  5666. // emitRepeat
  5667. emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
  5668. MOVL R12, SI
  5669. LEAL -4(R12), R12
  5670. CMPL SI, $0x08
  5671. JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
  5672. CMPL SI, $0x0c
  5673. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
  5674. CMPL R8, $0x00000800
  5675. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
  5676. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
  5677. CMPL R12, $0x00000104
  5678. JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
  5679. CMPL R12, $0x00010100
  5680. JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
  5681. CMPL R12, $0x0100ffff
  5682. JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
  5683. LEAL -16842747(R12), R12
  5684. MOVL $0xfffb001d, (CX)
  5685. MOVB $0xff, 4(CX)
  5686. ADDQ $0x05, CX
  5687. JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
  5688. repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
  5689. LEAL -65536(R12), R12
  5690. MOVL R12, R8
  5691. MOVW $0x001d, (CX)
  5692. MOVW R12, 2(CX)
  5693. SARL $0x10, R8
  5694. MOVB R8, 4(CX)
  5695. ADDQ $0x05, CX
  5696. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5697. repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
  5698. LEAL -256(R12), R12
  5699. MOVW $0x0019, (CX)
  5700. MOVW R12, 2(CX)
  5701. ADDQ $0x04, CX
  5702. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5703. repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
  5704. LEAL -4(R12), R12
  5705. MOVW $0x0015, (CX)
  5706. MOVB R12, 2(CX)
  5707. ADDQ $0x03, CX
  5708. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5709. repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
  5710. SHLL $0x02, R12
  5711. ORL $0x01, R12
  5712. MOVW R12, (CX)
  5713. ADDQ $0x02, CX
  5714. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5715. repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
  5716. XORQ SI, SI
  5717. LEAL 1(SI)(R12*4), R12
  5718. MOVB R8, 1(CX)
  5719. SARL $0x08, R8
  5720. SHLL $0x05, R8
  5721. ORL R8, R12
  5722. MOVB R12, (CX)
  5723. ADDQ $0x02, CX
  5724. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5725. four_bytes_remain_match_nolit_encodeBetterBlockAsm:
  5726. TESTL R12, R12
  5727. JZ match_nolit_emitcopy_end_encodeBetterBlockAsm
  5728. XORL SI, SI
  5729. LEAL -1(SI)(R12*4), R12
  5730. MOVB R12, (CX)
  5731. MOVL R8, 1(CX)
  5732. ADDQ $0x05, CX
  5733. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5734. two_byte_offset_match_nolit_encodeBetterBlockAsm:
  5735. CMPL R12, $0x40
  5736. JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm
  5737. CMPL R8, $0x00000800
  5738. JAE long_offset_short_match_nolit_encodeBetterBlockAsm
  5739. MOVL $0x00000001, SI
  5740. LEAL 16(SI), SI
  5741. MOVB R8, 1(CX)
  5742. MOVL R8, R9
  5743. SHRL $0x08, R9
  5744. SHLL $0x05, R9
  5745. ORL R9, SI
  5746. MOVB SI, (CX)
  5747. ADDQ $0x02, CX
  5748. SUBL $0x08, R12
  5749. // emitRepeat
  5750. LEAL -4(R12), R12
  5751. JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5752. emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5753. MOVL R12, SI
  5754. LEAL -4(R12), R12
  5755. CMPL SI, $0x08
  5756. JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5757. CMPL SI, $0x0c
  5758. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5759. CMPL R8, $0x00000800
  5760. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5761. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5762. CMPL R12, $0x00000104
  5763. JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5764. CMPL R12, $0x00010100
  5765. JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5766. CMPL R12, $0x0100ffff
  5767. JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5768. LEAL -16842747(R12), R12
  5769. MOVL $0xfffb001d, (CX)
  5770. MOVB $0xff, 4(CX)
  5771. ADDQ $0x05, CX
  5772. JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
  5773. repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5774. LEAL -65536(R12), R12
  5775. MOVL R12, R8
  5776. MOVW $0x001d, (CX)
  5777. MOVW R12, 2(CX)
  5778. SARL $0x10, R8
  5779. MOVB R8, 4(CX)
  5780. ADDQ $0x05, CX
  5781. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5782. repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5783. LEAL -256(R12), R12
  5784. MOVW $0x0019, (CX)
  5785. MOVW R12, 2(CX)
  5786. ADDQ $0x04, CX
  5787. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5788. repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5789. LEAL -4(R12), R12
  5790. MOVW $0x0015, (CX)
  5791. MOVB R12, 2(CX)
  5792. ADDQ $0x03, CX
  5793. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5794. repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5795. SHLL $0x02, R12
  5796. ORL $0x01, R12
  5797. MOVW R12, (CX)
  5798. ADDQ $0x02, CX
  5799. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5800. repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
  5801. XORQ SI, SI
  5802. LEAL 1(SI)(R12*4), R12
  5803. MOVB R8, 1(CX)
  5804. SARL $0x08, R8
  5805. SHLL $0x05, R8
  5806. ORL R8, R12
  5807. MOVB R12, (CX)
  5808. ADDQ $0x02, CX
  5809. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5810. long_offset_short_match_nolit_encodeBetterBlockAsm:
  5811. MOVB $0xee, (CX)
  5812. MOVW R8, 1(CX)
  5813. LEAL -60(R12), R12
  5814. ADDQ $0x03, CX
  5815. // emitRepeat
  5816. emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5817. MOVL R12, SI
  5818. LEAL -4(R12), R12
  5819. CMPL SI, $0x08
  5820. JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5821. CMPL SI, $0x0c
  5822. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5823. CMPL R8, $0x00000800
  5824. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5825. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5826. CMPL R12, $0x00000104
  5827. JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5828. CMPL R12, $0x00010100
  5829. JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5830. CMPL R12, $0x0100ffff
  5831. JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5832. LEAL -16842747(R12), R12
  5833. MOVL $0xfffb001d, (CX)
  5834. MOVB $0xff, 4(CX)
  5835. ADDQ $0x05, CX
  5836. JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
  5837. repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5838. LEAL -65536(R12), R12
  5839. MOVL R12, R8
  5840. MOVW $0x001d, (CX)
  5841. MOVW R12, 2(CX)
  5842. SARL $0x10, R8
  5843. MOVB R8, 4(CX)
  5844. ADDQ $0x05, CX
  5845. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5846. repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5847. LEAL -256(R12), R12
  5848. MOVW $0x0019, (CX)
  5849. MOVW R12, 2(CX)
  5850. ADDQ $0x04, CX
  5851. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5852. repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5853. LEAL -4(R12), R12
  5854. MOVW $0x0015, (CX)
  5855. MOVB R12, 2(CX)
  5856. ADDQ $0x03, CX
  5857. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5858. repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5859. SHLL $0x02, R12
  5860. ORL $0x01, R12
  5861. MOVW R12, (CX)
  5862. ADDQ $0x02, CX
  5863. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5864. repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
  5865. XORQ SI, SI
  5866. LEAL 1(SI)(R12*4), R12
  5867. MOVB R8, 1(CX)
  5868. SARL $0x08, R8
  5869. SHLL $0x05, R8
  5870. ORL R8, R12
  5871. MOVB R12, (CX)
  5872. ADDQ $0x02, CX
  5873. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5874. two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
  5875. MOVL R12, SI
  5876. SHLL $0x02, SI
  5877. CMPL R12, $0x0c
  5878. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm
  5879. CMPL R8, $0x00000800
  5880. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm
  5881. LEAL -15(SI), SI
  5882. MOVB R8, 1(CX)
  5883. SHRL $0x08, R8
  5884. SHLL $0x05, R8
  5885. ORL R8, SI
  5886. MOVB SI, (CX)
  5887. ADDQ $0x02, CX
  5888. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5889. emit_copy_three_match_nolit_encodeBetterBlockAsm:
  5890. LEAL -2(SI), SI
  5891. MOVB SI, (CX)
  5892. MOVW R8, 1(CX)
  5893. ADDQ $0x03, CX
  5894. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  5895. match_is_repeat_encodeBetterBlockAsm:
  5896. MOVL 12(SP), SI
  5897. CMPL SI, DI
  5898. JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
  5899. MOVL DI, R9
  5900. MOVL DI, 12(SP)
  5901. LEAQ (BX)(SI*1), R10
  5902. SUBL SI, R9
  5903. LEAL -1(R9), SI
  5904. CMPL SI, $0x3c
  5905. JB one_byte_match_emit_repeat_encodeBetterBlockAsm
  5906. CMPL SI, $0x00000100
  5907. JB two_bytes_match_emit_repeat_encodeBetterBlockAsm
  5908. CMPL SI, $0x00010000
  5909. JB three_bytes_match_emit_repeat_encodeBetterBlockAsm
  5910. CMPL SI, $0x01000000
  5911. JB four_bytes_match_emit_repeat_encodeBetterBlockAsm
  5912. MOVB $0xfc, (CX)
  5913. MOVL SI, 1(CX)
  5914. ADDQ $0x05, CX
  5915. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
  5916. four_bytes_match_emit_repeat_encodeBetterBlockAsm:
  5917. MOVL SI, R11
  5918. SHRL $0x10, R11
  5919. MOVB $0xf8, (CX)
  5920. MOVW SI, 1(CX)
  5921. MOVB R11, 3(CX)
  5922. ADDQ $0x04, CX
  5923. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
  5924. three_bytes_match_emit_repeat_encodeBetterBlockAsm:
  5925. MOVB $0xf4, (CX)
  5926. MOVW SI, 1(CX)
  5927. ADDQ $0x03, CX
  5928. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
  5929. two_bytes_match_emit_repeat_encodeBetterBlockAsm:
  5930. MOVB $0xf0, (CX)
  5931. MOVB SI, 1(CX)
  5932. ADDQ $0x02, CX
  5933. CMPL SI, $0x40
  5934. JB memmove_match_emit_repeat_encodeBetterBlockAsm
  5935. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
  5936. one_byte_match_emit_repeat_encodeBetterBlockAsm:
  5937. SHLB $0x02, SI
  5938. MOVB SI, (CX)
  5939. ADDQ $0x01, CX
  5940. memmove_match_emit_repeat_encodeBetterBlockAsm:
  5941. LEAQ (CX)(R9*1), SI
  5942. // genMemMoveShort
  5943. CMPQ R9, $0x04
  5944. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
  5945. CMPQ R9, $0x08
  5946. JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
  5947. CMPQ R9, $0x10
  5948. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
  5949. CMPQ R9, $0x20
  5950. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
  5951. JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
  5952. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
  5953. MOVL (R10), R11
  5954. MOVL R11, (CX)
  5955. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  5956. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
  5957. MOVL (R10), R11
  5958. MOVL -4(R10)(R9*1), R10
  5959. MOVL R11, (CX)
  5960. MOVL R10, -4(CX)(R9*1)
  5961. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  5962. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
  5963. MOVQ (R10), R11
  5964. MOVQ -8(R10)(R9*1), R10
  5965. MOVQ R11, (CX)
  5966. MOVQ R10, -8(CX)(R9*1)
  5967. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  5968. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
  5969. MOVOU (R10), X0
  5970. MOVOU -16(R10)(R9*1), X1
  5971. MOVOU X0, (CX)
  5972. MOVOU X1, -16(CX)(R9*1)
  5973. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
  5974. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
  5975. MOVOU (R10), X0
  5976. MOVOU 16(R10), X1
  5977. MOVOU -32(R10)(R9*1), X2
  5978. MOVOU -16(R10)(R9*1), X3
  5979. MOVOU X0, (CX)
  5980. MOVOU X1, 16(CX)
  5981. MOVOU X2, -32(CX)(R9*1)
  5982. MOVOU X3, -16(CX)(R9*1)
  5983. memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
  5984. MOVQ SI, CX
  5985. JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
  5986. memmove_long_match_emit_repeat_encodeBetterBlockAsm:
  5987. LEAQ (CX)(R9*1), SI
  5988. // genMemMoveLong
  5989. MOVOU (R10), X0
  5990. MOVOU 16(R10), X1
  5991. MOVOU -32(R10)(R9*1), X2
  5992. MOVOU -16(R10)(R9*1), X3
  5993. MOVQ R9, R13
  5994. SHRQ $0x05, R13
  5995. MOVQ CX, R11
  5996. ANDL $0x0000001f, R11
  5997. MOVQ $0x00000040, R14
  5998. SUBQ R11, R14
  5999. DECQ R13
  6000. JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6001. LEAQ -32(R10)(R14*1), R11
  6002. LEAQ -32(CX)(R14*1), R15
  6003. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
  6004. MOVOU (R11), X4
  6005. MOVOU 16(R11), X5
  6006. MOVOA X4, (R15)
  6007. MOVOA X5, 16(R15)
  6008. ADDQ $0x20, R15
  6009. ADDQ $0x20, R11
  6010. ADDQ $0x20, R14
  6011. DECQ R13
  6012. JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
  6013. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
  6014. MOVOU -32(R10)(R14*1), X4
  6015. MOVOU -16(R10)(R14*1), X5
  6016. MOVOA X4, -32(CX)(R14*1)
  6017. MOVOA X5, -16(CX)(R14*1)
  6018. ADDQ $0x20, R14
  6019. CMPQ R9, R14
  6020. JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6021. MOVOU X0, (CX)
  6022. MOVOU X1, 16(CX)
  6023. MOVOU X2, -32(CX)(R9*1)
  6024. MOVOU X3, -16(CX)(R9*1)
  6025. MOVQ SI, CX
  6026. emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
  6027. ADDL R12, DX
  6028. ADDL $0x04, R12
  6029. MOVL DX, 12(SP)
  6030. // emitRepeat
  6031. emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
  6032. MOVL R12, SI
  6033. LEAL -4(R12), R12
  6034. CMPL SI, $0x08
  6035. JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm
  6036. CMPL SI, $0x0c
  6037. JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
  6038. CMPL R8, $0x00000800
  6039. JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
  6040. cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
  6041. CMPL R12, $0x00000104
  6042. JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm
  6043. CMPL R12, $0x00010100
  6044. JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm
  6045. CMPL R12, $0x0100ffff
  6046. JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm
  6047. LEAL -16842747(R12), R12
  6048. MOVL $0xfffb001d, (CX)
  6049. MOVB $0xff, 4(CX)
  6050. ADDQ $0x05, CX
  6051. JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
  6052. repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
  6053. LEAL -65536(R12), R12
  6054. MOVL R12, R8
  6055. MOVW $0x001d, (CX)
  6056. MOVW R12, 2(CX)
  6057. SARL $0x10, R8
  6058. MOVB R8, 4(CX)
  6059. ADDQ $0x05, CX
  6060. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  6061. repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
  6062. LEAL -256(R12), R12
  6063. MOVW $0x0019, (CX)
  6064. MOVW R12, 2(CX)
  6065. ADDQ $0x04, CX
  6066. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  6067. repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
  6068. LEAL -4(R12), R12
  6069. MOVW $0x0015, (CX)
  6070. MOVB R12, 2(CX)
  6071. ADDQ $0x03, CX
  6072. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  6073. repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
  6074. SHLL $0x02, R12
  6075. ORL $0x01, R12
  6076. MOVW R12, (CX)
  6077. ADDQ $0x02, CX
  6078. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
  6079. repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
  6080. XORQ SI, SI
  6081. LEAL 1(SI)(R12*4), R12
  6082. MOVB R8, 1(CX)
  6083. SARL $0x08, R8
  6084. SHLL $0x05, R8
  6085. ORL R8, R12
  6086. MOVB R12, (CX)
  6087. ADDQ $0x02, CX
  6088. match_nolit_emitcopy_end_encodeBetterBlockAsm:
  6089. CMPL DX, 8(SP)
  6090. JAE emit_remainder_encodeBetterBlockAsm
  6091. CMPQ CX, (SP)
  6092. JB match_nolit_dst_ok_encodeBetterBlockAsm
  6093. MOVQ $0x00000000, ret+56(FP)
  6094. RET
  6095. match_nolit_dst_ok_encodeBetterBlockAsm:
  6096. MOVQ $0x00cf1bbcdcbfa563, SI
  6097. MOVQ $0x9e3779b1, R8
  6098. LEAQ 1(DI), DI
  6099. LEAQ -2(DX), R9
  6100. MOVQ (BX)(DI*1), R10
  6101. MOVQ 1(BX)(DI*1), R11
  6102. MOVQ (BX)(R9*1), R12
  6103. MOVQ 1(BX)(R9*1), R13
  6104. SHLQ $0x08, R10
  6105. IMULQ SI, R10
  6106. SHRQ $0x2f, R10
  6107. SHLQ $0x20, R11
  6108. IMULQ R8, R11
  6109. SHRQ $0x32, R11
  6110. SHLQ $0x08, R12
  6111. IMULQ SI, R12
  6112. SHRQ $0x2f, R12
  6113. SHLQ $0x20, R13
  6114. IMULQ R8, R13
  6115. SHRQ $0x32, R13
  6116. LEAQ 1(DI), R8
  6117. LEAQ 1(R9), R14
  6118. MOVL DI, (AX)(R10*4)
  6119. MOVL R9, (AX)(R12*4)
  6120. MOVL R8, 524288(AX)(R11*4)
  6121. MOVL R14, 524288(AX)(R13*4)
  6122. LEAQ 1(R9)(DI*1), R8
  6123. SHRQ $0x01, R8
  6124. ADDQ $0x01, DI
  6125. SUBQ $0x01, R9
  6126. index_loop_encodeBetterBlockAsm:
  6127. CMPQ R8, R9
  6128. JAE search_loop_encodeBetterBlockAsm
  6129. MOVQ (BX)(DI*1), R10
  6130. MOVQ (BX)(R8*1), R11
  6131. SHLQ $0x08, R10
  6132. IMULQ SI, R10
  6133. SHRQ $0x2f, R10
  6134. SHLQ $0x08, R11
  6135. IMULQ SI, R11
  6136. SHRQ $0x2f, R11
  6137. MOVL DI, (AX)(R10*4)
  6138. MOVL R8, (AX)(R11*4)
  6139. ADDQ $0x02, DI
  6140. ADDQ $0x02, R8
  6141. JMP index_loop_encodeBetterBlockAsm
  6142. emit_remainder_encodeBetterBlockAsm:
  6143. MOVQ src_len+32(FP), AX
  6144. SUBL 12(SP), AX
  6145. LEAQ 5(CX)(AX*1), AX
  6146. CMPQ AX, (SP)
  6147. JB emit_remainder_ok_encodeBetterBlockAsm
  6148. MOVQ $0x00000000, ret+56(FP)
  6149. RET
  6150. emit_remainder_ok_encodeBetterBlockAsm:
  6151. MOVQ src_len+32(FP), AX
  6152. MOVL 12(SP), DX
  6153. CMPL DX, AX
  6154. JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm
  6155. MOVL AX, SI
  6156. MOVL AX, 12(SP)
  6157. LEAQ (BX)(DX*1), AX
  6158. SUBL DX, SI
  6159. LEAL -1(SI), DX
  6160. CMPL DX, $0x3c
  6161. JB one_byte_emit_remainder_encodeBetterBlockAsm
  6162. CMPL DX, $0x00000100
  6163. JB two_bytes_emit_remainder_encodeBetterBlockAsm
  6164. CMPL DX, $0x00010000
  6165. JB three_bytes_emit_remainder_encodeBetterBlockAsm
  6166. CMPL DX, $0x01000000
  6167. JB four_bytes_emit_remainder_encodeBetterBlockAsm
  6168. MOVB $0xfc, (CX)
  6169. MOVL DX, 1(CX)
  6170. ADDQ $0x05, CX
  6171. JMP memmove_long_emit_remainder_encodeBetterBlockAsm
  6172. four_bytes_emit_remainder_encodeBetterBlockAsm:
  6173. MOVL DX, BX
  6174. SHRL $0x10, BX
  6175. MOVB $0xf8, (CX)
  6176. MOVW DX, 1(CX)
  6177. MOVB BL, 3(CX)
  6178. ADDQ $0x04, CX
  6179. JMP memmove_long_emit_remainder_encodeBetterBlockAsm
  6180. three_bytes_emit_remainder_encodeBetterBlockAsm:
  6181. MOVB $0xf4, (CX)
  6182. MOVW DX, 1(CX)
  6183. ADDQ $0x03, CX
  6184. JMP memmove_long_emit_remainder_encodeBetterBlockAsm
  6185. two_bytes_emit_remainder_encodeBetterBlockAsm:
  6186. MOVB $0xf0, (CX)
  6187. MOVB DL, 1(CX)
  6188. ADDQ $0x02, CX
  6189. CMPL DX, $0x40
  6190. JB memmove_emit_remainder_encodeBetterBlockAsm
  6191. JMP memmove_long_emit_remainder_encodeBetterBlockAsm
  6192. one_byte_emit_remainder_encodeBetterBlockAsm:
  6193. SHLB $0x02, DL
  6194. MOVB DL, (CX)
  6195. ADDQ $0x01, CX
  6196. memmove_emit_remainder_encodeBetterBlockAsm:
  6197. LEAQ (CX)(SI*1), DX
  6198. MOVL SI, BX
  6199. // genMemMoveShort
  6200. CMPQ BX, $0x03
  6201. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
  6202. JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
  6203. CMPQ BX, $0x08
  6204. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
  6205. CMPQ BX, $0x10
  6206. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
  6207. CMPQ BX, $0x20
  6208. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
  6209. JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
  6210. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
  6211. MOVB (AX), SI
  6212. MOVB -1(AX)(BX*1), AL
  6213. MOVB SI, (CX)
  6214. MOVB AL, -1(CX)(BX*1)
  6215. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6216. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
  6217. MOVW (AX), SI
  6218. MOVB 2(AX), AL
  6219. MOVW SI, (CX)
  6220. MOVB AL, 2(CX)
  6221. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6222. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
  6223. MOVL (AX), SI
  6224. MOVL -4(AX)(BX*1), AX
  6225. MOVL SI, (CX)
  6226. MOVL AX, -4(CX)(BX*1)
  6227. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6228. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
  6229. MOVQ (AX), SI
  6230. MOVQ -8(AX)(BX*1), AX
  6231. MOVQ SI, (CX)
  6232. MOVQ AX, -8(CX)(BX*1)
  6233. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6234. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
  6235. MOVOU (AX), X0
  6236. MOVOU -16(AX)(BX*1), X1
  6237. MOVOU X0, (CX)
  6238. MOVOU X1, -16(CX)(BX*1)
  6239. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
  6240. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
  6241. MOVOU (AX), X0
  6242. MOVOU 16(AX), X1
  6243. MOVOU -32(AX)(BX*1), X2
  6244. MOVOU -16(AX)(BX*1), X3
  6245. MOVOU X0, (CX)
  6246. MOVOU X1, 16(CX)
  6247. MOVOU X2, -32(CX)(BX*1)
  6248. MOVOU X3, -16(CX)(BX*1)
  6249. memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
  6250. MOVQ DX, CX
  6251. JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm
  6252. memmove_long_emit_remainder_encodeBetterBlockAsm:
  6253. LEAQ (CX)(SI*1), DX
  6254. MOVL SI, BX
  6255. // genMemMoveLong
  6256. MOVOU (AX), X0
  6257. MOVOU 16(AX), X1
  6258. MOVOU -32(AX)(BX*1), X2
  6259. MOVOU -16(AX)(BX*1), X3
  6260. MOVQ BX, DI
  6261. SHRQ $0x05, DI
  6262. MOVQ CX, SI
  6263. ANDL $0x0000001f, SI
  6264. MOVQ $0x00000040, R8
  6265. SUBQ SI, R8
  6266. DECQ DI
  6267. JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6268. LEAQ -32(AX)(R8*1), SI
  6269. LEAQ -32(CX)(R8*1), R9
  6270. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
  6271. MOVOU (SI), X4
  6272. MOVOU 16(SI), X5
  6273. MOVOA X4, (R9)
  6274. MOVOA X5, 16(R9)
  6275. ADDQ $0x20, R9
  6276. ADDQ $0x20, SI
  6277. ADDQ $0x20, R8
  6278. DECQ DI
  6279. JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
  6280. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
  6281. MOVOU -32(AX)(R8*1), X4
  6282. MOVOU -16(AX)(R8*1), X5
  6283. MOVOA X4, -32(CX)(R8*1)
  6284. MOVOA X5, -16(CX)(R8*1)
  6285. ADDQ $0x20, R8
  6286. CMPQ BX, R8
  6287. JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
  6288. MOVOU X0, (CX)
  6289. MOVOU X1, 16(CX)
  6290. MOVOU X2, -32(CX)(BX*1)
  6291. MOVOU X3, -16(CX)(BX*1)
  6292. MOVQ DX, CX
  6293. emit_literal_done_emit_remainder_encodeBetterBlockAsm:
  6294. MOVQ dst_base+0(FP), AX
  6295. SUBQ AX, CX
  6296. MOVQ CX, ret+56(FP)
  6297. RET
  6298. // func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
  6299. // Requires: BMI, SSE2
  6300. TEXT ·encodeBetterBlockAsm4MB(SB), $24-64
  6301. MOVQ tmp+48(FP), AX
  6302. MOVQ dst_base+0(FP), CX
  6303. MOVQ $0x00001200, DX
  6304. MOVQ AX, BX
  6305. PXOR X0, X0
  6306. zero_loop_encodeBetterBlockAsm4MB:
  6307. MOVOU X0, (BX)
  6308. MOVOU X0, 16(BX)
  6309. MOVOU X0, 32(BX)
  6310. MOVOU X0, 48(BX)
  6311. MOVOU X0, 64(BX)
  6312. MOVOU X0, 80(BX)
  6313. MOVOU X0, 96(BX)
  6314. MOVOU X0, 112(BX)
  6315. ADDQ $0x80, BX
  6316. DECQ DX
  6317. JNZ zero_loop_encodeBetterBlockAsm4MB
  6318. MOVL $0x00000000, 12(SP)
  6319. MOVQ src_len+32(FP), DX
  6320. LEAQ -6(DX), BX
  6321. LEAQ -8(DX), SI
  6322. MOVL SI, 8(SP)
  6323. SHRQ $0x05, DX
  6324. SUBL DX, BX
  6325. LEAQ (CX)(BX*1), BX
  6326. MOVQ BX, (SP)
  6327. MOVL $0x00000001, DX
  6328. MOVL $0x00000000, 16(SP)
  6329. MOVQ src_base+24(FP), BX
  6330. search_loop_encodeBetterBlockAsm4MB:
  6331. MOVL DX, SI
  6332. SUBL 12(SP), SI
  6333. SHRL $0x07, SI
  6334. CMPL SI, $0x63
  6335. JBE check_maxskip_ok_encodeBetterBlockAsm4MB
  6336. LEAL 100(DX), SI
  6337. JMP check_maxskip_cont_encodeBetterBlockAsm4MB
  6338. check_maxskip_ok_encodeBetterBlockAsm4MB:
  6339. LEAL 1(DX)(SI*1), SI
  6340. check_maxskip_cont_encodeBetterBlockAsm4MB:
  6341. CMPL SI, 8(SP)
  6342. JAE emit_remainder_encodeBetterBlockAsm4MB
  6343. MOVQ (BX)(DX*1), DI
  6344. MOVL SI, 20(SP)
  6345. MOVQ $0x00cf1bbcdcbfa563, R9
  6346. MOVQ $0x9e3779b1, SI
  6347. MOVQ DI, R10
  6348. MOVQ DI, R11
  6349. SHLQ $0x08, R10
  6350. IMULQ R9, R10
  6351. SHRQ $0x2f, R10
  6352. SHLQ $0x20, R11
  6353. IMULQ SI, R11
  6354. SHRQ $0x32, R11
  6355. MOVL (AX)(R10*4), SI
  6356. MOVL 524288(AX)(R11*4), R8
  6357. MOVL DX, (AX)(R10*4)
  6358. MOVL DX, 524288(AX)(R11*4)
  6359. MOVQ (BX)(SI*1), R10
  6360. MOVQ (BX)(R8*1), R11
  6361. CMPQ R10, DI
  6362. JEQ candidate_match_encodeBetterBlockAsm4MB
  6363. CMPQ R11, DI
  6364. JNE no_short_found_encodeBetterBlockAsm4MB
  6365. MOVL R8, SI
  6366. JMP candidate_match_encodeBetterBlockAsm4MB
  6367. no_short_found_encodeBetterBlockAsm4MB:
  6368. CMPL R10, DI
  6369. JEQ candidate_match_encodeBetterBlockAsm4MB
  6370. CMPL R11, DI
  6371. JEQ candidateS_match_encodeBetterBlockAsm4MB
  6372. MOVL 20(SP), DX
  6373. JMP search_loop_encodeBetterBlockAsm4MB
  6374. candidateS_match_encodeBetterBlockAsm4MB:
  6375. SHRQ $0x08, DI
  6376. MOVQ DI, R10
  6377. SHLQ $0x08, R10
  6378. IMULQ R9, R10
  6379. SHRQ $0x2f, R10
  6380. MOVL (AX)(R10*4), SI
  6381. INCL DX
  6382. MOVL DX, (AX)(R10*4)
  6383. CMPL (BX)(SI*1), DI
  6384. JEQ candidate_match_encodeBetterBlockAsm4MB
  6385. DECL DX
  6386. MOVL R8, SI
  6387. candidate_match_encodeBetterBlockAsm4MB:
  6388. MOVL 12(SP), DI
  6389. TESTL SI, SI
  6390. JZ match_extend_back_end_encodeBetterBlockAsm4MB
  6391. match_extend_back_loop_encodeBetterBlockAsm4MB:
  6392. CMPL DX, DI
  6393. JBE match_extend_back_end_encodeBetterBlockAsm4MB
  6394. MOVB -1(BX)(SI*1), R8
  6395. MOVB -1(BX)(DX*1), R9
  6396. CMPB R8, R9
  6397. JNE match_extend_back_end_encodeBetterBlockAsm4MB
  6398. LEAL -1(DX), DX
  6399. DECL SI
  6400. JZ match_extend_back_end_encodeBetterBlockAsm4MB
  6401. JMP match_extend_back_loop_encodeBetterBlockAsm4MB
  6402. match_extend_back_end_encodeBetterBlockAsm4MB:
  6403. MOVL DX, DI
  6404. SUBL 12(SP), DI
  6405. LEAQ 4(CX)(DI*1), DI
  6406. CMPQ DI, (SP)
  6407. JB match_dst_size_check_encodeBetterBlockAsm4MB
  6408. MOVQ $0x00000000, ret+56(FP)
  6409. RET
  6410. match_dst_size_check_encodeBetterBlockAsm4MB:
  6411. MOVL DX, DI
  6412. ADDL $0x04, DX
  6413. ADDL $0x04, SI
  6414. MOVQ src_len+32(FP), R8
  6415. SUBL DX, R8
  6416. LEAQ (BX)(DX*1), R9
  6417. LEAQ (BX)(SI*1), R10
  6418. // matchLen
  6419. XORL R12, R12
  6420. matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
  6421. CMPL R8, $0x10
  6422. JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
  6423. MOVQ (R9)(R12*1), R11
  6424. MOVQ 8(R9)(R12*1), R13
  6425. XORQ (R10)(R12*1), R11
  6426. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
  6427. XORQ 8(R10)(R12*1), R13
  6428. JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
  6429. LEAL -16(R8), R8
  6430. LEAL 16(R12), R12
  6431. JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
  6432. matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
  6433. #ifdef GOAMD64_v3
  6434. TZCNTQ R13, R13
  6435. #else
  6436. BSFQ R13, R13
  6437. #endif
  6438. SARQ $0x03, R13
  6439. LEAL 8(R12)(R13*1), R12
  6440. JMP match_nolit_end_encodeBetterBlockAsm4MB
  6441. matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
  6442. CMPL R8, $0x08
  6443. JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
  6444. MOVQ (R9)(R12*1), R11
  6445. XORQ (R10)(R12*1), R11
  6446. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
  6447. LEAL -8(R8), R8
  6448. LEAL 8(R12), R12
  6449. JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
  6450. matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
  6451. #ifdef GOAMD64_v3
  6452. TZCNTQ R11, R11
  6453. #else
  6454. BSFQ R11, R11
  6455. #endif
  6456. SARQ $0x03, R11
  6457. LEAL (R12)(R11*1), R12
  6458. JMP match_nolit_end_encodeBetterBlockAsm4MB
  6459. matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
  6460. CMPL R8, $0x04
  6461. JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
  6462. MOVL (R9)(R12*1), R11
  6463. CMPL (R10)(R12*1), R11
  6464. JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
  6465. LEAL -4(R8), R8
  6466. LEAL 4(R12), R12
  6467. matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
  6468. CMPL R8, $0x01
  6469. JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
  6470. JB match_nolit_end_encodeBetterBlockAsm4MB
  6471. MOVW (R9)(R12*1), R11
  6472. CMPW (R10)(R12*1), R11
  6473. JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
  6474. LEAL 2(R12), R12
  6475. SUBL $0x02, R8
  6476. JZ match_nolit_end_encodeBetterBlockAsm4MB
  6477. matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
  6478. MOVB (R9)(R12*1), R11
  6479. CMPB (R10)(R12*1), R11
  6480. JNE match_nolit_end_encodeBetterBlockAsm4MB
  6481. LEAL 1(R12), R12
  6482. match_nolit_end_encodeBetterBlockAsm4MB:
  6483. MOVL DX, R8
  6484. SUBL SI, R8
  6485. // Check if repeat
  6486. CMPL 16(SP), R8
  6487. JEQ match_is_repeat_encodeBetterBlockAsm4MB
  6488. CMPL R12, $0x01
  6489. JA match_length_ok_encodeBetterBlockAsm4MB
  6490. CMPL R8, $0x0000ffff
  6491. JBE match_length_ok_encodeBetterBlockAsm4MB
  6492. MOVL 20(SP), DX
  6493. INCL DX
  6494. JMP search_loop_encodeBetterBlockAsm4MB
  6495. match_length_ok_encodeBetterBlockAsm4MB:
  6496. MOVL R8, 16(SP)
  6497. MOVL 12(SP), SI
  6498. CMPL SI, DI
  6499. JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB
  6500. MOVL DI, R9
  6501. MOVL DI, 12(SP)
  6502. LEAQ (BX)(SI*1), R10
  6503. SUBL SI, R9
  6504. LEAL -1(R9), SI
  6505. CMPL SI, $0x3c
  6506. JB one_byte_match_emit_encodeBetterBlockAsm4MB
  6507. CMPL SI, $0x00000100
  6508. JB two_bytes_match_emit_encodeBetterBlockAsm4MB
  6509. CMPL SI, $0x00010000
  6510. JB three_bytes_match_emit_encodeBetterBlockAsm4MB
  6511. MOVL SI, R11
  6512. SHRL $0x10, R11
  6513. MOVB $0xf8, (CX)
  6514. MOVW SI, 1(CX)
  6515. MOVB R11, 3(CX)
  6516. ADDQ $0x04, CX
  6517. JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
  6518. three_bytes_match_emit_encodeBetterBlockAsm4MB:
  6519. MOVB $0xf4, (CX)
  6520. MOVW SI, 1(CX)
  6521. ADDQ $0x03, CX
  6522. JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
  6523. two_bytes_match_emit_encodeBetterBlockAsm4MB:
  6524. MOVB $0xf0, (CX)
  6525. MOVB SI, 1(CX)
  6526. ADDQ $0x02, CX
  6527. CMPL SI, $0x40
  6528. JB memmove_match_emit_encodeBetterBlockAsm4MB
  6529. JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
  6530. one_byte_match_emit_encodeBetterBlockAsm4MB:
  6531. SHLB $0x02, SI
  6532. MOVB SI, (CX)
  6533. ADDQ $0x01, CX
  6534. memmove_match_emit_encodeBetterBlockAsm4MB:
  6535. LEAQ (CX)(R9*1), SI
  6536. // genMemMoveShort
  6537. CMPQ R9, $0x04
  6538. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
  6539. CMPQ R9, $0x08
  6540. JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
  6541. CMPQ R9, $0x10
  6542. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
  6543. CMPQ R9, $0x20
  6544. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
  6545. JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
  6546. emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
  6547. MOVL (R10), R11
  6548. MOVL R11, (CX)
  6549. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  6550. emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
  6551. MOVL (R10), R11
  6552. MOVL -4(R10)(R9*1), R10
  6553. MOVL R11, (CX)
  6554. MOVL R10, -4(CX)(R9*1)
  6555. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  6556. emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
  6557. MOVQ (R10), R11
  6558. MOVQ -8(R10)(R9*1), R10
  6559. MOVQ R11, (CX)
  6560. MOVQ R10, -8(CX)(R9*1)
  6561. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  6562. emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
  6563. MOVOU (R10), X0
  6564. MOVOU -16(R10)(R9*1), X1
  6565. MOVOU X0, (CX)
  6566. MOVOU X1, -16(CX)(R9*1)
  6567. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
  6568. emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
  6569. MOVOU (R10), X0
  6570. MOVOU 16(R10), X1
  6571. MOVOU -32(R10)(R9*1), X2
  6572. MOVOU -16(R10)(R9*1), X3
  6573. MOVOU X0, (CX)
  6574. MOVOU X1, 16(CX)
  6575. MOVOU X2, -32(CX)(R9*1)
  6576. MOVOU X3, -16(CX)(R9*1)
  6577. memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
  6578. MOVQ SI, CX
  6579. JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB
  6580. memmove_long_match_emit_encodeBetterBlockAsm4MB:
  6581. LEAQ (CX)(R9*1), SI
  6582. // genMemMoveLong
  6583. MOVOU (R10), X0
  6584. MOVOU 16(R10), X1
  6585. MOVOU -32(R10)(R9*1), X2
  6586. MOVOU -16(R10)(R9*1), X3
  6587. MOVQ R9, R13
  6588. SHRQ $0x05, R13
  6589. MOVQ CX, R11
  6590. ANDL $0x0000001f, R11
  6591. MOVQ $0x00000040, R14
  6592. SUBQ R11, R14
  6593. DECQ R13
  6594. JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  6595. LEAQ -32(R10)(R14*1), R11
  6596. LEAQ -32(CX)(R14*1), R15
  6597. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
  6598. MOVOU (R11), X4
  6599. MOVOU 16(R11), X5
  6600. MOVOA X4, (R15)
  6601. MOVOA X5, 16(R15)
  6602. ADDQ $0x20, R15
  6603. ADDQ $0x20, R11
  6604. ADDQ $0x20, R14
  6605. DECQ R13
  6606. JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
  6607. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
  6608. MOVOU -32(R10)(R14*1), X4
  6609. MOVOU -16(R10)(R14*1), X5
  6610. MOVOA X4, -32(CX)(R14*1)
  6611. MOVOA X5, -16(CX)(R14*1)
  6612. ADDQ $0x20, R14
  6613. CMPQ R9, R14
  6614. JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  6615. MOVOU X0, (CX)
  6616. MOVOU X1, 16(CX)
  6617. MOVOU X2, -32(CX)(R9*1)
  6618. MOVOU X3, -16(CX)(R9*1)
  6619. MOVQ SI, CX
  6620. emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
  6621. ADDL R12, DX
  6622. ADDL $0x04, R12
  6623. MOVL DX, 12(SP)
  6624. // emitCopy
  6625. CMPL R8, $0x00010000
  6626. JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
  6627. CMPL R12, $0x40
  6628. JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
  6629. MOVB $0xff, (CX)
  6630. MOVL R8, 1(CX)
  6631. LEAL -64(R12), R12
  6632. ADDQ $0x05, CX
  6633. CMPL R12, $0x04
  6634. JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
  6635. // emitRepeat
  6636. MOVL R12, SI
  6637. LEAL -4(R12), R12
  6638. CMPL SI, $0x08
  6639. JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  6640. CMPL SI, $0x0c
  6641. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  6642. CMPL R8, $0x00000800
  6643. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  6644. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  6645. CMPL R12, $0x00000104
  6646. JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  6647. CMPL R12, $0x00010100
  6648. JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
  6649. LEAL -65536(R12), R12
  6650. MOVL R12, R8
  6651. MOVW $0x001d, (CX)
  6652. MOVW R12, 2(CX)
  6653. SARL $0x10, R8
  6654. MOVB R8, 4(CX)
  6655. ADDQ $0x05, CX
  6656. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6657. repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  6658. LEAL -256(R12), R12
  6659. MOVW $0x0019, (CX)
  6660. MOVW R12, 2(CX)
  6661. ADDQ $0x04, CX
  6662. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6663. repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  6664. LEAL -4(R12), R12
  6665. MOVW $0x0015, (CX)
  6666. MOVB R12, 2(CX)
  6667. ADDQ $0x03, CX
  6668. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6669. repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  6670. SHLL $0x02, R12
  6671. ORL $0x01, R12
  6672. MOVW R12, (CX)
  6673. ADDQ $0x02, CX
  6674. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6675. repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
  6676. XORQ SI, SI
  6677. LEAL 1(SI)(R12*4), R12
  6678. MOVB R8, 1(CX)
  6679. SARL $0x08, R8
  6680. SHLL $0x05, R8
  6681. ORL R8, R12
  6682. MOVB R12, (CX)
  6683. ADDQ $0x02, CX
  6684. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6685. four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
  6686. TESTL R12, R12
  6687. JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6688. XORL SI, SI
  6689. LEAL -1(SI)(R12*4), R12
  6690. MOVB R12, (CX)
  6691. MOVL R8, 1(CX)
  6692. ADDQ $0x05, CX
  6693. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6694. two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
  6695. CMPL R12, $0x40
  6696. JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
  6697. CMPL R8, $0x00000800
  6698. JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB
  6699. MOVL $0x00000001, SI
  6700. LEAL 16(SI), SI
  6701. MOVB R8, 1(CX)
  6702. SHRL $0x08, R8
  6703. SHLL $0x05, R8
  6704. ORL R8, SI
  6705. MOVB SI, (CX)
  6706. ADDQ $0x02, CX
  6707. SUBL $0x08, R12
  6708. // emitRepeat
  6709. LEAL -4(R12), R12
  6710. JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  6711. MOVL R12, SI
  6712. LEAL -4(R12), R12
  6713. CMPL SI, $0x08
  6714. JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  6715. CMPL SI, $0x0c
  6716. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  6717. CMPL R8, $0x00000800
  6718. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  6719. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  6720. CMPL R12, $0x00000104
  6721. JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  6722. CMPL R12, $0x00010100
  6723. JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
  6724. LEAL -65536(R12), R12
  6725. MOVL R12, R8
  6726. MOVW $0x001d, (CX)
  6727. MOVW R12, 2(CX)
  6728. SARL $0x10, R8
  6729. MOVB R8, 4(CX)
  6730. ADDQ $0x05, CX
  6731. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6732. repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  6733. LEAL -256(R12), R12
  6734. MOVW $0x0019, (CX)
  6735. MOVW R12, 2(CX)
  6736. ADDQ $0x04, CX
  6737. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6738. repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  6739. LEAL -4(R12), R12
  6740. MOVW $0x0015, (CX)
  6741. MOVB R12, 2(CX)
  6742. ADDQ $0x03, CX
  6743. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6744. repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  6745. SHLL $0x02, R12
  6746. ORL $0x01, R12
  6747. MOVW R12, (CX)
  6748. ADDQ $0x02, CX
  6749. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6750. repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
  6751. XORQ SI, SI
  6752. LEAL 1(SI)(R12*4), R12
  6753. MOVB R8, 1(CX)
  6754. SARL $0x08, R8
  6755. SHLL $0x05, R8
  6756. ORL R8, R12
  6757. MOVB R12, (CX)
  6758. ADDQ $0x02, CX
  6759. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6760. long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
  6761. MOVB $0xee, (CX)
  6762. MOVW R8, 1(CX)
  6763. LEAL -60(R12), R12
  6764. ADDQ $0x03, CX
  6765. // emitRepeat
  6766. MOVL R12, SI
  6767. LEAL -4(R12), R12
  6768. CMPL SI, $0x08
  6769. JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  6770. CMPL SI, $0x0c
  6771. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  6772. CMPL R8, $0x00000800
  6773. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  6774. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  6775. CMPL R12, $0x00000104
  6776. JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  6777. CMPL R12, $0x00010100
  6778. JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
  6779. LEAL -65536(R12), R12
  6780. MOVL R12, R8
  6781. MOVW $0x001d, (CX)
  6782. MOVW R12, 2(CX)
  6783. SARL $0x10, R8
  6784. MOVB R8, 4(CX)
  6785. ADDQ $0x05, CX
  6786. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6787. repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  6788. LEAL -256(R12), R12
  6789. MOVW $0x0019, (CX)
  6790. MOVW R12, 2(CX)
  6791. ADDQ $0x04, CX
  6792. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6793. repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  6794. LEAL -4(R12), R12
  6795. MOVW $0x0015, (CX)
  6796. MOVB R12, 2(CX)
  6797. ADDQ $0x03, CX
  6798. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6799. repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  6800. SHLL $0x02, R12
  6801. ORL $0x01, R12
  6802. MOVW R12, (CX)
  6803. ADDQ $0x02, CX
  6804. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6805. repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
  6806. XORQ SI, SI
  6807. LEAL 1(SI)(R12*4), R12
  6808. MOVB R8, 1(CX)
  6809. SARL $0x08, R8
  6810. SHLL $0x05, R8
  6811. ORL R8, R12
  6812. MOVB R12, (CX)
  6813. ADDQ $0x02, CX
  6814. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6815. two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
  6816. MOVL R12, SI
  6817. SHLL $0x02, SI
  6818. CMPL R12, $0x0c
  6819. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
  6820. CMPL R8, $0x00000800
  6821. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
  6822. LEAL -15(SI), SI
  6823. MOVB R8, 1(CX)
  6824. SHRL $0x08, R8
  6825. SHLL $0x05, R8
  6826. ORL R8, SI
  6827. MOVB SI, (CX)
  6828. ADDQ $0x02, CX
  6829. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6830. emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
  6831. LEAL -2(SI), SI
  6832. MOVB SI, (CX)
  6833. MOVW R8, 1(CX)
  6834. ADDQ $0x03, CX
  6835. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6836. match_is_repeat_encodeBetterBlockAsm4MB:
  6837. MOVL 12(SP), SI
  6838. CMPL SI, DI
  6839. JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
  6840. MOVL DI, R9
  6841. MOVL DI, 12(SP)
  6842. LEAQ (BX)(SI*1), R10
  6843. SUBL SI, R9
  6844. LEAL -1(R9), SI
  6845. CMPL SI, $0x3c
  6846. JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
  6847. CMPL SI, $0x00000100
  6848. JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
  6849. CMPL SI, $0x00010000
  6850. JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
  6851. MOVL SI, R11
  6852. SHRL $0x10, R11
  6853. MOVB $0xf8, (CX)
  6854. MOVW SI, 1(CX)
  6855. MOVB R11, 3(CX)
  6856. ADDQ $0x04, CX
  6857. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
  6858. three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
  6859. MOVB $0xf4, (CX)
  6860. MOVW SI, 1(CX)
  6861. ADDQ $0x03, CX
  6862. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
  6863. two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
  6864. MOVB $0xf0, (CX)
  6865. MOVB SI, 1(CX)
  6866. ADDQ $0x02, CX
  6867. CMPL SI, $0x40
  6868. JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB
  6869. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
  6870. one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
  6871. SHLB $0x02, SI
  6872. MOVB SI, (CX)
  6873. ADDQ $0x01, CX
  6874. memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
  6875. LEAQ (CX)(R9*1), SI
  6876. // genMemMoveShort
  6877. CMPQ R9, $0x04
  6878. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
  6879. CMPQ R9, $0x08
  6880. JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
  6881. CMPQ R9, $0x10
  6882. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
  6883. CMPQ R9, $0x20
  6884. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
  6885. JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
  6886. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
  6887. MOVL (R10), R11
  6888. MOVL R11, (CX)
  6889. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  6890. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
  6891. MOVL (R10), R11
  6892. MOVL -4(R10)(R9*1), R10
  6893. MOVL R11, (CX)
  6894. MOVL R10, -4(CX)(R9*1)
  6895. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  6896. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
  6897. MOVQ (R10), R11
  6898. MOVQ -8(R10)(R9*1), R10
  6899. MOVQ R11, (CX)
  6900. MOVQ R10, -8(CX)(R9*1)
  6901. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  6902. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
  6903. MOVOU (R10), X0
  6904. MOVOU -16(R10)(R9*1), X1
  6905. MOVOU X0, (CX)
  6906. MOVOU X1, -16(CX)(R9*1)
  6907. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
  6908. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
  6909. MOVOU (R10), X0
  6910. MOVOU 16(R10), X1
  6911. MOVOU -32(R10)(R9*1), X2
  6912. MOVOU -16(R10)(R9*1), X3
  6913. MOVOU X0, (CX)
  6914. MOVOU X1, 16(CX)
  6915. MOVOU X2, -32(CX)(R9*1)
  6916. MOVOU X3, -16(CX)(R9*1)
  6917. memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
  6918. MOVQ SI, CX
  6919. JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
  6920. memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
  6921. LEAQ (CX)(R9*1), SI
  6922. // genMemMoveLong
  6923. MOVOU (R10), X0
  6924. MOVOU 16(R10), X1
  6925. MOVOU -32(R10)(R9*1), X2
  6926. MOVOU -16(R10)(R9*1), X3
  6927. MOVQ R9, R13
  6928. SHRQ $0x05, R13
  6929. MOVQ CX, R11
  6930. ANDL $0x0000001f, R11
  6931. MOVQ $0x00000040, R14
  6932. SUBQ R11, R14
  6933. DECQ R13
  6934. JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  6935. LEAQ -32(R10)(R14*1), R11
  6936. LEAQ -32(CX)(R14*1), R15
  6937. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
  6938. MOVOU (R11), X4
  6939. MOVOU 16(R11), X5
  6940. MOVOA X4, (R15)
  6941. MOVOA X5, 16(R15)
  6942. ADDQ $0x20, R15
  6943. ADDQ $0x20, R11
  6944. ADDQ $0x20, R14
  6945. DECQ R13
  6946. JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
  6947. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
  6948. MOVOU -32(R10)(R14*1), X4
  6949. MOVOU -16(R10)(R14*1), X5
  6950. MOVOA X4, -32(CX)(R14*1)
  6951. MOVOA X5, -16(CX)(R14*1)
  6952. ADDQ $0x20, R14
  6953. CMPQ R9, R14
  6954. JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  6955. MOVOU X0, (CX)
  6956. MOVOU X1, 16(CX)
  6957. MOVOU X2, -32(CX)(R9*1)
  6958. MOVOU X3, -16(CX)(R9*1)
  6959. MOVQ SI, CX
  6960. emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
  6961. ADDL R12, DX
  6962. ADDL $0x04, R12
  6963. MOVL DX, 12(SP)
  6964. // emitRepeat
  6965. MOVL R12, SI
  6966. LEAL -4(R12), R12
  6967. CMPL SI, $0x08
  6968. JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
  6969. CMPL SI, $0x0c
  6970. JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
  6971. CMPL R8, $0x00000800
  6972. JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
  6973. cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
  6974. CMPL R12, $0x00000104
  6975. JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
  6976. CMPL R12, $0x00010100
  6977. JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
  6978. LEAL -65536(R12), R12
  6979. MOVL R12, R8
  6980. MOVW $0x001d, (CX)
  6981. MOVW R12, 2(CX)
  6982. SARL $0x10, R8
  6983. MOVB R8, 4(CX)
  6984. ADDQ $0x05, CX
  6985. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6986. repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
  6987. LEAL -256(R12), R12
  6988. MOVW $0x0019, (CX)
  6989. MOVW R12, 2(CX)
  6990. ADDQ $0x04, CX
  6991. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6992. repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
  6993. LEAL -4(R12), R12
  6994. MOVW $0x0015, (CX)
  6995. MOVB R12, 2(CX)
  6996. ADDQ $0x03, CX
  6997. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  6998. repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
  6999. SHLL $0x02, R12
  7000. ORL $0x01, R12
  7001. MOVW R12, (CX)
  7002. ADDQ $0x02, CX
  7003. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
  7004. repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
  7005. XORQ SI, SI
  7006. LEAL 1(SI)(R12*4), R12
  7007. MOVB R8, 1(CX)
  7008. SARL $0x08, R8
  7009. SHLL $0x05, R8
  7010. ORL R8, R12
  7011. MOVB R12, (CX)
  7012. ADDQ $0x02, CX
  7013. match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
  7014. CMPL DX, 8(SP)
  7015. JAE emit_remainder_encodeBetterBlockAsm4MB
  7016. CMPQ CX, (SP)
  7017. JB match_nolit_dst_ok_encodeBetterBlockAsm4MB
  7018. MOVQ $0x00000000, ret+56(FP)
  7019. RET
  7020. match_nolit_dst_ok_encodeBetterBlockAsm4MB:
  7021. MOVQ $0x00cf1bbcdcbfa563, SI
  7022. MOVQ $0x9e3779b1, R8
  7023. LEAQ 1(DI), DI
  7024. LEAQ -2(DX), R9
  7025. MOVQ (BX)(DI*1), R10
  7026. MOVQ 1(BX)(DI*1), R11
  7027. MOVQ (BX)(R9*1), R12
  7028. MOVQ 1(BX)(R9*1), R13
  7029. SHLQ $0x08, R10
  7030. IMULQ SI, R10
  7031. SHRQ $0x2f, R10
  7032. SHLQ $0x20, R11
  7033. IMULQ R8, R11
  7034. SHRQ $0x32, R11
  7035. SHLQ $0x08, R12
  7036. IMULQ SI, R12
  7037. SHRQ $0x2f, R12
  7038. SHLQ $0x20, R13
  7039. IMULQ R8, R13
  7040. SHRQ $0x32, R13
  7041. LEAQ 1(DI), R8
  7042. LEAQ 1(R9), R14
  7043. MOVL DI, (AX)(R10*4)
  7044. MOVL R9, (AX)(R12*4)
  7045. MOVL R8, 524288(AX)(R11*4)
  7046. MOVL R14, 524288(AX)(R13*4)
  7047. LEAQ 1(R9)(DI*1), R8
  7048. SHRQ $0x01, R8
  7049. ADDQ $0x01, DI
  7050. SUBQ $0x01, R9
  7051. index_loop_encodeBetterBlockAsm4MB:
  7052. CMPQ R8, R9
  7053. JAE search_loop_encodeBetterBlockAsm4MB
  7054. MOVQ (BX)(DI*1), R10
  7055. MOVQ (BX)(R8*1), R11
  7056. SHLQ $0x08, R10
  7057. IMULQ SI, R10
  7058. SHRQ $0x2f, R10
  7059. SHLQ $0x08, R11
  7060. IMULQ SI, R11
  7061. SHRQ $0x2f, R11
  7062. MOVL DI, (AX)(R10*4)
  7063. MOVL R8, (AX)(R11*4)
  7064. ADDQ $0x02, DI
  7065. ADDQ $0x02, R8
  7066. JMP index_loop_encodeBetterBlockAsm4MB
  7067. emit_remainder_encodeBetterBlockAsm4MB:
  7068. MOVQ src_len+32(FP), AX
  7069. SUBL 12(SP), AX
  7070. LEAQ 4(CX)(AX*1), AX
  7071. CMPQ AX, (SP)
  7072. JB emit_remainder_ok_encodeBetterBlockAsm4MB
  7073. MOVQ $0x00000000, ret+56(FP)
  7074. RET
  7075. emit_remainder_ok_encodeBetterBlockAsm4MB:
  7076. MOVQ src_len+32(FP), AX
  7077. MOVL 12(SP), DX
  7078. CMPL DX, AX
  7079. JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
  7080. MOVL AX, SI
  7081. MOVL AX, 12(SP)
  7082. LEAQ (BX)(DX*1), AX
  7083. SUBL DX, SI
  7084. LEAL -1(SI), DX
  7085. CMPL DX, $0x3c
  7086. JB one_byte_emit_remainder_encodeBetterBlockAsm4MB
  7087. CMPL DX, $0x00000100
  7088. JB two_bytes_emit_remainder_encodeBetterBlockAsm4MB
  7089. CMPL DX, $0x00010000
  7090. JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB
  7091. MOVL DX, BX
  7092. SHRL $0x10, BX
  7093. MOVB $0xf8, (CX)
  7094. MOVW DX, 1(CX)
  7095. MOVB BL, 3(CX)
  7096. ADDQ $0x04, CX
  7097. JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
  7098. three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
  7099. MOVB $0xf4, (CX)
  7100. MOVW DX, 1(CX)
  7101. ADDQ $0x03, CX
  7102. JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
  7103. two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
  7104. MOVB $0xf0, (CX)
  7105. MOVB DL, 1(CX)
  7106. ADDQ $0x02, CX
  7107. CMPL DX, $0x40
  7108. JB memmove_emit_remainder_encodeBetterBlockAsm4MB
  7109. JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
  7110. one_byte_emit_remainder_encodeBetterBlockAsm4MB:
  7111. SHLB $0x02, DL
  7112. MOVB DL, (CX)
  7113. ADDQ $0x01, CX
  7114. memmove_emit_remainder_encodeBetterBlockAsm4MB:
  7115. LEAQ (CX)(SI*1), DX
  7116. MOVL SI, BX
  7117. // genMemMoveShort
  7118. CMPQ BX, $0x03
  7119. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2
  7120. JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3
  7121. CMPQ BX, $0x08
  7122. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
  7123. CMPQ BX, $0x10
  7124. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
  7125. CMPQ BX, $0x20
  7126. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
  7127. JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
  7128. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
  7129. MOVB (AX), SI
  7130. MOVB -1(AX)(BX*1), AL
  7131. MOVB SI, (CX)
  7132. MOVB AL, -1(CX)(BX*1)
  7133. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  7134. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
  7135. MOVW (AX), SI
  7136. MOVB 2(AX), AL
  7137. MOVW SI, (CX)
  7138. MOVB AL, 2(CX)
  7139. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  7140. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
  7141. MOVL (AX), SI
  7142. MOVL -4(AX)(BX*1), AX
  7143. MOVL SI, (CX)
  7144. MOVL AX, -4(CX)(BX*1)
  7145. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  7146. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
  7147. MOVQ (AX), SI
  7148. MOVQ -8(AX)(BX*1), AX
  7149. MOVQ SI, (CX)
  7150. MOVQ AX, -8(CX)(BX*1)
  7151. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  7152. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
  7153. MOVOU (AX), X0
  7154. MOVOU -16(AX)(BX*1), X1
  7155. MOVOU X0, (CX)
  7156. MOVOU X1, -16(CX)(BX*1)
  7157. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
  7158. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
  7159. MOVOU (AX), X0
  7160. MOVOU 16(AX), X1
  7161. MOVOU -32(AX)(BX*1), X2
  7162. MOVOU -16(AX)(BX*1), X3
  7163. MOVOU X0, (CX)
  7164. MOVOU X1, 16(CX)
  7165. MOVOU X2, -32(CX)(BX*1)
  7166. MOVOU X3, -16(CX)(BX*1)
  7167. memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
  7168. MOVQ DX, CX
  7169. JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
  7170. memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
  7171. LEAQ (CX)(SI*1), DX
  7172. MOVL SI, BX
  7173. // genMemMoveLong
  7174. MOVOU (AX), X0
  7175. MOVOU 16(AX), X1
  7176. MOVOU -32(AX)(BX*1), X2
  7177. MOVOU -16(AX)(BX*1), X3
  7178. MOVQ BX, DI
  7179. SHRQ $0x05, DI
  7180. MOVQ CX, SI
  7181. ANDL $0x0000001f, SI
  7182. MOVQ $0x00000040, R8
  7183. SUBQ SI, R8
  7184. DECQ DI
  7185. JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  7186. LEAQ -32(AX)(R8*1), SI
  7187. LEAQ -32(CX)(R8*1), R9
  7188. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
  7189. MOVOU (SI), X4
  7190. MOVOU 16(SI), X5
  7191. MOVOA X4, (R9)
  7192. MOVOA X5, 16(R9)
  7193. ADDQ $0x20, R9
  7194. ADDQ $0x20, SI
  7195. ADDQ $0x20, R8
  7196. DECQ DI
  7197. JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
  7198. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
  7199. MOVOU -32(AX)(R8*1), X4
  7200. MOVOU -16(AX)(R8*1), X5
  7201. MOVOA X4, -32(CX)(R8*1)
  7202. MOVOA X5, -16(CX)(R8*1)
  7203. ADDQ $0x20, R8
  7204. CMPQ BX, R8
  7205. JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
  7206. MOVOU X0, (CX)
  7207. MOVOU X1, 16(CX)
  7208. MOVOU X2, -32(CX)(BX*1)
  7209. MOVOU X3, -16(CX)(BX*1)
  7210. MOVQ DX, CX
  7211. emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
  7212. MOVQ dst_base+0(FP), AX
  7213. SUBQ AX, CX
  7214. MOVQ CX, ret+56(FP)
  7215. RET
  7216. // func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
  7217. // Requires: BMI, SSE2
  7218. TEXT ·encodeBetterBlockAsm12B(SB), $24-64
  7219. MOVQ tmp+48(FP), AX
  7220. MOVQ dst_base+0(FP), CX
  7221. MOVQ $0x00000280, DX
  7222. MOVQ AX, BX
  7223. PXOR X0, X0
  7224. zero_loop_encodeBetterBlockAsm12B:
  7225. MOVOU X0, (BX)
  7226. MOVOU X0, 16(BX)
  7227. MOVOU X0, 32(BX)
  7228. MOVOU X0, 48(BX)
  7229. MOVOU X0, 64(BX)
  7230. MOVOU X0, 80(BX)
  7231. MOVOU X0, 96(BX)
  7232. MOVOU X0, 112(BX)
  7233. ADDQ $0x80, BX
  7234. DECQ DX
  7235. JNZ zero_loop_encodeBetterBlockAsm12B
  7236. MOVL $0x00000000, 12(SP)
  7237. MOVQ src_len+32(FP), DX
  7238. LEAQ -6(DX), BX
  7239. LEAQ -8(DX), SI
  7240. MOVL SI, 8(SP)
  7241. SHRQ $0x05, DX
  7242. SUBL DX, BX
  7243. LEAQ (CX)(BX*1), BX
  7244. MOVQ BX, (SP)
  7245. MOVL $0x00000001, DX
  7246. MOVL $0x00000000, 16(SP)
  7247. MOVQ src_base+24(FP), BX
  7248. search_loop_encodeBetterBlockAsm12B:
  7249. MOVL DX, SI
  7250. SUBL 12(SP), SI
  7251. SHRL $0x06, SI
  7252. LEAL 1(DX)(SI*1), SI
  7253. CMPL SI, 8(SP)
  7254. JAE emit_remainder_encodeBetterBlockAsm12B
  7255. MOVQ (BX)(DX*1), DI
  7256. MOVL SI, 20(SP)
  7257. MOVQ $0x0000cf1bbcdcbf9b, R9
  7258. MOVQ $0x9e3779b1, SI
  7259. MOVQ DI, R10
  7260. MOVQ DI, R11
  7261. SHLQ $0x10, R10
  7262. IMULQ R9, R10
  7263. SHRQ $0x32, R10
  7264. SHLQ $0x20, R11
  7265. IMULQ SI, R11
  7266. SHRQ $0x34, R11
  7267. MOVL (AX)(R10*4), SI
  7268. MOVL 65536(AX)(R11*4), R8
  7269. MOVL DX, (AX)(R10*4)
  7270. MOVL DX, 65536(AX)(R11*4)
  7271. MOVQ (BX)(SI*1), R10
  7272. MOVQ (BX)(R8*1), R11
  7273. CMPQ R10, DI
  7274. JEQ candidate_match_encodeBetterBlockAsm12B
  7275. CMPQ R11, DI
  7276. JNE no_short_found_encodeBetterBlockAsm12B
  7277. MOVL R8, SI
  7278. JMP candidate_match_encodeBetterBlockAsm12B
  7279. no_short_found_encodeBetterBlockAsm12B:
  7280. CMPL R10, DI
  7281. JEQ candidate_match_encodeBetterBlockAsm12B
  7282. CMPL R11, DI
  7283. JEQ candidateS_match_encodeBetterBlockAsm12B
  7284. MOVL 20(SP), DX
  7285. JMP search_loop_encodeBetterBlockAsm12B
  7286. candidateS_match_encodeBetterBlockAsm12B:
  7287. SHRQ $0x08, DI
  7288. MOVQ DI, R10
  7289. SHLQ $0x10, R10
  7290. IMULQ R9, R10
  7291. SHRQ $0x32, R10
  7292. MOVL (AX)(R10*4), SI
  7293. INCL DX
  7294. MOVL DX, (AX)(R10*4)
  7295. CMPL (BX)(SI*1), DI
  7296. JEQ candidate_match_encodeBetterBlockAsm12B
  7297. DECL DX
  7298. MOVL R8, SI
  7299. candidate_match_encodeBetterBlockAsm12B:
  7300. MOVL 12(SP), DI
  7301. TESTL SI, SI
  7302. JZ match_extend_back_end_encodeBetterBlockAsm12B
  7303. match_extend_back_loop_encodeBetterBlockAsm12B:
  7304. CMPL DX, DI
  7305. JBE match_extend_back_end_encodeBetterBlockAsm12B
  7306. MOVB -1(BX)(SI*1), R8
  7307. MOVB -1(BX)(DX*1), R9
  7308. CMPB R8, R9
  7309. JNE match_extend_back_end_encodeBetterBlockAsm12B
  7310. LEAL -1(DX), DX
  7311. DECL SI
  7312. JZ match_extend_back_end_encodeBetterBlockAsm12B
  7313. JMP match_extend_back_loop_encodeBetterBlockAsm12B
  7314. match_extend_back_end_encodeBetterBlockAsm12B:
  7315. MOVL DX, DI
  7316. SUBL 12(SP), DI
  7317. LEAQ 3(CX)(DI*1), DI
  7318. CMPQ DI, (SP)
  7319. JB match_dst_size_check_encodeBetterBlockAsm12B
  7320. MOVQ $0x00000000, ret+56(FP)
  7321. RET
  7322. match_dst_size_check_encodeBetterBlockAsm12B:
  7323. MOVL DX, DI
  7324. ADDL $0x04, DX
  7325. ADDL $0x04, SI
  7326. MOVQ src_len+32(FP), R8
  7327. SUBL DX, R8
  7328. LEAQ (BX)(DX*1), R9
  7329. LEAQ (BX)(SI*1), R10
  7330. // matchLen
  7331. XORL R12, R12
  7332. matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
  7333. CMPL R8, $0x10
  7334. JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B
  7335. MOVQ (R9)(R12*1), R11
  7336. MOVQ 8(R9)(R12*1), R13
  7337. XORQ (R10)(R12*1), R11
  7338. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
  7339. XORQ 8(R10)(R12*1), R13
  7340. JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
  7341. LEAL -16(R8), R8
  7342. LEAL 16(R12), R12
  7343. JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
  7344. matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
  7345. #ifdef GOAMD64_v3
  7346. TZCNTQ R13, R13
  7347. #else
  7348. BSFQ R13, R13
  7349. #endif
  7350. SARQ $0x03, R13
  7351. LEAL 8(R12)(R13*1), R12
  7352. JMP match_nolit_end_encodeBetterBlockAsm12B
  7353. matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
  7354. CMPL R8, $0x08
  7355. JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B
  7356. MOVQ (R9)(R12*1), R11
  7357. XORQ (R10)(R12*1), R11
  7358. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
  7359. LEAL -8(R8), R8
  7360. LEAL 8(R12), R12
  7361. JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B
  7362. matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
  7363. #ifdef GOAMD64_v3
  7364. TZCNTQ R11, R11
  7365. #else
  7366. BSFQ R11, R11
  7367. #endif
  7368. SARQ $0x03, R11
  7369. LEAL (R12)(R11*1), R12
  7370. JMP match_nolit_end_encodeBetterBlockAsm12B
  7371. matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
  7372. CMPL R8, $0x04
  7373. JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B
  7374. MOVL (R9)(R12*1), R11
  7375. CMPL (R10)(R12*1), R11
  7376. JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B
  7377. LEAL -4(R8), R8
  7378. LEAL 4(R12), R12
  7379. matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
  7380. CMPL R8, $0x01
  7381. JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B
  7382. JB match_nolit_end_encodeBetterBlockAsm12B
  7383. MOVW (R9)(R12*1), R11
  7384. CMPW (R10)(R12*1), R11
  7385. JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B
  7386. LEAL 2(R12), R12
  7387. SUBL $0x02, R8
  7388. JZ match_nolit_end_encodeBetterBlockAsm12B
  7389. matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
  7390. MOVB (R9)(R12*1), R11
  7391. CMPB (R10)(R12*1), R11
  7392. JNE match_nolit_end_encodeBetterBlockAsm12B
  7393. LEAL 1(R12), R12
  7394. match_nolit_end_encodeBetterBlockAsm12B:
  7395. MOVL DX, R8
  7396. SUBL SI, R8
  7397. // Check if repeat
  7398. CMPL 16(SP), R8
  7399. JEQ match_is_repeat_encodeBetterBlockAsm12B
  7400. MOVL R8, 16(SP)
  7401. MOVL 12(SP), SI
  7402. CMPL SI, DI
  7403. JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B
  7404. MOVL DI, R9
  7405. MOVL DI, 12(SP)
  7406. LEAQ (BX)(SI*1), R10
  7407. SUBL SI, R9
  7408. LEAL -1(R9), SI
  7409. CMPL SI, $0x3c
  7410. JB one_byte_match_emit_encodeBetterBlockAsm12B
  7411. CMPL SI, $0x00000100
  7412. JB two_bytes_match_emit_encodeBetterBlockAsm12B
  7413. JB three_bytes_match_emit_encodeBetterBlockAsm12B
  7414. three_bytes_match_emit_encodeBetterBlockAsm12B:
  7415. MOVB $0xf4, (CX)
  7416. MOVW SI, 1(CX)
  7417. ADDQ $0x03, CX
  7418. JMP memmove_long_match_emit_encodeBetterBlockAsm12B
  7419. two_bytes_match_emit_encodeBetterBlockAsm12B:
  7420. MOVB $0xf0, (CX)
  7421. MOVB SI, 1(CX)
  7422. ADDQ $0x02, CX
  7423. CMPL SI, $0x40
  7424. JB memmove_match_emit_encodeBetterBlockAsm12B
  7425. JMP memmove_long_match_emit_encodeBetterBlockAsm12B
  7426. one_byte_match_emit_encodeBetterBlockAsm12B:
  7427. SHLB $0x02, SI
  7428. MOVB SI, (CX)
  7429. ADDQ $0x01, CX
  7430. memmove_match_emit_encodeBetterBlockAsm12B:
  7431. LEAQ (CX)(R9*1), SI
  7432. // genMemMoveShort
  7433. CMPQ R9, $0x04
  7434. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
  7435. CMPQ R9, $0x08
  7436. JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
  7437. CMPQ R9, $0x10
  7438. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
  7439. CMPQ R9, $0x20
  7440. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
  7441. JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
  7442. emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
  7443. MOVL (R10), R11
  7444. MOVL R11, (CX)
  7445. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  7446. emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
  7447. MOVL (R10), R11
  7448. MOVL -4(R10)(R9*1), R10
  7449. MOVL R11, (CX)
  7450. MOVL R10, -4(CX)(R9*1)
  7451. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  7452. emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
  7453. MOVQ (R10), R11
  7454. MOVQ -8(R10)(R9*1), R10
  7455. MOVQ R11, (CX)
  7456. MOVQ R10, -8(CX)(R9*1)
  7457. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  7458. emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
  7459. MOVOU (R10), X0
  7460. MOVOU -16(R10)(R9*1), X1
  7461. MOVOU X0, (CX)
  7462. MOVOU X1, -16(CX)(R9*1)
  7463. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
  7464. emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
  7465. MOVOU (R10), X0
  7466. MOVOU 16(R10), X1
  7467. MOVOU -32(R10)(R9*1), X2
  7468. MOVOU -16(R10)(R9*1), X3
  7469. MOVOU X0, (CX)
  7470. MOVOU X1, 16(CX)
  7471. MOVOU X2, -32(CX)(R9*1)
  7472. MOVOU X3, -16(CX)(R9*1)
  7473. memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
  7474. MOVQ SI, CX
  7475. JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B
  7476. memmove_long_match_emit_encodeBetterBlockAsm12B:
  7477. LEAQ (CX)(R9*1), SI
  7478. // genMemMoveLong
  7479. MOVOU (R10), X0
  7480. MOVOU 16(R10), X1
  7481. MOVOU -32(R10)(R9*1), X2
  7482. MOVOU -16(R10)(R9*1), X3
  7483. MOVQ R9, R13
  7484. SHRQ $0x05, R13
  7485. MOVQ CX, R11
  7486. ANDL $0x0000001f, R11
  7487. MOVQ $0x00000040, R14
  7488. SUBQ R11, R14
  7489. DECQ R13
  7490. JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  7491. LEAQ -32(R10)(R14*1), R11
  7492. LEAQ -32(CX)(R14*1), R15
  7493. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
  7494. MOVOU (R11), X4
  7495. MOVOU 16(R11), X5
  7496. MOVOA X4, (R15)
  7497. MOVOA X5, 16(R15)
  7498. ADDQ $0x20, R15
  7499. ADDQ $0x20, R11
  7500. ADDQ $0x20, R14
  7501. DECQ R13
  7502. JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
  7503. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
  7504. MOVOU -32(R10)(R14*1), X4
  7505. MOVOU -16(R10)(R14*1), X5
  7506. MOVOA X4, -32(CX)(R14*1)
  7507. MOVOA X5, -16(CX)(R14*1)
  7508. ADDQ $0x20, R14
  7509. CMPQ R9, R14
  7510. JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  7511. MOVOU X0, (CX)
  7512. MOVOU X1, 16(CX)
  7513. MOVOU X2, -32(CX)(R9*1)
  7514. MOVOU X3, -16(CX)(R9*1)
  7515. MOVQ SI, CX
  7516. emit_literal_done_match_emit_encodeBetterBlockAsm12B:
  7517. ADDL R12, DX
  7518. ADDL $0x04, R12
  7519. MOVL DX, 12(SP)
  7520. // emitCopy
  7521. CMPL R12, $0x40
  7522. JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
  7523. CMPL R8, $0x00000800
  7524. JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B
  7525. MOVL $0x00000001, SI
  7526. LEAL 16(SI), SI
  7527. MOVB R8, 1(CX)
  7528. SHRL $0x08, R8
  7529. SHLL $0x05, R8
  7530. ORL R8, SI
  7531. MOVB SI, (CX)
  7532. ADDQ $0x02, CX
  7533. SUBL $0x08, R12
  7534. // emitRepeat
  7535. LEAL -4(R12), R12
  7536. JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  7537. MOVL R12, SI
  7538. LEAL -4(R12), R12
  7539. CMPL SI, $0x08
  7540. JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  7541. CMPL SI, $0x0c
  7542. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  7543. CMPL R8, $0x00000800
  7544. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  7545. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  7546. CMPL R12, $0x00000104
  7547. JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
  7548. LEAL -256(R12), R12
  7549. MOVW $0x0019, (CX)
  7550. MOVW R12, 2(CX)
  7551. ADDQ $0x04, CX
  7552. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7553. repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  7554. LEAL -4(R12), R12
  7555. MOVW $0x0015, (CX)
  7556. MOVB R12, 2(CX)
  7557. ADDQ $0x03, CX
  7558. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7559. repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  7560. SHLL $0x02, R12
  7561. ORL $0x01, R12
  7562. MOVW R12, (CX)
  7563. ADDQ $0x02, CX
  7564. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7565. repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
  7566. XORQ SI, SI
  7567. LEAL 1(SI)(R12*4), R12
  7568. MOVB R8, 1(CX)
  7569. SARL $0x08, R8
  7570. SHLL $0x05, R8
  7571. ORL R8, R12
  7572. MOVB R12, (CX)
  7573. ADDQ $0x02, CX
  7574. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7575. long_offset_short_match_nolit_encodeBetterBlockAsm12B:
  7576. MOVB $0xee, (CX)
  7577. MOVW R8, 1(CX)
  7578. LEAL -60(R12), R12
  7579. ADDQ $0x03, CX
  7580. // emitRepeat
  7581. MOVL R12, SI
  7582. LEAL -4(R12), R12
  7583. CMPL SI, $0x08
  7584. JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  7585. CMPL SI, $0x0c
  7586. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  7587. CMPL R8, $0x00000800
  7588. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  7589. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  7590. CMPL R12, $0x00000104
  7591. JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
  7592. LEAL -256(R12), R12
  7593. MOVW $0x0019, (CX)
  7594. MOVW R12, 2(CX)
  7595. ADDQ $0x04, CX
  7596. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7597. repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  7598. LEAL -4(R12), R12
  7599. MOVW $0x0015, (CX)
  7600. MOVB R12, 2(CX)
  7601. ADDQ $0x03, CX
  7602. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7603. repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  7604. SHLL $0x02, R12
  7605. ORL $0x01, R12
  7606. MOVW R12, (CX)
  7607. ADDQ $0x02, CX
  7608. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7609. repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
  7610. XORQ SI, SI
  7611. LEAL 1(SI)(R12*4), R12
  7612. MOVB R8, 1(CX)
  7613. SARL $0x08, R8
  7614. SHLL $0x05, R8
  7615. ORL R8, R12
  7616. MOVB R12, (CX)
  7617. ADDQ $0x02, CX
  7618. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7619. two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
  7620. MOVL R12, SI
  7621. SHLL $0x02, SI
  7622. CMPL R12, $0x0c
  7623. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
  7624. CMPL R8, $0x00000800
  7625. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
  7626. LEAL -15(SI), SI
  7627. MOVB R8, 1(CX)
  7628. SHRL $0x08, R8
  7629. SHLL $0x05, R8
  7630. ORL R8, SI
  7631. MOVB SI, (CX)
  7632. ADDQ $0x02, CX
  7633. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7634. emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
  7635. LEAL -2(SI), SI
  7636. MOVB SI, (CX)
  7637. MOVW R8, 1(CX)
  7638. ADDQ $0x03, CX
  7639. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7640. match_is_repeat_encodeBetterBlockAsm12B:
  7641. MOVL 12(SP), SI
  7642. CMPL SI, DI
  7643. JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
  7644. MOVL DI, R9
  7645. MOVL DI, 12(SP)
  7646. LEAQ (BX)(SI*1), R10
  7647. SUBL SI, R9
  7648. LEAL -1(R9), SI
  7649. CMPL SI, $0x3c
  7650. JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B
  7651. CMPL SI, $0x00000100
  7652. JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
  7653. JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
  7654. three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
  7655. MOVB $0xf4, (CX)
  7656. MOVW SI, 1(CX)
  7657. ADDQ $0x03, CX
  7658. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
  7659. two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
  7660. MOVB $0xf0, (CX)
  7661. MOVB SI, 1(CX)
  7662. ADDQ $0x02, CX
  7663. CMPL SI, $0x40
  7664. JB memmove_match_emit_repeat_encodeBetterBlockAsm12B
  7665. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
  7666. one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
  7667. SHLB $0x02, SI
  7668. MOVB SI, (CX)
  7669. ADDQ $0x01, CX
  7670. memmove_match_emit_repeat_encodeBetterBlockAsm12B:
  7671. LEAQ (CX)(R9*1), SI
  7672. // genMemMoveShort
  7673. CMPQ R9, $0x04
  7674. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
  7675. CMPQ R9, $0x08
  7676. JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
  7677. CMPQ R9, $0x10
  7678. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
  7679. CMPQ R9, $0x20
  7680. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
  7681. JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
  7682. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
  7683. MOVL (R10), R11
  7684. MOVL R11, (CX)
  7685. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  7686. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
  7687. MOVL (R10), R11
  7688. MOVL -4(R10)(R9*1), R10
  7689. MOVL R11, (CX)
  7690. MOVL R10, -4(CX)(R9*1)
  7691. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  7692. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
  7693. MOVQ (R10), R11
  7694. MOVQ -8(R10)(R9*1), R10
  7695. MOVQ R11, (CX)
  7696. MOVQ R10, -8(CX)(R9*1)
  7697. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  7698. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
  7699. MOVOU (R10), X0
  7700. MOVOU -16(R10)(R9*1), X1
  7701. MOVOU X0, (CX)
  7702. MOVOU X1, -16(CX)(R9*1)
  7703. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
  7704. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
  7705. MOVOU (R10), X0
  7706. MOVOU 16(R10), X1
  7707. MOVOU -32(R10)(R9*1), X2
  7708. MOVOU -16(R10)(R9*1), X3
  7709. MOVOU X0, (CX)
  7710. MOVOU X1, 16(CX)
  7711. MOVOU X2, -32(CX)(R9*1)
  7712. MOVOU X3, -16(CX)(R9*1)
  7713. memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
  7714. MOVQ SI, CX
  7715. JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
  7716. memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
  7717. LEAQ (CX)(R9*1), SI
  7718. // genMemMoveLong
  7719. MOVOU (R10), X0
  7720. MOVOU 16(R10), X1
  7721. MOVOU -32(R10)(R9*1), X2
  7722. MOVOU -16(R10)(R9*1), X3
  7723. MOVQ R9, R13
  7724. SHRQ $0x05, R13
  7725. MOVQ CX, R11
  7726. ANDL $0x0000001f, R11
  7727. MOVQ $0x00000040, R14
  7728. SUBQ R11, R14
  7729. DECQ R13
  7730. JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  7731. LEAQ -32(R10)(R14*1), R11
  7732. LEAQ -32(CX)(R14*1), R15
  7733. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
  7734. MOVOU (R11), X4
  7735. MOVOU 16(R11), X5
  7736. MOVOA X4, (R15)
  7737. MOVOA X5, 16(R15)
  7738. ADDQ $0x20, R15
  7739. ADDQ $0x20, R11
  7740. ADDQ $0x20, R14
  7741. DECQ R13
  7742. JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
  7743. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
  7744. MOVOU -32(R10)(R14*1), X4
  7745. MOVOU -16(R10)(R14*1), X5
  7746. MOVOA X4, -32(CX)(R14*1)
  7747. MOVOA X5, -16(CX)(R14*1)
  7748. ADDQ $0x20, R14
  7749. CMPQ R9, R14
  7750. JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  7751. MOVOU X0, (CX)
  7752. MOVOU X1, 16(CX)
  7753. MOVOU X2, -32(CX)(R9*1)
  7754. MOVOU X3, -16(CX)(R9*1)
  7755. MOVQ SI, CX
  7756. emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
  7757. ADDL R12, DX
  7758. ADDL $0x04, R12
  7759. MOVL DX, 12(SP)
  7760. // emitRepeat
  7761. MOVL R12, SI
  7762. LEAL -4(R12), R12
  7763. CMPL SI, $0x08
  7764. JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
  7765. CMPL SI, $0x0c
  7766. JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
  7767. CMPL R8, $0x00000800
  7768. JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
  7769. cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
  7770. CMPL R12, $0x00000104
  7771. JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
  7772. LEAL -256(R12), R12
  7773. MOVW $0x0019, (CX)
  7774. MOVW R12, 2(CX)
  7775. ADDQ $0x04, CX
  7776. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7777. repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
  7778. LEAL -4(R12), R12
  7779. MOVW $0x0015, (CX)
  7780. MOVB R12, 2(CX)
  7781. ADDQ $0x03, CX
  7782. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7783. repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
  7784. SHLL $0x02, R12
  7785. ORL $0x01, R12
  7786. MOVW R12, (CX)
  7787. ADDQ $0x02, CX
  7788. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
  7789. repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
  7790. XORQ SI, SI
  7791. LEAL 1(SI)(R12*4), R12
  7792. MOVB R8, 1(CX)
  7793. SARL $0x08, R8
  7794. SHLL $0x05, R8
  7795. ORL R8, R12
  7796. MOVB R12, (CX)
  7797. ADDQ $0x02, CX
  7798. match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
  7799. CMPL DX, 8(SP)
  7800. JAE emit_remainder_encodeBetterBlockAsm12B
  7801. CMPQ CX, (SP)
  7802. JB match_nolit_dst_ok_encodeBetterBlockAsm12B
  7803. MOVQ $0x00000000, ret+56(FP)
  7804. RET
  7805. match_nolit_dst_ok_encodeBetterBlockAsm12B:
  7806. MOVQ $0x0000cf1bbcdcbf9b, SI
  7807. MOVQ $0x9e3779b1, R8
  7808. LEAQ 1(DI), DI
  7809. LEAQ -2(DX), R9
  7810. MOVQ (BX)(DI*1), R10
  7811. MOVQ 1(BX)(DI*1), R11
  7812. MOVQ (BX)(R9*1), R12
  7813. MOVQ 1(BX)(R9*1), R13
  7814. SHLQ $0x10, R10
  7815. IMULQ SI, R10
  7816. SHRQ $0x32, R10
  7817. SHLQ $0x20, R11
  7818. IMULQ R8, R11
  7819. SHRQ $0x34, R11
  7820. SHLQ $0x10, R12
  7821. IMULQ SI, R12
  7822. SHRQ $0x32, R12
  7823. SHLQ $0x20, R13
  7824. IMULQ R8, R13
  7825. SHRQ $0x34, R13
  7826. LEAQ 1(DI), R8
  7827. LEAQ 1(R9), R14
  7828. MOVL DI, (AX)(R10*4)
  7829. MOVL R9, (AX)(R12*4)
  7830. MOVL R8, 65536(AX)(R11*4)
  7831. MOVL R14, 65536(AX)(R13*4)
  7832. LEAQ 1(R9)(DI*1), R8
  7833. SHRQ $0x01, R8
  7834. ADDQ $0x01, DI
  7835. SUBQ $0x01, R9
  7836. index_loop_encodeBetterBlockAsm12B:
  7837. CMPQ R8, R9
  7838. JAE search_loop_encodeBetterBlockAsm12B
  7839. MOVQ (BX)(DI*1), R10
  7840. MOVQ (BX)(R8*1), R11
  7841. SHLQ $0x10, R10
  7842. IMULQ SI, R10
  7843. SHRQ $0x32, R10
  7844. SHLQ $0x10, R11
  7845. IMULQ SI, R11
  7846. SHRQ $0x32, R11
  7847. MOVL DI, (AX)(R10*4)
  7848. MOVL R8, (AX)(R11*4)
  7849. ADDQ $0x02, DI
  7850. ADDQ $0x02, R8
  7851. JMP index_loop_encodeBetterBlockAsm12B
  7852. emit_remainder_encodeBetterBlockAsm12B:
  7853. MOVQ src_len+32(FP), AX
  7854. SUBL 12(SP), AX
  7855. LEAQ 3(CX)(AX*1), AX
  7856. CMPQ AX, (SP)
  7857. JB emit_remainder_ok_encodeBetterBlockAsm12B
  7858. MOVQ $0x00000000, ret+56(FP)
  7859. RET
  7860. emit_remainder_ok_encodeBetterBlockAsm12B:
  7861. MOVQ src_len+32(FP), AX
  7862. MOVL 12(SP), DX
  7863. CMPL DX, AX
  7864. JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
  7865. MOVL AX, SI
  7866. MOVL AX, 12(SP)
  7867. LEAQ (BX)(DX*1), AX
  7868. SUBL DX, SI
  7869. LEAL -1(SI), DX
  7870. CMPL DX, $0x3c
  7871. JB one_byte_emit_remainder_encodeBetterBlockAsm12B
  7872. CMPL DX, $0x00000100
  7873. JB two_bytes_emit_remainder_encodeBetterBlockAsm12B
  7874. JB three_bytes_emit_remainder_encodeBetterBlockAsm12B
  7875. three_bytes_emit_remainder_encodeBetterBlockAsm12B:
  7876. MOVB $0xf4, (CX)
  7877. MOVW DX, 1(CX)
  7878. ADDQ $0x03, CX
  7879. JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
  7880. two_bytes_emit_remainder_encodeBetterBlockAsm12B:
  7881. MOVB $0xf0, (CX)
  7882. MOVB DL, 1(CX)
  7883. ADDQ $0x02, CX
  7884. CMPL DX, $0x40
  7885. JB memmove_emit_remainder_encodeBetterBlockAsm12B
  7886. JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
  7887. one_byte_emit_remainder_encodeBetterBlockAsm12B:
  7888. SHLB $0x02, DL
  7889. MOVB DL, (CX)
  7890. ADDQ $0x01, CX
  7891. memmove_emit_remainder_encodeBetterBlockAsm12B:
  7892. LEAQ (CX)(SI*1), DX
  7893. MOVL SI, BX
  7894. // genMemMoveShort
  7895. CMPQ BX, $0x03
  7896. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2
  7897. JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3
  7898. CMPQ BX, $0x08
  7899. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
  7900. CMPQ BX, $0x10
  7901. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
  7902. CMPQ BX, $0x20
  7903. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
  7904. JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
  7905. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
  7906. MOVB (AX), SI
  7907. MOVB -1(AX)(BX*1), AL
  7908. MOVB SI, (CX)
  7909. MOVB AL, -1(CX)(BX*1)
  7910. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  7911. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
  7912. MOVW (AX), SI
  7913. MOVB 2(AX), AL
  7914. MOVW SI, (CX)
  7915. MOVB AL, 2(CX)
  7916. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  7917. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
  7918. MOVL (AX), SI
  7919. MOVL -4(AX)(BX*1), AX
  7920. MOVL SI, (CX)
  7921. MOVL AX, -4(CX)(BX*1)
  7922. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  7923. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
  7924. MOVQ (AX), SI
  7925. MOVQ -8(AX)(BX*1), AX
  7926. MOVQ SI, (CX)
  7927. MOVQ AX, -8(CX)(BX*1)
  7928. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  7929. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
  7930. MOVOU (AX), X0
  7931. MOVOU -16(AX)(BX*1), X1
  7932. MOVOU X0, (CX)
  7933. MOVOU X1, -16(CX)(BX*1)
  7934. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
  7935. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
  7936. MOVOU (AX), X0
  7937. MOVOU 16(AX), X1
  7938. MOVOU -32(AX)(BX*1), X2
  7939. MOVOU -16(AX)(BX*1), X3
  7940. MOVOU X0, (CX)
  7941. MOVOU X1, 16(CX)
  7942. MOVOU X2, -32(CX)(BX*1)
  7943. MOVOU X3, -16(CX)(BX*1)
  7944. memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
  7945. MOVQ DX, CX
  7946. JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
  7947. memmove_long_emit_remainder_encodeBetterBlockAsm12B:
  7948. LEAQ (CX)(SI*1), DX
  7949. MOVL SI, BX
  7950. // genMemMoveLong
  7951. MOVOU (AX), X0
  7952. MOVOU 16(AX), X1
  7953. MOVOU -32(AX)(BX*1), X2
  7954. MOVOU -16(AX)(BX*1), X3
  7955. MOVQ BX, DI
  7956. SHRQ $0x05, DI
  7957. MOVQ CX, SI
  7958. ANDL $0x0000001f, SI
  7959. MOVQ $0x00000040, R8
  7960. SUBQ SI, R8
  7961. DECQ DI
  7962. JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  7963. LEAQ -32(AX)(R8*1), SI
  7964. LEAQ -32(CX)(R8*1), R9
  7965. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
  7966. MOVOU (SI), X4
  7967. MOVOU 16(SI), X5
  7968. MOVOA X4, (R9)
  7969. MOVOA X5, 16(R9)
  7970. ADDQ $0x20, R9
  7971. ADDQ $0x20, SI
  7972. ADDQ $0x20, R8
  7973. DECQ DI
  7974. JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
  7975. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
  7976. MOVOU -32(AX)(R8*1), X4
  7977. MOVOU -16(AX)(R8*1), X5
  7978. MOVOA X4, -32(CX)(R8*1)
  7979. MOVOA X5, -16(CX)(R8*1)
  7980. ADDQ $0x20, R8
  7981. CMPQ BX, R8
  7982. JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
  7983. MOVOU X0, (CX)
  7984. MOVOU X1, 16(CX)
  7985. MOVOU X2, -32(CX)(BX*1)
  7986. MOVOU X3, -16(CX)(BX*1)
  7987. MOVQ DX, CX
  7988. emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
  7989. MOVQ dst_base+0(FP), AX
  7990. SUBQ AX, CX
  7991. MOVQ CX, ret+56(FP)
  7992. RET
  7993. // func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
  7994. // Requires: BMI, SSE2
  7995. TEXT ·encodeBetterBlockAsm10B(SB), $24-64
  7996. MOVQ tmp+48(FP), AX
  7997. MOVQ dst_base+0(FP), CX
  7998. MOVQ $0x000000a0, DX
  7999. MOVQ AX, BX
  8000. PXOR X0, X0
  8001. zero_loop_encodeBetterBlockAsm10B:
  8002. MOVOU X0, (BX)
  8003. MOVOU X0, 16(BX)
  8004. MOVOU X0, 32(BX)
  8005. MOVOU X0, 48(BX)
  8006. MOVOU X0, 64(BX)
  8007. MOVOU X0, 80(BX)
  8008. MOVOU X0, 96(BX)
  8009. MOVOU X0, 112(BX)
  8010. ADDQ $0x80, BX
  8011. DECQ DX
  8012. JNZ zero_loop_encodeBetterBlockAsm10B
  8013. MOVL $0x00000000, 12(SP)
  8014. MOVQ src_len+32(FP), DX
  8015. LEAQ -6(DX), BX
  8016. LEAQ -8(DX), SI
  8017. MOVL SI, 8(SP)
  8018. SHRQ $0x05, DX
  8019. SUBL DX, BX
  8020. LEAQ (CX)(BX*1), BX
  8021. MOVQ BX, (SP)
  8022. MOVL $0x00000001, DX
  8023. MOVL $0x00000000, 16(SP)
  8024. MOVQ src_base+24(FP), BX
  8025. search_loop_encodeBetterBlockAsm10B:
  8026. MOVL DX, SI
  8027. SUBL 12(SP), SI
  8028. SHRL $0x05, SI
  8029. LEAL 1(DX)(SI*1), SI
  8030. CMPL SI, 8(SP)
  8031. JAE emit_remainder_encodeBetterBlockAsm10B
  8032. MOVQ (BX)(DX*1), DI
  8033. MOVL SI, 20(SP)
  8034. MOVQ $0x0000cf1bbcdcbf9b, R9
  8035. MOVQ $0x9e3779b1, SI
  8036. MOVQ DI, R10
  8037. MOVQ DI, R11
  8038. SHLQ $0x10, R10
  8039. IMULQ R9, R10
  8040. SHRQ $0x34, R10
  8041. SHLQ $0x20, R11
  8042. IMULQ SI, R11
  8043. SHRQ $0x36, R11
  8044. MOVL (AX)(R10*4), SI
  8045. MOVL 16384(AX)(R11*4), R8
  8046. MOVL DX, (AX)(R10*4)
  8047. MOVL DX, 16384(AX)(R11*4)
  8048. MOVQ (BX)(SI*1), R10
  8049. MOVQ (BX)(R8*1), R11
  8050. CMPQ R10, DI
  8051. JEQ candidate_match_encodeBetterBlockAsm10B
  8052. CMPQ R11, DI
  8053. JNE no_short_found_encodeBetterBlockAsm10B
  8054. MOVL R8, SI
  8055. JMP candidate_match_encodeBetterBlockAsm10B
  8056. no_short_found_encodeBetterBlockAsm10B:
  8057. CMPL R10, DI
  8058. JEQ candidate_match_encodeBetterBlockAsm10B
  8059. CMPL R11, DI
  8060. JEQ candidateS_match_encodeBetterBlockAsm10B
  8061. MOVL 20(SP), DX
  8062. JMP search_loop_encodeBetterBlockAsm10B
  8063. candidateS_match_encodeBetterBlockAsm10B:
  8064. SHRQ $0x08, DI
  8065. MOVQ DI, R10
  8066. SHLQ $0x10, R10
  8067. IMULQ R9, R10
  8068. SHRQ $0x34, R10
  8069. MOVL (AX)(R10*4), SI
  8070. INCL DX
  8071. MOVL DX, (AX)(R10*4)
  8072. CMPL (BX)(SI*1), DI
  8073. JEQ candidate_match_encodeBetterBlockAsm10B
  8074. DECL DX
  8075. MOVL R8, SI
  8076. candidate_match_encodeBetterBlockAsm10B:
  8077. MOVL 12(SP), DI
  8078. TESTL SI, SI
  8079. JZ match_extend_back_end_encodeBetterBlockAsm10B
  8080. match_extend_back_loop_encodeBetterBlockAsm10B:
  8081. CMPL DX, DI
  8082. JBE match_extend_back_end_encodeBetterBlockAsm10B
  8083. MOVB -1(BX)(SI*1), R8
  8084. MOVB -1(BX)(DX*1), R9
  8085. CMPB R8, R9
  8086. JNE match_extend_back_end_encodeBetterBlockAsm10B
  8087. LEAL -1(DX), DX
  8088. DECL SI
  8089. JZ match_extend_back_end_encodeBetterBlockAsm10B
  8090. JMP match_extend_back_loop_encodeBetterBlockAsm10B
  8091. match_extend_back_end_encodeBetterBlockAsm10B:
  8092. MOVL DX, DI
  8093. SUBL 12(SP), DI
  8094. LEAQ 3(CX)(DI*1), DI
  8095. CMPQ DI, (SP)
  8096. JB match_dst_size_check_encodeBetterBlockAsm10B
  8097. MOVQ $0x00000000, ret+56(FP)
  8098. RET
  8099. match_dst_size_check_encodeBetterBlockAsm10B:
  8100. MOVL DX, DI
  8101. ADDL $0x04, DX
  8102. ADDL $0x04, SI
  8103. MOVQ src_len+32(FP), R8
  8104. SUBL DX, R8
  8105. LEAQ (BX)(DX*1), R9
  8106. LEAQ (BX)(SI*1), R10
  8107. // matchLen
  8108. XORL R12, R12
  8109. matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
  8110. CMPL R8, $0x10
  8111. JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B
  8112. MOVQ (R9)(R12*1), R11
  8113. MOVQ 8(R9)(R12*1), R13
  8114. XORQ (R10)(R12*1), R11
  8115. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
  8116. XORQ 8(R10)(R12*1), R13
  8117. JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
  8118. LEAL -16(R8), R8
  8119. LEAL 16(R12), R12
  8120. JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
  8121. matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
  8122. #ifdef GOAMD64_v3
  8123. TZCNTQ R13, R13
  8124. #else
  8125. BSFQ R13, R13
  8126. #endif
  8127. SARQ $0x03, R13
  8128. LEAL 8(R12)(R13*1), R12
  8129. JMP match_nolit_end_encodeBetterBlockAsm10B
  8130. matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
  8131. CMPL R8, $0x08
  8132. JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B
  8133. MOVQ (R9)(R12*1), R11
  8134. XORQ (R10)(R12*1), R11
  8135. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
  8136. LEAL -8(R8), R8
  8137. LEAL 8(R12), R12
  8138. JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B
  8139. matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
  8140. #ifdef GOAMD64_v3
  8141. TZCNTQ R11, R11
  8142. #else
  8143. BSFQ R11, R11
  8144. #endif
  8145. SARQ $0x03, R11
  8146. LEAL (R12)(R11*1), R12
  8147. JMP match_nolit_end_encodeBetterBlockAsm10B
  8148. matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
  8149. CMPL R8, $0x04
  8150. JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B
  8151. MOVL (R9)(R12*1), R11
  8152. CMPL (R10)(R12*1), R11
  8153. JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B
  8154. LEAL -4(R8), R8
  8155. LEAL 4(R12), R12
  8156. matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
  8157. CMPL R8, $0x01
  8158. JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B
  8159. JB match_nolit_end_encodeBetterBlockAsm10B
  8160. MOVW (R9)(R12*1), R11
  8161. CMPW (R10)(R12*1), R11
  8162. JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B
  8163. LEAL 2(R12), R12
  8164. SUBL $0x02, R8
  8165. JZ match_nolit_end_encodeBetterBlockAsm10B
  8166. matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
  8167. MOVB (R9)(R12*1), R11
  8168. CMPB (R10)(R12*1), R11
  8169. JNE match_nolit_end_encodeBetterBlockAsm10B
  8170. LEAL 1(R12), R12
  8171. match_nolit_end_encodeBetterBlockAsm10B:
  8172. MOVL DX, R8
  8173. SUBL SI, R8
  8174. // Check if repeat
  8175. CMPL 16(SP), R8
  8176. JEQ match_is_repeat_encodeBetterBlockAsm10B
  8177. MOVL R8, 16(SP)
  8178. MOVL 12(SP), SI
  8179. CMPL SI, DI
  8180. JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B
  8181. MOVL DI, R9
  8182. MOVL DI, 12(SP)
  8183. LEAQ (BX)(SI*1), R10
  8184. SUBL SI, R9
  8185. LEAL -1(R9), SI
  8186. CMPL SI, $0x3c
  8187. JB one_byte_match_emit_encodeBetterBlockAsm10B
  8188. CMPL SI, $0x00000100
  8189. JB two_bytes_match_emit_encodeBetterBlockAsm10B
  8190. JB three_bytes_match_emit_encodeBetterBlockAsm10B
  8191. three_bytes_match_emit_encodeBetterBlockAsm10B:
  8192. MOVB $0xf4, (CX)
  8193. MOVW SI, 1(CX)
  8194. ADDQ $0x03, CX
  8195. JMP memmove_long_match_emit_encodeBetterBlockAsm10B
  8196. two_bytes_match_emit_encodeBetterBlockAsm10B:
  8197. MOVB $0xf0, (CX)
  8198. MOVB SI, 1(CX)
  8199. ADDQ $0x02, CX
  8200. CMPL SI, $0x40
  8201. JB memmove_match_emit_encodeBetterBlockAsm10B
  8202. JMP memmove_long_match_emit_encodeBetterBlockAsm10B
  8203. one_byte_match_emit_encodeBetterBlockAsm10B:
  8204. SHLB $0x02, SI
  8205. MOVB SI, (CX)
  8206. ADDQ $0x01, CX
  8207. memmove_match_emit_encodeBetterBlockAsm10B:
  8208. LEAQ (CX)(R9*1), SI
  8209. // genMemMoveShort
  8210. CMPQ R9, $0x04
  8211. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
  8212. CMPQ R9, $0x08
  8213. JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
  8214. CMPQ R9, $0x10
  8215. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
  8216. CMPQ R9, $0x20
  8217. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
  8218. JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
  8219. emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
  8220. MOVL (R10), R11
  8221. MOVL R11, (CX)
  8222. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  8223. emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
  8224. MOVL (R10), R11
  8225. MOVL -4(R10)(R9*1), R10
  8226. MOVL R11, (CX)
  8227. MOVL R10, -4(CX)(R9*1)
  8228. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  8229. emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
  8230. MOVQ (R10), R11
  8231. MOVQ -8(R10)(R9*1), R10
  8232. MOVQ R11, (CX)
  8233. MOVQ R10, -8(CX)(R9*1)
  8234. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  8235. emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
  8236. MOVOU (R10), X0
  8237. MOVOU -16(R10)(R9*1), X1
  8238. MOVOU X0, (CX)
  8239. MOVOU X1, -16(CX)(R9*1)
  8240. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
  8241. emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
  8242. MOVOU (R10), X0
  8243. MOVOU 16(R10), X1
  8244. MOVOU -32(R10)(R9*1), X2
  8245. MOVOU -16(R10)(R9*1), X3
  8246. MOVOU X0, (CX)
  8247. MOVOU X1, 16(CX)
  8248. MOVOU X2, -32(CX)(R9*1)
  8249. MOVOU X3, -16(CX)(R9*1)
  8250. memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
  8251. MOVQ SI, CX
  8252. JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B
  8253. memmove_long_match_emit_encodeBetterBlockAsm10B:
  8254. LEAQ (CX)(R9*1), SI
  8255. // genMemMoveLong
  8256. MOVOU (R10), X0
  8257. MOVOU 16(R10), X1
  8258. MOVOU -32(R10)(R9*1), X2
  8259. MOVOU -16(R10)(R9*1), X3
  8260. MOVQ R9, R13
  8261. SHRQ $0x05, R13
  8262. MOVQ CX, R11
  8263. ANDL $0x0000001f, R11
  8264. MOVQ $0x00000040, R14
  8265. SUBQ R11, R14
  8266. DECQ R13
  8267. JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  8268. LEAQ -32(R10)(R14*1), R11
  8269. LEAQ -32(CX)(R14*1), R15
  8270. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
  8271. MOVOU (R11), X4
  8272. MOVOU 16(R11), X5
  8273. MOVOA X4, (R15)
  8274. MOVOA X5, 16(R15)
  8275. ADDQ $0x20, R15
  8276. ADDQ $0x20, R11
  8277. ADDQ $0x20, R14
  8278. DECQ R13
  8279. JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
  8280. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
  8281. MOVOU -32(R10)(R14*1), X4
  8282. MOVOU -16(R10)(R14*1), X5
  8283. MOVOA X4, -32(CX)(R14*1)
  8284. MOVOA X5, -16(CX)(R14*1)
  8285. ADDQ $0x20, R14
  8286. CMPQ R9, R14
  8287. JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  8288. MOVOU X0, (CX)
  8289. MOVOU X1, 16(CX)
  8290. MOVOU X2, -32(CX)(R9*1)
  8291. MOVOU X3, -16(CX)(R9*1)
  8292. MOVQ SI, CX
  8293. emit_literal_done_match_emit_encodeBetterBlockAsm10B:
  8294. ADDL R12, DX
  8295. ADDL $0x04, R12
  8296. MOVL DX, 12(SP)
  8297. // emitCopy
  8298. CMPL R12, $0x40
  8299. JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
  8300. CMPL R8, $0x00000800
  8301. JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B
  8302. MOVL $0x00000001, SI
  8303. LEAL 16(SI), SI
  8304. MOVB R8, 1(CX)
  8305. SHRL $0x08, R8
  8306. SHLL $0x05, R8
  8307. ORL R8, SI
  8308. MOVB SI, (CX)
  8309. ADDQ $0x02, CX
  8310. SUBL $0x08, R12
  8311. // emitRepeat
  8312. LEAL -4(R12), R12
  8313. JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  8314. MOVL R12, SI
  8315. LEAL -4(R12), R12
  8316. CMPL SI, $0x08
  8317. JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  8318. CMPL SI, $0x0c
  8319. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  8320. CMPL R8, $0x00000800
  8321. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  8322. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  8323. CMPL R12, $0x00000104
  8324. JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
  8325. LEAL -256(R12), R12
  8326. MOVW $0x0019, (CX)
  8327. MOVW R12, 2(CX)
  8328. ADDQ $0x04, CX
  8329. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8330. repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  8331. LEAL -4(R12), R12
  8332. MOVW $0x0015, (CX)
  8333. MOVB R12, 2(CX)
  8334. ADDQ $0x03, CX
  8335. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8336. repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  8337. SHLL $0x02, R12
  8338. ORL $0x01, R12
  8339. MOVW R12, (CX)
  8340. ADDQ $0x02, CX
  8341. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8342. repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
  8343. XORQ SI, SI
  8344. LEAL 1(SI)(R12*4), R12
  8345. MOVB R8, 1(CX)
  8346. SARL $0x08, R8
  8347. SHLL $0x05, R8
  8348. ORL R8, R12
  8349. MOVB R12, (CX)
  8350. ADDQ $0x02, CX
  8351. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8352. long_offset_short_match_nolit_encodeBetterBlockAsm10B:
  8353. MOVB $0xee, (CX)
  8354. MOVW R8, 1(CX)
  8355. LEAL -60(R12), R12
  8356. ADDQ $0x03, CX
  8357. // emitRepeat
  8358. MOVL R12, SI
  8359. LEAL -4(R12), R12
  8360. CMPL SI, $0x08
  8361. JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  8362. CMPL SI, $0x0c
  8363. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  8364. CMPL R8, $0x00000800
  8365. JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  8366. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  8367. CMPL R12, $0x00000104
  8368. JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
  8369. LEAL -256(R12), R12
  8370. MOVW $0x0019, (CX)
  8371. MOVW R12, 2(CX)
  8372. ADDQ $0x04, CX
  8373. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8374. repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  8375. LEAL -4(R12), R12
  8376. MOVW $0x0015, (CX)
  8377. MOVB R12, 2(CX)
  8378. ADDQ $0x03, CX
  8379. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8380. repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  8381. SHLL $0x02, R12
  8382. ORL $0x01, R12
  8383. MOVW R12, (CX)
  8384. ADDQ $0x02, CX
  8385. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8386. repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
  8387. XORQ SI, SI
  8388. LEAL 1(SI)(R12*4), R12
  8389. MOVB R8, 1(CX)
  8390. SARL $0x08, R8
  8391. SHLL $0x05, R8
  8392. ORL R8, R12
  8393. MOVB R12, (CX)
  8394. ADDQ $0x02, CX
  8395. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8396. two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
  8397. MOVL R12, SI
  8398. SHLL $0x02, SI
  8399. CMPL R12, $0x0c
  8400. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
  8401. CMPL R8, $0x00000800
  8402. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
  8403. LEAL -15(SI), SI
  8404. MOVB R8, 1(CX)
  8405. SHRL $0x08, R8
  8406. SHLL $0x05, R8
  8407. ORL R8, SI
  8408. MOVB SI, (CX)
  8409. ADDQ $0x02, CX
  8410. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8411. emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
  8412. LEAL -2(SI), SI
  8413. MOVB SI, (CX)
  8414. MOVW R8, 1(CX)
  8415. ADDQ $0x03, CX
  8416. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8417. match_is_repeat_encodeBetterBlockAsm10B:
  8418. MOVL 12(SP), SI
  8419. CMPL SI, DI
  8420. JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
  8421. MOVL DI, R9
  8422. MOVL DI, 12(SP)
  8423. LEAQ (BX)(SI*1), R10
  8424. SUBL SI, R9
  8425. LEAL -1(R9), SI
  8426. CMPL SI, $0x3c
  8427. JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B
  8428. CMPL SI, $0x00000100
  8429. JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
  8430. JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
  8431. three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
  8432. MOVB $0xf4, (CX)
  8433. MOVW SI, 1(CX)
  8434. ADDQ $0x03, CX
  8435. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
  8436. two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
  8437. MOVB $0xf0, (CX)
  8438. MOVB SI, 1(CX)
  8439. ADDQ $0x02, CX
  8440. CMPL SI, $0x40
  8441. JB memmove_match_emit_repeat_encodeBetterBlockAsm10B
  8442. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
  8443. one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
  8444. SHLB $0x02, SI
  8445. MOVB SI, (CX)
  8446. ADDQ $0x01, CX
  8447. memmove_match_emit_repeat_encodeBetterBlockAsm10B:
  8448. LEAQ (CX)(R9*1), SI
  8449. // genMemMoveShort
  8450. CMPQ R9, $0x04
  8451. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
  8452. CMPQ R9, $0x08
  8453. JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
  8454. CMPQ R9, $0x10
  8455. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
  8456. CMPQ R9, $0x20
  8457. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
  8458. JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
  8459. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
  8460. MOVL (R10), R11
  8461. MOVL R11, (CX)
  8462. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  8463. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
  8464. MOVL (R10), R11
  8465. MOVL -4(R10)(R9*1), R10
  8466. MOVL R11, (CX)
  8467. MOVL R10, -4(CX)(R9*1)
  8468. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  8469. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
  8470. MOVQ (R10), R11
  8471. MOVQ -8(R10)(R9*1), R10
  8472. MOVQ R11, (CX)
  8473. MOVQ R10, -8(CX)(R9*1)
  8474. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  8475. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
  8476. MOVOU (R10), X0
  8477. MOVOU -16(R10)(R9*1), X1
  8478. MOVOU X0, (CX)
  8479. MOVOU X1, -16(CX)(R9*1)
  8480. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
  8481. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
  8482. MOVOU (R10), X0
  8483. MOVOU 16(R10), X1
  8484. MOVOU -32(R10)(R9*1), X2
  8485. MOVOU -16(R10)(R9*1), X3
  8486. MOVOU X0, (CX)
  8487. MOVOU X1, 16(CX)
  8488. MOVOU X2, -32(CX)(R9*1)
  8489. MOVOU X3, -16(CX)(R9*1)
  8490. memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
  8491. MOVQ SI, CX
  8492. JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
  8493. memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
  8494. LEAQ (CX)(R9*1), SI
  8495. // genMemMoveLong
  8496. MOVOU (R10), X0
  8497. MOVOU 16(R10), X1
  8498. MOVOU -32(R10)(R9*1), X2
  8499. MOVOU -16(R10)(R9*1), X3
  8500. MOVQ R9, R13
  8501. SHRQ $0x05, R13
  8502. MOVQ CX, R11
  8503. ANDL $0x0000001f, R11
  8504. MOVQ $0x00000040, R14
  8505. SUBQ R11, R14
  8506. DECQ R13
  8507. JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  8508. LEAQ -32(R10)(R14*1), R11
  8509. LEAQ -32(CX)(R14*1), R15
  8510. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
  8511. MOVOU (R11), X4
  8512. MOVOU 16(R11), X5
  8513. MOVOA X4, (R15)
  8514. MOVOA X5, 16(R15)
  8515. ADDQ $0x20, R15
  8516. ADDQ $0x20, R11
  8517. ADDQ $0x20, R14
  8518. DECQ R13
  8519. JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
  8520. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
  8521. MOVOU -32(R10)(R14*1), X4
  8522. MOVOU -16(R10)(R14*1), X5
  8523. MOVOA X4, -32(CX)(R14*1)
  8524. MOVOA X5, -16(CX)(R14*1)
  8525. ADDQ $0x20, R14
  8526. CMPQ R9, R14
  8527. JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  8528. MOVOU X0, (CX)
  8529. MOVOU X1, 16(CX)
  8530. MOVOU X2, -32(CX)(R9*1)
  8531. MOVOU X3, -16(CX)(R9*1)
  8532. MOVQ SI, CX
  8533. emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
  8534. ADDL R12, DX
  8535. ADDL $0x04, R12
  8536. MOVL DX, 12(SP)
  8537. // emitRepeat
  8538. MOVL R12, SI
  8539. LEAL -4(R12), R12
  8540. CMPL SI, $0x08
  8541. JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
  8542. CMPL SI, $0x0c
  8543. JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
  8544. CMPL R8, $0x00000800
  8545. JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
  8546. cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
  8547. CMPL R12, $0x00000104
  8548. JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
  8549. LEAL -256(R12), R12
  8550. MOVW $0x0019, (CX)
  8551. MOVW R12, 2(CX)
  8552. ADDQ $0x04, CX
  8553. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8554. repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
  8555. LEAL -4(R12), R12
  8556. MOVW $0x0015, (CX)
  8557. MOVB R12, 2(CX)
  8558. ADDQ $0x03, CX
  8559. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8560. repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
  8561. SHLL $0x02, R12
  8562. ORL $0x01, R12
  8563. MOVW R12, (CX)
  8564. ADDQ $0x02, CX
  8565. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
  8566. repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
  8567. XORQ SI, SI
  8568. LEAL 1(SI)(R12*4), R12
  8569. MOVB R8, 1(CX)
  8570. SARL $0x08, R8
  8571. SHLL $0x05, R8
  8572. ORL R8, R12
  8573. MOVB R12, (CX)
  8574. ADDQ $0x02, CX
  8575. match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
  8576. CMPL DX, 8(SP)
  8577. JAE emit_remainder_encodeBetterBlockAsm10B
  8578. CMPQ CX, (SP)
  8579. JB match_nolit_dst_ok_encodeBetterBlockAsm10B
  8580. MOVQ $0x00000000, ret+56(FP)
  8581. RET
  8582. match_nolit_dst_ok_encodeBetterBlockAsm10B:
  8583. MOVQ $0x0000cf1bbcdcbf9b, SI
  8584. MOVQ $0x9e3779b1, R8
  8585. LEAQ 1(DI), DI
  8586. LEAQ -2(DX), R9
  8587. MOVQ (BX)(DI*1), R10
  8588. MOVQ 1(BX)(DI*1), R11
  8589. MOVQ (BX)(R9*1), R12
  8590. MOVQ 1(BX)(R9*1), R13
  8591. SHLQ $0x10, R10
  8592. IMULQ SI, R10
  8593. SHRQ $0x34, R10
  8594. SHLQ $0x20, R11
  8595. IMULQ R8, R11
  8596. SHRQ $0x36, R11
  8597. SHLQ $0x10, R12
  8598. IMULQ SI, R12
  8599. SHRQ $0x34, R12
  8600. SHLQ $0x20, R13
  8601. IMULQ R8, R13
  8602. SHRQ $0x36, R13
  8603. LEAQ 1(DI), R8
  8604. LEAQ 1(R9), R14
  8605. MOVL DI, (AX)(R10*4)
  8606. MOVL R9, (AX)(R12*4)
  8607. MOVL R8, 16384(AX)(R11*4)
  8608. MOVL R14, 16384(AX)(R13*4)
  8609. LEAQ 1(R9)(DI*1), R8
  8610. SHRQ $0x01, R8
  8611. ADDQ $0x01, DI
  8612. SUBQ $0x01, R9
  8613. index_loop_encodeBetterBlockAsm10B:
  8614. CMPQ R8, R9
  8615. JAE search_loop_encodeBetterBlockAsm10B
  8616. MOVQ (BX)(DI*1), R10
  8617. MOVQ (BX)(R8*1), R11
  8618. SHLQ $0x10, R10
  8619. IMULQ SI, R10
  8620. SHRQ $0x34, R10
  8621. SHLQ $0x10, R11
  8622. IMULQ SI, R11
  8623. SHRQ $0x34, R11
  8624. MOVL DI, (AX)(R10*4)
  8625. MOVL R8, (AX)(R11*4)
  8626. ADDQ $0x02, DI
  8627. ADDQ $0x02, R8
  8628. JMP index_loop_encodeBetterBlockAsm10B
  8629. emit_remainder_encodeBetterBlockAsm10B:
  8630. MOVQ src_len+32(FP), AX
  8631. SUBL 12(SP), AX
  8632. LEAQ 3(CX)(AX*1), AX
  8633. CMPQ AX, (SP)
  8634. JB emit_remainder_ok_encodeBetterBlockAsm10B
  8635. MOVQ $0x00000000, ret+56(FP)
  8636. RET
  8637. emit_remainder_ok_encodeBetterBlockAsm10B:
  8638. MOVQ src_len+32(FP), AX
  8639. MOVL 12(SP), DX
  8640. CMPL DX, AX
  8641. JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
  8642. MOVL AX, SI
  8643. MOVL AX, 12(SP)
  8644. LEAQ (BX)(DX*1), AX
  8645. SUBL DX, SI
  8646. LEAL -1(SI), DX
  8647. CMPL DX, $0x3c
  8648. JB one_byte_emit_remainder_encodeBetterBlockAsm10B
  8649. CMPL DX, $0x00000100
  8650. JB two_bytes_emit_remainder_encodeBetterBlockAsm10B
  8651. JB three_bytes_emit_remainder_encodeBetterBlockAsm10B
  8652. three_bytes_emit_remainder_encodeBetterBlockAsm10B:
  8653. MOVB $0xf4, (CX)
  8654. MOVW DX, 1(CX)
  8655. ADDQ $0x03, CX
  8656. JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
  8657. two_bytes_emit_remainder_encodeBetterBlockAsm10B:
  8658. MOVB $0xf0, (CX)
  8659. MOVB DL, 1(CX)
  8660. ADDQ $0x02, CX
  8661. CMPL DX, $0x40
  8662. JB memmove_emit_remainder_encodeBetterBlockAsm10B
  8663. JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
  8664. one_byte_emit_remainder_encodeBetterBlockAsm10B:
  8665. SHLB $0x02, DL
  8666. MOVB DL, (CX)
  8667. ADDQ $0x01, CX
  8668. memmove_emit_remainder_encodeBetterBlockAsm10B:
  8669. LEAQ (CX)(SI*1), DX
  8670. MOVL SI, BX
  8671. // genMemMoveShort
  8672. CMPQ BX, $0x03
  8673. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2
  8674. JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3
  8675. CMPQ BX, $0x08
  8676. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
  8677. CMPQ BX, $0x10
  8678. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
  8679. CMPQ BX, $0x20
  8680. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
  8681. JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
  8682. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
  8683. MOVB (AX), SI
  8684. MOVB -1(AX)(BX*1), AL
  8685. MOVB SI, (CX)
  8686. MOVB AL, -1(CX)(BX*1)
  8687. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  8688. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
  8689. MOVW (AX), SI
  8690. MOVB 2(AX), AL
  8691. MOVW SI, (CX)
  8692. MOVB AL, 2(CX)
  8693. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  8694. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
  8695. MOVL (AX), SI
  8696. MOVL -4(AX)(BX*1), AX
  8697. MOVL SI, (CX)
  8698. MOVL AX, -4(CX)(BX*1)
  8699. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  8700. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
  8701. MOVQ (AX), SI
  8702. MOVQ -8(AX)(BX*1), AX
  8703. MOVQ SI, (CX)
  8704. MOVQ AX, -8(CX)(BX*1)
  8705. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  8706. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
  8707. MOVOU (AX), X0
  8708. MOVOU -16(AX)(BX*1), X1
  8709. MOVOU X0, (CX)
  8710. MOVOU X1, -16(CX)(BX*1)
  8711. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
  8712. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
  8713. MOVOU (AX), X0
  8714. MOVOU 16(AX), X1
  8715. MOVOU -32(AX)(BX*1), X2
  8716. MOVOU -16(AX)(BX*1), X3
  8717. MOVOU X0, (CX)
  8718. MOVOU X1, 16(CX)
  8719. MOVOU X2, -32(CX)(BX*1)
  8720. MOVOU X3, -16(CX)(BX*1)
  8721. memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
  8722. MOVQ DX, CX
  8723. JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
  8724. memmove_long_emit_remainder_encodeBetterBlockAsm10B:
  8725. LEAQ (CX)(SI*1), DX
  8726. MOVL SI, BX
  8727. // genMemMoveLong
  8728. MOVOU (AX), X0
  8729. MOVOU 16(AX), X1
  8730. MOVOU -32(AX)(BX*1), X2
  8731. MOVOU -16(AX)(BX*1), X3
  8732. MOVQ BX, DI
  8733. SHRQ $0x05, DI
  8734. MOVQ CX, SI
  8735. ANDL $0x0000001f, SI
  8736. MOVQ $0x00000040, R8
  8737. SUBQ SI, R8
  8738. DECQ DI
  8739. JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  8740. LEAQ -32(AX)(R8*1), SI
  8741. LEAQ -32(CX)(R8*1), R9
  8742. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
  8743. MOVOU (SI), X4
  8744. MOVOU 16(SI), X5
  8745. MOVOA X4, (R9)
  8746. MOVOA X5, 16(R9)
  8747. ADDQ $0x20, R9
  8748. ADDQ $0x20, SI
  8749. ADDQ $0x20, R8
  8750. DECQ DI
  8751. JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
  8752. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
  8753. MOVOU -32(AX)(R8*1), X4
  8754. MOVOU -16(AX)(R8*1), X5
  8755. MOVOA X4, -32(CX)(R8*1)
  8756. MOVOA X5, -16(CX)(R8*1)
  8757. ADDQ $0x20, R8
  8758. CMPQ BX, R8
  8759. JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
  8760. MOVOU X0, (CX)
  8761. MOVOU X1, 16(CX)
  8762. MOVOU X2, -32(CX)(BX*1)
  8763. MOVOU X3, -16(CX)(BX*1)
  8764. MOVQ DX, CX
  8765. emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
  8766. MOVQ dst_base+0(FP), AX
  8767. SUBQ AX, CX
  8768. MOVQ CX, ret+56(FP)
  8769. RET
  8770. // func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
  8771. // Requires: BMI, SSE2
  8772. TEXT ·encodeBetterBlockAsm8B(SB), $24-64
  8773. MOVQ tmp+48(FP), AX
  8774. MOVQ dst_base+0(FP), CX
  8775. MOVQ $0x00000028, DX
  8776. MOVQ AX, BX
  8777. PXOR X0, X0
  8778. zero_loop_encodeBetterBlockAsm8B:
  8779. MOVOU X0, (BX)
  8780. MOVOU X0, 16(BX)
  8781. MOVOU X0, 32(BX)
  8782. MOVOU X0, 48(BX)
  8783. MOVOU X0, 64(BX)
  8784. MOVOU X0, 80(BX)
  8785. MOVOU X0, 96(BX)
  8786. MOVOU X0, 112(BX)
  8787. ADDQ $0x80, BX
  8788. DECQ DX
  8789. JNZ zero_loop_encodeBetterBlockAsm8B
  8790. MOVL $0x00000000, 12(SP)
  8791. MOVQ src_len+32(FP), DX
  8792. LEAQ -6(DX), BX
  8793. LEAQ -8(DX), SI
  8794. MOVL SI, 8(SP)
  8795. SHRQ $0x05, DX
  8796. SUBL DX, BX
  8797. LEAQ (CX)(BX*1), BX
  8798. MOVQ BX, (SP)
  8799. MOVL $0x00000001, DX
  8800. MOVL $0x00000000, 16(SP)
  8801. MOVQ src_base+24(FP), BX
  8802. search_loop_encodeBetterBlockAsm8B:
  8803. MOVL DX, SI
  8804. SUBL 12(SP), SI
  8805. SHRL $0x04, SI
  8806. LEAL 1(DX)(SI*1), SI
  8807. CMPL SI, 8(SP)
  8808. JAE emit_remainder_encodeBetterBlockAsm8B
  8809. MOVQ (BX)(DX*1), DI
  8810. MOVL SI, 20(SP)
  8811. MOVQ $0x0000cf1bbcdcbf9b, R9
  8812. MOVQ $0x9e3779b1, SI
  8813. MOVQ DI, R10
  8814. MOVQ DI, R11
  8815. SHLQ $0x10, R10
  8816. IMULQ R9, R10
  8817. SHRQ $0x36, R10
  8818. SHLQ $0x20, R11
  8819. IMULQ SI, R11
  8820. SHRQ $0x38, R11
  8821. MOVL (AX)(R10*4), SI
  8822. MOVL 4096(AX)(R11*4), R8
  8823. MOVL DX, (AX)(R10*4)
  8824. MOVL DX, 4096(AX)(R11*4)
  8825. MOVQ (BX)(SI*1), R10
  8826. MOVQ (BX)(R8*1), R11
  8827. CMPQ R10, DI
  8828. JEQ candidate_match_encodeBetterBlockAsm8B
  8829. CMPQ R11, DI
  8830. JNE no_short_found_encodeBetterBlockAsm8B
  8831. MOVL R8, SI
  8832. JMP candidate_match_encodeBetterBlockAsm8B
  8833. no_short_found_encodeBetterBlockAsm8B:
  8834. CMPL R10, DI
  8835. JEQ candidate_match_encodeBetterBlockAsm8B
  8836. CMPL R11, DI
  8837. JEQ candidateS_match_encodeBetterBlockAsm8B
  8838. MOVL 20(SP), DX
  8839. JMP search_loop_encodeBetterBlockAsm8B
  8840. candidateS_match_encodeBetterBlockAsm8B:
  8841. SHRQ $0x08, DI
  8842. MOVQ DI, R10
  8843. SHLQ $0x10, R10
  8844. IMULQ R9, R10
  8845. SHRQ $0x36, R10
  8846. MOVL (AX)(R10*4), SI
  8847. INCL DX
  8848. MOVL DX, (AX)(R10*4)
  8849. CMPL (BX)(SI*1), DI
  8850. JEQ candidate_match_encodeBetterBlockAsm8B
  8851. DECL DX
  8852. MOVL R8, SI
  8853. candidate_match_encodeBetterBlockAsm8B:
  8854. MOVL 12(SP), DI
  8855. TESTL SI, SI
  8856. JZ match_extend_back_end_encodeBetterBlockAsm8B
  8857. match_extend_back_loop_encodeBetterBlockAsm8B:
  8858. CMPL DX, DI
  8859. JBE match_extend_back_end_encodeBetterBlockAsm8B
  8860. MOVB -1(BX)(SI*1), R8
  8861. MOVB -1(BX)(DX*1), R9
  8862. CMPB R8, R9
  8863. JNE match_extend_back_end_encodeBetterBlockAsm8B
  8864. LEAL -1(DX), DX
  8865. DECL SI
  8866. JZ match_extend_back_end_encodeBetterBlockAsm8B
  8867. JMP match_extend_back_loop_encodeBetterBlockAsm8B
  8868. match_extend_back_end_encodeBetterBlockAsm8B:
  8869. MOVL DX, DI
  8870. SUBL 12(SP), DI
  8871. LEAQ 3(CX)(DI*1), DI
  8872. CMPQ DI, (SP)
  8873. JB match_dst_size_check_encodeBetterBlockAsm8B
  8874. MOVQ $0x00000000, ret+56(FP)
  8875. RET
  8876. match_dst_size_check_encodeBetterBlockAsm8B:
  8877. MOVL DX, DI
  8878. ADDL $0x04, DX
  8879. ADDL $0x04, SI
  8880. MOVQ src_len+32(FP), R8
  8881. SUBL DX, R8
  8882. LEAQ (BX)(DX*1), R9
  8883. LEAQ (BX)(SI*1), R10
  8884. // matchLen
  8885. XORL R12, R12
  8886. matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
  8887. CMPL R8, $0x10
  8888. JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B
  8889. MOVQ (R9)(R12*1), R11
  8890. MOVQ 8(R9)(R12*1), R13
  8891. XORQ (R10)(R12*1), R11
  8892. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
  8893. XORQ 8(R10)(R12*1), R13
  8894. JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
  8895. LEAL -16(R8), R8
  8896. LEAL 16(R12), R12
  8897. JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
  8898. matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
  8899. #ifdef GOAMD64_v3
  8900. TZCNTQ R13, R13
  8901. #else
  8902. BSFQ R13, R13
  8903. #endif
  8904. SARQ $0x03, R13
  8905. LEAL 8(R12)(R13*1), R12
  8906. JMP match_nolit_end_encodeBetterBlockAsm8B
  8907. matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
  8908. CMPL R8, $0x08
  8909. JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B
  8910. MOVQ (R9)(R12*1), R11
  8911. XORQ (R10)(R12*1), R11
  8912. JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
  8913. LEAL -8(R8), R8
  8914. LEAL 8(R12), R12
  8915. JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B
  8916. matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
  8917. #ifdef GOAMD64_v3
  8918. TZCNTQ R11, R11
  8919. #else
  8920. BSFQ R11, R11
  8921. #endif
  8922. SARQ $0x03, R11
  8923. LEAL (R12)(R11*1), R12
  8924. JMP match_nolit_end_encodeBetterBlockAsm8B
  8925. matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
  8926. CMPL R8, $0x04
  8927. JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B
  8928. MOVL (R9)(R12*1), R11
  8929. CMPL (R10)(R12*1), R11
  8930. JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B
  8931. LEAL -4(R8), R8
  8932. LEAL 4(R12), R12
  8933. matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
  8934. CMPL R8, $0x01
  8935. JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B
  8936. JB match_nolit_end_encodeBetterBlockAsm8B
  8937. MOVW (R9)(R12*1), R11
  8938. CMPW (R10)(R12*1), R11
  8939. JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B
  8940. LEAL 2(R12), R12
  8941. SUBL $0x02, R8
  8942. JZ match_nolit_end_encodeBetterBlockAsm8B
  8943. matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
  8944. MOVB (R9)(R12*1), R11
  8945. CMPB (R10)(R12*1), R11
  8946. JNE match_nolit_end_encodeBetterBlockAsm8B
  8947. LEAL 1(R12), R12
  8948. match_nolit_end_encodeBetterBlockAsm8B:
  8949. MOVL DX, R8
  8950. SUBL SI, R8
  8951. // Check if repeat
  8952. CMPL 16(SP), R8
  8953. JEQ match_is_repeat_encodeBetterBlockAsm8B
  8954. MOVL R8, 16(SP)
  8955. MOVL 12(SP), SI
  8956. CMPL SI, DI
  8957. JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B
  8958. MOVL DI, R9
  8959. MOVL DI, 12(SP)
  8960. LEAQ (BX)(SI*1), R10
  8961. SUBL SI, R9
  8962. LEAL -1(R9), SI
  8963. CMPL SI, $0x3c
  8964. JB one_byte_match_emit_encodeBetterBlockAsm8B
  8965. CMPL SI, $0x00000100
  8966. JB two_bytes_match_emit_encodeBetterBlockAsm8B
  8967. JB three_bytes_match_emit_encodeBetterBlockAsm8B
  8968. three_bytes_match_emit_encodeBetterBlockAsm8B:
  8969. MOVB $0xf4, (CX)
  8970. MOVW SI, 1(CX)
  8971. ADDQ $0x03, CX
  8972. JMP memmove_long_match_emit_encodeBetterBlockAsm8B
  8973. two_bytes_match_emit_encodeBetterBlockAsm8B:
  8974. MOVB $0xf0, (CX)
  8975. MOVB SI, 1(CX)
  8976. ADDQ $0x02, CX
  8977. CMPL SI, $0x40
  8978. JB memmove_match_emit_encodeBetterBlockAsm8B
  8979. JMP memmove_long_match_emit_encodeBetterBlockAsm8B
  8980. one_byte_match_emit_encodeBetterBlockAsm8B:
  8981. SHLB $0x02, SI
  8982. MOVB SI, (CX)
  8983. ADDQ $0x01, CX
  8984. memmove_match_emit_encodeBetterBlockAsm8B:
  8985. LEAQ (CX)(R9*1), SI
  8986. // genMemMoveShort
  8987. CMPQ R9, $0x04
  8988. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
  8989. CMPQ R9, $0x08
  8990. JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
  8991. CMPQ R9, $0x10
  8992. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
  8993. CMPQ R9, $0x20
  8994. JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
  8995. JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
  8996. emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
  8997. MOVL (R10), R11
  8998. MOVL R11, (CX)
  8999. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
  9000. emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
  9001. MOVL (R10), R11
  9002. MOVL -4(R10)(R9*1), R10
  9003. MOVL R11, (CX)
  9004. MOVL R10, -4(CX)(R9*1)
  9005. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
  9006. emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
  9007. MOVQ (R10), R11
  9008. MOVQ -8(R10)(R9*1), R10
  9009. MOVQ R11, (CX)
  9010. MOVQ R10, -8(CX)(R9*1)
  9011. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
  9012. emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
  9013. MOVOU (R10), X0
  9014. MOVOU -16(R10)(R9*1), X1
  9015. MOVOU X0, (CX)
  9016. MOVOU X1, -16(CX)(R9*1)
  9017. JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
  9018. emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
  9019. MOVOU (R10), X0
  9020. MOVOU 16(R10), X1
  9021. MOVOU -32(R10)(R9*1), X2
  9022. MOVOU -16(R10)(R9*1), X3
  9023. MOVOU X0, (CX)
  9024. MOVOU X1, 16(CX)
  9025. MOVOU X2, -32(CX)(R9*1)
  9026. MOVOU X3, -16(CX)(R9*1)
  9027. memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
  9028. MOVQ SI, CX
  9029. JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B
  9030. memmove_long_match_emit_encodeBetterBlockAsm8B:
  9031. LEAQ (CX)(R9*1), SI
  9032. // genMemMoveLong
  9033. MOVOU (R10), X0
  9034. MOVOU 16(R10), X1
  9035. MOVOU -32(R10)(R9*1), X2
  9036. MOVOU -16(R10)(R9*1), X3
  9037. MOVQ R9, R13
  9038. SHRQ $0x05, R13
  9039. MOVQ CX, R11
  9040. ANDL $0x0000001f, R11
  9041. MOVQ $0x00000040, R14
  9042. SUBQ R11, R14
  9043. DECQ R13
  9044. JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  9045. LEAQ -32(R10)(R14*1), R11
  9046. LEAQ -32(CX)(R14*1), R15
  9047. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
  9048. MOVOU (R11), X4
  9049. MOVOU 16(R11), X5
  9050. MOVOA X4, (R15)
  9051. MOVOA X5, 16(R15)
  9052. ADDQ $0x20, R15
  9053. ADDQ $0x20, R11
  9054. ADDQ $0x20, R14
  9055. DECQ R13
  9056. JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
  9057. emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
  9058. MOVOU -32(R10)(R14*1), X4
  9059. MOVOU -16(R10)(R14*1), X5
  9060. MOVOA X4, -32(CX)(R14*1)
  9061. MOVOA X5, -16(CX)(R14*1)
  9062. ADDQ $0x20, R14
  9063. CMPQ R9, R14
  9064. JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  9065. MOVOU X0, (CX)
  9066. MOVOU X1, 16(CX)
  9067. MOVOU X2, -32(CX)(R9*1)
  9068. MOVOU X3, -16(CX)(R9*1)
  9069. MOVQ SI, CX
  9070. emit_literal_done_match_emit_encodeBetterBlockAsm8B:
  9071. ADDL R12, DX
  9072. ADDL $0x04, R12
  9073. MOVL DX, 12(SP)
  9074. // emitCopy
  9075. CMPL R12, $0x40
  9076. JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
  9077. CMPL R8, $0x00000800
  9078. JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B
  9079. MOVL $0x00000001, SI
  9080. LEAL 16(SI), SI
  9081. MOVB R8, 1(CX)
  9082. SHRL $0x08, R8
  9083. SHLL $0x05, R8
  9084. ORL R8, SI
  9085. MOVB SI, (CX)
  9086. ADDQ $0x02, CX
  9087. SUBL $0x08, R12
  9088. // emitRepeat
  9089. LEAL -4(R12), R12
  9090. JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
  9091. MOVL R12, SI
  9092. LEAL -4(R12), R12
  9093. CMPL SI, $0x08
  9094. JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
  9095. CMPL SI, $0x0c
  9096. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
  9097. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
  9098. CMPL R12, $0x00000104
  9099. JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
  9100. LEAL -256(R12), R12
  9101. MOVW $0x0019, (CX)
  9102. MOVW R12, 2(CX)
  9103. ADDQ $0x04, CX
  9104. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9105. repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
  9106. LEAL -4(R12), R12
  9107. MOVW $0x0015, (CX)
  9108. MOVB R12, 2(CX)
  9109. ADDQ $0x03, CX
  9110. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9111. repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
  9112. SHLL $0x02, R12
  9113. ORL $0x01, R12
  9114. MOVW R12, (CX)
  9115. ADDQ $0x02, CX
  9116. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9117. XORQ SI, SI
  9118. LEAL 1(SI)(R12*4), R12
  9119. MOVB R8, 1(CX)
  9120. SARL $0x08, R8
  9121. SHLL $0x05, R8
  9122. ORL R8, R12
  9123. MOVB R12, (CX)
  9124. ADDQ $0x02, CX
  9125. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9126. long_offset_short_match_nolit_encodeBetterBlockAsm8B:
  9127. MOVB $0xee, (CX)
  9128. MOVW R8, 1(CX)
  9129. LEAL -60(R12), R12
  9130. ADDQ $0x03, CX
  9131. // emitRepeat
  9132. MOVL R12, SI
  9133. LEAL -4(R12), R12
  9134. CMPL SI, $0x08
  9135. JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
  9136. CMPL SI, $0x0c
  9137. JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
  9138. cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
  9139. CMPL R12, $0x00000104
  9140. JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
  9141. LEAL -256(R12), R12
  9142. MOVW $0x0019, (CX)
  9143. MOVW R12, 2(CX)
  9144. ADDQ $0x04, CX
  9145. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9146. repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
  9147. LEAL -4(R12), R12
  9148. MOVW $0x0015, (CX)
  9149. MOVB R12, 2(CX)
  9150. ADDQ $0x03, CX
  9151. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9152. repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
  9153. SHLL $0x02, R12
  9154. ORL $0x01, R12
  9155. MOVW R12, (CX)
  9156. ADDQ $0x02, CX
  9157. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9158. XORQ SI, SI
  9159. LEAL 1(SI)(R12*4), R12
  9160. MOVB R8, 1(CX)
  9161. SARL $0x08, R8
  9162. SHLL $0x05, R8
  9163. ORL R8, R12
  9164. MOVB R12, (CX)
  9165. ADDQ $0x02, CX
  9166. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9167. two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
  9168. MOVL R12, SI
  9169. SHLL $0x02, SI
  9170. CMPL R12, $0x0c
  9171. JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B
  9172. LEAL -15(SI), SI
  9173. MOVB R8, 1(CX)
  9174. SHRL $0x08, R8
  9175. SHLL $0x05, R8
  9176. ORL R8, SI
  9177. MOVB SI, (CX)
  9178. ADDQ $0x02, CX
  9179. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9180. emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
  9181. LEAL -2(SI), SI
  9182. MOVB SI, (CX)
  9183. MOVW R8, 1(CX)
  9184. ADDQ $0x03, CX
  9185. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9186. match_is_repeat_encodeBetterBlockAsm8B:
  9187. MOVL 12(SP), SI
  9188. CMPL SI, DI
  9189. JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
  9190. MOVL DI, R8
  9191. MOVL DI, 12(SP)
  9192. LEAQ (BX)(SI*1), R9
  9193. SUBL SI, R8
  9194. LEAL -1(R8), SI
  9195. CMPL SI, $0x3c
  9196. JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B
  9197. CMPL SI, $0x00000100
  9198. JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
  9199. JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
  9200. three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
  9201. MOVB $0xf4, (CX)
  9202. MOVW SI, 1(CX)
  9203. ADDQ $0x03, CX
  9204. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
  9205. two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
  9206. MOVB $0xf0, (CX)
  9207. MOVB SI, 1(CX)
  9208. ADDQ $0x02, CX
  9209. CMPL SI, $0x40
  9210. JB memmove_match_emit_repeat_encodeBetterBlockAsm8B
  9211. JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
  9212. one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
  9213. SHLB $0x02, SI
  9214. MOVB SI, (CX)
  9215. ADDQ $0x01, CX
  9216. memmove_match_emit_repeat_encodeBetterBlockAsm8B:
  9217. LEAQ (CX)(R8*1), SI
  9218. // genMemMoveShort
  9219. CMPQ R8, $0x04
  9220. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
  9221. CMPQ R8, $0x08
  9222. JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
  9223. CMPQ R8, $0x10
  9224. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
  9225. CMPQ R8, $0x20
  9226. JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
  9227. JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
  9228. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
  9229. MOVL (R9), R10
  9230. MOVL R10, (CX)
  9231. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  9232. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
  9233. MOVL (R9), R10
  9234. MOVL -4(R9)(R8*1), R9
  9235. MOVL R10, (CX)
  9236. MOVL R9, -4(CX)(R8*1)
  9237. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  9238. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
  9239. MOVQ (R9), R10
  9240. MOVQ -8(R9)(R8*1), R9
  9241. MOVQ R10, (CX)
  9242. MOVQ R9, -8(CX)(R8*1)
  9243. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  9244. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
  9245. MOVOU (R9), X0
  9246. MOVOU -16(R9)(R8*1), X1
  9247. MOVOU X0, (CX)
  9248. MOVOU X1, -16(CX)(R8*1)
  9249. JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
  9250. emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
  9251. MOVOU (R9), X0
  9252. MOVOU 16(R9), X1
  9253. MOVOU -32(R9)(R8*1), X2
  9254. MOVOU -16(R9)(R8*1), X3
  9255. MOVOU X0, (CX)
  9256. MOVOU X1, 16(CX)
  9257. MOVOU X2, -32(CX)(R8*1)
  9258. MOVOU X3, -16(CX)(R8*1)
  9259. memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
  9260. MOVQ SI, CX
  9261. JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
  9262. memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
  9263. LEAQ (CX)(R8*1), SI
  9264. // genMemMoveLong
  9265. MOVOU (R9), X0
  9266. MOVOU 16(R9), X1
  9267. MOVOU -32(R9)(R8*1), X2
  9268. MOVOU -16(R9)(R8*1), X3
  9269. MOVQ R8, R11
  9270. SHRQ $0x05, R11
  9271. MOVQ CX, R10
  9272. ANDL $0x0000001f, R10
  9273. MOVQ $0x00000040, R13
  9274. SUBQ R10, R13
  9275. DECQ R11
  9276. JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  9277. LEAQ -32(R9)(R13*1), R10
  9278. LEAQ -32(CX)(R13*1), R14
  9279. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
  9280. MOVOU (R10), X4
  9281. MOVOU 16(R10), X5
  9282. MOVOA X4, (R14)
  9283. MOVOA X5, 16(R14)
  9284. ADDQ $0x20, R14
  9285. ADDQ $0x20, R10
  9286. ADDQ $0x20, R13
  9287. DECQ R11
  9288. JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
  9289. emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
  9290. MOVOU -32(R9)(R13*1), X4
  9291. MOVOU -16(R9)(R13*1), X5
  9292. MOVOA X4, -32(CX)(R13*1)
  9293. MOVOA X5, -16(CX)(R13*1)
  9294. ADDQ $0x20, R13
  9295. CMPQ R8, R13
  9296. JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  9297. MOVOU X0, (CX)
  9298. MOVOU X1, 16(CX)
  9299. MOVOU X2, -32(CX)(R8*1)
  9300. MOVOU X3, -16(CX)(R8*1)
  9301. MOVQ SI, CX
  9302. emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
  9303. ADDL R12, DX
  9304. ADDL $0x04, R12
  9305. MOVL DX, 12(SP)
  9306. // emitRepeat
  9307. MOVL R12, SI
  9308. LEAL -4(R12), R12
  9309. CMPL SI, $0x08
  9310. JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
  9311. CMPL SI, $0x0c
  9312. JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
  9313. cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
  9314. CMPL R12, $0x00000104
  9315. JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
  9316. LEAL -256(R12), R12
  9317. MOVW $0x0019, (CX)
  9318. MOVW R12, 2(CX)
  9319. ADDQ $0x04, CX
  9320. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9321. repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
  9322. LEAL -4(R12), R12
  9323. MOVW $0x0015, (CX)
  9324. MOVB R12, 2(CX)
  9325. ADDQ $0x03, CX
  9326. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9327. repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
  9328. SHLL $0x02, R12
  9329. ORL $0x01, R12
  9330. MOVW R12, (CX)
  9331. ADDQ $0x02, CX
  9332. JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
  9333. XORQ SI, SI
  9334. LEAL 1(SI)(R12*4), R12
  9335. MOVB R8, 1(CX)
  9336. SARL $0x08, R8
  9337. SHLL $0x05, R8
  9338. ORL R8, R12
  9339. MOVB R12, (CX)
  9340. ADDQ $0x02, CX
  9341. match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
  9342. CMPL DX, 8(SP)
  9343. JAE emit_remainder_encodeBetterBlockAsm8B
  9344. CMPQ CX, (SP)
  9345. JB match_nolit_dst_ok_encodeBetterBlockAsm8B
  9346. MOVQ $0x00000000, ret+56(FP)
  9347. RET
  9348. match_nolit_dst_ok_encodeBetterBlockAsm8B:
  9349. MOVQ $0x0000cf1bbcdcbf9b, SI
  9350. MOVQ $0x9e3779b1, R8
  9351. LEAQ 1(DI), DI
  9352. LEAQ -2(DX), R9
  9353. MOVQ (BX)(DI*1), R10
  9354. MOVQ 1(BX)(DI*1), R11
  9355. MOVQ (BX)(R9*1), R12
  9356. MOVQ 1(BX)(R9*1), R13
  9357. SHLQ $0x10, R10
  9358. IMULQ SI, R10
  9359. SHRQ $0x36, R10
  9360. SHLQ $0x20, R11
  9361. IMULQ R8, R11
  9362. SHRQ $0x38, R11
  9363. SHLQ $0x10, R12
  9364. IMULQ SI, R12
  9365. SHRQ $0x36, R12
  9366. SHLQ $0x20, R13
  9367. IMULQ R8, R13
  9368. SHRQ $0x38, R13
  9369. LEAQ 1(DI), R8
  9370. LEAQ 1(R9), R14
  9371. MOVL DI, (AX)(R10*4)
  9372. MOVL R9, (AX)(R12*4)
  9373. MOVL R8, 4096(AX)(R11*4)
  9374. MOVL R14, 4096(AX)(R13*4)
  9375. LEAQ 1(R9)(DI*1), R8
  9376. SHRQ $0x01, R8
  9377. ADDQ $0x01, DI
  9378. SUBQ $0x01, R9
  9379. index_loop_encodeBetterBlockAsm8B:
  9380. CMPQ R8, R9
  9381. JAE search_loop_encodeBetterBlockAsm8B
  9382. MOVQ (BX)(DI*1), R10
  9383. MOVQ (BX)(R8*1), R11
  9384. SHLQ $0x10, R10
  9385. IMULQ SI, R10
  9386. SHRQ $0x36, R10
  9387. SHLQ $0x10, R11
  9388. IMULQ SI, R11
  9389. SHRQ $0x36, R11
  9390. MOVL DI, (AX)(R10*4)
  9391. MOVL R8, (AX)(R11*4)
  9392. ADDQ $0x02, DI
  9393. ADDQ $0x02, R8
  9394. JMP index_loop_encodeBetterBlockAsm8B
  9395. emit_remainder_encodeBetterBlockAsm8B:
  9396. MOVQ src_len+32(FP), AX
  9397. SUBL 12(SP), AX
  9398. LEAQ 3(CX)(AX*1), AX
  9399. CMPQ AX, (SP)
  9400. JB emit_remainder_ok_encodeBetterBlockAsm8B
  9401. MOVQ $0x00000000, ret+56(FP)
  9402. RET
  9403. emit_remainder_ok_encodeBetterBlockAsm8B:
  9404. MOVQ src_len+32(FP), AX
  9405. MOVL 12(SP), DX
  9406. CMPL DX, AX
  9407. JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
  9408. MOVL AX, SI
  9409. MOVL AX, 12(SP)
  9410. LEAQ (BX)(DX*1), AX
  9411. SUBL DX, SI
  9412. LEAL -1(SI), DX
  9413. CMPL DX, $0x3c
  9414. JB one_byte_emit_remainder_encodeBetterBlockAsm8B
  9415. CMPL DX, $0x00000100
  9416. JB two_bytes_emit_remainder_encodeBetterBlockAsm8B
  9417. JB three_bytes_emit_remainder_encodeBetterBlockAsm8B
  9418. three_bytes_emit_remainder_encodeBetterBlockAsm8B:
  9419. MOVB $0xf4, (CX)
  9420. MOVW DX, 1(CX)
  9421. ADDQ $0x03, CX
  9422. JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
  9423. two_bytes_emit_remainder_encodeBetterBlockAsm8B:
  9424. MOVB $0xf0, (CX)
  9425. MOVB DL, 1(CX)
  9426. ADDQ $0x02, CX
  9427. CMPL DX, $0x40
  9428. JB memmove_emit_remainder_encodeBetterBlockAsm8B
  9429. JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
  9430. one_byte_emit_remainder_encodeBetterBlockAsm8B:
  9431. SHLB $0x02, DL
  9432. MOVB DL, (CX)
  9433. ADDQ $0x01, CX
  9434. memmove_emit_remainder_encodeBetterBlockAsm8B:
  9435. LEAQ (CX)(SI*1), DX
  9436. MOVL SI, BX
  9437. // genMemMoveShort
  9438. CMPQ BX, $0x03
  9439. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2
  9440. JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3
  9441. CMPQ BX, $0x08
  9442. JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
  9443. CMPQ BX, $0x10
  9444. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
  9445. CMPQ BX, $0x20
  9446. JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
  9447. JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
  9448. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
  9449. MOVB (AX), SI
  9450. MOVB -1(AX)(BX*1), AL
  9451. MOVB SI, (CX)
  9452. MOVB AL, -1(CX)(BX*1)
  9453. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  9454. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
  9455. MOVW (AX), SI
  9456. MOVB 2(AX), AL
  9457. MOVW SI, (CX)
  9458. MOVB AL, 2(CX)
  9459. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  9460. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
  9461. MOVL (AX), SI
  9462. MOVL -4(AX)(BX*1), AX
  9463. MOVL SI, (CX)
  9464. MOVL AX, -4(CX)(BX*1)
  9465. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  9466. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
  9467. MOVQ (AX), SI
  9468. MOVQ -8(AX)(BX*1), AX
  9469. MOVQ SI, (CX)
  9470. MOVQ AX, -8(CX)(BX*1)
  9471. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  9472. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
  9473. MOVOU (AX), X0
  9474. MOVOU -16(AX)(BX*1), X1
  9475. MOVOU X0, (CX)
  9476. MOVOU X1, -16(CX)(BX*1)
  9477. JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
  9478. emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
  9479. MOVOU (AX), X0
  9480. MOVOU 16(AX), X1
  9481. MOVOU -32(AX)(BX*1), X2
  9482. MOVOU -16(AX)(BX*1), X3
  9483. MOVOU X0, (CX)
  9484. MOVOU X1, 16(CX)
  9485. MOVOU X2, -32(CX)(BX*1)
  9486. MOVOU X3, -16(CX)(BX*1)
  9487. memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
  9488. MOVQ DX, CX
  9489. JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
  9490. memmove_long_emit_remainder_encodeBetterBlockAsm8B:
  9491. LEAQ (CX)(SI*1), DX
  9492. MOVL SI, BX
  9493. // genMemMoveLong
  9494. MOVOU (AX), X0
  9495. MOVOU 16(AX), X1
  9496. MOVOU -32(AX)(BX*1), X2
  9497. MOVOU -16(AX)(BX*1), X3
  9498. MOVQ BX, DI
  9499. SHRQ $0x05, DI
  9500. MOVQ CX, SI
  9501. ANDL $0x0000001f, SI
  9502. MOVQ $0x00000040, R8
  9503. SUBQ SI, R8
  9504. DECQ DI
  9505. JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  9506. LEAQ -32(AX)(R8*1), SI
  9507. LEAQ -32(CX)(R8*1), R9
  9508. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
  9509. MOVOU (SI), X4
  9510. MOVOU 16(SI), X5
  9511. MOVOA X4, (R9)
  9512. MOVOA X5, 16(R9)
  9513. ADDQ $0x20, R9
  9514. ADDQ $0x20, SI
  9515. ADDQ $0x20, R8
  9516. DECQ DI
  9517. JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
  9518. emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
  9519. MOVOU -32(AX)(R8*1), X4
  9520. MOVOU -16(AX)(R8*1), X5
  9521. MOVOA X4, -32(CX)(R8*1)
  9522. MOVOA X5, -16(CX)(R8*1)
  9523. ADDQ $0x20, R8
  9524. CMPQ BX, R8
  9525. JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
  9526. MOVOU X0, (CX)
  9527. MOVOU X1, 16(CX)
  9528. MOVOU X2, -32(CX)(BX*1)
  9529. MOVOU X3, -16(CX)(BX*1)
  9530. MOVQ DX, CX
  9531. emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
  9532. MOVQ dst_base+0(FP), AX
  9533. SUBQ AX, CX
  9534. MOVQ CX, ret+56(FP)
  9535. RET
  9536. // func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
  9537. // Requires: BMI, SSE2
  9538. TEXT ·encodeSnappyBlockAsm(SB), $24-64
  9539. MOVQ tmp+48(FP), AX
  9540. MOVQ dst_base+0(FP), CX
  9541. MOVQ $0x00000200, DX
  9542. MOVQ AX, BX
  9543. PXOR X0, X0
  9544. zero_loop_encodeSnappyBlockAsm:
  9545. MOVOU X0, (BX)
  9546. MOVOU X0, 16(BX)
  9547. MOVOU X0, 32(BX)
  9548. MOVOU X0, 48(BX)
  9549. MOVOU X0, 64(BX)
  9550. MOVOU X0, 80(BX)
  9551. MOVOU X0, 96(BX)
  9552. MOVOU X0, 112(BX)
  9553. ADDQ $0x80, BX
  9554. DECQ DX
  9555. JNZ zero_loop_encodeSnappyBlockAsm
  9556. MOVL $0x00000000, 12(SP)
  9557. MOVQ src_len+32(FP), DX
  9558. LEAQ -9(DX), BX
  9559. LEAQ -8(DX), SI
  9560. MOVL SI, 8(SP)
  9561. SHRQ $0x05, DX
  9562. SUBL DX, BX
  9563. LEAQ (CX)(BX*1), BX
  9564. MOVQ BX, (SP)
  9565. MOVL $0x00000001, DX
  9566. MOVL DX, 16(SP)
  9567. MOVQ src_base+24(FP), BX
  9568. search_loop_encodeSnappyBlockAsm:
  9569. MOVL DX, SI
  9570. SUBL 12(SP), SI
  9571. SHRL $0x06, SI
  9572. LEAL 4(DX)(SI*1), SI
  9573. CMPL SI, 8(SP)
  9574. JAE emit_remainder_encodeSnappyBlockAsm
  9575. MOVQ (BX)(DX*1), DI
  9576. MOVL SI, 20(SP)
  9577. MOVQ $0x0000cf1bbcdcbf9b, R9
  9578. MOVQ DI, R10
  9579. MOVQ DI, R11
  9580. SHRQ $0x08, R11
  9581. SHLQ $0x10, R10
  9582. IMULQ R9, R10
  9583. SHRQ $0x32, R10
  9584. SHLQ $0x10, R11
  9585. IMULQ R9, R11
  9586. SHRQ $0x32, R11
  9587. MOVL (AX)(R10*4), SI
  9588. MOVL (AX)(R11*4), R8
  9589. MOVL DX, (AX)(R10*4)
  9590. LEAL 1(DX), R10
  9591. MOVL R10, (AX)(R11*4)
  9592. MOVQ DI, R10
  9593. SHRQ $0x10, R10
  9594. SHLQ $0x10, R10
  9595. IMULQ R9, R10
  9596. SHRQ $0x32, R10
  9597. MOVL DX, R9
  9598. SUBL 16(SP), R9
  9599. MOVL 1(BX)(R9*1), R11
  9600. MOVQ DI, R9
  9601. SHRQ $0x08, R9
  9602. CMPL R9, R11
  9603. JNE no_repeat_found_encodeSnappyBlockAsm
  9604. LEAL 1(DX), DI
  9605. MOVL 12(SP), SI
  9606. MOVL DI, R8
  9607. SUBL 16(SP), R8
  9608. JZ repeat_extend_back_end_encodeSnappyBlockAsm
  9609. repeat_extend_back_loop_encodeSnappyBlockAsm:
  9610. CMPL DI, SI
  9611. JBE repeat_extend_back_end_encodeSnappyBlockAsm
  9612. MOVB -1(BX)(R8*1), R9
  9613. MOVB -1(BX)(DI*1), R10
  9614. CMPB R9, R10
  9615. JNE repeat_extend_back_end_encodeSnappyBlockAsm
  9616. LEAL -1(DI), DI
  9617. DECL R8
  9618. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm
  9619. repeat_extend_back_end_encodeSnappyBlockAsm:
  9620. MOVL DI, SI
  9621. SUBL 12(SP), SI
  9622. LEAQ 5(CX)(SI*1), SI
  9623. CMPQ SI, (SP)
  9624. JB repeat_dst_size_check_encodeSnappyBlockAsm
  9625. MOVQ $0x00000000, ret+56(FP)
  9626. RET
  9627. repeat_dst_size_check_encodeSnappyBlockAsm:
  9628. MOVL 12(SP), SI
  9629. CMPL SI, DI
  9630. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm
  9631. MOVL DI, R8
  9632. MOVL DI, 12(SP)
  9633. LEAQ (BX)(SI*1), R9
  9634. SUBL SI, R8
  9635. LEAL -1(R8), SI
  9636. CMPL SI, $0x3c
  9637. JB one_byte_repeat_emit_encodeSnappyBlockAsm
  9638. CMPL SI, $0x00000100
  9639. JB two_bytes_repeat_emit_encodeSnappyBlockAsm
  9640. CMPL SI, $0x00010000
  9641. JB three_bytes_repeat_emit_encodeSnappyBlockAsm
  9642. CMPL SI, $0x01000000
  9643. JB four_bytes_repeat_emit_encodeSnappyBlockAsm
  9644. MOVB $0xfc, (CX)
  9645. MOVL SI, 1(CX)
  9646. ADDQ $0x05, CX
  9647. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
  9648. four_bytes_repeat_emit_encodeSnappyBlockAsm:
  9649. MOVL SI, R10
  9650. SHRL $0x10, R10
  9651. MOVB $0xf8, (CX)
  9652. MOVW SI, 1(CX)
  9653. MOVB R10, 3(CX)
  9654. ADDQ $0x04, CX
  9655. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
  9656. three_bytes_repeat_emit_encodeSnappyBlockAsm:
  9657. MOVB $0xf4, (CX)
  9658. MOVW SI, 1(CX)
  9659. ADDQ $0x03, CX
  9660. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
  9661. two_bytes_repeat_emit_encodeSnappyBlockAsm:
  9662. MOVB $0xf0, (CX)
  9663. MOVB SI, 1(CX)
  9664. ADDQ $0x02, CX
  9665. CMPL SI, $0x40
  9666. JB memmove_repeat_emit_encodeSnappyBlockAsm
  9667. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
  9668. one_byte_repeat_emit_encodeSnappyBlockAsm:
  9669. SHLB $0x02, SI
  9670. MOVB SI, (CX)
  9671. ADDQ $0x01, CX
  9672. memmove_repeat_emit_encodeSnappyBlockAsm:
  9673. LEAQ (CX)(R8*1), SI
  9674. // genMemMoveShort
  9675. CMPQ R8, $0x08
  9676. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
  9677. CMPQ R8, $0x10
  9678. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
  9679. CMPQ R8, $0x20
  9680. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
  9681. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
  9682. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
  9683. MOVQ (R9), R10
  9684. MOVQ R10, (CX)
  9685. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  9686. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
  9687. MOVQ (R9), R10
  9688. MOVQ -8(R9)(R8*1), R9
  9689. MOVQ R10, (CX)
  9690. MOVQ R9, -8(CX)(R8*1)
  9691. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  9692. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
  9693. MOVOU (R9), X0
  9694. MOVOU -16(R9)(R8*1), X1
  9695. MOVOU X0, (CX)
  9696. MOVOU X1, -16(CX)(R8*1)
  9697. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
  9698. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
  9699. MOVOU (R9), X0
  9700. MOVOU 16(R9), X1
  9701. MOVOU -32(R9)(R8*1), X2
  9702. MOVOU -16(R9)(R8*1), X3
  9703. MOVOU X0, (CX)
  9704. MOVOU X1, 16(CX)
  9705. MOVOU X2, -32(CX)(R8*1)
  9706. MOVOU X3, -16(CX)(R8*1)
  9707. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
  9708. MOVQ SI, CX
  9709. JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm
  9710. memmove_long_repeat_emit_encodeSnappyBlockAsm:
  9711. LEAQ (CX)(R8*1), SI
  9712. // genMemMoveLong
  9713. MOVOU (R9), X0
  9714. MOVOU 16(R9), X1
  9715. MOVOU -32(R9)(R8*1), X2
  9716. MOVOU -16(R9)(R8*1), X3
  9717. MOVQ R8, R11
  9718. SHRQ $0x05, R11
  9719. MOVQ CX, R10
  9720. ANDL $0x0000001f, R10
  9721. MOVQ $0x00000040, R12
  9722. SUBQ R10, R12
  9723. DECQ R11
  9724. JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  9725. LEAQ -32(R9)(R12*1), R10
  9726. LEAQ -32(CX)(R12*1), R13
  9727. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
  9728. MOVOU (R10), X4
  9729. MOVOU 16(R10), X5
  9730. MOVOA X4, (R13)
  9731. MOVOA X5, 16(R13)
  9732. ADDQ $0x20, R13
  9733. ADDQ $0x20, R10
  9734. ADDQ $0x20, R12
  9735. DECQ R11
  9736. JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
  9737. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
  9738. MOVOU -32(R9)(R12*1), X4
  9739. MOVOU -16(R9)(R12*1), X5
  9740. MOVOA X4, -32(CX)(R12*1)
  9741. MOVOA X5, -16(CX)(R12*1)
  9742. ADDQ $0x20, R12
  9743. CMPQ R8, R12
  9744. JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  9745. MOVOU X0, (CX)
  9746. MOVOU X1, 16(CX)
  9747. MOVOU X2, -32(CX)(R8*1)
  9748. MOVOU X3, -16(CX)(R8*1)
  9749. MOVQ SI, CX
  9750. emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
  9751. ADDL $0x05, DX
  9752. MOVL DX, SI
  9753. SUBL 16(SP), SI
  9754. MOVQ src_len+32(FP), R8
  9755. SUBL DX, R8
  9756. LEAQ (BX)(DX*1), R9
  9757. LEAQ (BX)(SI*1), SI
  9758. // matchLen
  9759. XORL R11, R11
  9760. matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
  9761. CMPL R8, $0x10
  9762. JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm
  9763. MOVQ (R9)(R11*1), R10
  9764. MOVQ 8(R9)(R11*1), R12
  9765. XORQ (SI)(R11*1), R10
  9766. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
  9767. XORQ 8(SI)(R11*1), R12
  9768. JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
  9769. LEAL -16(R8), R8
  9770. LEAL 16(R11), R11
  9771. JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
  9772. matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
  9773. #ifdef GOAMD64_v3
  9774. TZCNTQ R12, R12
  9775. #else
  9776. BSFQ R12, R12
  9777. #endif
  9778. SARQ $0x03, R12
  9779. LEAL 8(R11)(R12*1), R11
  9780. JMP repeat_extend_forward_end_encodeSnappyBlockAsm
  9781. matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
  9782. CMPL R8, $0x08
  9783. JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm
  9784. MOVQ (R9)(R11*1), R10
  9785. XORQ (SI)(R11*1), R10
  9786. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
  9787. LEAL -8(R8), R8
  9788. LEAL 8(R11), R11
  9789. JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm
  9790. matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
  9791. #ifdef GOAMD64_v3
  9792. TZCNTQ R10, R10
  9793. #else
  9794. BSFQ R10, R10
  9795. #endif
  9796. SARQ $0x03, R10
  9797. LEAL (R11)(R10*1), R11
  9798. JMP repeat_extend_forward_end_encodeSnappyBlockAsm
  9799. matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
  9800. CMPL R8, $0x04
  9801. JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm
  9802. MOVL (R9)(R11*1), R10
  9803. CMPL (SI)(R11*1), R10
  9804. JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm
  9805. LEAL -4(R8), R8
  9806. LEAL 4(R11), R11
  9807. matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
  9808. CMPL R8, $0x01
  9809. JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm
  9810. JB repeat_extend_forward_end_encodeSnappyBlockAsm
  9811. MOVW (R9)(R11*1), R10
  9812. CMPW (SI)(R11*1), R10
  9813. JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm
  9814. LEAL 2(R11), R11
  9815. SUBL $0x02, R8
  9816. JZ repeat_extend_forward_end_encodeSnappyBlockAsm
  9817. matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
  9818. MOVB (R9)(R11*1), R10
  9819. CMPB (SI)(R11*1), R10
  9820. JNE repeat_extend_forward_end_encodeSnappyBlockAsm
  9821. LEAL 1(R11), R11
  9822. repeat_extend_forward_end_encodeSnappyBlockAsm:
  9823. ADDL R11, DX
  9824. MOVL DX, SI
  9825. SUBL DI, SI
  9826. MOVL 16(SP), DI
  9827. // emitCopy
  9828. CMPL DI, $0x00010000
  9829. JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
  9830. four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
  9831. CMPL SI, $0x40
  9832. JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
  9833. MOVB $0xff, (CX)
  9834. MOVL DI, 1(CX)
  9835. LEAL -64(SI), SI
  9836. ADDQ $0x05, CX
  9837. CMPL SI, $0x04
  9838. JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
  9839. JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
  9840. four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
  9841. TESTL SI, SI
  9842. JZ repeat_end_emit_encodeSnappyBlockAsm
  9843. XORL R8, R8
  9844. LEAL -1(R8)(SI*4), SI
  9845. MOVB SI, (CX)
  9846. MOVL DI, 1(CX)
  9847. ADDQ $0x05, CX
  9848. JMP repeat_end_emit_encodeSnappyBlockAsm
  9849. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
  9850. CMPL SI, $0x40
  9851. JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
  9852. MOVB $0xee, (CX)
  9853. MOVW DI, 1(CX)
  9854. LEAL -60(SI), SI
  9855. ADDQ $0x03, CX
  9856. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
  9857. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
  9858. MOVL SI, R8
  9859. SHLL $0x02, R8
  9860. CMPL SI, $0x0c
  9861. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
  9862. CMPL DI, $0x00000800
  9863. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
  9864. LEAL -15(R8), R8
  9865. MOVB DI, 1(CX)
  9866. SHRL $0x08, DI
  9867. SHLL $0x05, DI
  9868. ORL DI, R8
  9869. MOVB R8, (CX)
  9870. ADDQ $0x02, CX
  9871. JMP repeat_end_emit_encodeSnappyBlockAsm
  9872. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
  9873. LEAL -2(R8), R8
  9874. MOVB R8, (CX)
  9875. MOVW DI, 1(CX)
  9876. ADDQ $0x03, CX
  9877. repeat_end_emit_encodeSnappyBlockAsm:
  9878. MOVL DX, 12(SP)
  9879. JMP search_loop_encodeSnappyBlockAsm
  9880. no_repeat_found_encodeSnappyBlockAsm:
  9881. CMPL (BX)(SI*1), DI
  9882. JEQ candidate_match_encodeSnappyBlockAsm
  9883. SHRQ $0x08, DI
  9884. MOVL (AX)(R10*4), SI
  9885. LEAL 2(DX), R9
  9886. CMPL (BX)(R8*1), DI
  9887. JEQ candidate2_match_encodeSnappyBlockAsm
  9888. MOVL R9, (AX)(R10*4)
  9889. SHRQ $0x08, DI
  9890. CMPL (BX)(SI*1), DI
  9891. JEQ candidate3_match_encodeSnappyBlockAsm
  9892. MOVL 20(SP), DX
  9893. JMP search_loop_encodeSnappyBlockAsm
  9894. candidate3_match_encodeSnappyBlockAsm:
  9895. ADDL $0x02, DX
  9896. JMP candidate_match_encodeSnappyBlockAsm
  9897. candidate2_match_encodeSnappyBlockAsm:
  9898. MOVL R9, (AX)(R10*4)
  9899. INCL DX
  9900. MOVL R8, SI
  9901. candidate_match_encodeSnappyBlockAsm:
  9902. MOVL 12(SP), DI
  9903. TESTL SI, SI
  9904. JZ match_extend_back_end_encodeSnappyBlockAsm
  9905. match_extend_back_loop_encodeSnappyBlockAsm:
  9906. CMPL DX, DI
  9907. JBE match_extend_back_end_encodeSnappyBlockAsm
  9908. MOVB -1(BX)(SI*1), R8
  9909. MOVB -1(BX)(DX*1), R9
  9910. CMPB R8, R9
  9911. JNE match_extend_back_end_encodeSnappyBlockAsm
  9912. LEAL -1(DX), DX
  9913. DECL SI
  9914. JZ match_extend_back_end_encodeSnappyBlockAsm
  9915. JMP match_extend_back_loop_encodeSnappyBlockAsm
  9916. match_extend_back_end_encodeSnappyBlockAsm:
  9917. MOVL DX, DI
  9918. SUBL 12(SP), DI
  9919. LEAQ 5(CX)(DI*1), DI
  9920. CMPQ DI, (SP)
  9921. JB match_dst_size_check_encodeSnappyBlockAsm
  9922. MOVQ $0x00000000, ret+56(FP)
  9923. RET
  9924. match_dst_size_check_encodeSnappyBlockAsm:
  9925. MOVL DX, DI
  9926. MOVL 12(SP), R8
  9927. CMPL R8, DI
  9928. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm
  9929. MOVL DI, R9
  9930. MOVL DI, 12(SP)
  9931. LEAQ (BX)(R8*1), DI
  9932. SUBL R8, R9
  9933. LEAL -1(R9), R8
  9934. CMPL R8, $0x3c
  9935. JB one_byte_match_emit_encodeSnappyBlockAsm
  9936. CMPL R8, $0x00000100
  9937. JB two_bytes_match_emit_encodeSnappyBlockAsm
  9938. CMPL R8, $0x00010000
  9939. JB three_bytes_match_emit_encodeSnappyBlockAsm
  9940. CMPL R8, $0x01000000
  9941. JB four_bytes_match_emit_encodeSnappyBlockAsm
  9942. MOVB $0xfc, (CX)
  9943. MOVL R8, 1(CX)
  9944. ADDQ $0x05, CX
  9945. JMP memmove_long_match_emit_encodeSnappyBlockAsm
  9946. four_bytes_match_emit_encodeSnappyBlockAsm:
  9947. MOVL R8, R10
  9948. SHRL $0x10, R10
  9949. MOVB $0xf8, (CX)
  9950. MOVW R8, 1(CX)
  9951. MOVB R10, 3(CX)
  9952. ADDQ $0x04, CX
  9953. JMP memmove_long_match_emit_encodeSnappyBlockAsm
  9954. three_bytes_match_emit_encodeSnappyBlockAsm:
  9955. MOVB $0xf4, (CX)
  9956. MOVW R8, 1(CX)
  9957. ADDQ $0x03, CX
  9958. JMP memmove_long_match_emit_encodeSnappyBlockAsm
  9959. two_bytes_match_emit_encodeSnappyBlockAsm:
  9960. MOVB $0xf0, (CX)
  9961. MOVB R8, 1(CX)
  9962. ADDQ $0x02, CX
  9963. CMPL R8, $0x40
  9964. JB memmove_match_emit_encodeSnappyBlockAsm
  9965. JMP memmove_long_match_emit_encodeSnappyBlockAsm
  9966. one_byte_match_emit_encodeSnappyBlockAsm:
  9967. SHLB $0x02, R8
  9968. MOVB R8, (CX)
  9969. ADDQ $0x01, CX
  9970. memmove_match_emit_encodeSnappyBlockAsm:
  9971. LEAQ (CX)(R9*1), R8
  9972. // genMemMoveShort
  9973. CMPQ R9, $0x08
  9974. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
  9975. CMPQ R9, $0x10
  9976. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
  9977. CMPQ R9, $0x20
  9978. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
  9979. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
  9980. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
  9981. MOVQ (DI), R10
  9982. MOVQ R10, (CX)
  9983. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  9984. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
  9985. MOVQ (DI), R10
  9986. MOVQ -8(DI)(R9*1), DI
  9987. MOVQ R10, (CX)
  9988. MOVQ DI, -8(CX)(R9*1)
  9989. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  9990. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
  9991. MOVOU (DI), X0
  9992. MOVOU -16(DI)(R9*1), X1
  9993. MOVOU X0, (CX)
  9994. MOVOU X1, -16(CX)(R9*1)
  9995. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
  9996. emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
  9997. MOVOU (DI), X0
  9998. MOVOU 16(DI), X1
  9999. MOVOU -32(DI)(R9*1), X2
  10000. MOVOU -16(DI)(R9*1), X3
  10001. MOVOU X0, (CX)
  10002. MOVOU X1, 16(CX)
  10003. MOVOU X2, -32(CX)(R9*1)
  10004. MOVOU X3, -16(CX)(R9*1)
  10005. memmove_end_copy_match_emit_encodeSnappyBlockAsm:
  10006. MOVQ R8, CX
  10007. JMP emit_literal_done_match_emit_encodeSnappyBlockAsm
  10008. memmove_long_match_emit_encodeSnappyBlockAsm:
  10009. LEAQ (CX)(R9*1), R8
  10010. // genMemMoveLong
  10011. MOVOU (DI), X0
  10012. MOVOU 16(DI), X1
  10013. MOVOU -32(DI)(R9*1), X2
  10014. MOVOU -16(DI)(R9*1), X3
  10015. MOVQ R9, R11
  10016. SHRQ $0x05, R11
  10017. MOVQ CX, R10
  10018. ANDL $0x0000001f, R10
  10019. MOVQ $0x00000040, R12
  10020. SUBQ R10, R12
  10021. DECQ R11
  10022. JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10023. LEAQ -32(DI)(R12*1), R10
  10024. LEAQ -32(CX)(R12*1), R13
  10025. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
  10026. MOVOU (R10), X4
  10027. MOVOU 16(R10), X5
  10028. MOVOA X4, (R13)
  10029. MOVOA X5, 16(R13)
  10030. ADDQ $0x20, R13
  10031. ADDQ $0x20, R10
  10032. ADDQ $0x20, R12
  10033. DECQ R11
  10034. JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
  10035. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
  10036. MOVOU -32(DI)(R12*1), X4
  10037. MOVOU -16(DI)(R12*1), X5
  10038. MOVOA X4, -32(CX)(R12*1)
  10039. MOVOA X5, -16(CX)(R12*1)
  10040. ADDQ $0x20, R12
  10041. CMPQ R9, R12
  10042. JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10043. MOVOU X0, (CX)
  10044. MOVOU X1, 16(CX)
  10045. MOVOU X2, -32(CX)(R9*1)
  10046. MOVOU X3, -16(CX)(R9*1)
  10047. MOVQ R8, CX
  10048. emit_literal_done_match_emit_encodeSnappyBlockAsm:
  10049. match_nolit_loop_encodeSnappyBlockAsm:
  10050. MOVL DX, DI
  10051. SUBL SI, DI
  10052. MOVL DI, 16(SP)
  10053. ADDL $0x04, DX
  10054. ADDL $0x04, SI
  10055. MOVQ src_len+32(FP), DI
  10056. SUBL DX, DI
  10057. LEAQ (BX)(DX*1), R8
  10058. LEAQ (BX)(SI*1), SI
  10059. // matchLen
  10060. XORL R10, R10
  10061. matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
  10062. CMPL DI, $0x10
  10063. JB matchlen_match8_match_nolit_encodeSnappyBlockAsm
  10064. MOVQ (R8)(R10*1), R9
  10065. MOVQ 8(R8)(R10*1), R11
  10066. XORQ (SI)(R10*1), R9
  10067. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
  10068. XORQ 8(SI)(R10*1), R11
  10069. JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
  10070. LEAL -16(DI), DI
  10071. LEAL 16(R10), R10
  10072. JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
  10073. matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
  10074. #ifdef GOAMD64_v3
  10075. TZCNTQ R11, R11
  10076. #else
  10077. BSFQ R11, R11
  10078. #endif
  10079. SARQ $0x03, R11
  10080. LEAL 8(R10)(R11*1), R10
  10081. JMP match_nolit_end_encodeSnappyBlockAsm
  10082. matchlen_match8_match_nolit_encodeSnappyBlockAsm:
  10083. CMPL DI, $0x08
  10084. JB matchlen_match4_match_nolit_encodeSnappyBlockAsm
  10085. MOVQ (R8)(R10*1), R9
  10086. XORQ (SI)(R10*1), R9
  10087. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
  10088. LEAL -8(DI), DI
  10089. LEAL 8(R10), R10
  10090. JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm
  10091. matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
  10092. #ifdef GOAMD64_v3
  10093. TZCNTQ R9, R9
  10094. #else
  10095. BSFQ R9, R9
  10096. #endif
  10097. SARQ $0x03, R9
  10098. LEAL (R10)(R9*1), R10
  10099. JMP match_nolit_end_encodeSnappyBlockAsm
  10100. matchlen_match4_match_nolit_encodeSnappyBlockAsm:
  10101. CMPL DI, $0x04
  10102. JB matchlen_match2_match_nolit_encodeSnappyBlockAsm
  10103. MOVL (R8)(R10*1), R9
  10104. CMPL (SI)(R10*1), R9
  10105. JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm
  10106. LEAL -4(DI), DI
  10107. LEAL 4(R10), R10
  10108. matchlen_match2_match_nolit_encodeSnappyBlockAsm:
  10109. CMPL DI, $0x01
  10110. JE matchlen_match1_match_nolit_encodeSnappyBlockAsm
  10111. JB match_nolit_end_encodeSnappyBlockAsm
  10112. MOVW (R8)(R10*1), R9
  10113. CMPW (SI)(R10*1), R9
  10114. JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm
  10115. LEAL 2(R10), R10
  10116. SUBL $0x02, DI
  10117. JZ match_nolit_end_encodeSnappyBlockAsm
  10118. matchlen_match1_match_nolit_encodeSnappyBlockAsm:
  10119. MOVB (R8)(R10*1), R9
  10120. CMPB (SI)(R10*1), R9
  10121. JNE match_nolit_end_encodeSnappyBlockAsm
  10122. LEAL 1(R10), R10
  10123. match_nolit_end_encodeSnappyBlockAsm:
  10124. ADDL R10, DX
  10125. MOVL 16(SP), SI
  10126. ADDL $0x04, R10
  10127. MOVL DX, 12(SP)
  10128. // emitCopy
  10129. CMPL SI, $0x00010000
  10130. JB two_byte_offset_match_nolit_encodeSnappyBlockAsm
  10131. four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
  10132. CMPL R10, $0x40
  10133. JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm
  10134. MOVB $0xff, (CX)
  10135. MOVL SI, 1(CX)
  10136. LEAL -64(R10), R10
  10137. ADDQ $0x05, CX
  10138. CMPL R10, $0x04
  10139. JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm
  10140. JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
  10141. four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
  10142. TESTL R10, R10
  10143. JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm
  10144. XORL DI, DI
  10145. LEAL -1(DI)(R10*4), R10
  10146. MOVB R10, (CX)
  10147. MOVL SI, 1(CX)
  10148. ADDQ $0x05, CX
  10149. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
  10150. two_byte_offset_match_nolit_encodeSnappyBlockAsm:
  10151. CMPL R10, $0x40
  10152. JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
  10153. MOVB $0xee, (CX)
  10154. MOVW SI, 1(CX)
  10155. LEAL -60(R10), R10
  10156. ADDQ $0x03, CX
  10157. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm
  10158. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
  10159. MOVL R10, DI
  10160. SHLL $0x02, DI
  10161. CMPL R10, $0x0c
  10162. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm
  10163. CMPL SI, $0x00000800
  10164. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm
  10165. LEAL -15(DI), DI
  10166. MOVB SI, 1(CX)
  10167. SHRL $0x08, SI
  10168. SHLL $0x05, SI
  10169. ORL SI, DI
  10170. MOVB DI, (CX)
  10171. ADDQ $0x02, CX
  10172. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
  10173. emit_copy_three_match_nolit_encodeSnappyBlockAsm:
  10174. LEAL -2(DI), DI
  10175. MOVB DI, (CX)
  10176. MOVW SI, 1(CX)
  10177. ADDQ $0x03, CX
  10178. match_nolit_emitcopy_end_encodeSnappyBlockAsm:
  10179. CMPL DX, 8(SP)
  10180. JAE emit_remainder_encodeSnappyBlockAsm
  10181. MOVQ -2(BX)(DX*1), DI
  10182. CMPQ CX, (SP)
  10183. JB match_nolit_dst_ok_encodeSnappyBlockAsm
  10184. MOVQ $0x00000000, ret+56(FP)
  10185. RET
  10186. match_nolit_dst_ok_encodeSnappyBlockAsm:
  10187. MOVQ $0x0000cf1bbcdcbf9b, R9
  10188. MOVQ DI, R8
  10189. SHRQ $0x10, DI
  10190. MOVQ DI, SI
  10191. SHLQ $0x10, R8
  10192. IMULQ R9, R8
  10193. SHRQ $0x32, R8
  10194. SHLQ $0x10, SI
  10195. IMULQ R9, SI
  10196. SHRQ $0x32, SI
  10197. LEAL -2(DX), R9
  10198. LEAQ (AX)(SI*4), R10
  10199. MOVL (R10), SI
  10200. MOVL R9, (AX)(R8*4)
  10201. MOVL DX, (R10)
  10202. CMPL (BX)(SI*1), DI
  10203. JEQ match_nolit_loop_encodeSnappyBlockAsm
  10204. INCL DX
  10205. JMP search_loop_encodeSnappyBlockAsm
  10206. emit_remainder_encodeSnappyBlockAsm:
  10207. MOVQ src_len+32(FP), AX
  10208. SUBL 12(SP), AX
  10209. LEAQ 5(CX)(AX*1), AX
  10210. CMPQ AX, (SP)
  10211. JB emit_remainder_ok_encodeSnappyBlockAsm
  10212. MOVQ $0x00000000, ret+56(FP)
  10213. RET
  10214. emit_remainder_ok_encodeSnappyBlockAsm:
  10215. MOVQ src_len+32(FP), AX
  10216. MOVL 12(SP), DX
  10217. CMPL DX, AX
  10218. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm
  10219. MOVL AX, SI
  10220. MOVL AX, 12(SP)
  10221. LEAQ (BX)(DX*1), AX
  10222. SUBL DX, SI
  10223. LEAL -1(SI), DX
  10224. CMPL DX, $0x3c
  10225. JB one_byte_emit_remainder_encodeSnappyBlockAsm
  10226. CMPL DX, $0x00000100
  10227. JB two_bytes_emit_remainder_encodeSnappyBlockAsm
  10228. CMPL DX, $0x00010000
  10229. JB three_bytes_emit_remainder_encodeSnappyBlockAsm
  10230. CMPL DX, $0x01000000
  10231. JB four_bytes_emit_remainder_encodeSnappyBlockAsm
  10232. MOVB $0xfc, (CX)
  10233. MOVL DX, 1(CX)
  10234. ADDQ $0x05, CX
  10235. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
  10236. four_bytes_emit_remainder_encodeSnappyBlockAsm:
  10237. MOVL DX, BX
  10238. SHRL $0x10, BX
  10239. MOVB $0xf8, (CX)
  10240. MOVW DX, 1(CX)
  10241. MOVB BL, 3(CX)
  10242. ADDQ $0x04, CX
  10243. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
  10244. three_bytes_emit_remainder_encodeSnappyBlockAsm:
  10245. MOVB $0xf4, (CX)
  10246. MOVW DX, 1(CX)
  10247. ADDQ $0x03, CX
  10248. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
  10249. two_bytes_emit_remainder_encodeSnappyBlockAsm:
  10250. MOVB $0xf0, (CX)
  10251. MOVB DL, 1(CX)
  10252. ADDQ $0x02, CX
  10253. CMPL DX, $0x40
  10254. JB memmove_emit_remainder_encodeSnappyBlockAsm
  10255. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
  10256. one_byte_emit_remainder_encodeSnappyBlockAsm:
  10257. SHLB $0x02, DL
  10258. MOVB DL, (CX)
  10259. ADDQ $0x01, CX
  10260. memmove_emit_remainder_encodeSnappyBlockAsm:
  10261. LEAQ (CX)(SI*1), DX
  10262. MOVL SI, BX
  10263. // genMemMoveShort
  10264. CMPQ BX, $0x03
  10265. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
  10266. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
  10267. CMPQ BX, $0x08
  10268. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7
  10269. CMPQ BX, $0x10
  10270. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
  10271. CMPQ BX, $0x20
  10272. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
  10273. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
  10274. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
  10275. MOVB (AX), SI
  10276. MOVB -1(AX)(BX*1), AL
  10277. MOVB SI, (CX)
  10278. MOVB AL, -1(CX)(BX*1)
  10279. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  10280. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
  10281. MOVW (AX), SI
  10282. MOVB 2(AX), AL
  10283. MOVW SI, (CX)
  10284. MOVB AL, 2(CX)
  10285. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  10286. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
  10287. MOVL (AX), SI
  10288. MOVL -4(AX)(BX*1), AX
  10289. MOVL SI, (CX)
  10290. MOVL AX, -4(CX)(BX*1)
  10291. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  10292. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
  10293. MOVQ (AX), SI
  10294. MOVQ -8(AX)(BX*1), AX
  10295. MOVQ SI, (CX)
  10296. MOVQ AX, -8(CX)(BX*1)
  10297. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  10298. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
  10299. MOVOU (AX), X0
  10300. MOVOU -16(AX)(BX*1), X1
  10301. MOVOU X0, (CX)
  10302. MOVOU X1, -16(CX)(BX*1)
  10303. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
  10304. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
  10305. MOVOU (AX), X0
  10306. MOVOU 16(AX), X1
  10307. MOVOU -32(AX)(BX*1), X2
  10308. MOVOU -16(AX)(BX*1), X3
  10309. MOVOU X0, (CX)
  10310. MOVOU X1, 16(CX)
  10311. MOVOU X2, -32(CX)(BX*1)
  10312. MOVOU X3, -16(CX)(BX*1)
  10313. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
  10314. MOVQ DX, CX
  10315. JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm
  10316. memmove_long_emit_remainder_encodeSnappyBlockAsm:
  10317. LEAQ (CX)(SI*1), DX
  10318. MOVL SI, BX
  10319. // genMemMoveLong
  10320. MOVOU (AX), X0
  10321. MOVOU 16(AX), X1
  10322. MOVOU -32(AX)(BX*1), X2
  10323. MOVOU -16(AX)(BX*1), X3
  10324. MOVQ BX, DI
  10325. SHRQ $0x05, DI
  10326. MOVQ CX, SI
  10327. ANDL $0x0000001f, SI
  10328. MOVQ $0x00000040, R8
  10329. SUBQ SI, R8
  10330. DECQ DI
  10331. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10332. LEAQ -32(AX)(R8*1), SI
  10333. LEAQ -32(CX)(R8*1), R9
  10334. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
  10335. MOVOU (SI), X4
  10336. MOVOU 16(SI), X5
  10337. MOVOA X4, (R9)
  10338. MOVOA X5, 16(R9)
  10339. ADDQ $0x20, R9
  10340. ADDQ $0x20, SI
  10341. ADDQ $0x20, R8
  10342. DECQ DI
  10343. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
  10344. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
  10345. MOVOU -32(AX)(R8*1), X4
  10346. MOVOU -16(AX)(R8*1), X5
  10347. MOVOA X4, -32(CX)(R8*1)
  10348. MOVOA X5, -16(CX)(R8*1)
  10349. ADDQ $0x20, R8
  10350. CMPQ BX, R8
  10351. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
  10352. MOVOU X0, (CX)
  10353. MOVOU X1, 16(CX)
  10354. MOVOU X2, -32(CX)(BX*1)
  10355. MOVOU X3, -16(CX)(BX*1)
  10356. MOVQ DX, CX
  10357. emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
  10358. MOVQ dst_base+0(FP), AX
  10359. SUBQ AX, CX
  10360. MOVQ CX, ret+56(FP)
  10361. RET
  10362. // func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
  10363. // Requires: BMI, SSE2
  10364. TEXT ·encodeSnappyBlockAsm64K(SB), $24-64
  10365. MOVQ tmp+48(FP), AX
  10366. MOVQ dst_base+0(FP), CX
  10367. MOVQ $0x00000200, DX
  10368. MOVQ AX, BX
  10369. PXOR X0, X0
  10370. zero_loop_encodeSnappyBlockAsm64K:
  10371. MOVOU X0, (BX)
  10372. MOVOU X0, 16(BX)
  10373. MOVOU X0, 32(BX)
  10374. MOVOU X0, 48(BX)
  10375. MOVOU X0, 64(BX)
  10376. MOVOU X0, 80(BX)
  10377. MOVOU X0, 96(BX)
  10378. MOVOU X0, 112(BX)
  10379. ADDQ $0x80, BX
  10380. DECQ DX
  10381. JNZ zero_loop_encodeSnappyBlockAsm64K
  10382. MOVL $0x00000000, 12(SP)
  10383. MOVQ src_len+32(FP), DX
  10384. LEAQ -9(DX), BX
  10385. LEAQ -8(DX), SI
  10386. MOVL SI, 8(SP)
  10387. SHRQ $0x05, DX
  10388. SUBL DX, BX
  10389. LEAQ (CX)(BX*1), BX
  10390. MOVQ BX, (SP)
  10391. MOVL $0x00000001, DX
  10392. MOVL DX, 16(SP)
  10393. MOVQ src_base+24(FP), BX
  10394. search_loop_encodeSnappyBlockAsm64K:
  10395. MOVL DX, SI
  10396. SUBL 12(SP), SI
  10397. SHRL $0x06, SI
  10398. LEAL 4(DX)(SI*1), SI
  10399. CMPL SI, 8(SP)
  10400. JAE emit_remainder_encodeSnappyBlockAsm64K
  10401. MOVQ (BX)(DX*1), DI
  10402. MOVL SI, 20(SP)
  10403. MOVQ $0x0000cf1bbcdcbf9b, R9
  10404. MOVQ DI, R10
  10405. MOVQ DI, R11
  10406. SHRQ $0x08, R11
  10407. SHLQ $0x10, R10
  10408. IMULQ R9, R10
  10409. SHRQ $0x32, R10
  10410. SHLQ $0x10, R11
  10411. IMULQ R9, R11
  10412. SHRQ $0x32, R11
  10413. MOVL (AX)(R10*4), SI
  10414. MOVL (AX)(R11*4), R8
  10415. MOVL DX, (AX)(R10*4)
  10416. LEAL 1(DX), R10
  10417. MOVL R10, (AX)(R11*4)
  10418. MOVQ DI, R10
  10419. SHRQ $0x10, R10
  10420. SHLQ $0x10, R10
  10421. IMULQ R9, R10
  10422. SHRQ $0x32, R10
  10423. MOVL DX, R9
  10424. SUBL 16(SP), R9
  10425. MOVL 1(BX)(R9*1), R11
  10426. MOVQ DI, R9
  10427. SHRQ $0x08, R9
  10428. CMPL R9, R11
  10429. JNE no_repeat_found_encodeSnappyBlockAsm64K
  10430. LEAL 1(DX), DI
  10431. MOVL 12(SP), SI
  10432. MOVL DI, R8
  10433. SUBL 16(SP), R8
  10434. JZ repeat_extend_back_end_encodeSnappyBlockAsm64K
  10435. repeat_extend_back_loop_encodeSnappyBlockAsm64K:
  10436. CMPL DI, SI
  10437. JBE repeat_extend_back_end_encodeSnappyBlockAsm64K
  10438. MOVB -1(BX)(R8*1), R9
  10439. MOVB -1(BX)(DI*1), R10
  10440. CMPB R9, R10
  10441. JNE repeat_extend_back_end_encodeSnappyBlockAsm64K
  10442. LEAL -1(DI), DI
  10443. DECL R8
  10444. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K
  10445. repeat_extend_back_end_encodeSnappyBlockAsm64K:
  10446. MOVL DI, SI
  10447. SUBL 12(SP), SI
  10448. LEAQ 3(CX)(SI*1), SI
  10449. CMPQ SI, (SP)
  10450. JB repeat_dst_size_check_encodeSnappyBlockAsm64K
  10451. MOVQ $0x00000000, ret+56(FP)
  10452. RET
  10453. repeat_dst_size_check_encodeSnappyBlockAsm64K:
  10454. MOVL 12(SP), SI
  10455. CMPL SI, DI
  10456. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
  10457. MOVL DI, R8
  10458. MOVL DI, 12(SP)
  10459. LEAQ (BX)(SI*1), R9
  10460. SUBL SI, R8
  10461. LEAL -1(R8), SI
  10462. CMPL SI, $0x3c
  10463. JB one_byte_repeat_emit_encodeSnappyBlockAsm64K
  10464. CMPL SI, $0x00000100
  10465. JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K
  10466. JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K
  10467. three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
  10468. MOVB $0xf4, (CX)
  10469. MOVW SI, 1(CX)
  10470. ADDQ $0x03, CX
  10471. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
  10472. two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
  10473. MOVB $0xf0, (CX)
  10474. MOVB SI, 1(CX)
  10475. ADDQ $0x02, CX
  10476. CMPL SI, $0x40
  10477. JB memmove_repeat_emit_encodeSnappyBlockAsm64K
  10478. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
  10479. one_byte_repeat_emit_encodeSnappyBlockAsm64K:
  10480. SHLB $0x02, SI
  10481. MOVB SI, (CX)
  10482. ADDQ $0x01, CX
  10483. memmove_repeat_emit_encodeSnappyBlockAsm64K:
  10484. LEAQ (CX)(R8*1), SI
  10485. // genMemMoveShort
  10486. CMPQ R8, $0x08
  10487. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
  10488. CMPQ R8, $0x10
  10489. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
  10490. CMPQ R8, $0x20
  10491. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
  10492. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
  10493. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
  10494. MOVQ (R9), R10
  10495. MOVQ R10, (CX)
  10496. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
  10497. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
  10498. MOVQ (R9), R10
  10499. MOVQ -8(R9)(R8*1), R9
  10500. MOVQ R10, (CX)
  10501. MOVQ R9, -8(CX)(R8*1)
  10502. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
  10503. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
  10504. MOVOU (R9), X0
  10505. MOVOU -16(R9)(R8*1), X1
  10506. MOVOU X0, (CX)
  10507. MOVOU X1, -16(CX)(R8*1)
  10508. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
  10509. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
  10510. MOVOU (R9), X0
  10511. MOVOU 16(R9), X1
  10512. MOVOU -32(R9)(R8*1), X2
  10513. MOVOU -16(R9)(R8*1), X3
  10514. MOVOU X0, (CX)
  10515. MOVOU X1, 16(CX)
  10516. MOVOU X2, -32(CX)(R8*1)
  10517. MOVOU X3, -16(CX)(R8*1)
  10518. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
  10519. MOVQ SI, CX
  10520. JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
  10521. memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
  10522. LEAQ (CX)(R8*1), SI
  10523. // genMemMoveLong
  10524. MOVOU (R9), X0
  10525. MOVOU 16(R9), X1
  10526. MOVOU -32(R9)(R8*1), X2
  10527. MOVOU -16(R9)(R8*1), X3
  10528. MOVQ R8, R11
  10529. SHRQ $0x05, R11
  10530. MOVQ CX, R10
  10531. ANDL $0x0000001f, R10
  10532. MOVQ $0x00000040, R12
  10533. SUBQ R10, R12
  10534. DECQ R11
  10535. JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  10536. LEAQ -32(R9)(R12*1), R10
  10537. LEAQ -32(CX)(R12*1), R13
  10538. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
  10539. MOVOU (R10), X4
  10540. MOVOU 16(R10), X5
  10541. MOVOA X4, (R13)
  10542. MOVOA X5, 16(R13)
  10543. ADDQ $0x20, R13
  10544. ADDQ $0x20, R10
  10545. ADDQ $0x20, R12
  10546. DECQ R11
  10547. JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
  10548. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
  10549. MOVOU -32(R9)(R12*1), X4
  10550. MOVOU -16(R9)(R12*1), X5
  10551. MOVOA X4, -32(CX)(R12*1)
  10552. MOVOA X5, -16(CX)(R12*1)
  10553. ADDQ $0x20, R12
  10554. CMPQ R8, R12
  10555. JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  10556. MOVOU X0, (CX)
  10557. MOVOU X1, 16(CX)
  10558. MOVOU X2, -32(CX)(R8*1)
  10559. MOVOU X3, -16(CX)(R8*1)
  10560. MOVQ SI, CX
  10561. emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
  10562. ADDL $0x05, DX
  10563. MOVL DX, SI
  10564. SUBL 16(SP), SI
  10565. MOVQ src_len+32(FP), R8
  10566. SUBL DX, R8
  10567. LEAQ (BX)(DX*1), R9
  10568. LEAQ (BX)(SI*1), SI
  10569. // matchLen
  10570. XORL R11, R11
  10571. matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
  10572. CMPL R8, $0x10
  10573. JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
  10574. MOVQ (R9)(R11*1), R10
  10575. MOVQ 8(R9)(R11*1), R12
  10576. XORQ (SI)(R11*1), R10
  10577. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
  10578. XORQ 8(SI)(R11*1), R12
  10579. JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
  10580. LEAL -16(R8), R8
  10581. LEAL 16(R11), R11
  10582. JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
  10583. matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
  10584. #ifdef GOAMD64_v3
  10585. TZCNTQ R12, R12
  10586. #else
  10587. BSFQ R12, R12
  10588. #endif
  10589. SARQ $0x03, R12
  10590. LEAL 8(R11)(R12*1), R11
  10591. JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
  10592. matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
  10593. CMPL R8, $0x08
  10594. JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
  10595. MOVQ (R9)(R11*1), R10
  10596. XORQ (SI)(R11*1), R10
  10597. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
  10598. LEAL -8(R8), R8
  10599. LEAL 8(R11), R11
  10600. JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
  10601. matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
  10602. #ifdef GOAMD64_v3
  10603. TZCNTQ R10, R10
  10604. #else
  10605. BSFQ R10, R10
  10606. #endif
  10607. SARQ $0x03, R10
  10608. LEAL (R11)(R10*1), R11
  10609. JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
  10610. matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
  10611. CMPL R8, $0x04
  10612. JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
  10613. MOVL (R9)(R11*1), R10
  10614. CMPL (SI)(R11*1), R10
  10615. JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
  10616. LEAL -4(R8), R8
  10617. LEAL 4(R11), R11
  10618. matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
  10619. CMPL R8, $0x01
  10620. JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
  10621. JB repeat_extend_forward_end_encodeSnappyBlockAsm64K
  10622. MOVW (R9)(R11*1), R10
  10623. CMPW (SI)(R11*1), R10
  10624. JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
  10625. LEAL 2(R11), R11
  10626. SUBL $0x02, R8
  10627. JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K
  10628. matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
  10629. MOVB (R9)(R11*1), R10
  10630. CMPB (SI)(R11*1), R10
  10631. JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K
  10632. LEAL 1(R11), R11
  10633. repeat_extend_forward_end_encodeSnappyBlockAsm64K:
  10634. ADDL R11, DX
  10635. MOVL DX, SI
  10636. SUBL DI, SI
  10637. MOVL 16(SP), DI
  10638. // emitCopy
  10639. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
  10640. CMPL SI, $0x40
  10641. JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
  10642. MOVB $0xee, (CX)
  10643. MOVW DI, 1(CX)
  10644. LEAL -60(SI), SI
  10645. ADDQ $0x03, CX
  10646. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
  10647. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
  10648. MOVL SI, R8
  10649. SHLL $0x02, R8
  10650. CMPL SI, $0x0c
  10651. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
  10652. CMPL DI, $0x00000800
  10653. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
  10654. LEAL -15(R8), R8
  10655. MOVB DI, 1(CX)
  10656. SHRL $0x08, DI
  10657. SHLL $0x05, DI
  10658. ORL DI, R8
  10659. MOVB R8, (CX)
  10660. ADDQ $0x02, CX
  10661. JMP repeat_end_emit_encodeSnappyBlockAsm64K
  10662. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
  10663. LEAL -2(R8), R8
  10664. MOVB R8, (CX)
  10665. MOVW DI, 1(CX)
  10666. ADDQ $0x03, CX
  10667. repeat_end_emit_encodeSnappyBlockAsm64K:
  10668. MOVL DX, 12(SP)
  10669. JMP search_loop_encodeSnappyBlockAsm64K
  10670. no_repeat_found_encodeSnappyBlockAsm64K:
  10671. CMPL (BX)(SI*1), DI
  10672. JEQ candidate_match_encodeSnappyBlockAsm64K
  10673. SHRQ $0x08, DI
  10674. MOVL (AX)(R10*4), SI
  10675. LEAL 2(DX), R9
  10676. CMPL (BX)(R8*1), DI
  10677. JEQ candidate2_match_encodeSnappyBlockAsm64K
  10678. MOVL R9, (AX)(R10*4)
  10679. SHRQ $0x08, DI
  10680. CMPL (BX)(SI*1), DI
  10681. JEQ candidate3_match_encodeSnappyBlockAsm64K
  10682. MOVL 20(SP), DX
  10683. JMP search_loop_encodeSnappyBlockAsm64K
  10684. candidate3_match_encodeSnappyBlockAsm64K:
  10685. ADDL $0x02, DX
  10686. JMP candidate_match_encodeSnappyBlockAsm64K
  10687. candidate2_match_encodeSnappyBlockAsm64K:
  10688. MOVL R9, (AX)(R10*4)
  10689. INCL DX
  10690. MOVL R8, SI
  10691. candidate_match_encodeSnappyBlockAsm64K:
  10692. MOVL 12(SP), DI
  10693. TESTL SI, SI
  10694. JZ match_extend_back_end_encodeSnappyBlockAsm64K
  10695. match_extend_back_loop_encodeSnappyBlockAsm64K:
  10696. CMPL DX, DI
  10697. JBE match_extend_back_end_encodeSnappyBlockAsm64K
  10698. MOVB -1(BX)(SI*1), R8
  10699. MOVB -1(BX)(DX*1), R9
  10700. CMPB R8, R9
  10701. JNE match_extend_back_end_encodeSnappyBlockAsm64K
  10702. LEAL -1(DX), DX
  10703. DECL SI
  10704. JZ match_extend_back_end_encodeSnappyBlockAsm64K
  10705. JMP match_extend_back_loop_encodeSnappyBlockAsm64K
  10706. match_extend_back_end_encodeSnappyBlockAsm64K:
  10707. MOVL DX, DI
  10708. SUBL 12(SP), DI
  10709. LEAQ 3(CX)(DI*1), DI
  10710. CMPQ DI, (SP)
  10711. JB match_dst_size_check_encodeSnappyBlockAsm64K
  10712. MOVQ $0x00000000, ret+56(FP)
  10713. RET
  10714. match_dst_size_check_encodeSnappyBlockAsm64K:
  10715. MOVL DX, DI
  10716. MOVL 12(SP), R8
  10717. CMPL R8, DI
  10718. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K
  10719. MOVL DI, R9
  10720. MOVL DI, 12(SP)
  10721. LEAQ (BX)(R8*1), DI
  10722. SUBL R8, R9
  10723. LEAL -1(R9), R8
  10724. CMPL R8, $0x3c
  10725. JB one_byte_match_emit_encodeSnappyBlockAsm64K
  10726. CMPL R8, $0x00000100
  10727. JB two_bytes_match_emit_encodeSnappyBlockAsm64K
  10728. JB three_bytes_match_emit_encodeSnappyBlockAsm64K
  10729. three_bytes_match_emit_encodeSnappyBlockAsm64K:
  10730. MOVB $0xf4, (CX)
  10731. MOVW R8, 1(CX)
  10732. ADDQ $0x03, CX
  10733. JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
  10734. two_bytes_match_emit_encodeSnappyBlockAsm64K:
  10735. MOVB $0xf0, (CX)
  10736. MOVB R8, 1(CX)
  10737. ADDQ $0x02, CX
  10738. CMPL R8, $0x40
  10739. JB memmove_match_emit_encodeSnappyBlockAsm64K
  10740. JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
  10741. one_byte_match_emit_encodeSnappyBlockAsm64K:
  10742. SHLB $0x02, R8
  10743. MOVB R8, (CX)
  10744. ADDQ $0x01, CX
  10745. memmove_match_emit_encodeSnappyBlockAsm64K:
  10746. LEAQ (CX)(R9*1), R8
  10747. // genMemMoveShort
  10748. CMPQ R9, $0x08
  10749. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
  10750. CMPQ R9, $0x10
  10751. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
  10752. CMPQ R9, $0x20
  10753. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
  10754. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
  10755. emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
  10756. MOVQ (DI), R10
  10757. MOVQ R10, (CX)
  10758. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
  10759. emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
  10760. MOVQ (DI), R10
  10761. MOVQ -8(DI)(R9*1), DI
  10762. MOVQ R10, (CX)
  10763. MOVQ DI, -8(CX)(R9*1)
  10764. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
  10765. emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
  10766. MOVOU (DI), X0
  10767. MOVOU -16(DI)(R9*1), X1
  10768. MOVOU X0, (CX)
  10769. MOVOU X1, -16(CX)(R9*1)
  10770. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
  10771. emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
  10772. MOVOU (DI), X0
  10773. MOVOU 16(DI), X1
  10774. MOVOU -32(DI)(R9*1), X2
  10775. MOVOU -16(DI)(R9*1), X3
  10776. MOVOU X0, (CX)
  10777. MOVOU X1, 16(CX)
  10778. MOVOU X2, -32(CX)(R9*1)
  10779. MOVOU X3, -16(CX)(R9*1)
  10780. memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
  10781. MOVQ R8, CX
  10782. JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K
  10783. memmove_long_match_emit_encodeSnappyBlockAsm64K:
  10784. LEAQ (CX)(R9*1), R8
  10785. // genMemMoveLong
  10786. MOVOU (DI), X0
  10787. MOVOU 16(DI), X1
  10788. MOVOU -32(DI)(R9*1), X2
  10789. MOVOU -16(DI)(R9*1), X3
  10790. MOVQ R9, R11
  10791. SHRQ $0x05, R11
  10792. MOVQ CX, R10
  10793. ANDL $0x0000001f, R10
  10794. MOVQ $0x00000040, R12
  10795. SUBQ R10, R12
  10796. DECQ R11
  10797. JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  10798. LEAQ -32(DI)(R12*1), R10
  10799. LEAQ -32(CX)(R12*1), R13
  10800. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
  10801. MOVOU (R10), X4
  10802. MOVOU 16(R10), X5
  10803. MOVOA X4, (R13)
  10804. MOVOA X5, 16(R13)
  10805. ADDQ $0x20, R13
  10806. ADDQ $0x20, R10
  10807. ADDQ $0x20, R12
  10808. DECQ R11
  10809. JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
  10810. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
  10811. MOVOU -32(DI)(R12*1), X4
  10812. MOVOU -16(DI)(R12*1), X5
  10813. MOVOA X4, -32(CX)(R12*1)
  10814. MOVOA X5, -16(CX)(R12*1)
  10815. ADDQ $0x20, R12
  10816. CMPQ R9, R12
  10817. JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  10818. MOVOU X0, (CX)
  10819. MOVOU X1, 16(CX)
  10820. MOVOU X2, -32(CX)(R9*1)
  10821. MOVOU X3, -16(CX)(R9*1)
  10822. MOVQ R8, CX
  10823. emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
  10824. match_nolit_loop_encodeSnappyBlockAsm64K:
  10825. MOVL DX, DI
  10826. SUBL SI, DI
  10827. MOVL DI, 16(SP)
  10828. ADDL $0x04, DX
  10829. ADDL $0x04, SI
  10830. MOVQ src_len+32(FP), DI
  10831. SUBL DX, DI
  10832. LEAQ (BX)(DX*1), R8
  10833. LEAQ (BX)(SI*1), SI
  10834. // matchLen
  10835. XORL R10, R10
  10836. matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
  10837. CMPL DI, $0x10
  10838. JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
  10839. MOVQ (R8)(R10*1), R9
  10840. MOVQ 8(R8)(R10*1), R11
  10841. XORQ (SI)(R10*1), R9
  10842. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
  10843. XORQ 8(SI)(R10*1), R11
  10844. JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
  10845. LEAL -16(DI), DI
  10846. LEAL 16(R10), R10
  10847. JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
  10848. matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
  10849. #ifdef GOAMD64_v3
  10850. TZCNTQ R11, R11
  10851. #else
  10852. BSFQ R11, R11
  10853. #endif
  10854. SARQ $0x03, R11
  10855. LEAL 8(R10)(R11*1), R10
  10856. JMP match_nolit_end_encodeSnappyBlockAsm64K
  10857. matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
  10858. CMPL DI, $0x08
  10859. JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
  10860. MOVQ (R8)(R10*1), R9
  10861. XORQ (SI)(R10*1), R9
  10862. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
  10863. LEAL -8(DI), DI
  10864. LEAL 8(R10), R10
  10865. JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
  10866. matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
  10867. #ifdef GOAMD64_v3
  10868. TZCNTQ R9, R9
  10869. #else
  10870. BSFQ R9, R9
  10871. #endif
  10872. SARQ $0x03, R9
  10873. LEAL (R10)(R9*1), R10
  10874. JMP match_nolit_end_encodeSnappyBlockAsm64K
  10875. matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
  10876. CMPL DI, $0x04
  10877. JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
  10878. MOVL (R8)(R10*1), R9
  10879. CMPL (SI)(R10*1), R9
  10880. JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
  10881. LEAL -4(DI), DI
  10882. LEAL 4(R10), R10
  10883. matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
  10884. CMPL DI, $0x01
  10885. JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
  10886. JB match_nolit_end_encodeSnappyBlockAsm64K
  10887. MOVW (R8)(R10*1), R9
  10888. CMPW (SI)(R10*1), R9
  10889. JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
  10890. LEAL 2(R10), R10
  10891. SUBL $0x02, DI
  10892. JZ match_nolit_end_encodeSnappyBlockAsm64K
  10893. matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
  10894. MOVB (R8)(R10*1), R9
  10895. CMPB (SI)(R10*1), R9
  10896. JNE match_nolit_end_encodeSnappyBlockAsm64K
  10897. LEAL 1(R10), R10
  10898. match_nolit_end_encodeSnappyBlockAsm64K:
  10899. ADDL R10, DX
  10900. MOVL 16(SP), SI
  10901. ADDL $0x04, R10
  10902. MOVL DX, 12(SP)
  10903. // emitCopy
  10904. two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
  10905. CMPL R10, $0x40
  10906. JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
  10907. MOVB $0xee, (CX)
  10908. MOVW SI, 1(CX)
  10909. LEAL -60(R10), R10
  10910. ADDQ $0x03, CX
  10911. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
  10912. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
  10913. MOVL R10, DI
  10914. SHLL $0x02, DI
  10915. CMPL R10, $0x0c
  10916. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
  10917. CMPL SI, $0x00000800
  10918. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
  10919. LEAL -15(DI), DI
  10920. MOVB SI, 1(CX)
  10921. SHRL $0x08, SI
  10922. SHLL $0x05, SI
  10923. ORL SI, DI
  10924. MOVB DI, (CX)
  10925. ADDQ $0x02, CX
  10926. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
  10927. emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
  10928. LEAL -2(DI), DI
  10929. MOVB DI, (CX)
  10930. MOVW SI, 1(CX)
  10931. ADDQ $0x03, CX
  10932. match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
  10933. CMPL DX, 8(SP)
  10934. JAE emit_remainder_encodeSnappyBlockAsm64K
  10935. MOVQ -2(BX)(DX*1), DI
  10936. CMPQ CX, (SP)
  10937. JB match_nolit_dst_ok_encodeSnappyBlockAsm64K
  10938. MOVQ $0x00000000, ret+56(FP)
  10939. RET
  10940. match_nolit_dst_ok_encodeSnappyBlockAsm64K:
  10941. MOVQ $0x0000cf1bbcdcbf9b, R9
  10942. MOVQ DI, R8
  10943. SHRQ $0x10, DI
  10944. MOVQ DI, SI
  10945. SHLQ $0x10, R8
  10946. IMULQ R9, R8
  10947. SHRQ $0x32, R8
  10948. SHLQ $0x10, SI
  10949. IMULQ R9, SI
  10950. SHRQ $0x32, SI
  10951. LEAL -2(DX), R9
  10952. LEAQ (AX)(SI*4), R10
  10953. MOVL (R10), SI
  10954. MOVL R9, (AX)(R8*4)
  10955. MOVL DX, (R10)
  10956. CMPL (BX)(SI*1), DI
  10957. JEQ match_nolit_loop_encodeSnappyBlockAsm64K
  10958. INCL DX
  10959. JMP search_loop_encodeSnappyBlockAsm64K
  10960. emit_remainder_encodeSnappyBlockAsm64K:
  10961. MOVQ src_len+32(FP), AX
  10962. SUBL 12(SP), AX
  10963. LEAQ 3(CX)(AX*1), AX
  10964. CMPQ AX, (SP)
  10965. JB emit_remainder_ok_encodeSnappyBlockAsm64K
  10966. MOVQ $0x00000000, ret+56(FP)
  10967. RET
  10968. emit_remainder_ok_encodeSnappyBlockAsm64K:
  10969. MOVQ src_len+32(FP), AX
  10970. MOVL 12(SP), DX
  10971. CMPL DX, AX
  10972. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
  10973. MOVL AX, SI
  10974. MOVL AX, 12(SP)
  10975. LEAQ (BX)(DX*1), AX
  10976. SUBL DX, SI
  10977. LEAL -1(SI), DX
  10978. CMPL DX, $0x3c
  10979. JB one_byte_emit_remainder_encodeSnappyBlockAsm64K
  10980. CMPL DX, $0x00000100
  10981. JB two_bytes_emit_remainder_encodeSnappyBlockAsm64K
  10982. JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K
  10983. three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
  10984. MOVB $0xf4, (CX)
  10985. MOVW DX, 1(CX)
  10986. ADDQ $0x03, CX
  10987. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
  10988. two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
  10989. MOVB $0xf0, (CX)
  10990. MOVB DL, 1(CX)
  10991. ADDQ $0x02, CX
  10992. CMPL DX, $0x40
  10993. JB memmove_emit_remainder_encodeSnappyBlockAsm64K
  10994. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
  10995. one_byte_emit_remainder_encodeSnappyBlockAsm64K:
  10996. SHLB $0x02, DL
  10997. MOVB DL, (CX)
  10998. ADDQ $0x01, CX
  10999. memmove_emit_remainder_encodeSnappyBlockAsm64K:
  11000. LEAQ (CX)(SI*1), DX
  11001. MOVL SI, BX
  11002. // genMemMoveShort
  11003. CMPQ BX, $0x03
  11004. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2
  11005. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3
  11006. CMPQ BX, $0x08
  11007. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7
  11008. CMPQ BX, $0x10
  11009. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
  11010. CMPQ BX, $0x20
  11011. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
  11012. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
  11013. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
  11014. MOVB (AX), SI
  11015. MOVB -1(AX)(BX*1), AL
  11016. MOVB SI, (CX)
  11017. MOVB AL, -1(CX)(BX*1)
  11018. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11019. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
  11020. MOVW (AX), SI
  11021. MOVB 2(AX), AL
  11022. MOVW SI, (CX)
  11023. MOVB AL, 2(CX)
  11024. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11025. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
  11026. MOVL (AX), SI
  11027. MOVL -4(AX)(BX*1), AX
  11028. MOVL SI, (CX)
  11029. MOVL AX, -4(CX)(BX*1)
  11030. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11031. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
  11032. MOVQ (AX), SI
  11033. MOVQ -8(AX)(BX*1), AX
  11034. MOVQ SI, (CX)
  11035. MOVQ AX, -8(CX)(BX*1)
  11036. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11037. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
  11038. MOVOU (AX), X0
  11039. MOVOU -16(AX)(BX*1), X1
  11040. MOVOU X0, (CX)
  11041. MOVOU X1, -16(CX)(BX*1)
  11042. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
  11043. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
  11044. MOVOU (AX), X0
  11045. MOVOU 16(AX), X1
  11046. MOVOU -32(AX)(BX*1), X2
  11047. MOVOU -16(AX)(BX*1), X3
  11048. MOVOU X0, (CX)
  11049. MOVOU X1, 16(CX)
  11050. MOVOU X2, -32(CX)(BX*1)
  11051. MOVOU X3, -16(CX)(BX*1)
  11052. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
  11053. MOVQ DX, CX
  11054. JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
  11055. memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
  11056. LEAQ (CX)(SI*1), DX
  11057. MOVL SI, BX
  11058. // genMemMoveLong
  11059. MOVOU (AX), X0
  11060. MOVOU 16(AX), X1
  11061. MOVOU -32(AX)(BX*1), X2
  11062. MOVOU -16(AX)(BX*1), X3
  11063. MOVQ BX, DI
  11064. SHRQ $0x05, DI
  11065. MOVQ CX, SI
  11066. ANDL $0x0000001f, SI
  11067. MOVQ $0x00000040, R8
  11068. SUBQ SI, R8
  11069. DECQ DI
  11070. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11071. LEAQ -32(AX)(R8*1), SI
  11072. LEAQ -32(CX)(R8*1), R9
  11073. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
  11074. MOVOU (SI), X4
  11075. MOVOU 16(SI), X5
  11076. MOVOA X4, (R9)
  11077. MOVOA X5, 16(R9)
  11078. ADDQ $0x20, R9
  11079. ADDQ $0x20, SI
  11080. ADDQ $0x20, R8
  11081. DECQ DI
  11082. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
  11083. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
  11084. MOVOU -32(AX)(R8*1), X4
  11085. MOVOU -16(AX)(R8*1), X5
  11086. MOVOA X4, -32(CX)(R8*1)
  11087. MOVOA X5, -16(CX)(R8*1)
  11088. ADDQ $0x20, R8
  11089. CMPQ BX, R8
  11090. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
  11091. MOVOU X0, (CX)
  11092. MOVOU X1, 16(CX)
  11093. MOVOU X2, -32(CX)(BX*1)
  11094. MOVOU X3, -16(CX)(BX*1)
  11095. MOVQ DX, CX
  11096. emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
  11097. MOVQ dst_base+0(FP), AX
  11098. SUBQ AX, CX
  11099. MOVQ CX, ret+56(FP)
  11100. RET
  11101. // func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
  11102. // Requires: BMI, SSE2
  11103. TEXT ·encodeSnappyBlockAsm12B(SB), $24-64
  11104. MOVQ tmp+48(FP), AX
  11105. MOVQ dst_base+0(FP), CX
  11106. MOVQ $0x00000080, DX
  11107. MOVQ AX, BX
  11108. PXOR X0, X0
  11109. zero_loop_encodeSnappyBlockAsm12B:
  11110. MOVOU X0, (BX)
  11111. MOVOU X0, 16(BX)
  11112. MOVOU X0, 32(BX)
  11113. MOVOU X0, 48(BX)
  11114. MOVOU X0, 64(BX)
  11115. MOVOU X0, 80(BX)
  11116. MOVOU X0, 96(BX)
  11117. MOVOU X0, 112(BX)
  11118. ADDQ $0x80, BX
  11119. DECQ DX
  11120. JNZ zero_loop_encodeSnappyBlockAsm12B
  11121. MOVL $0x00000000, 12(SP)
  11122. MOVQ src_len+32(FP), DX
  11123. LEAQ -9(DX), BX
  11124. LEAQ -8(DX), SI
  11125. MOVL SI, 8(SP)
  11126. SHRQ $0x05, DX
  11127. SUBL DX, BX
  11128. LEAQ (CX)(BX*1), BX
  11129. MOVQ BX, (SP)
  11130. MOVL $0x00000001, DX
  11131. MOVL DX, 16(SP)
  11132. MOVQ src_base+24(FP), BX
  11133. search_loop_encodeSnappyBlockAsm12B:
  11134. MOVL DX, SI
  11135. SUBL 12(SP), SI
  11136. SHRL $0x05, SI
  11137. LEAL 4(DX)(SI*1), SI
  11138. CMPL SI, 8(SP)
  11139. JAE emit_remainder_encodeSnappyBlockAsm12B
  11140. MOVQ (BX)(DX*1), DI
  11141. MOVL SI, 20(SP)
  11142. MOVQ $0x000000cf1bbcdcbb, R9
  11143. MOVQ DI, R10
  11144. MOVQ DI, R11
  11145. SHRQ $0x08, R11
  11146. SHLQ $0x18, R10
  11147. IMULQ R9, R10
  11148. SHRQ $0x34, R10
  11149. SHLQ $0x18, R11
  11150. IMULQ R9, R11
  11151. SHRQ $0x34, R11
  11152. MOVL (AX)(R10*4), SI
  11153. MOVL (AX)(R11*4), R8
  11154. MOVL DX, (AX)(R10*4)
  11155. LEAL 1(DX), R10
  11156. MOVL R10, (AX)(R11*4)
  11157. MOVQ DI, R10
  11158. SHRQ $0x10, R10
  11159. SHLQ $0x18, R10
  11160. IMULQ R9, R10
  11161. SHRQ $0x34, R10
  11162. MOVL DX, R9
  11163. SUBL 16(SP), R9
  11164. MOVL 1(BX)(R9*1), R11
  11165. MOVQ DI, R9
  11166. SHRQ $0x08, R9
  11167. CMPL R9, R11
  11168. JNE no_repeat_found_encodeSnappyBlockAsm12B
  11169. LEAL 1(DX), DI
  11170. MOVL 12(SP), SI
  11171. MOVL DI, R8
  11172. SUBL 16(SP), R8
  11173. JZ repeat_extend_back_end_encodeSnappyBlockAsm12B
  11174. repeat_extend_back_loop_encodeSnappyBlockAsm12B:
  11175. CMPL DI, SI
  11176. JBE repeat_extend_back_end_encodeSnappyBlockAsm12B
  11177. MOVB -1(BX)(R8*1), R9
  11178. MOVB -1(BX)(DI*1), R10
  11179. CMPB R9, R10
  11180. JNE repeat_extend_back_end_encodeSnappyBlockAsm12B
  11181. LEAL -1(DI), DI
  11182. DECL R8
  11183. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B
  11184. repeat_extend_back_end_encodeSnappyBlockAsm12B:
  11185. MOVL DI, SI
  11186. SUBL 12(SP), SI
  11187. LEAQ 3(CX)(SI*1), SI
  11188. CMPQ SI, (SP)
  11189. JB repeat_dst_size_check_encodeSnappyBlockAsm12B
  11190. MOVQ $0x00000000, ret+56(FP)
  11191. RET
  11192. repeat_dst_size_check_encodeSnappyBlockAsm12B:
  11193. MOVL 12(SP), SI
  11194. CMPL SI, DI
  11195. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
  11196. MOVL DI, R8
  11197. MOVL DI, 12(SP)
  11198. LEAQ (BX)(SI*1), R9
  11199. SUBL SI, R8
  11200. LEAL -1(R8), SI
  11201. CMPL SI, $0x3c
  11202. JB one_byte_repeat_emit_encodeSnappyBlockAsm12B
  11203. CMPL SI, $0x00000100
  11204. JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B
  11205. JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B
  11206. three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  11207. MOVB $0xf4, (CX)
  11208. MOVW SI, 1(CX)
  11209. ADDQ $0x03, CX
  11210. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
  11211. two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
  11212. MOVB $0xf0, (CX)
  11213. MOVB SI, 1(CX)
  11214. ADDQ $0x02, CX
  11215. CMPL SI, $0x40
  11216. JB memmove_repeat_emit_encodeSnappyBlockAsm12B
  11217. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
  11218. one_byte_repeat_emit_encodeSnappyBlockAsm12B:
  11219. SHLB $0x02, SI
  11220. MOVB SI, (CX)
  11221. ADDQ $0x01, CX
  11222. memmove_repeat_emit_encodeSnappyBlockAsm12B:
  11223. LEAQ (CX)(R8*1), SI
  11224. // genMemMoveShort
  11225. CMPQ R8, $0x08
  11226. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
  11227. CMPQ R8, $0x10
  11228. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
  11229. CMPQ R8, $0x20
  11230. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
  11231. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
  11232. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
  11233. MOVQ (R9), R10
  11234. MOVQ R10, (CX)
  11235. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  11236. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
  11237. MOVQ (R9), R10
  11238. MOVQ -8(R9)(R8*1), R9
  11239. MOVQ R10, (CX)
  11240. MOVQ R9, -8(CX)(R8*1)
  11241. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  11242. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
  11243. MOVOU (R9), X0
  11244. MOVOU -16(R9)(R8*1), X1
  11245. MOVOU X0, (CX)
  11246. MOVOU X1, -16(CX)(R8*1)
  11247. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
  11248. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
  11249. MOVOU (R9), X0
  11250. MOVOU 16(R9), X1
  11251. MOVOU -32(R9)(R8*1), X2
  11252. MOVOU -16(R9)(R8*1), X3
  11253. MOVOU X0, (CX)
  11254. MOVOU X1, 16(CX)
  11255. MOVOU X2, -32(CX)(R8*1)
  11256. MOVOU X3, -16(CX)(R8*1)
  11257. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
  11258. MOVQ SI, CX
  11259. JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
  11260. memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
  11261. LEAQ (CX)(R8*1), SI
  11262. // genMemMoveLong
  11263. MOVOU (R9), X0
  11264. MOVOU 16(R9), X1
  11265. MOVOU -32(R9)(R8*1), X2
  11266. MOVOU -16(R9)(R8*1), X3
  11267. MOVQ R8, R11
  11268. SHRQ $0x05, R11
  11269. MOVQ CX, R10
  11270. ANDL $0x0000001f, R10
  11271. MOVQ $0x00000040, R12
  11272. SUBQ R10, R12
  11273. DECQ R11
  11274. JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  11275. LEAQ -32(R9)(R12*1), R10
  11276. LEAQ -32(CX)(R12*1), R13
  11277. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
  11278. MOVOU (R10), X4
  11279. MOVOU 16(R10), X5
  11280. MOVOA X4, (R13)
  11281. MOVOA X5, 16(R13)
  11282. ADDQ $0x20, R13
  11283. ADDQ $0x20, R10
  11284. ADDQ $0x20, R12
  11285. DECQ R11
  11286. JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
  11287. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
  11288. MOVOU -32(R9)(R12*1), X4
  11289. MOVOU -16(R9)(R12*1), X5
  11290. MOVOA X4, -32(CX)(R12*1)
  11291. MOVOA X5, -16(CX)(R12*1)
  11292. ADDQ $0x20, R12
  11293. CMPQ R8, R12
  11294. JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  11295. MOVOU X0, (CX)
  11296. MOVOU X1, 16(CX)
  11297. MOVOU X2, -32(CX)(R8*1)
  11298. MOVOU X3, -16(CX)(R8*1)
  11299. MOVQ SI, CX
  11300. emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
  11301. ADDL $0x05, DX
  11302. MOVL DX, SI
  11303. SUBL 16(SP), SI
  11304. MOVQ src_len+32(FP), R8
  11305. SUBL DX, R8
  11306. LEAQ (BX)(DX*1), R9
  11307. LEAQ (BX)(SI*1), SI
  11308. // matchLen
  11309. XORL R11, R11
  11310. matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
  11311. CMPL R8, $0x10
  11312. JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
  11313. MOVQ (R9)(R11*1), R10
  11314. MOVQ 8(R9)(R11*1), R12
  11315. XORQ (SI)(R11*1), R10
  11316. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
  11317. XORQ 8(SI)(R11*1), R12
  11318. JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
  11319. LEAL -16(R8), R8
  11320. LEAL 16(R11), R11
  11321. JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
  11322. matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
  11323. #ifdef GOAMD64_v3
  11324. TZCNTQ R12, R12
  11325. #else
  11326. BSFQ R12, R12
  11327. #endif
  11328. SARQ $0x03, R12
  11329. LEAL 8(R11)(R12*1), R11
  11330. JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
  11331. matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
  11332. CMPL R8, $0x08
  11333. JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
  11334. MOVQ (R9)(R11*1), R10
  11335. XORQ (SI)(R11*1), R10
  11336. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
  11337. LEAL -8(R8), R8
  11338. LEAL 8(R11), R11
  11339. JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
  11340. matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
  11341. #ifdef GOAMD64_v3
  11342. TZCNTQ R10, R10
  11343. #else
  11344. BSFQ R10, R10
  11345. #endif
  11346. SARQ $0x03, R10
  11347. LEAL (R11)(R10*1), R11
  11348. JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
  11349. matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
  11350. CMPL R8, $0x04
  11351. JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
  11352. MOVL (R9)(R11*1), R10
  11353. CMPL (SI)(R11*1), R10
  11354. JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
  11355. LEAL -4(R8), R8
  11356. LEAL 4(R11), R11
  11357. matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
  11358. CMPL R8, $0x01
  11359. JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
  11360. JB repeat_extend_forward_end_encodeSnappyBlockAsm12B
  11361. MOVW (R9)(R11*1), R10
  11362. CMPW (SI)(R11*1), R10
  11363. JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
  11364. LEAL 2(R11), R11
  11365. SUBL $0x02, R8
  11366. JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B
  11367. matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
  11368. MOVB (R9)(R11*1), R10
  11369. CMPB (SI)(R11*1), R10
  11370. JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B
  11371. LEAL 1(R11), R11
  11372. repeat_extend_forward_end_encodeSnappyBlockAsm12B:
  11373. ADDL R11, DX
  11374. MOVL DX, SI
  11375. SUBL DI, SI
  11376. MOVL 16(SP), DI
  11377. // emitCopy
  11378. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
  11379. CMPL SI, $0x40
  11380. JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
  11381. MOVB $0xee, (CX)
  11382. MOVW DI, 1(CX)
  11383. LEAL -60(SI), SI
  11384. ADDQ $0x03, CX
  11385. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
  11386. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
  11387. MOVL SI, R8
  11388. SHLL $0x02, R8
  11389. CMPL SI, $0x0c
  11390. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
  11391. CMPL DI, $0x00000800
  11392. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
  11393. LEAL -15(R8), R8
  11394. MOVB DI, 1(CX)
  11395. SHRL $0x08, DI
  11396. SHLL $0x05, DI
  11397. ORL DI, R8
  11398. MOVB R8, (CX)
  11399. ADDQ $0x02, CX
  11400. JMP repeat_end_emit_encodeSnappyBlockAsm12B
  11401. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
  11402. LEAL -2(R8), R8
  11403. MOVB R8, (CX)
  11404. MOVW DI, 1(CX)
  11405. ADDQ $0x03, CX
  11406. repeat_end_emit_encodeSnappyBlockAsm12B:
  11407. MOVL DX, 12(SP)
  11408. JMP search_loop_encodeSnappyBlockAsm12B
  11409. no_repeat_found_encodeSnappyBlockAsm12B:
  11410. CMPL (BX)(SI*1), DI
  11411. JEQ candidate_match_encodeSnappyBlockAsm12B
  11412. SHRQ $0x08, DI
  11413. MOVL (AX)(R10*4), SI
  11414. LEAL 2(DX), R9
  11415. CMPL (BX)(R8*1), DI
  11416. JEQ candidate2_match_encodeSnappyBlockAsm12B
  11417. MOVL R9, (AX)(R10*4)
  11418. SHRQ $0x08, DI
  11419. CMPL (BX)(SI*1), DI
  11420. JEQ candidate3_match_encodeSnappyBlockAsm12B
  11421. MOVL 20(SP), DX
  11422. JMP search_loop_encodeSnappyBlockAsm12B
  11423. candidate3_match_encodeSnappyBlockAsm12B:
  11424. ADDL $0x02, DX
  11425. JMP candidate_match_encodeSnappyBlockAsm12B
  11426. candidate2_match_encodeSnappyBlockAsm12B:
  11427. MOVL R9, (AX)(R10*4)
  11428. INCL DX
  11429. MOVL R8, SI
  11430. candidate_match_encodeSnappyBlockAsm12B:
  11431. MOVL 12(SP), DI
  11432. TESTL SI, SI
  11433. JZ match_extend_back_end_encodeSnappyBlockAsm12B
  11434. match_extend_back_loop_encodeSnappyBlockAsm12B:
  11435. CMPL DX, DI
  11436. JBE match_extend_back_end_encodeSnappyBlockAsm12B
  11437. MOVB -1(BX)(SI*1), R8
  11438. MOVB -1(BX)(DX*1), R9
  11439. CMPB R8, R9
  11440. JNE match_extend_back_end_encodeSnappyBlockAsm12B
  11441. LEAL -1(DX), DX
  11442. DECL SI
  11443. JZ match_extend_back_end_encodeSnappyBlockAsm12B
  11444. JMP match_extend_back_loop_encodeSnappyBlockAsm12B
  11445. match_extend_back_end_encodeSnappyBlockAsm12B:
  11446. MOVL DX, DI
  11447. SUBL 12(SP), DI
  11448. LEAQ 3(CX)(DI*1), DI
  11449. CMPQ DI, (SP)
  11450. JB match_dst_size_check_encodeSnappyBlockAsm12B
  11451. MOVQ $0x00000000, ret+56(FP)
  11452. RET
  11453. match_dst_size_check_encodeSnappyBlockAsm12B:
  11454. MOVL DX, DI
  11455. MOVL 12(SP), R8
  11456. CMPL R8, DI
  11457. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B
  11458. MOVL DI, R9
  11459. MOVL DI, 12(SP)
  11460. LEAQ (BX)(R8*1), DI
  11461. SUBL R8, R9
  11462. LEAL -1(R9), R8
  11463. CMPL R8, $0x3c
  11464. JB one_byte_match_emit_encodeSnappyBlockAsm12B
  11465. CMPL R8, $0x00000100
  11466. JB two_bytes_match_emit_encodeSnappyBlockAsm12B
  11467. JB three_bytes_match_emit_encodeSnappyBlockAsm12B
  11468. three_bytes_match_emit_encodeSnappyBlockAsm12B:
  11469. MOVB $0xf4, (CX)
  11470. MOVW R8, 1(CX)
  11471. ADDQ $0x03, CX
  11472. JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
  11473. two_bytes_match_emit_encodeSnappyBlockAsm12B:
  11474. MOVB $0xf0, (CX)
  11475. MOVB R8, 1(CX)
  11476. ADDQ $0x02, CX
  11477. CMPL R8, $0x40
  11478. JB memmove_match_emit_encodeSnappyBlockAsm12B
  11479. JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
  11480. one_byte_match_emit_encodeSnappyBlockAsm12B:
  11481. SHLB $0x02, R8
  11482. MOVB R8, (CX)
  11483. ADDQ $0x01, CX
  11484. memmove_match_emit_encodeSnappyBlockAsm12B:
  11485. LEAQ (CX)(R9*1), R8
  11486. // genMemMoveShort
  11487. CMPQ R9, $0x08
  11488. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
  11489. CMPQ R9, $0x10
  11490. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
  11491. CMPQ R9, $0x20
  11492. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
  11493. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
  11494. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
  11495. MOVQ (DI), R10
  11496. MOVQ R10, (CX)
  11497. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  11498. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
  11499. MOVQ (DI), R10
  11500. MOVQ -8(DI)(R9*1), DI
  11501. MOVQ R10, (CX)
  11502. MOVQ DI, -8(CX)(R9*1)
  11503. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  11504. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
  11505. MOVOU (DI), X0
  11506. MOVOU -16(DI)(R9*1), X1
  11507. MOVOU X0, (CX)
  11508. MOVOU X1, -16(CX)(R9*1)
  11509. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
  11510. emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
  11511. MOVOU (DI), X0
  11512. MOVOU 16(DI), X1
  11513. MOVOU -32(DI)(R9*1), X2
  11514. MOVOU -16(DI)(R9*1), X3
  11515. MOVOU X0, (CX)
  11516. MOVOU X1, 16(CX)
  11517. MOVOU X2, -32(CX)(R9*1)
  11518. MOVOU X3, -16(CX)(R9*1)
  11519. memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
  11520. MOVQ R8, CX
  11521. JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B
  11522. memmove_long_match_emit_encodeSnappyBlockAsm12B:
  11523. LEAQ (CX)(R9*1), R8
  11524. // genMemMoveLong
  11525. MOVOU (DI), X0
  11526. MOVOU 16(DI), X1
  11527. MOVOU -32(DI)(R9*1), X2
  11528. MOVOU -16(DI)(R9*1), X3
  11529. MOVQ R9, R11
  11530. SHRQ $0x05, R11
  11531. MOVQ CX, R10
  11532. ANDL $0x0000001f, R10
  11533. MOVQ $0x00000040, R12
  11534. SUBQ R10, R12
  11535. DECQ R11
  11536. JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  11537. LEAQ -32(DI)(R12*1), R10
  11538. LEAQ -32(CX)(R12*1), R13
  11539. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
  11540. MOVOU (R10), X4
  11541. MOVOU 16(R10), X5
  11542. MOVOA X4, (R13)
  11543. MOVOA X5, 16(R13)
  11544. ADDQ $0x20, R13
  11545. ADDQ $0x20, R10
  11546. ADDQ $0x20, R12
  11547. DECQ R11
  11548. JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
  11549. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
  11550. MOVOU -32(DI)(R12*1), X4
  11551. MOVOU -16(DI)(R12*1), X5
  11552. MOVOA X4, -32(CX)(R12*1)
  11553. MOVOA X5, -16(CX)(R12*1)
  11554. ADDQ $0x20, R12
  11555. CMPQ R9, R12
  11556. JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  11557. MOVOU X0, (CX)
  11558. MOVOU X1, 16(CX)
  11559. MOVOU X2, -32(CX)(R9*1)
  11560. MOVOU X3, -16(CX)(R9*1)
  11561. MOVQ R8, CX
  11562. emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
  11563. match_nolit_loop_encodeSnappyBlockAsm12B:
  11564. MOVL DX, DI
  11565. SUBL SI, DI
  11566. MOVL DI, 16(SP)
  11567. ADDL $0x04, DX
  11568. ADDL $0x04, SI
  11569. MOVQ src_len+32(FP), DI
  11570. SUBL DX, DI
  11571. LEAQ (BX)(DX*1), R8
  11572. LEAQ (BX)(SI*1), SI
  11573. // matchLen
  11574. XORL R10, R10
  11575. matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
  11576. CMPL DI, $0x10
  11577. JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
  11578. MOVQ (R8)(R10*1), R9
  11579. MOVQ 8(R8)(R10*1), R11
  11580. XORQ (SI)(R10*1), R9
  11581. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
  11582. XORQ 8(SI)(R10*1), R11
  11583. JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
  11584. LEAL -16(DI), DI
  11585. LEAL 16(R10), R10
  11586. JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
  11587. matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
  11588. #ifdef GOAMD64_v3
  11589. TZCNTQ R11, R11
  11590. #else
  11591. BSFQ R11, R11
  11592. #endif
  11593. SARQ $0x03, R11
  11594. LEAL 8(R10)(R11*1), R10
  11595. JMP match_nolit_end_encodeSnappyBlockAsm12B
  11596. matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
  11597. CMPL DI, $0x08
  11598. JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
  11599. MOVQ (R8)(R10*1), R9
  11600. XORQ (SI)(R10*1), R9
  11601. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
  11602. LEAL -8(DI), DI
  11603. LEAL 8(R10), R10
  11604. JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
  11605. matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
  11606. #ifdef GOAMD64_v3
  11607. TZCNTQ R9, R9
  11608. #else
  11609. BSFQ R9, R9
  11610. #endif
  11611. SARQ $0x03, R9
  11612. LEAL (R10)(R9*1), R10
  11613. JMP match_nolit_end_encodeSnappyBlockAsm12B
  11614. matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
  11615. CMPL DI, $0x04
  11616. JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
  11617. MOVL (R8)(R10*1), R9
  11618. CMPL (SI)(R10*1), R9
  11619. JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
  11620. LEAL -4(DI), DI
  11621. LEAL 4(R10), R10
  11622. matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
  11623. CMPL DI, $0x01
  11624. JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
  11625. JB match_nolit_end_encodeSnappyBlockAsm12B
  11626. MOVW (R8)(R10*1), R9
  11627. CMPW (SI)(R10*1), R9
  11628. JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
  11629. LEAL 2(R10), R10
  11630. SUBL $0x02, DI
  11631. JZ match_nolit_end_encodeSnappyBlockAsm12B
  11632. matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
  11633. MOVB (R8)(R10*1), R9
  11634. CMPB (SI)(R10*1), R9
  11635. JNE match_nolit_end_encodeSnappyBlockAsm12B
  11636. LEAL 1(R10), R10
  11637. match_nolit_end_encodeSnappyBlockAsm12B:
  11638. ADDL R10, DX
  11639. MOVL 16(SP), SI
  11640. ADDL $0x04, R10
  11641. MOVL DX, 12(SP)
  11642. // emitCopy
  11643. two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
  11644. CMPL R10, $0x40
  11645. JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
  11646. MOVB $0xee, (CX)
  11647. MOVW SI, 1(CX)
  11648. LEAL -60(R10), R10
  11649. ADDQ $0x03, CX
  11650. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
  11651. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
  11652. MOVL R10, DI
  11653. SHLL $0x02, DI
  11654. CMPL R10, $0x0c
  11655. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
  11656. CMPL SI, $0x00000800
  11657. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
  11658. LEAL -15(DI), DI
  11659. MOVB SI, 1(CX)
  11660. SHRL $0x08, SI
  11661. SHLL $0x05, SI
  11662. ORL SI, DI
  11663. MOVB DI, (CX)
  11664. ADDQ $0x02, CX
  11665. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
  11666. emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
  11667. LEAL -2(DI), DI
  11668. MOVB DI, (CX)
  11669. MOVW SI, 1(CX)
  11670. ADDQ $0x03, CX
  11671. match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
  11672. CMPL DX, 8(SP)
  11673. JAE emit_remainder_encodeSnappyBlockAsm12B
  11674. MOVQ -2(BX)(DX*1), DI
  11675. CMPQ CX, (SP)
  11676. JB match_nolit_dst_ok_encodeSnappyBlockAsm12B
  11677. MOVQ $0x00000000, ret+56(FP)
  11678. RET
  11679. match_nolit_dst_ok_encodeSnappyBlockAsm12B:
  11680. MOVQ $0x000000cf1bbcdcbb, R9
  11681. MOVQ DI, R8
  11682. SHRQ $0x10, DI
  11683. MOVQ DI, SI
  11684. SHLQ $0x18, R8
  11685. IMULQ R9, R8
  11686. SHRQ $0x34, R8
  11687. SHLQ $0x18, SI
  11688. IMULQ R9, SI
  11689. SHRQ $0x34, SI
  11690. LEAL -2(DX), R9
  11691. LEAQ (AX)(SI*4), R10
  11692. MOVL (R10), SI
  11693. MOVL R9, (AX)(R8*4)
  11694. MOVL DX, (R10)
  11695. CMPL (BX)(SI*1), DI
  11696. JEQ match_nolit_loop_encodeSnappyBlockAsm12B
  11697. INCL DX
  11698. JMP search_loop_encodeSnappyBlockAsm12B
  11699. emit_remainder_encodeSnappyBlockAsm12B:
  11700. MOVQ src_len+32(FP), AX
  11701. SUBL 12(SP), AX
  11702. LEAQ 3(CX)(AX*1), AX
  11703. CMPQ AX, (SP)
  11704. JB emit_remainder_ok_encodeSnappyBlockAsm12B
  11705. MOVQ $0x00000000, ret+56(FP)
  11706. RET
  11707. emit_remainder_ok_encodeSnappyBlockAsm12B:
  11708. MOVQ src_len+32(FP), AX
  11709. MOVL 12(SP), DX
  11710. CMPL DX, AX
  11711. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
  11712. MOVL AX, SI
  11713. MOVL AX, 12(SP)
  11714. LEAQ (BX)(DX*1), AX
  11715. SUBL DX, SI
  11716. LEAL -1(SI), DX
  11717. CMPL DX, $0x3c
  11718. JB one_byte_emit_remainder_encodeSnappyBlockAsm12B
  11719. CMPL DX, $0x00000100
  11720. JB two_bytes_emit_remainder_encodeSnappyBlockAsm12B
  11721. JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B
  11722. three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  11723. MOVB $0xf4, (CX)
  11724. MOVW DX, 1(CX)
  11725. ADDQ $0x03, CX
  11726. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
  11727. two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
  11728. MOVB $0xf0, (CX)
  11729. MOVB DL, 1(CX)
  11730. ADDQ $0x02, CX
  11731. CMPL DX, $0x40
  11732. JB memmove_emit_remainder_encodeSnappyBlockAsm12B
  11733. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
  11734. one_byte_emit_remainder_encodeSnappyBlockAsm12B:
  11735. SHLB $0x02, DL
  11736. MOVB DL, (CX)
  11737. ADDQ $0x01, CX
  11738. memmove_emit_remainder_encodeSnappyBlockAsm12B:
  11739. LEAQ (CX)(SI*1), DX
  11740. MOVL SI, BX
  11741. // genMemMoveShort
  11742. CMPQ BX, $0x03
  11743. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
  11744. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
  11745. CMPQ BX, $0x08
  11746. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7
  11747. CMPQ BX, $0x10
  11748. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
  11749. CMPQ BX, $0x20
  11750. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
  11751. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
  11752. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
  11753. MOVB (AX), SI
  11754. MOVB -1(AX)(BX*1), AL
  11755. MOVB SI, (CX)
  11756. MOVB AL, -1(CX)(BX*1)
  11757. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  11758. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
  11759. MOVW (AX), SI
  11760. MOVB 2(AX), AL
  11761. MOVW SI, (CX)
  11762. MOVB AL, 2(CX)
  11763. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  11764. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
  11765. MOVL (AX), SI
  11766. MOVL -4(AX)(BX*1), AX
  11767. MOVL SI, (CX)
  11768. MOVL AX, -4(CX)(BX*1)
  11769. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  11770. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
  11771. MOVQ (AX), SI
  11772. MOVQ -8(AX)(BX*1), AX
  11773. MOVQ SI, (CX)
  11774. MOVQ AX, -8(CX)(BX*1)
  11775. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  11776. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
  11777. MOVOU (AX), X0
  11778. MOVOU -16(AX)(BX*1), X1
  11779. MOVOU X0, (CX)
  11780. MOVOU X1, -16(CX)(BX*1)
  11781. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
  11782. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
  11783. MOVOU (AX), X0
  11784. MOVOU 16(AX), X1
  11785. MOVOU -32(AX)(BX*1), X2
  11786. MOVOU -16(AX)(BX*1), X3
  11787. MOVOU X0, (CX)
  11788. MOVOU X1, 16(CX)
  11789. MOVOU X2, -32(CX)(BX*1)
  11790. MOVOU X3, -16(CX)(BX*1)
  11791. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
  11792. MOVQ DX, CX
  11793. JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
  11794. memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
  11795. LEAQ (CX)(SI*1), DX
  11796. MOVL SI, BX
  11797. // genMemMoveLong
  11798. MOVOU (AX), X0
  11799. MOVOU 16(AX), X1
  11800. MOVOU -32(AX)(BX*1), X2
  11801. MOVOU -16(AX)(BX*1), X3
  11802. MOVQ BX, DI
  11803. SHRQ $0x05, DI
  11804. MOVQ CX, SI
  11805. ANDL $0x0000001f, SI
  11806. MOVQ $0x00000040, R8
  11807. SUBQ SI, R8
  11808. DECQ DI
  11809. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  11810. LEAQ -32(AX)(R8*1), SI
  11811. LEAQ -32(CX)(R8*1), R9
  11812. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
  11813. MOVOU (SI), X4
  11814. MOVOU 16(SI), X5
  11815. MOVOA X4, (R9)
  11816. MOVOA X5, 16(R9)
  11817. ADDQ $0x20, R9
  11818. ADDQ $0x20, SI
  11819. ADDQ $0x20, R8
  11820. DECQ DI
  11821. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
  11822. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
  11823. MOVOU -32(AX)(R8*1), X4
  11824. MOVOU -16(AX)(R8*1), X5
  11825. MOVOA X4, -32(CX)(R8*1)
  11826. MOVOA X5, -16(CX)(R8*1)
  11827. ADDQ $0x20, R8
  11828. CMPQ BX, R8
  11829. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
  11830. MOVOU X0, (CX)
  11831. MOVOU X1, 16(CX)
  11832. MOVOU X2, -32(CX)(BX*1)
  11833. MOVOU X3, -16(CX)(BX*1)
  11834. MOVQ DX, CX
  11835. emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
  11836. MOVQ dst_base+0(FP), AX
  11837. SUBQ AX, CX
  11838. MOVQ CX, ret+56(FP)
  11839. RET
  11840. // func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
  11841. // Requires: BMI, SSE2
  11842. TEXT ·encodeSnappyBlockAsm10B(SB), $24-64
  11843. MOVQ tmp+48(FP), AX
  11844. MOVQ dst_base+0(FP), CX
  11845. MOVQ $0x00000020, DX
  11846. MOVQ AX, BX
  11847. PXOR X0, X0
  11848. zero_loop_encodeSnappyBlockAsm10B:
  11849. MOVOU X0, (BX)
  11850. MOVOU X0, 16(BX)
  11851. MOVOU X0, 32(BX)
  11852. MOVOU X0, 48(BX)
  11853. MOVOU X0, 64(BX)
  11854. MOVOU X0, 80(BX)
  11855. MOVOU X0, 96(BX)
  11856. MOVOU X0, 112(BX)
  11857. ADDQ $0x80, BX
  11858. DECQ DX
  11859. JNZ zero_loop_encodeSnappyBlockAsm10B
  11860. MOVL $0x00000000, 12(SP)
  11861. MOVQ src_len+32(FP), DX
  11862. LEAQ -9(DX), BX
  11863. LEAQ -8(DX), SI
  11864. MOVL SI, 8(SP)
  11865. SHRQ $0x05, DX
  11866. SUBL DX, BX
  11867. LEAQ (CX)(BX*1), BX
  11868. MOVQ BX, (SP)
  11869. MOVL $0x00000001, DX
  11870. MOVL DX, 16(SP)
  11871. MOVQ src_base+24(FP), BX
  11872. search_loop_encodeSnappyBlockAsm10B:
  11873. MOVL DX, SI
  11874. SUBL 12(SP), SI
  11875. SHRL $0x05, SI
  11876. LEAL 4(DX)(SI*1), SI
  11877. CMPL SI, 8(SP)
  11878. JAE emit_remainder_encodeSnappyBlockAsm10B
  11879. MOVQ (BX)(DX*1), DI
  11880. MOVL SI, 20(SP)
  11881. MOVQ $0x9e3779b1, R9
  11882. MOVQ DI, R10
  11883. MOVQ DI, R11
  11884. SHRQ $0x08, R11
  11885. SHLQ $0x20, R10
  11886. IMULQ R9, R10
  11887. SHRQ $0x36, R10
  11888. SHLQ $0x20, R11
  11889. IMULQ R9, R11
  11890. SHRQ $0x36, R11
  11891. MOVL (AX)(R10*4), SI
  11892. MOVL (AX)(R11*4), R8
  11893. MOVL DX, (AX)(R10*4)
  11894. LEAL 1(DX), R10
  11895. MOVL R10, (AX)(R11*4)
  11896. MOVQ DI, R10
  11897. SHRQ $0x10, R10
  11898. SHLQ $0x20, R10
  11899. IMULQ R9, R10
  11900. SHRQ $0x36, R10
  11901. MOVL DX, R9
  11902. SUBL 16(SP), R9
  11903. MOVL 1(BX)(R9*1), R11
  11904. MOVQ DI, R9
  11905. SHRQ $0x08, R9
  11906. CMPL R9, R11
  11907. JNE no_repeat_found_encodeSnappyBlockAsm10B
  11908. LEAL 1(DX), DI
  11909. MOVL 12(SP), SI
  11910. MOVL DI, R8
  11911. SUBL 16(SP), R8
  11912. JZ repeat_extend_back_end_encodeSnappyBlockAsm10B
  11913. repeat_extend_back_loop_encodeSnappyBlockAsm10B:
  11914. CMPL DI, SI
  11915. JBE repeat_extend_back_end_encodeSnappyBlockAsm10B
  11916. MOVB -1(BX)(R8*1), R9
  11917. MOVB -1(BX)(DI*1), R10
  11918. CMPB R9, R10
  11919. JNE repeat_extend_back_end_encodeSnappyBlockAsm10B
  11920. LEAL -1(DI), DI
  11921. DECL R8
  11922. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B
  11923. repeat_extend_back_end_encodeSnappyBlockAsm10B:
  11924. MOVL DI, SI
  11925. SUBL 12(SP), SI
  11926. LEAQ 3(CX)(SI*1), SI
  11927. CMPQ SI, (SP)
  11928. JB repeat_dst_size_check_encodeSnappyBlockAsm10B
  11929. MOVQ $0x00000000, ret+56(FP)
  11930. RET
  11931. repeat_dst_size_check_encodeSnappyBlockAsm10B:
  11932. MOVL 12(SP), SI
  11933. CMPL SI, DI
  11934. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
  11935. MOVL DI, R8
  11936. MOVL DI, 12(SP)
  11937. LEAQ (BX)(SI*1), R9
  11938. SUBL SI, R8
  11939. LEAL -1(R8), SI
  11940. CMPL SI, $0x3c
  11941. JB one_byte_repeat_emit_encodeSnappyBlockAsm10B
  11942. CMPL SI, $0x00000100
  11943. JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B
  11944. JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B
  11945. three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  11946. MOVB $0xf4, (CX)
  11947. MOVW SI, 1(CX)
  11948. ADDQ $0x03, CX
  11949. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
  11950. two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
  11951. MOVB $0xf0, (CX)
  11952. MOVB SI, 1(CX)
  11953. ADDQ $0x02, CX
  11954. CMPL SI, $0x40
  11955. JB memmove_repeat_emit_encodeSnappyBlockAsm10B
  11956. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
  11957. one_byte_repeat_emit_encodeSnappyBlockAsm10B:
  11958. SHLB $0x02, SI
  11959. MOVB SI, (CX)
  11960. ADDQ $0x01, CX
  11961. memmove_repeat_emit_encodeSnappyBlockAsm10B:
  11962. LEAQ (CX)(R8*1), SI
  11963. // genMemMoveShort
  11964. CMPQ R8, $0x08
  11965. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
  11966. CMPQ R8, $0x10
  11967. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
  11968. CMPQ R8, $0x20
  11969. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
  11970. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
  11971. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
  11972. MOVQ (R9), R10
  11973. MOVQ R10, (CX)
  11974. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  11975. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
  11976. MOVQ (R9), R10
  11977. MOVQ -8(R9)(R8*1), R9
  11978. MOVQ R10, (CX)
  11979. MOVQ R9, -8(CX)(R8*1)
  11980. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  11981. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
  11982. MOVOU (R9), X0
  11983. MOVOU -16(R9)(R8*1), X1
  11984. MOVOU X0, (CX)
  11985. MOVOU X1, -16(CX)(R8*1)
  11986. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
  11987. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
  11988. MOVOU (R9), X0
  11989. MOVOU 16(R9), X1
  11990. MOVOU -32(R9)(R8*1), X2
  11991. MOVOU -16(R9)(R8*1), X3
  11992. MOVOU X0, (CX)
  11993. MOVOU X1, 16(CX)
  11994. MOVOU X2, -32(CX)(R8*1)
  11995. MOVOU X3, -16(CX)(R8*1)
  11996. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
  11997. MOVQ SI, CX
  11998. JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
  11999. memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
  12000. LEAQ (CX)(R8*1), SI
  12001. // genMemMoveLong
  12002. MOVOU (R9), X0
  12003. MOVOU 16(R9), X1
  12004. MOVOU -32(R9)(R8*1), X2
  12005. MOVOU -16(R9)(R8*1), X3
  12006. MOVQ R8, R11
  12007. SHRQ $0x05, R11
  12008. MOVQ CX, R10
  12009. ANDL $0x0000001f, R10
  12010. MOVQ $0x00000040, R12
  12011. SUBQ R10, R12
  12012. DECQ R11
  12013. JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12014. LEAQ -32(R9)(R12*1), R10
  12015. LEAQ -32(CX)(R12*1), R13
  12016. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
  12017. MOVOU (R10), X4
  12018. MOVOU 16(R10), X5
  12019. MOVOA X4, (R13)
  12020. MOVOA X5, 16(R13)
  12021. ADDQ $0x20, R13
  12022. ADDQ $0x20, R10
  12023. ADDQ $0x20, R12
  12024. DECQ R11
  12025. JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
  12026. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
  12027. MOVOU -32(R9)(R12*1), X4
  12028. MOVOU -16(R9)(R12*1), X5
  12029. MOVOA X4, -32(CX)(R12*1)
  12030. MOVOA X5, -16(CX)(R12*1)
  12031. ADDQ $0x20, R12
  12032. CMPQ R8, R12
  12033. JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12034. MOVOU X0, (CX)
  12035. MOVOU X1, 16(CX)
  12036. MOVOU X2, -32(CX)(R8*1)
  12037. MOVOU X3, -16(CX)(R8*1)
  12038. MOVQ SI, CX
  12039. emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
  12040. ADDL $0x05, DX
  12041. MOVL DX, SI
  12042. SUBL 16(SP), SI
  12043. MOVQ src_len+32(FP), R8
  12044. SUBL DX, R8
  12045. LEAQ (BX)(DX*1), R9
  12046. LEAQ (BX)(SI*1), SI
  12047. // matchLen
  12048. XORL R11, R11
  12049. matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
  12050. CMPL R8, $0x10
  12051. JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
  12052. MOVQ (R9)(R11*1), R10
  12053. MOVQ 8(R9)(R11*1), R12
  12054. XORQ (SI)(R11*1), R10
  12055. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
  12056. XORQ 8(SI)(R11*1), R12
  12057. JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
  12058. LEAL -16(R8), R8
  12059. LEAL 16(R11), R11
  12060. JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
  12061. matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
  12062. #ifdef GOAMD64_v3
  12063. TZCNTQ R12, R12
  12064. #else
  12065. BSFQ R12, R12
  12066. #endif
  12067. SARQ $0x03, R12
  12068. LEAL 8(R11)(R12*1), R11
  12069. JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
  12070. matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
  12071. CMPL R8, $0x08
  12072. JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
  12073. MOVQ (R9)(R11*1), R10
  12074. XORQ (SI)(R11*1), R10
  12075. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
  12076. LEAL -8(R8), R8
  12077. LEAL 8(R11), R11
  12078. JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
  12079. matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
  12080. #ifdef GOAMD64_v3
  12081. TZCNTQ R10, R10
  12082. #else
  12083. BSFQ R10, R10
  12084. #endif
  12085. SARQ $0x03, R10
  12086. LEAL (R11)(R10*1), R11
  12087. JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
  12088. matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
  12089. CMPL R8, $0x04
  12090. JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
  12091. MOVL (R9)(R11*1), R10
  12092. CMPL (SI)(R11*1), R10
  12093. JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
  12094. LEAL -4(R8), R8
  12095. LEAL 4(R11), R11
  12096. matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
  12097. CMPL R8, $0x01
  12098. JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
  12099. JB repeat_extend_forward_end_encodeSnappyBlockAsm10B
  12100. MOVW (R9)(R11*1), R10
  12101. CMPW (SI)(R11*1), R10
  12102. JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
  12103. LEAL 2(R11), R11
  12104. SUBL $0x02, R8
  12105. JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B
  12106. matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
  12107. MOVB (R9)(R11*1), R10
  12108. CMPB (SI)(R11*1), R10
  12109. JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B
  12110. LEAL 1(R11), R11
  12111. repeat_extend_forward_end_encodeSnappyBlockAsm10B:
  12112. ADDL R11, DX
  12113. MOVL DX, SI
  12114. SUBL DI, SI
  12115. MOVL 16(SP), DI
  12116. // emitCopy
  12117. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
  12118. CMPL SI, $0x40
  12119. JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
  12120. MOVB $0xee, (CX)
  12121. MOVW DI, 1(CX)
  12122. LEAL -60(SI), SI
  12123. ADDQ $0x03, CX
  12124. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
  12125. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
  12126. MOVL SI, R8
  12127. SHLL $0x02, R8
  12128. CMPL SI, $0x0c
  12129. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
  12130. CMPL DI, $0x00000800
  12131. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
  12132. LEAL -15(R8), R8
  12133. MOVB DI, 1(CX)
  12134. SHRL $0x08, DI
  12135. SHLL $0x05, DI
  12136. ORL DI, R8
  12137. MOVB R8, (CX)
  12138. ADDQ $0x02, CX
  12139. JMP repeat_end_emit_encodeSnappyBlockAsm10B
  12140. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
  12141. LEAL -2(R8), R8
  12142. MOVB R8, (CX)
  12143. MOVW DI, 1(CX)
  12144. ADDQ $0x03, CX
  12145. repeat_end_emit_encodeSnappyBlockAsm10B:
  12146. MOVL DX, 12(SP)
  12147. JMP search_loop_encodeSnappyBlockAsm10B
  12148. no_repeat_found_encodeSnappyBlockAsm10B:
  12149. CMPL (BX)(SI*1), DI
  12150. JEQ candidate_match_encodeSnappyBlockAsm10B
  12151. SHRQ $0x08, DI
  12152. MOVL (AX)(R10*4), SI
  12153. LEAL 2(DX), R9
  12154. CMPL (BX)(R8*1), DI
  12155. JEQ candidate2_match_encodeSnappyBlockAsm10B
  12156. MOVL R9, (AX)(R10*4)
  12157. SHRQ $0x08, DI
  12158. CMPL (BX)(SI*1), DI
  12159. JEQ candidate3_match_encodeSnappyBlockAsm10B
  12160. MOVL 20(SP), DX
  12161. JMP search_loop_encodeSnappyBlockAsm10B
  12162. candidate3_match_encodeSnappyBlockAsm10B:
  12163. ADDL $0x02, DX
  12164. JMP candidate_match_encodeSnappyBlockAsm10B
  12165. candidate2_match_encodeSnappyBlockAsm10B:
  12166. MOVL R9, (AX)(R10*4)
  12167. INCL DX
  12168. MOVL R8, SI
  12169. candidate_match_encodeSnappyBlockAsm10B:
  12170. MOVL 12(SP), DI
  12171. TESTL SI, SI
  12172. JZ match_extend_back_end_encodeSnappyBlockAsm10B
  12173. match_extend_back_loop_encodeSnappyBlockAsm10B:
  12174. CMPL DX, DI
  12175. JBE match_extend_back_end_encodeSnappyBlockAsm10B
  12176. MOVB -1(BX)(SI*1), R8
  12177. MOVB -1(BX)(DX*1), R9
  12178. CMPB R8, R9
  12179. JNE match_extend_back_end_encodeSnappyBlockAsm10B
  12180. LEAL -1(DX), DX
  12181. DECL SI
  12182. JZ match_extend_back_end_encodeSnappyBlockAsm10B
  12183. JMP match_extend_back_loop_encodeSnappyBlockAsm10B
  12184. match_extend_back_end_encodeSnappyBlockAsm10B:
  12185. MOVL DX, DI
  12186. SUBL 12(SP), DI
  12187. LEAQ 3(CX)(DI*1), DI
  12188. CMPQ DI, (SP)
  12189. JB match_dst_size_check_encodeSnappyBlockAsm10B
  12190. MOVQ $0x00000000, ret+56(FP)
  12191. RET
  12192. match_dst_size_check_encodeSnappyBlockAsm10B:
  12193. MOVL DX, DI
  12194. MOVL 12(SP), R8
  12195. CMPL R8, DI
  12196. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B
  12197. MOVL DI, R9
  12198. MOVL DI, 12(SP)
  12199. LEAQ (BX)(R8*1), DI
  12200. SUBL R8, R9
  12201. LEAL -1(R9), R8
  12202. CMPL R8, $0x3c
  12203. JB one_byte_match_emit_encodeSnappyBlockAsm10B
  12204. CMPL R8, $0x00000100
  12205. JB two_bytes_match_emit_encodeSnappyBlockAsm10B
  12206. JB three_bytes_match_emit_encodeSnappyBlockAsm10B
  12207. three_bytes_match_emit_encodeSnappyBlockAsm10B:
  12208. MOVB $0xf4, (CX)
  12209. MOVW R8, 1(CX)
  12210. ADDQ $0x03, CX
  12211. JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
  12212. two_bytes_match_emit_encodeSnappyBlockAsm10B:
  12213. MOVB $0xf0, (CX)
  12214. MOVB R8, 1(CX)
  12215. ADDQ $0x02, CX
  12216. CMPL R8, $0x40
  12217. JB memmove_match_emit_encodeSnappyBlockAsm10B
  12218. JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
  12219. one_byte_match_emit_encodeSnappyBlockAsm10B:
  12220. SHLB $0x02, R8
  12221. MOVB R8, (CX)
  12222. ADDQ $0x01, CX
  12223. memmove_match_emit_encodeSnappyBlockAsm10B:
  12224. LEAQ (CX)(R9*1), R8
  12225. // genMemMoveShort
  12226. CMPQ R9, $0x08
  12227. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
  12228. CMPQ R9, $0x10
  12229. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
  12230. CMPQ R9, $0x20
  12231. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
  12232. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
  12233. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
  12234. MOVQ (DI), R10
  12235. MOVQ R10, (CX)
  12236. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  12237. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
  12238. MOVQ (DI), R10
  12239. MOVQ -8(DI)(R9*1), DI
  12240. MOVQ R10, (CX)
  12241. MOVQ DI, -8(CX)(R9*1)
  12242. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  12243. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
  12244. MOVOU (DI), X0
  12245. MOVOU -16(DI)(R9*1), X1
  12246. MOVOU X0, (CX)
  12247. MOVOU X1, -16(CX)(R9*1)
  12248. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
  12249. emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
  12250. MOVOU (DI), X0
  12251. MOVOU 16(DI), X1
  12252. MOVOU -32(DI)(R9*1), X2
  12253. MOVOU -16(DI)(R9*1), X3
  12254. MOVOU X0, (CX)
  12255. MOVOU X1, 16(CX)
  12256. MOVOU X2, -32(CX)(R9*1)
  12257. MOVOU X3, -16(CX)(R9*1)
  12258. memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
  12259. MOVQ R8, CX
  12260. JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B
  12261. memmove_long_match_emit_encodeSnappyBlockAsm10B:
  12262. LEAQ (CX)(R9*1), R8
  12263. // genMemMoveLong
  12264. MOVOU (DI), X0
  12265. MOVOU 16(DI), X1
  12266. MOVOU -32(DI)(R9*1), X2
  12267. MOVOU -16(DI)(R9*1), X3
  12268. MOVQ R9, R11
  12269. SHRQ $0x05, R11
  12270. MOVQ CX, R10
  12271. ANDL $0x0000001f, R10
  12272. MOVQ $0x00000040, R12
  12273. SUBQ R10, R12
  12274. DECQ R11
  12275. JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12276. LEAQ -32(DI)(R12*1), R10
  12277. LEAQ -32(CX)(R12*1), R13
  12278. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
  12279. MOVOU (R10), X4
  12280. MOVOU 16(R10), X5
  12281. MOVOA X4, (R13)
  12282. MOVOA X5, 16(R13)
  12283. ADDQ $0x20, R13
  12284. ADDQ $0x20, R10
  12285. ADDQ $0x20, R12
  12286. DECQ R11
  12287. JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
  12288. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
  12289. MOVOU -32(DI)(R12*1), X4
  12290. MOVOU -16(DI)(R12*1), X5
  12291. MOVOA X4, -32(CX)(R12*1)
  12292. MOVOA X5, -16(CX)(R12*1)
  12293. ADDQ $0x20, R12
  12294. CMPQ R9, R12
  12295. JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12296. MOVOU X0, (CX)
  12297. MOVOU X1, 16(CX)
  12298. MOVOU X2, -32(CX)(R9*1)
  12299. MOVOU X3, -16(CX)(R9*1)
  12300. MOVQ R8, CX
  12301. emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
  12302. match_nolit_loop_encodeSnappyBlockAsm10B:
  12303. MOVL DX, DI
  12304. SUBL SI, DI
  12305. MOVL DI, 16(SP)
  12306. ADDL $0x04, DX
  12307. ADDL $0x04, SI
  12308. MOVQ src_len+32(FP), DI
  12309. SUBL DX, DI
  12310. LEAQ (BX)(DX*1), R8
  12311. LEAQ (BX)(SI*1), SI
  12312. // matchLen
  12313. XORL R10, R10
  12314. matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
  12315. CMPL DI, $0x10
  12316. JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
  12317. MOVQ (R8)(R10*1), R9
  12318. MOVQ 8(R8)(R10*1), R11
  12319. XORQ (SI)(R10*1), R9
  12320. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
  12321. XORQ 8(SI)(R10*1), R11
  12322. JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
  12323. LEAL -16(DI), DI
  12324. LEAL 16(R10), R10
  12325. JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
  12326. matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
  12327. #ifdef GOAMD64_v3
  12328. TZCNTQ R11, R11
  12329. #else
  12330. BSFQ R11, R11
  12331. #endif
  12332. SARQ $0x03, R11
  12333. LEAL 8(R10)(R11*1), R10
  12334. JMP match_nolit_end_encodeSnappyBlockAsm10B
  12335. matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
  12336. CMPL DI, $0x08
  12337. JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
  12338. MOVQ (R8)(R10*1), R9
  12339. XORQ (SI)(R10*1), R9
  12340. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
  12341. LEAL -8(DI), DI
  12342. LEAL 8(R10), R10
  12343. JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
  12344. matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
  12345. #ifdef GOAMD64_v3
  12346. TZCNTQ R9, R9
  12347. #else
  12348. BSFQ R9, R9
  12349. #endif
  12350. SARQ $0x03, R9
  12351. LEAL (R10)(R9*1), R10
  12352. JMP match_nolit_end_encodeSnappyBlockAsm10B
  12353. matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
  12354. CMPL DI, $0x04
  12355. JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
  12356. MOVL (R8)(R10*1), R9
  12357. CMPL (SI)(R10*1), R9
  12358. JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
  12359. LEAL -4(DI), DI
  12360. LEAL 4(R10), R10
  12361. matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
  12362. CMPL DI, $0x01
  12363. JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
  12364. JB match_nolit_end_encodeSnappyBlockAsm10B
  12365. MOVW (R8)(R10*1), R9
  12366. CMPW (SI)(R10*1), R9
  12367. JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
  12368. LEAL 2(R10), R10
  12369. SUBL $0x02, DI
  12370. JZ match_nolit_end_encodeSnappyBlockAsm10B
  12371. matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
  12372. MOVB (R8)(R10*1), R9
  12373. CMPB (SI)(R10*1), R9
  12374. JNE match_nolit_end_encodeSnappyBlockAsm10B
  12375. LEAL 1(R10), R10
  12376. match_nolit_end_encodeSnappyBlockAsm10B:
  12377. ADDL R10, DX
  12378. MOVL 16(SP), SI
  12379. ADDL $0x04, R10
  12380. MOVL DX, 12(SP)
  12381. // emitCopy
  12382. two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
  12383. CMPL R10, $0x40
  12384. JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
  12385. MOVB $0xee, (CX)
  12386. MOVW SI, 1(CX)
  12387. LEAL -60(R10), R10
  12388. ADDQ $0x03, CX
  12389. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
  12390. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
  12391. MOVL R10, DI
  12392. SHLL $0x02, DI
  12393. CMPL R10, $0x0c
  12394. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
  12395. CMPL SI, $0x00000800
  12396. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
  12397. LEAL -15(DI), DI
  12398. MOVB SI, 1(CX)
  12399. SHRL $0x08, SI
  12400. SHLL $0x05, SI
  12401. ORL SI, DI
  12402. MOVB DI, (CX)
  12403. ADDQ $0x02, CX
  12404. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
  12405. emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
  12406. LEAL -2(DI), DI
  12407. MOVB DI, (CX)
  12408. MOVW SI, 1(CX)
  12409. ADDQ $0x03, CX
  12410. match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
  12411. CMPL DX, 8(SP)
  12412. JAE emit_remainder_encodeSnappyBlockAsm10B
  12413. MOVQ -2(BX)(DX*1), DI
  12414. CMPQ CX, (SP)
  12415. JB match_nolit_dst_ok_encodeSnappyBlockAsm10B
  12416. MOVQ $0x00000000, ret+56(FP)
  12417. RET
  12418. match_nolit_dst_ok_encodeSnappyBlockAsm10B:
  12419. MOVQ $0x9e3779b1, R9
  12420. MOVQ DI, R8
  12421. SHRQ $0x10, DI
  12422. MOVQ DI, SI
  12423. SHLQ $0x20, R8
  12424. IMULQ R9, R8
  12425. SHRQ $0x36, R8
  12426. SHLQ $0x20, SI
  12427. IMULQ R9, SI
  12428. SHRQ $0x36, SI
  12429. LEAL -2(DX), R9
  12430. LEAQ (AX)(SI*4), R10
  12431. MOVL (R10), SI
  12432. MOVL R9, (AX)(R8*4)
  12433. MOVL DX, (R10)
  12434. CMPL (BX)(SI*1), DI
  12435. JEQ match_nolit_loop_encodeSnappyBlockAsm10B
  12436. INCL DX
  12437. JMP search_loop_encodeSnappyBlockAsm10B
  12438. emit_remainder_encodeSnappyBlockAsm10B:
  12439. MOVQ src_len+32(FP), AX
  12440. SUBL 12(SP), AX
  12441. LEAQ 3(CX)(AX*1), AX
  12442. CMPQ AX, (SP)
  12443. JB emit_remainder_ok_encodeSnappyBlockAsm10B
  12444. MOVQ $0x00000000, ret+56(FP)
  12445. RET
  12446. emit_remainder_ok_encodeSnappyBlockAsm10B:
  12447. MOVQ src_len+32(FP), AX
  12448. MOVL 12(SP), DX
  12449. CMPL DX, AX
  12450. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
  12451. MOVL AX, SI
  12452. MOVL AX, 12(SP)
  12453. LEAQ (BX)(DX*1), AX
  12454. SUBL DX, SI
  12455. LEAL -1(SI), DX
  12456. CMPL DX, $0x3c
  12457. JB one_byte_emit_remainder_encodeSnappyBlockAsm10B
  12458. CMPL DX, $0x00000100
  12459. JB two_bytes_emit_remainder_encodeSnappyBlockAsm10B
  12460. JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B
  12461. three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  12462. MOVB $0xf4, (CX)
  12463. MOVW DX, 1(CX)
  12464. ADDQ $0x03, CX
  12465. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
  12466. two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
  12467. MOVB $0xf0, (CX)
  12468. MOVB DL, 1(CX)
  12469. ADDQ $0x02, CX
  12470. CMPL DX, $0x40
  12471. JB memmove_emit_remainder_encodeSnappyBlockAsm10B
  12472. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
  12473. one_byte_emit_remainder_encodeSnappyBlockAsm10B:
  12474. SHLB $0x02, DL
  12475. MOVB DL, (CX)
  12476. ADDQ $0x01, CX
  12477. memmove_emit_remainder_encodeSnappyBlockAsm10B:
  12478. LEAQ (CX)(SI*1), DX
  12479. MOVL SI, BX
  12480. // genMemMoveShort
  12481. CMPQ BX, $0x03
  12482. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
  12483. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
  12484. CMPQ BX, $0x08
  12485. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7
  12486. CMPQ BX, $0x10
  12487. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
  12488. CMPQ BX, $0x20
  12489. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
  12490. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
  12491. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
  12492. MOVB (AX), SI
  12493. MOVB -1(AX)(BX*1), AL
  12494. MOVB SI, (CX)
  12495. MOVB AL, -1(CX)(BX*1)
  12496. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  12497. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
  12498. MOVW (AX), SI
  12499. MOVB 2(AX), AL
  12500. MOVW SI, (CX)
  12501. MOVB AL, 2(CX)
  12502. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  12503. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
  12504. MOVL (AX), SI
  12505. MOVL -4(AX)(BX*1), AX
  12506. MOVL SI, (CX)
  12507. MOVL AX, -4(CX)(BX*1)
  12508. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  12509. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
  12510. MOVQ (AX), SI
  12511. MOVQ -8(AX)(BX*1), AX
  12512. MOVQ SI, (CX)
  12513. MOVQ AX, -8(CX)(BX*1)
  12514. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  12515. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
  12516. MOVOU (AX), X0
  12517. MOVOU -16(AX)(BX*1), X1
  12518. MOVOU X0, (CX)
  12519. MOVOU X1, -16(CX)(BX*1)
  12520. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
  12521. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
  12522. MOVOU (AX), X0
  12523. MOVOU 16(AX), X1
  12524. MOVOU -32(AX)(BX*1), X2
  12525. MOVOU -16(AX)(BX*1), X3
  12526. MOVOU X0, (CX)
  12527. MOVOU X1, 16(CX)
  12528. MOVOU X2, -32(CX)(BX*1)
  12529. MOVOU X3, -16(CX)(BX*1)
  12530. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
  12531. MOVQ DX, CX
  12532. JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
  12533. memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
  12534. LEAQ (CX)(SI*1), DX
  12535. MOVL SI, BX
  12536. // genMemMoveLong
  12537. MOVOU (AX), X0
  12538. MOVOU 16(AX), X1
  12539. MOVOU -32(AX)(BX*1), X2
  12540. MOVOU -16(AX)(BX*1), X3
  12541. MOVQ BX, DI
  12542. SHRQ $0x05, DI
  12543. MOVQ CX, SI
  12544. ANDL $0x0000001f, SI
  12545. MOVQ $0x00000040, R8
  12546. SUBQ SI, R8
  12547. DECQ DI
  12548. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12549. LEAQ -32(AX)(R8*1), SI
  12550. LEAQ -32(CX)(R8*1), R9
  12551. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
  12552. MOVOU (SI), X4
  12553. MOVOU 16(SI), X5
  12554. MOVOA X4, (R9)
  12555. MOVOA X5, 16(R9)
  12556. ADDQ $0x20, R9
  12557. ADDQ $0x20, SI
  12558. ADDQ $0x20, R8
  12559. DECQ DI
  12560. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
  12561. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
  12562. MOVOU -32(AX)(R8*1), X4
  12563. MOVOU -16(AX)(R8*1), X5
  12564. MOVOA X4, -32(CX)(R8*1)
  12565. MOVOA X5, -16(CX)(R8*1)
  12566. ADDQ $0x20, R8
  12567. CMPQ BX, R8
  12568. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
  12569. MOVOU X0, (CX)
  12570. MOVOU X1, 16(CX)
  12571. MOVOU X2, -32(CX)(BX*1)
  12572. MOVOU X3, -16(CX)(BX*1)
  12573. MOVQ DX, CX
  12574. emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
  12575. MOVQ dst_base+0(FP), AX
  12576. SUBQ AX, CX
  12577. MOVQ CX, ret+56(FP)
  12578. RET
  12579. // func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
  12580. // Requires: BMI, SSE2
  12581. TEXT ·encodeSnappyBlockAsm8B(SB), $24-64
  12582. MOVQ tmp+48(FP), AX
  12583. MOVQ dst_base+0(FP), CX
  12584. MOVQ $0x00000008, DX
  12585. MOVQ AX, BX
  12586. PXOR X0, X0
  12587. zero_loop_encodeSnappyBlockAsm8B:
  12588. MOVOU X0, (BX)
  12589. MOVOU X0, 16(BX)
  12590. MOVOU X0, 32(BX)
  12591. MOVOU X0, 48(BX)
  12592. MOVOU X0, 64(BX)
  12593. MOVOU X0, 80(BX)
  12594. MOVOU X0, 96(BX)
  12595. MOVOU X0, 112(BX)
  12596. ADDQ $0x80, BX
  12597. DECQ DX
  12598. JNZ zero_loop_encodeSnappyBlockAsm8B
  12599. MOVL $0x00000000, 12(SP)
  12600. MOVQ src_len+32(FP), DX
  12601. LEAQ -9(DX), BX
  12602. LEAQ -8(DX), SI
  12603. MOVL SI, 8(SP)
  12604. SHRQ $0x05, DX
  12605. SUBL DX, BX
  12606. LEAQ (CX)(BX*1), BX
  12607. MOVQ BX, (SP)
  12608. MOVL $0x00000001, DX
  12609. MOVL DX, 16(SP)
  12610. MOVQ src_base+24(FP), BX
  12611. search_loop_encodeSnappyBlockAsm8B:
  12612. MOVL DX, SI
  12613. SUBL 12(SP), SI
  12614. SHRL $0x04, SI
  12615. LEAL 4(DX)(SI*1), SI
  12616. CMPL SI, 8(SP)
  12617. JAE emit_remainder_encodeSnappyBlockAsm8B
  12618. MOVQ (BX)(DX*1), DI
  12619. MOVL SI, 20(SP)
  12620. MOVQ $0x9e3779b1, R9
  12621. MOVQ DI, R10
  12622. MOVQ DI, R11
  12623. SHRQ $0x08, R11
  12624. SHLQ $0x20, R10
  12625. IMULQ R9, R10
  12626. SHRQ $0x38, R10
  12627. SHLQ $0x20, R11
  12628. IMULQ R9, R11
  12629. SHRQ $0x38, R11
  12630. MOVL (AX)(R10*4), SI
  12631. MOVL (AX)(R11*4), R8
  12632. MOVL DX, (AX)(R10*4)
  12633. LEAL 1(DX), R10
  12634. MOVL R10, (AX)(R11*4)
  12635. MOVQ DI, R10
  12636. SHRQ $0x10, R10
  12637. SHLQ $0x20, R10
  12638. IMULQ R9, R10
  12639. SHRQ $0x38, R10
  12640. MOVL DX, R9
  12641. SUBL 16(SP), R9
  12642. MOVL 1(BX)(R9*1), R11
  12643. MOVQ DI, R9
  12644. SHRQ $0x08, R9
  12645. CMPL R9, R11
  12646. JNE no_repeat_found_encodeSnappyBlockAsm8B
  12647. LEAL 1(DX), DI
  12648. MOVL 12(SP), SI
  12649. MOVL DI, R8
  12650. SUBL 16(SP), R8
  12651. JZ repeat_extend_back_end_encodeSnappyBlockAsm8B
  12652. repeat_extend_back_loop_encodeSnappyBlockAsm8B:
  12653. CMPL DI, SI
  12654. JBE repeat_extend_back_end_encodeSnappyBlockAsm8B
  12655. MOVB -1(BX)(R8*1), R9
  12656. MOVB -1(BX)(DI*1), R10
  12657. CMPB R9, R10
  12658. JNE repeat_extend_back_end_encodeSnappyBlockAsm8B
  12659. LEAL -1(DI), DI
  12660. DECL R8
  12661. JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B
  12662. repeat_extend_back_end_encodeSnappyBlockAsm8B:
  12663. MOVL DI, SI
  12664. SUBL 12(SP), SI
  12665. LEAQ 3(CX)(SI*1), SI
  12666. CMPQ SI, (SP)
  12667. JB repeat_dst_size_check_encodeSnappyBlockAsm8B
  12668. MOVQ $0x00000000, ret+56(FP)
  12669. RET
  12670. repeat_dst_size_check_encodeSnappyBlockAsm8B:
  12671. MOVL 12(SP), SI
  12672. CMPL SI, DI
  12673. JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
  12674. MOVL DI, R8
  12675. MOVL DI, 12(SP)
  12676. LEAQ (BX)(SI*1), R9
  12677. SUBL SI, R8
  12678. LEAL -1(R8), SI
  12679. CMPL SI, $0x3c
  12680. JB one_byte_repeat_emit_encodeSnappyBlockAsm8B
  12681. CMPL SI, $0x00000100
  12682. JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B
  12683. JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B
  12684. three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  12685. MOVB $0xf4, (CX)
  12686. MOVW SI, 1(CX)
  12687. ADDQ $0x03, CX
  12688. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
  12689. two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
  12690. MOVB $0xf0, (CX)
  12691. MOVB SI, 1(CX)
  12692. ADDQ $0x02, CX
  12693. CMPL SI, $0x40
  12694. JB memmove_repeat_emit_encodeSnappyBlockAsm8B
  12695. JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
  12696. one_byte_repeat_emit_encodeSnappyBlockAsm8B:
  12697. SHLB $0x02, SI
  12698. MOVB SI, (CX)
  12699. ADDQ $0x01, CX
  12700. memmove_repeat_emit_encodeSnappyBlockAsm8B:
  12701. LEAQ (CX)(R8*1), SI
  12702. // genMemMoveShort
  12703. CMPQ R8, $0x08
  12704. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
  12705. CMPQ R8, $0x10
  12706. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
  12707. CMPQ R8, $0x20
  12708. JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
  12709. JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
  12710. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
  12711. MOVQ (R9), R10
  12712. MOVQ R10, (CX)
  12713. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  12714. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
  12715. MOVQ (R9), R10
  12716. MOVQ -8(R9)(R8*1), R9
  12717. MOVQ R10, (CX)
  12718. MOVQ R9, -8(CX)(R8*1)
  12719. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  12720. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
  12721. MOVOU (R9), X0
  12722. MOVOU -16(R9)(R8*1), X1
  12723. MOVOU X0, (CX)
  12724. MOVOU X1, -16(CX)(R8*1)
  12725. JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
  12726. emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
  12727. MOVOU (R9), X0
  12728. MOVOU 16(R9), X1
  12729. MOVOU -32(R9)(R8*1), X2
  12730. MOVOU -16(R9)(R8*1), X3
  12731. MOVOU X0, (CX)
  12732. MOVOU X1, 16(CX)
  12733. MOVOU X2, -32(CX)(R8*1)
  12734. MOVOU X3, -16(CX)(R8*1)
  12735. memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
  12736. MOVQ SI, CX
  12737. JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
  12738. memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
  12739. LEAQ (CX)(R8*1), SI
  12740. // genMemMoveLong
  12741. MOVOU (R9), X0
  12742. MOVOU 16(R9), X1
  12743. MOVOU -32(R9)(R8*1), X2
  12744. MOVOU -16(R9)(R8*1), X3
  12745. MOVQ R8, R11
  12746. SHRQ $0x05, R11
  12747. MOVQ CX, R10
  12748. ANDL $0x0000001f, R10
  12749. MOVQ $0x00000040, R12
  12750. SUBQ R10, R12
  12751. DECQ R11
  12752. JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  12753. LEAQ -32(R9)(R12*1), R10
  12754. LEAQ -32(CX)(R12*1), R13
  12755. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
  12756. MOVOU (R10), X4
  12757. MOVOU 16(R10), X5
  12758. MOVOA X4, (R13)
  12759. MOVOA X5, 16(R13)
  12760. ADDQ $0x20, R13
  12761. ADDQ $0x20, R10
  12762. ADDQ $0x20, R12
  12763. DECQ R11
  12764. JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
  12765. emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
  12766. MOVOU -32(R9)(R12*1), X4
  12767. MOVOU -16(R9)(R12*1), X5
  12768. MOVOA X4, -32(CX)(R12*1)
  12769. MOVOA X5, -16(CX)(R12*1)
  12770. ADDQ $0x20, R12
  12771. CMPQ R8, R12
  12772. JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  12773. MOVOU X0, (CX)
  12774. MOVOU X1, 16(CX)
  12775. MOVOU X2, -32(CX)(R8*1)
  12776. MOVOU X3, -16(CX)(R8*1)
  12777. MOVQ SI, CX
  12778. emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
  12779. ADDL $0x05, DX
  12780. MOVL DX, SI
  12781. SUBL 16(SP), SI
  12782. MOVQ src_len+32(FP), R8
  12783. SUBL DX, R8
  12784. LEAQ (BX)(DX*1), R9
  12785. LEAQ (BX)(SI*1), SI
  12786. // matchLen
  12787. XORL R11, R11
  12788. matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
  12789. CMPL R8, $0x10
  12790. JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
  12791. MOVQ (R9)(R11*1), R10
  12792. MOVQ 8(R9)(R11*1), R12
  12793. XORQ (SI)(R11*1), R10
  12794. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
  12795. XORQ 8(SI)(R11*1), R12
  12796. JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
  12797. LEAL -16(R8), R8
  12798. LEAL 16(R11), R11
  12799. JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
  12800. matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
  12801. #ifdef GOAMD64_v3
  12802. TZCNTQ R12, R12
  12803. #else
  12804. BSFQ R12, R12
  12805. #endif
  12806. SARQ $0x03, R12
  12807. LEAL 8(R11)(R12*1), R11
  12808. JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
  12809. matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
  12810. CMPL R8, $0x08
  12811. JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
  12812. MOVQ (R9)(R11*1), R10
  12813. XORQ (SI)(R11*1), R10
  12814. JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
  12815. LEAL -8(R8), R8
  12816. LEAL 8(R11), R11
  12817. JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
  12818. matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
  12819. #ifdef GOAMD64_v3
  12820. TZCNTQ R10, R10
  12821. #else
  12822. BSFQ R10, R10
  12823. #endif
  12824. SARQ $0x03, R10
  12825. LEAL (R11)(R10*1), R11
  12826. JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
  12827. matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
  12828. CMPL R8, $0x04
  12829. JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
  12830. MOVL (R9)(R11*1), R10
  12831. CMPL (SI)(R11*1), R10
  12832. JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
  12833. LEAL -4(R8), R8
  12834. LEAL 4(R11), R11
  12835. matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
  12836. CMPL R8, $0x01
  12837. JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
  12838. JB repeat_extend_forward_end_encodeSnappyBlockAsm8B
  12839. MOVW (R9)(R11*1), R10
  12840. CMPW (SI)(R11*1), R10
  12841. JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
  12842. LEAL 2(R11), R11
  12843. SUBL $0x02, R8
  12844. JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B
  12845. matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
  12846. MOVB (R9)(R11*1), R10
  12847. CMPB (SI)(R11*1), R10
  12848. JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B
  12849. LEAL 1(R11), R11
  12850. repeat_extend_forward_end_encodeSnappyBlockAsm8B:
  12851. ADDL R11, DX
  12852. MOVL DX, SI
  12853. SUBL DI, SI
  12854. MOVL 16(SP), DI
  12855. // emitCopy
  12856. two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
  12857. CMPL SI, $0x40
  12858. JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
  12859. MOVB $0xee, (CX)
  12860. MOVW DI, 1(CX)
  12861. LEAL -60(SI), SI
  12862. ADDQ $0x03, CX
  12863. JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
  12864. two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
  12865. MOVL SI, R8
  12866. SHLL $0x02, R8
  12867. CMPL SI, $0x0c
  12868. JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
  12869. LEAL -15(R8), R8
  12870. MOVB DI, 1(CX)
  12871. SHRL $0x08, DI
  12872. SHLL $0x05, DI
  12873. ORL DI, R8
  12874. MOVB R8, (CX)
  12875. ADDQ $0x02, CX
  12876. JMP repeat_end_emit_encodeSnappyBlockAsm8B
  12877. emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
  12878. LEAL -2(R8), R8
  12879. MOVB R8, (CX)
  12880. MOVW DI, 1(CX)
  12881. ADDQ $0x03, CX
  12882. repeat_end_emit_encodeSnappyBlockAsm8B:
  12883. MOVL DX, 12(SP)
  12884. JMP search_loop_encodeSnappyBlockAsm8B
  12885. no_repeat_found_encodeSnappyBlockAsm8B:
  12886. CMPL (BX)(SI*1), DI
  12887. JEQ candidate_match_encodeSnappyBlockAsm8B
  12888. SHRQ $0x08, DI
  12889. MOVL (AX)(R10*4), SI
  12890. LEAL 2(DX), R9
  12891. CMPL (BX)(R8*1), DI
  12892. JEQ candidate2_match_encodeSnappyBlockAsm8B
  12893. MOVL R9, (AX)(R10*4)
  12894. SHRQ $0x08, DI
  12895. CMPL (BX)(SI*1), DI
  12896. JEQ candidate3_match_encodeSnappyBlockAsm8B
  12897. MOVL 20(SP), DX
  12898. JMP search_loop_encodeSnappyBlockAsm8B
  12899. candidate3_match_encodeSnappyBlockAsm8B:
  12900. ADDL $0x02, DX
  12901. JMP candidate_match_encodeSnappyBlockAsm8B
  12902. candidate2_match_encodeSnappyBlockAsm8B:
  12903. MOVL R9, (AX)(R10*4)
  12904. INCL DX
  12905. MOVL R8, SI
  12906. candidate_match_encodeSnappyBlockAsm8B:
  12907. MOVL 12(SP), DI
  12908. TESTL SI, SI
  12909. JZ match_extend_back_end_encodeSnappyBlockAsm8B
  12910. match_extend_back_loop_encodeSnappyBlockAsm8B:
  12911. CMPL DX, DI
  12912. JBE match_extend_back_end_encodeSnappyBlockAsm8B
  12913. MOVB -1(BX)(SI*1), R8
  12914. MOVB -1(BX)(DX*1), R9
  12915. CMPB R8, R9
  12916. JNE match_extend_back_end_encodeSnappyBlockAsm8B
  12917. LEAL -1(DX), DX
  12918. DECL SI
  12919. JZ match_extend_back_end_encodeSnappyBlockAsm8B
  12920. JMP match_extend_back_loop_encodeSnappyBlockAsm8B
  12921. match_extend_back_end_encodeSnappyBlockAsm8B:
  12922. MOVL DX, DI
  12923. SUBL 12(SP), DI
  12924. LEAQ 3(CX)(DI*1), DI
  12925. CMPQ DI, (SP)
  12926. JB match_dst_size_check_encodeSnappyBlockAsm8B
  12927. MOVQ $0x00000000, ret+56(FP)
  12928. RET
  12929. match_dst_size_check_encodeSnappyBlockAsm8B:
  12930. MOVL DX, DI
  12931. MOVL 12(SP), R8
  12932. CMPL R8, DI
  12933. JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B
  12934. MOVL DI, R9
  12935. MOVL DI, 12(SP)
  12936. LEAQ (BX)(R8*1), DI
  12937. SUBL R8, R9
  12938. LEAL -1(R9), R8
  12939. CMPL R8, $0x3c
  12940. JB one_byte_match_emit_encodeSnappyBlockAsm8B
  12941. CMPL R8, $0x00000100
  12942. JB two_bytes_match_emit_encodeSnappyBlockAsm8B
  12943. JB three_bytes_match_emit_encodeSnappyBlockAsm8B
  12944. three_bytes_match_emit_encodeSnappyBlockAsm8B:
  12945. MOVB $0xf4, (CX)
  12946. MOVW R8, 1(CX)
  12947. ADDQ $0x03, CX
  12948. JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
  12949. two_bytes_match_emit_encodeSnappyBlockAsm8B:
  12950. MOVB $0xf0, (CX)
  12951. MOVB R8, 1(CX)
  12952. ADDQ $0x02, CX
  12953. CMPL R8, $0x40
  12954. JB memmove_match_emit_encodeSnappyBlockAsm8B
  12955. JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
  12956. one_byte_match_emit_encodeSnappyBlockAsm8B:
  12957. SHLB $0x02, R8
  12958. MOVB R8, (CX)
  12959. ADDQ $0x01, CX
  12960. memmove_match_emit_encodeSnappyBlockAsm8B:
  12961. LEAQ (CX)(R9*1), R8
  12962. // genMemMoveShort
  12963. CMPQ R9, $0x08
  12964. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
  12965. CMPQ R9, $0x10
  12966. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
  12967. CMPQ R9, $0x20
  12968. JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
  12969. JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
  12970. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
  12971. MOVQ (DI), R10
  12972. MOVQ R10, (CX)
  12973. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  12974. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
  12975. MOVQ (DI), R10
  12976. MOVQ -8(DI)(R9*1), DI
  12977. MOVQ R10, (CX)
  12978. MOVQ DI, -8(CX)(R9*1)
  12979. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  12980. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
  12981. MOVOU (DI), X0
  12982. MOVOU -16(DI)(R9*1), X1
  12983. MOVOU X0, (CX)
  12984. MOVOU X1, -16(CX)(R9*1)
  12985. JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
  12986. emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
  12987. MOVOU (DI), X0
  12988. MOVOU 16(DI), X1
  12989. MOVOU -32(DI)(R9*1), X2
  12990. MOVOU -16(DI)(R9*1), X3
  12991. MOVOU X0, (CX)
  12992. MOVOU X1, 16(CX)
  12993. MOVOU X2, -32(CX)(R9*1)
  12994. MOVOU X3, -16(CX)(R9*1)
  12995. memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
  12996. MOVQ R8, CX
  12997. JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B
  12998. memmove_long_match_emit_encodeSnappyBlockAsm8B:
  12999. LEAQ (CX)(R9*1), R8
  13000. // genMemMoveLong
  13001. MOVOU (DI), X0
  13002. MOVOU 16(DI), X1
  13003. MOVOU -32(DI)(R9*1), X2
  13004. MOVOU -16(DI)(R9*1), X3
  13005. MOVQ R9, R11
  13006. SHRQ $0x05, R11
  13007. MOVQ CX, R10
  13008. ANDL $0x0000001f, R10
  13009. MOVQ $0x00000040, R12
  13010. SUBQ R10, R12
  13011. DECQ R11
  13012. JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  13013. LEAQ -32(DI)(R12*1), R10
  13014. LEAQ -32(CX)(R12*1), R13
  13015. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
  13016. MOVOU (R10), X4
  13017. MOVOU 16(R10), X5
  13018. MOVOA X4, (R13)
  13019. MOVOA X5, 16(R13)
  13020. ADDQ $0x20, R13
  13021. ADDQ $0x20, R10
  13022. ADDQ $0x20, R12
  13023. DECQ R11
  13024. JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
  13025. emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
  13026. MOVOU -32(DI)(R12*1), X4
  13027. MOVOU -16(DI)(R12*1), X5
  13028. MOVOA X4, -32(CX)(R12*1)
  13029. MOVOA X5, -16(CX)(R12*1)
  13030. ADDQ $0x20, R12
  13031. CMPQ R9, R12
  13032. JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  13033. MOVOU X0, (CX)
  13034. MOVOU X1, 16(CX)
  13035. MOVOU X2, -32(CX)(R9*1)
  13036. MOVOU X3, -16(CX)(R9*1)
  13037. MOVQ R8, CX
  13038. emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
  13039. match_nolit_loop_encodeSnappyBlockAsm8B:
  13040. MOVL DX, DI
  13041. SUBL SI, DI
  13042. MOVL DI, 16(SP)
  13043. ADDL $0x04, DX
  13044. ADDL $0x04, SI
  13045. MOVQ src_len+32(FP), DI
  13046. SUBL DX, DI
  13047. LEAQ (BX)(DX*1), R8
  13048. LEAQ (BX)(SI*1), SI
  13049. // matchLen
  13050. XORL R10, R10
  13051. matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
  13052. CMPL DI, $0x10
  13053. JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
  13054. MOVQ (R8)(R10*1), R9
  13055. MOVQ 8(R8)(R10*1), R11
  13056. XORQ (SI)(R10*1), R9
  13057. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
  13058. XORQ 8(SI)(R10*1), R11
  13059. JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
  13060. LEAL -16(DI), DI
  13061. LEAL 16(R10), R10
  13062. JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
  13063. matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
  13064. #ifdef GOAMD64_v3
  13065. TZCNTQ R11, R11
  13066. #else
  13067. BSFQ R11, R11
  13068. #endif
  13069. SARQ $0x03, R11
  13070. LEAL 8(R10)(R11*1), R10
  13071. JMP match_nolit_end_encodeSnappyBlockAsm8B
  13072. matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
  13073. CMPL DI, $0x08
  13074. JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
  13075. MOVQ (R8)(R10*1), R9
  13076. XORQ (SI)(R10*1), R9
  13077. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
  13078. LEAL -8(DI), DI
  13079. LEAL 8(R10), R10
  13080. JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
  13081. matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
  13082. #ifdef GOAMD64_v3
  13083. TZCNTQ R9, R9
  13084. #else
  13085. BSFQ R9, R9
  13086. #endif
  13087. SARQ $0x03, R9
  13088. LEAL (R10)(R9*1), R10
  13089. JMP match_nolit_end_encodeSnappyBlockAsm8B
  13090. matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
  13091. CMPL DI, $0x04
  13092. JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
  13093. MOVL (R8)(R10*1), R9
  13094. CMPL (SI)(R10*1), R9
  13095. JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
  13096. LEAL -4(DI), DI
  13097. LEAL 4(R10), R10
  13098. matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
  13099. CMPL DI, $0x01
  13100. JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
  13101. JB match_nolit_end_encodeSnappyBlockAsm8B
  13102. MOVW (R8)(R10*1), R9
  13103. CMPW (SI)(R10*1), R9
  13104. JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
  13105. LEAL 2(R10), R10
  13106. SUBL $0x02, DI
  13107. JZ match_nolit_end_encodeSnappyBlockAsm8B
  13108. matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
  13109. MOVB (R8)(R10*1), R9
  13110. CMPB (SI)(R10*1), R9
  13111. JNE match_nolit_end_encodeSnappyBlockAsm8B
  13112. LEAL 1(R10), R10
  13113. match_nolit_end_encodeSnappyBlockAsm8B:
  13114. ADDL R10, DX
  13115. MOVL 16(SP), SI
  13116. ADDL $0x04, R10
  13117. MOVL DX, 12(SP)
  13118. // emitCopy
  13119. two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
  13120. CMPL R10, $0x40
  13121. JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
  13122. MOVB $0xee, (CX)
  13123. MOVW SI, 1(CX)
  13124. LEAL -60(R10), R10
  13125. ADDQ $0x03, CX
  13126. JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
  13127. two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
  13128. MOVL R10, DI
  13129. SHLL $0x02, DI
  13130. CMPL R10, $0x0c
  13131. JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
  13132. LEAL -15(DI), DI
  13133. MOVB SI, 1(CX)
  13134. SHRL $0x08, SI
  13135. SHLL $0x05, SI
  13136. ORL SI, DI
  13137. MOVB DI, (CX)
  13138. ADDQ $0x02, CX
  13139. JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
  13140. emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
  13141. LEAL -2(DI), DI
  13142. MOVB DI, (CX)
  13143. MOVW SI, 1(CX)
  13144. ADDQ $0x03, CX
  13145. match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
  13146. CMPL DX, 8(SP)
  13147. JAE emit_remainder_encodeSnappyBlockAsm8B
  13148. MOVQ -2(BX)(DX*1), DI
  13149. CMPQ CX, (SP)
  13150. JB match_nolit_dst_ok_encodeSnappyBlockAsm8B
  13151. MOVQ $0x00000000, ret+56(FP)
  13152. RET
  13153. match_nolit_dst_ok_encodeSnappyBlockAsm8B:
  13154. MOVQ $0x9e3779b1, R9
  13155. MOVQ DI, R8
  13156. SHRQ $0x10, DI
  13157. MOVQ DI, SI
  13158. SHLQ $0x20, R8
  13159. IMULQ R9, R8
  13160. SHRQ $0x38, R8
  13161. SHLQ $0x20, SI
  13162. IMULQ R9, SI
  13163. SHRQ $0x38, SI
  13164. LEAL -2(DX), R9
  13165. LEAQ (AX)(SI*4), R10
  13166. MOVL (R10), SI
  13167. MOVL R9, (AX)(R8*4)
  13168. MOVL DX, (R10)
  13169. CMPL (BX)(SI*1), DI
  13170. JEQ match_nolit_loop_encodeSnappyBlockAsm8B
  13171. INCL DX
  13172. JMP search_loop_encodeSnappyBlockAsm8B
  13173. emit_remainder_encodeSnappyBlockAsm8B:
  13174. MOVQ src_len+32(FP), AX
  13175. SUBL 12(SP), AX
  13176. LEAQ 3(CX)(AX*1), AX
  13177. CMPQ AX, (SP)
  13178. JB emit_remainder_ok_encodeSnappyBlockAsm8B
  13179. MOVQ $0x00000000, ret+56(FP)
  13180. RET
  13181. emit_remainder_ok_encodeSnappyBlockAsm8B:
  13182. MOVQ src_len+32(FP), AX
  13183. MOVL 12(SP), DX
  13184. CMPL DX, AX
  13185. JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
  13186. MOVL AX, SI
  13187. MOVL AX, 12(SP)
  13188. LEAQ (BX)(DX*1), AX
  13189. SUBL DX, SI
  13190. LEAL -1(SI), DX
  13191. CMPL DX, $0x3c
  13192. JB one_byte_emit_remainder_encodeSnappyBlockAsm8B
  13193. CMPL DX, $0x00000100
  13194. JB two_bytes_emit_remainder_encodeSnappyBlockAsm8B
  13195. JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B
  13196. three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  13197. MOVB $0xf4, (CX)
  13198. MOVW DX, 1(CX)
  13199. ADDQ $0x03, CX
  13200. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
  13201. two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
  13202. MOVB $0xf0, (CX)
  13203. MOVB DL, 1(CX)
  13204. ADDQ $0x02, CX
  13205. CMPL DX, $0x40
  13206. JB memmove_emit_remainder_encodeSnappyBlockAsm8B
  13207. JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
  13208. one_byte_emit_remainder_encodeSnappyBlockAsm8B:
  13209. SHLB $0x02, DL
  13210. MOVB DL, (CX)
  13211. ADDQ $0x01, CX
  13212. memmove_emit_remainder_encodeSnappyBlockAsm8B:
  13213. LEAQ (CX)(SI*1), DX
  13214. MOVL SI, BX
  13215. // genMemMoveShort
  13216. CMPQ BX, $0x03
  13217. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
  13218. JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
  13219. CMPQ BX, $0x08
  13220. JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7
  13221. CMPQ BX, $0x10
  13222. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
  13223. CMPQ BX, $0x20
  13224. JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
  13225. JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
  13226. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
  13227. MOVB (AX), SI
  13228. MOVB -1(AX)(BX*1), AL
  13229. MOVB SI, (CX)
  13230. MOVB AL, -1(CX)(BX*1)
  13231. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  13232. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
  13233. MOVW (AX), SI
  13234. MOVB 2(AX), AL
  13235. MOVW SI, (CX)
  13236. MOVB AL, 2(CX)
  13237. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  13238. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
  13239. MOVL (AX), SI
  13240. MOVL -4(AX)(BX*1), AX
  13241. MOVL SI, (CX)
  13242. MOVL AX, -4(CX)(BX*1)
  13243. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  13244. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
  13245. MOVQ (AX), SI
  13246. MOVQ -8(AX)(BX*1), AX
  13247. MOVQ SI, (CX)
  13248. MOVQ AX, -8(CX)(BX*1)
  13249. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  13250. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
  13251. MOVOU (AX), X0
  13252. MOVOU -16(AX)(BX*1), X1
  13253. MOVOU X0, (CX)
  13254. MOVOU X1, -16(CX)(BX*1)
  13255. JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
  13256. emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
  13257. MOVOU (AX), X0
  13258. MOVOU 16(AX), X1
  13259. MOVOU -32(AX)(BX*1), X2
  13260. MOVOU -16(AX)(BX*1), X3
  13261. MOVOU X0, (CX)
  13262. MOVOU X1, 16(CX)
  13263. MOVOU X2, -32(CX)(BX*1)
  13264. MOVOU X3, -16(CX)(BX*1)
  13265. memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
  13266. MOVQ DX, CX
  13267. JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
  13268. memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
  13269. LEAQ (CX)(SI*1), DX
  13270. MOVL SI, BX
  13271. // genMemMoveLong
  13272. MOVOU (AX), X0
  13273. MOVOU 16(AX), X1
  13274. MOVOU -32(AX)(BX*1), X2
  13275. MOVOU -16(AX)(BX*1), X3
  13276. MOVQ BX, DI
  13277. SHRQ $0x05, DI
  13278. MOVQ CX, SI
  13279. ANDL $0x0000001f, SI
  13280. MOVQ $0x00000040, R8
  13281. SUBQ SI, R8
  13282. DECQ DI
  13283. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  13284. LEAQ -32(AX)(R8*1), SI
  13285. LEAQ -32(CX)(R8*1), R9
  13286. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
  13287. MOVOU (SI), X4
  13288. MOVOU 16(SI), X5
  13289. MOVOA X4, (R9)
  13290. MOVOA X5, 16(R9)
  13291. ADDQ $0x20, R9
  13292. ADDQ $0x20, SI
  13293. ADDQ $0x20, R8
  13294. DECQ DI
  13295. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
  13296. emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
  13297. MOVOU -32(AX)(R8*1), X4
  13298. MOVOU -16(AX)(R8*1), X5
  13299. MOVOA X4, -32(CX)(R8*1)
  13300. MOVOA X5, -16(CX)(R8*1)
  13301. ADDQ $0x20, R8
  13302. CMPQ BX, R8
  13303. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
  13304. MOVOU X0, (CX)
  13305. MOVOU X1, 16(CX)
  13306. MOVOU X2, -32(CX)(BX*1)
  13307. MOVOU X3, -16(CX)(BX*1)
  13308. MOVQ DX, CX
  13309. emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
  13310. MOVQ dst_base+0(FP), AX
  13311. SUBQ AX, CX
  13312. MOVQ CX, ret+56(FP)
  13313. RET
  13314. // func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
  13315. // Requires: BMI, SSE2
  13316. TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64
  13317. MOVQ tmp+48(FP), AX
  13318. MOVQ dst_base+0(FP), CX
  13319. MOVQ $0x00001200, DX
  13320. MOVQ AX, BX
  13321. PXOR X0, X0
  13322. zero_loop_encodeSnappyBetterBlockAsm:
  13323. MOVOU X0, (BX)
  13324. MOVOU X0, 16(BX)
  13325. MOVOU X0, 32(BX)
  13326. MOVOU X0, 48(BX)
  13327. MOVOU X0, 64(BX)
  13328. MOVOU X0, 80(BX)
  13329. MOVOU X0, 96(BX)
  13330. MOVOU X0, 112(BX)
  13331. ADDQ $0x80, BX
  13332. DECQ DX
  13333. JNZ zero_loop_encodeSnappyBetterBlockAsm
  13334. MOVL $0x00000000, 12(SP)
  13335. MOVQ src_len+32(FP), DX
  13336. LEAQ -9(DX), BX
  13337. LEAQ -8(DX), SI
  13338. MOVL SI, 8(SP)
  13339. SHRQ $0x05, DX
  13340. SUBL DX, BX
  13341. LEAQ (CX)(BX*1), BX
  13342. MOVQ BX, (SP)
  13343. MOVL $0x00000001, DX
  13344. MOVL $0x00000000, 16(SP)
  13345. MOVQ src_base+24(FP), BX
  13346. search_loop_encodeSnappyBetterBlockAsm:
  13347. MOVL DX, SI
  13348. SUBL 12(SP), SI
  13349. SHRL $0x07, SI
  13350. CMPL SI, $0x63
  13351. JBE check_maxskip_ok_encodeSnappyBetterBlockAsm
  13352. LEAL 100(DX), SI
  13353. JMP check_maxskip_cont_encodeSnappyBetterBlockAsm
  13354. check_maxskip_ok_encodeSnappyBetterBlockAsm:
  13355. LEAL 1(DX)(SI*1), SI
  13356. check_maxskip_cont_encodeSnappyBetterBlockAsm:
  13357. CMPL SI, 8(SP)
  13358. JAE emit_remainder_encodeSnappyBetterBlockAsm
  13359. MOVQ (BX)(DX*1), DI
  13360. MOVL SI, 20(SP)
  13361. MOVQ $0x00cf1bbcdcbfa563, R9
  13362. MOVQ $0x9e3779b1, SI
  13363. MOVQ DI, R10
  13364. MOVQ DI, R11
  13365. SHLQ $0x08, R10
  13366. IMULQ R9, R10
  13367. SHRQ $0x2f, R10
  13368. SHLQ $0x20, R11
  13369. IMULQ SI, R11
  13370. SHRQ $0x32, R11
  13371. MOVL (AX)(R10*4), SI
  13372. MOVL 524288(AX)(R11*4), R8
  13373. MOVL DX, (AX)(R10*4)
  13374. MOVL DX, 524288(AX)(R11*4)
  13375. MOVQ (BX)(SI*1), R10
  13376. MOVQ (BX)(R8*1), R11
  13377. CMPQ R10, DI
  13378. JEQ candidate_match_encodeSnappyBetterBlockAsm
  13379. CMPQ R11, DI
  13380. JNE no_short_found_encodeSnappyBetterBlockAsm
  13381. MOVL R8, SI
  13382. JMP candidate_match_encodeSnappyBetterBlockAsm
  13383. no_short_found_encodeSnappyBetterBlockAsm:
  13384. CMPL R10, DI
  13385. JEQ candidate_match_encodeSnappyBetterBlockAsm
  13386. CMPL R11, DI
  13387. JEQ candidateS_match_encodeSnappyBetterBlockAsm
  13388. MOVL 20(SP), DX
  13389. JMP search_loop_encodeSnappyBetterBlockAsm
  13390. candidateS_match_encodeSnappyBetterBlockAsm:
  13391. SHRQ $0x08, DI
  13392. MOVQ DI, R10
  13393. SHLQ $0x08, R10
  13394. IMULQ R9, R10
  13395. SHRQ $0x2f, R10
  13396. MOVL (AX)(R10*4), SI
  13397. INCL DX
  13398. MOVL DX, (AX)(R10*4)
  13399. CMPL (BX)(SI*1), DI
  13400. JEQ candidate_match_encodeSnappyBetterBlockAsm
  13401. DECL DX
  13402. MOVL R8, SI
  13403. candidate_match_encodeSnappyBetterBlockAsm:
  13404. MOVL 12(SP), DI
  13405. TESTL SI, SI
  13406. JZ match_extend_back_end_encodeSnappyBetterBlockAsm
  13407. match_extend_back_loop_encodeSnappyBetterBlockAsm:
  13408. CMPL DX, DI
  13409. JBE match_extend_back_end_encodeSnappyBetterBlockAsm
  13410. MOVB -1(BX)(SI*1), R8
  13411. MOVB -1(BX)(DX*1), R9
  13412. CMPB R8, R9
  13413. JNE match_extend_back_end_encodeSnappyBetterBlockAsm
  13414. LEAL -1(DX), DX
  13415. DECL SI
  13416. JZ match_extend_back_end_encodeSnappyBetterBlockAsm
  13417. JMP match_extend_back_loop_encodeSnappyBetterBlockAsm
  13418. match_extend_back_end_encodeSnappyBetterBlockAsm:
  13419. MOVL DX, DI
  13420. SUBL 12(SP), DI
  13421. LEAQ 5(CX)(DI*1), DI
  13422. CMPQ DI, (SP)
  13423. JB match_dst_size_check_encodeSnappyBetterBlockAsm
  13424. MOVQ $0x00000000, ret+56(FP)
  13425. RET
  13426. match_dst_size_check_encodeSnappyBetterBlockAsm:
  13427. MOVL DX, DI
  13428. ADDL $0x04, DX
  13429. ADDL $0x04, SI
  13430. MOVQ src_len+32(FP), R8
  13431. SUBL DX, R8
  13432. LEAQ (BX)(DX*1), R9
  13433. LEAQ (BX)(SI*1), R10
  13434. // matchLen
  13435. XORL R12, R12
  13436. matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
  13437. CMPL R8, $0x10
  13438. JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
  13439. MOVQ (R9)(R12*1), R11
  13440. MOVQ 8(R9)(R12*1), R13
  13441. XORQ (R10)(R12*1), R11
  13442. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
  13443. XORQ 8(R10)(R12*1), R13
  13444. JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
  13445. LEAL -16(R8), R8
  13446. LEAL 16(R12), R12
  13447. JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
  13448. matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
  13449. #ifdef GOAMD64_v3
  13450. TZCNTQ R13, R13
  13451. #else
  13452. BSFQ R13, R13
  13453. #endif
  13454. SARQ $0x03, R13
  13455. LEAL 8(R12)(R13*1), R12
  13456. JMP match_nolit_end_encodeSnappyBetterBlockAsm
  13457. matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
  13458. CMPL R8, $0x08
  13459. JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
  13460. MOVQ (R9)(R12*1), R11
  13461. XORQ (R10)(R12*1), R11
  13462. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
  13463. LEAL -8(R8), R8
  13464. LEAL 8(R12), R12
  13465. JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
  13466. matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
  13467. #ifdef GOAMD64_v3
  13468. TZCNTQ R11, R11
  13469. #else
  13470. BSFQ R11, R11
  13471. #endif
  13472. SARQ $0x03, R11
  13473. LEAL (R12)(R11*1), R12
  13474. JMP match_nolit_end_encodeSnappyBetterBlockAsm
  13475. matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
  13476. CMPL R8, $0x04
  13477. JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
  13478. MOVL (R9)(R12*1), R11
  13479. CMPL (R10)(R12*1), R11
  13480. JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
  13481. LEAL -4(R8), R8
  13482. LEAL 4(R12), R12
  13483. matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
  13484. CMPL R8, $0x01
  13485. JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
  13486. JB match_nolit_end_encodeSnappyBetterBlockAsm
  13487. MOVW (R9)(R12*1), R11
  13488. CMPW (R10)(R12*1), R11
  13489. JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
  13490. LEAL 2(R12), R12
  13491. SUBL $0x02, R8
  13492. JZ match_nolit_end_encodeSnappyBetterBlockAsm
  13493. matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
  13494. MOVB (R9)(R12*1), R11
  13495. CMPB (R10)(R12*1), R11
  13496. JNE match_nolit_end_encodeSnappyBetterBlockAsm
  13497. LEAL 1(R12), R12
  13498. match_nolit_end_encodeSnappyBetterBlockAsm:
  13499. MOVL DX, R8
  13500. SUBL SI, R8
  13501. // Check if repeat
  13502. CMPL R12, $0x01
  13503. JA match_length_ok_encodeSnappyBetterBlockAsm
  13504. CMPL R8, $0x0000ffff
  13505. JBE match_length_ok_encodeSnappyBetterBlockAsm
  13506. MOVL 20(SP), DX
  13507. INCL DX
  13508. JMP search_loop_encodeSnappyBetterBlockAsm
  13509. match_length_ok_encodeSnappyBetterBlockAsm:
  13510. MOVL R8, 16(SP)
  13511. MOVL 12(SP), SI
  13512. CMPL SI, DI
  13513. JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
  13514. MOVL DI, R9
  13515. MOVL DI, 12(SP)
  13516. LEAQ (BX)(SI*1), R10
  13517. SUBL SI, R9
  13518. LEAL -1(R9), SI
  13519. CMPL SI, $0x3c
  13520. JB one_byte_match_emit_encodeSnappyBetterBlockAsm
  13521. CMPL SI, $0x00000100
  13522. JB two_bytes_match_emit_encodeSnappyBetterBlockAsm
  13523. CMPL SI, $0x00010000
  13524. JB three_bytes_match_emit_encodeSnappyBetterBlockAsm
  13525. CMPL SI, $0x01000000
  13526. JB four_bytes_match_emit_encodeSnappyBetterBlockAsm
  13527. MOVB $0xfc, (CX)
  13528. MOVL SI, 1(CX)
  13529. ADDQ $0x05, CX
  13530. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
  13531. four_bytes_match_emit_encodeSnappyBetterBlockAsm:
  13532. MOVL SI, R11
  13533. SHRL $0x10, R11
  13534. MOVB $0xf8, (CX)
  13535. MOVW SI, 1(CX)
  13536. MOVB R11, 3(CX)
  13537. ADDQ $0x04, CX
  13538. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
  13539. three_bytes_match_emit_encodeSnappyBetterBlockAsm:
  13540. MOVB $0xf4, (CX)
  13541. MOVW SI, 1(CX)
  13542. ADDQ $0x03, CX
  13543. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
  13544. two_bytes_match_emit_encodeSnappyBetterBlockAsm:
  13545. MOVB $0xf0, (CX)
  13546. MOVB SI, 1(CX)
  13547. ADDQ $0x02, CX
  13548. CMPL SI, $0x40
  13549. JB memmove_match_emit_encodeSnappyBetterBlockAsm
  13550. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
  13551. one_byte_match_emit_encodeSnappyBetterBlockAsm:
  13552. SHLB $0x02, SI
  13553. MOVB SI, (CX)
  13554. ADDQ $0x01, CX
  13555. memmove_match_emit_encodeSnappyBetterBlockAsm:
  13556. LEAQ (CX)(R9*1), SI
  13557. // genMemMoveShort
  13558. CMPQ R9, $0x08
  13559. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
  13560. CMPQ R9, $0x10
  13561. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
  13562. CMPQ R9, $0x20
  13563. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
  13564. JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
  13565. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
  13566. MOVQ (R10), R11
  13567. MOVQ R11, (CX)
  13568. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
  13569. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
  13570. MOVQ (R10), R11
  13571. MOVQ -8(R10)(R9*1), R10
  13572. MOVQ R11, (CX)
  13573. MOVQ R10, -8(CX)(R9*1)
  13574. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
  13575. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
  13576. MOVOU (R10), X0
  13577. MOVOU -16(R10)(R9*1), X1
  13578. MOVOU X0, (CX)
  13579. MOVOU X1, -16(CX)(R9*1)
  13580. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
  13581. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
  13582. MOVOU (R10), X0
  13583. MOVOU 16(R10), X1
  13584. MOVOU -32(R10)(R9*1), X2
  13585. MOVOU -16(R10)(R9*1), X3
  13586. MOVOU X0, (CX)
  13587. MOVOU X1, 16(CX)
  13588. MOVOU X2, -32(CX)(R9*1)
  13589. MOVOU X3, -16(CX)(R9*1)
  13590. memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
  13591. MOVQ SI, CX
  13592. JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
  13593. memmove_long_match_emit_encodeSnappyBetterBlockAsm:
  13594. LEAQ (CX)(R9*1), SI
  13595. // genMemMoveLong
  13596. MOVOU (R10), X0
  13597. MOVOU 16(R10), X1
  13598. MOVOU -32(R10)(R9*1), X2
  13599. MOVOU -16(R10)(R9*1), X3
  13600. MOVQ R9, R13
  13601. SHRQ $0x05, R13
  13602. MOVQ CX, R11
  13603. ANDL $0x0000001f, R11
  13604. MOVQ $0x00000040, R14
  13605. SUBQ R11, R14
  13606. DECQ R13
  13607. JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  13608. LEAQ -32(R10)(R14*1), R11
  13609. LEAQ -32(CX)(R14*1), R15
  13610. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
  13611. MOVOU (R11), X4
  13612. MOVOU 16(R11), X5
  13613. MOVOA X4, (R15)
  13614. MOVOA X5, 16(R15)
  13615. ADDQ $0x20, R15
  13616. ADDQ $0x20, R11
  13617. ADDQ $0x20, R14
  13618. DECQ R13
  13619. JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
  13620. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
  13621. MOVOU -32(R10)(R14*1), X4
  13622. MOVOU -16(R10)(R14*1), X5
  13623. MOVOA X4, -32(CX)(R14*1)
  13624. MOVOA X5, -16(CX)(R14*1)
  13625. ADDQ $0x20, R14
  13626. CMPQ R9, R14
  13627. JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  13628. MOVOU X0, (CX)
  13629. MOVOU X1, 16(CX)
  13630. MOVOU X2, -32(CX)(R9*1)
  13631. MOVOU X3, -16(CX)(R9*1)
  13632. MOVQ SI, CX
  13633. emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
  13634. ADDL R12, DX
  13635. ADDL $0x04, R12
  13636. MOVL DX, 12(SP)
  13637. // emitCopy
  13638. CMPL R8, $0x00010000
  13639. JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
  13640. four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
  13641. CMPL R12, $0x40
  13642. JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
  13643. MOVB $0xff, (CX)
  13644. MOVL R8, 1(CX)
  13645. LEAL -64(R12), R12
  13646. ADDQ $0x05, CX
  13647. CMPL R12, $0x04
  13648. JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
  13649. JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
  13650. four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
  13651. TESTL R12, R12
  13652. JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
  13653. XORL SI, SI
  13654. LEAL -1(SI)(R12*4), R12
  13655. MOVB R12, (CX)
  13656. MOVL R8, 1(CX)
  13657. ADDQ $0x05, CX
  13658. JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
  13659. two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
  13660. CMPL R12, $0x40
  13661. JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
  13662. MOVB $0xee, (CX)
  13663. MOVW R8, 1(CX)
  13664. LEAL -60(R12), R12
  13665. ADDQ $0x03, CX
  13666. JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
  13667. two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
  13668. MOVL R12, SI
  13669. SHLL $0x02, SI
  13670. CMPL R12, $0x0c
  13671. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
  13672. CMPL R8, $0x00000800
  13673. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
  13674. LEAL -15(SI), SI
  13675. MOVB R8, 1(CX)
  13676. SHRL $0x08, R8
  13677. SHLL $0x05, R8
  13678. ORL R8, SI
  13679. MOVB SI, (CX)
  13680. ADDQ $0x02, CX
  13681. JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
  13682. emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
  13683. LEAL -2(SI), SI
  13684. MOVB SI, (CX)
  13685. MOVW R8, 1(CX)
  13686. ADDQ $0x03, CX
  13687. match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
  13688. CMPL DX, 8(SP)
  13689. JAE emit_remainder_encodeSnappyBetterBlockAsm
  13690. CMPQ CX, (SP)
  13691. JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm
  13692. MOVQ $0x00000000, ret+56(FP)
  13693. RET
  13694. match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
  13695. MOVQ $0x00cf1bbcdcbfa563, SI
  13696. MOVQ $0x9e3779b1, R8
  13697. LEAQ 1(DI), DI
  13698. LEAQ -2(DX), R9
  13699. MOVQ (BX)(DI*1), R10
  13700. MOVQ 1(BX)(DI*1), R11
  13701. MOVQ (BX)(R9*1), R12
  13702. MOVQ 1(BX)(R9*1), R13
  13703. SHLQ $0x08, R10
  13704. IMULQ SI, R10
  13705. SHRQ $0x2f, R10
  13706. SHLQ $0x20, R11
  13707. IMULQ R8, R11
  13708. SHRQ $0x32, R11
  13709. SHLQ $0x08, R12
  13710. IMULQ SI, R12
  13711. SHRQ $0x2f, R12
  13712. SHLQ $0x20, R13
  13713. IMULQ R8, R13
  13714. SHRQ $0x32, R13
  13715. LEAQ 1(DI), R8
  13716. LEAQ 1(R9), R14
  13717. MOVL DI, (AX)(R10*4)
  13718. MOVL R9, (AX)(R12*4)
  13719. MOVL R8, 524288(AX)(R11*4)
  13720. MOVL R14, 524288(AX)(R13*4)
  13721. LEAQ 1(R9)(DI*1), R8
  13722. SHRQ $0x01, R8
  13723. ADDQ $0x01, DI
  13724. SUBQ $0x01, R9
  13725. index_loop_encodeSnappyBetterBlockAsm:
  13726. CMPQ R8, R9
  13727. JAE search_loop_encodeSnappyBetterBlockAsm
  13728. MOVQ (BX)(DI*1), R10
  13729. MOVQ (BX)(R8*1), R11
  13730. SHLQ $0x08, R10
  13731. IMULQ SI, R10
  13732. SHRQ $0x2f, R10
  13733. SHLQ $0x08, R11
  13734. IMULQ SI, R11
  13735. SHRQ $0x2f, R11
  13736. MOVL DI, (AX)(R10*4)
  13737. MOVL R8, (AX)(R11*4)
  13738. ADDQ $0x02, DI
  13739. ADDQ $0x02, R8
  13740. JMP index_loop_encodeSnappyBetterBlockAsm
  13741. emit_remainder_encodeSnappyBetterBlockAsm:
  13742. MOVQ src_len+32(FP), AX
  13743. SUBL 12(SP), AX
  13744. LEAQ 5(CX)(AX*1), AX
  13745. CMPQ AX, (SP)
  13746. JB emit_remainder_ok_encodeSnappyBetterBlockAsm
  13747. MOVQ $0x00000000, ret+56(FP)
  13748. RET
  13749. emit_remainder_ok_encodeSnappyBetterBlockAsm:
  13750. MOVQ src_len+32(FP), AX
  13751. MOVL 12(SP), DX
  13752. CMPL DX, AX
  13753. JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
  13754. MOVL AX, SI
  13755. MOVL AX, 12(SP)
  13756. LEAQ (BX)(DX*1), AX
  13757. SUBL DX, SI
  13758. LEAL -1(SI), DX
  13759. CMPL DX, $0x3c
  13760. JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm
  13761. CMPL DX, $0x00000100
  13762. JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
  13763. CMPL DX, $0x00010000
  13764. JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
  13765. CMPL DX, $0x01000000
  13766. JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
  13767. MOVB $0xfc, (CX)
  13768. MOVL DX, 1(CX)
  13769. ADDQ $0x05, CX
  13770. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  13771. four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
  13772. MOVL DX, BX
  13773. SHRL $0x10, BX
  13774. MOVB $0xf8, (CX)
  13775. MOVW DX, 1(CX)
  13776. MOVB BL, 3(CX)
  13777. ADDQ $0x04, CX
  13778. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  13779. three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
  13780. MOVB $0xf4, (CX)
  13781. MOVW DX, 1(CX)
  13782. ADDQ $0x03, CX
  13783. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  13784. two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
  13785. MOVB $0xf0, (CX)
  13786. MOVB DL, 1(CX)
  13787. ADDQ $0x02, CX
  13788. CMPL DX, $0x40
  13789. JB memmove_emit_remainder_encodeSnappyBetterBlockAsm
  13790. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
  13791. one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
  13792. SHLB $0x02, DL
  13793. MOVB DL, (CX)
  13794. ADDQ $0x01, CX
  13795. memmove_emit_remainder_encodeSnappyBetterBlockAsm:
  13796. LEAQ (CX)(SI*1), DX
  13797. MOVL SI, BX
  13798. // genMemMoveShort
  13799. CMPQ BX, $0x03
  13800. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2
  13801. JE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3
  13802. CMPQ BX, $0x08
  13803. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7
  13804. CMPQ BX, $0x10
  13805. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
  13806. CMPQ BX, $0x20
  13807. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
  13808. JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
  13809. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
  13810. MOVB (AX), SI
  13811. MOVB -1(AX)(BX*1), AL
  13812. MOVB SI, (CX)
  13813. MOVB AL, -1(CX)(BX*1)
  13814. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  13815. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
  13816. MOVW (AX), SI
  13817. MOVB 2(AX), AL
  13818. MOVW SI, (CX)
  13819. MOVB AL, 2(CX)
  13820. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  13821. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
  13822. MOVL (AX), SI
  13823. MOVL -4(AX)(BX*1), AX
  13824. MOVL SI, (CX)
  13825. MOVL AX, -4(CX)(BX*1)
  13826. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  13827. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
  13828. MOVQ (AX), SI
  13829. MOVQ -8(AX)(BX*1), AX
  13830. MOVQ SI, (CX)
  13831. MOVQ AX, -8(CX)(BX*1)
  13832. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  13833. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
  13834. MOVOU (AX), X0
  13835. MOVOU -16(AX)(BX*1), X1
  13836. MOVOU X0, (CX)
  13837. MOVOU X1, -16(CX)(BX*1)
  13838. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
  13839. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
  13840. MOVOU (AX), X0
  13841. MOVOU 16(AX), X1
  13842. MOVOU -32(AX)(BX*1), X2
  13843. MOVOU -16(AX)(BX*1), X3
  13844. MOVOU X0, (CX)
  13845. MOVOU X1, 16(CX)
  13846. MOVOU X2, -32(CX)(BX*1)
  13847. MOVOU X3, -16(CX)(BX*1)
  13848. memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
  13849. MOVQ DX, CX
  13850. JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
  13851. memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
  13852. LEAQ (CX)(SI*1), DX
  13853. MOVL SI, BX
  13854. // genMemMoveLong
  13855. MOVOU (AX), X0
  13856. MOVOU 16(AX), X1
  13857. MOVOU -32(AX)(BX*1), X2
  13858. MOVOU -16(AX)(BX*1), X3
  13859. MOVQ BX, DI
  13860. SHRQ $0x05, DI
  13861. MOVQ CX, SI
  13862. ANDL $0x0000001f, SI
  13863. MOVQ $0x00000040, R8
  13864. SUBQ SI, R8
  13865. DECQ DI
  13866. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  13867. LEAQ -32(AX)(R8*1), SI
  13868. LEAQ -32(CX)(R8*1), R9
  13869. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
  13870. MOVOU (SI), X4
  13871. MOVOU 16(SI), X5
  13872. MOVOA X4, (R9)
  13873. MOVOA X5, 16(R9)
  13874. ADDQ $0x20, R9
  13875. ADDQ $0x20, SI
  13876. ADDQ $0x20, R8
  13877. DECQ DI
  13878. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
  13879. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
  13880. MOVOU -32(AX)(R8*1), X4
  13881. MOVOU -16(AX)(R8*1), X5
  13882. MOVOA X4, -32(CX)(R8*1)
  13883. MOVOA X5, -16(CX)(R8*1)
  13884. ADDQ $0x20, R8
  13885. CMPQ BX, R8
  13886. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
  13887. MOVOU X0, (CX)
  13888. MOVOU X1, 16(CX)
  13889. MOVOU X2, -32(CX)(BX*1)
  13890. MOVOU X3, -16(CX)(BX*1)
  13891. MOVQ DX, CX
  13892. emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
  13893. MOVQ dst_base+0(FP), AX
  13894. SUBQ AX, CX
  13895. MOVQ CX, ret+56(FP)
  13896. RET
  13897. // func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
  13898. // Requires: BMI, SSE2
  13899. TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64
  13900. MOVQ tmp+48(FP), AX
  13901. MOVQ dst_base+0(FP), CX
  13902. MOVQ $0x00000900, DX
  13903. MOVQ AX, BX
  13904. PXOR X0, X0
  13905. zero_loop_encodeSnappyBetterBlockAsm64K:
  13906. MOVOU X0, (BX)
  13907. MOVOU X0, 16(BX)
  13908. MOVOU X0, 32(BX)
  13909. MOVOU X0, 48(BX)
  13910. MOVOU X0, 64(BX)
  13911. MOVOU X0, 80(BX)
  13912. MOVOU X0, 96(BX)
  13913. MOVOU X0, 112(BX)
  13914. ADDQ $0x80, BX
  13915. DECQ DX
  13916. JNZ zero_loop_encodeSnappyBetterBlockAsm64K
  13917. MOVL $0x00000000, 12(SP)
  13918. MOVQ src_len+32(FP), DX
  13919. LEAQ -9(DX), BX
  13920. LEAQ -8(DX), SI
  13921. MOVL SI, 8(SP)
  13922. SHRQ $0x05, DX
  13923. SUBL DX, BX
  13924. LEAQ (CX)(BX*1), BX
  13925. MOVQ BX, (SP)
  13926. MOVL $0x00000001, DX
  13927. MOVL $0x00000000, 16(SP)
  13928. MOVQ src_base+24(FP), BX
  13929. search_loop_encodeSnappyBetterBlockAsm64K:
  13930. MOVL DX, SI
  13931. SUBL 12(SP), SI
  13932. SHRL $0x07, SI
  13933. LEAL 1(DX)(SI*1), SI
  13934. CMPL SI, 8(SP)
  13935. JAE emit_remainder_encodeSnappyBetterBlockAsm64K
  13936. MOVQ (BX)(DX*1), DI
  13937. MOVL SI, 20(SP)
  13938. MOVQ $0x00cf1bbcdcbfa563, R9
  13939. MOVQ $0x9e3779b1, SI
  13940. MOVQ DI, R10
  13941. MOVQ DI, R11
  13942. SHLQ $0x08, R10
  13943. IMULQ R9, R10
  13944. SHRQ $0x30, R10
  13945. SHLQ $0x20, R11
  13946. IMULQ SI, R11
  13947. SHRQ $0x33, R11
  13948. MOVL (AX)(R10*4), SI
  13949. MOVL 262144(AX)(R11*4), R8
  13950. MOVL DX, (AX)(R10*4)
  13951. MOVL DX, 262144(AX)(R11*4)
  13952. MOVQ (BX)(SI*1), R10
  13953. MOVQ (BX)(R8*1), R11
  13954. CMPQ R10, DI
  13955. JEQ candidate_match_encodeSnappyBetterBlockAsm64K
  13956. CMPQ R11, DI
  13957. JNE no_short_found_encodeSnappyBetterBlockAsm64K
  13958. MOVL R8, SI
  13959. JMP candidate_match_encodeSnappyBetterBlockAsm64K
  13960. no_short_found_encodeSnappyBetterBlockAsm64K:
  13961. CMPL R10, DI
  13962. JEQ candidate_match_encodeSnappyBetterBlockAsm64K
  13963. CMPL R11, DI
  13964. JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
  13965. MOVL 20(SP), DX
  13966. JMP search_loop_encodeSnappyBetterBlockAsm64K
  13967. candidateS_match_encodeSnappyBetterBlockAsm64K:
  13968. SHRQ $0x08, DI
  13969. MOVQ DI, R10
  13970. SHLQ $0x08, R10
  13971. IMULQ R9, R10
  13972. SHRQ $0x30, R10
  13973. MOVL (AX)(R10*4), SI
  13974. INCL DX
  13975. MOVL DX, (AX)(R10*4)
  13976. CMPL (BX)(SI*1), DI
  13977. JEQ candidate_match_encodeSnappyBetterBlockAsm64K
  13978. DECL DX
  13979. MOVL R8, SI
  13980. candidate_match_encodeSnappyBetterBlockAsm64K:
  13981. MOVL 12(SP), DI
  13982. TESTL SI, SI
  13983. JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
  13984. match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
  13985. CMPL DX, DI
  13986. JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K
  13987. MOVB -1(BX)(SI*1), R8
  13988. MOVB -1(BX)(DX*1), R9
  13989. CMPB R8, R9
  13990. JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K
  13991. LEAL -1(DX), DX
  13992. DECL SI
  13993. JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
  13994. JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K
  13995. match_extend_back_end_encodeSnappyBetterBlockAsm64K:
  13996. MOVL DX, DI
  13997. SUBL 12(SP), DI
  13998. LEAQ 3(CX)(DI*1), DI
  13999. CMPQ DI, (SP)
  14000. JB match_dst_size_check_encodeSnappyBetterBlockAsm64K
  14001. MOVQ $0x00000000, ret+56(FP)
  14002. RET
  14003. match_dst_size_check_encodeSnappyBetterBlockAsm64K:
  14004. MOVL DX, DI
  14005. ADDL $0x04, DX
  14006. ADDL $0x04, SI
  14007. MOVQ src_len+32(FP), R8
  14008. SUBL DX, R8
  14009. LEAQ (BX)(DX*1), R9
  14010. LEAQ (BX)(SI*1), R10
  14011. // matchLen
  14012. XORL R12, R12
  14013. matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
  14014. CMPL R8, $0x10
  14015. JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
  14016. MOVQ (R9)(R12*1), R11
  14017. MOVQ 8(R9)(R12*1), R13
  14018. XORQ (R10)(R12*1), R11
  14019. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
  14020. XORQ 8(R10)(R12*1), R13
  14021. JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
  14022. LEAL -16(R8), R8
  14023. LEAL 16(R12), R12
  14024. JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
  14025. matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
  14026. #ifdef GOAMD64_v3
  14027. TZCNTQ R13, R13
  14028. #else
  14029. BSFQ R13, R13
  14030. #endif
  14031. SARQ $0x03, R13
  14032. LEAL 8(R12)(R13*1), R12
  14033. JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
  14034. matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
  14035. CMPL R8, $0x08
  14036. JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
  14037. MOVQ (R9)(R12*1), R11
  14038. XORQ (R10)(R12*1), R11
  14039. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
  14040. LEAL -8(R8), R8
  14041. LEAL 8(R12), R12
  14042. JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
  14043. matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
  14044. #ifdef GOAMD64_v3
  14045. TZCNTQ R11, R11
  14046. #else
  14047. BSFQ R11, R11
  14048. #endif
  14049. SARQ $0x03, R11
  14050. LEAL (R12)(R11*1), R12
  14051. JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
  14052. matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
  14053. CMPL R8, $0x04
  14054. JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
  14055. MOVL (R9)(R12*1), R11
  14056. CMPL (R10)(R12*1), R11
  14057. JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
  14058. LEAL -4(R8), R8
  14059. LEAL 4(R12), R12
  14060. matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
  14061. CMPL R8, $0x01
  14062. JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
  14063. JB match_nolit_end_encodeSnappyBetterBlockAsm64K
  14064. MOVW (R9)(R12*1), R11
  14065. CMPW (R10)(R12*1), R11
  14066. JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
  14067. LEAL 2(R12), R12
  14068. SUBL $0x02, R8
  14069. JZ match_nolit_end_encodeSnappyBetterBlockAsm64K
  14070. matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
  14071. MOVB (R9)(R12*1), R11
  14072. CMPB (R10)(R12*1), R11
  14073. JNE match_nolit_end_encodeSnappyBetterBlockAsm64K
  14074. LEAL 1(R12), R12
  14075. match_nolit_end_encodeSnappyBetterBlockAsm64K:
  14076. MOVL DX, R8
  14077. SUBL SI, R8
  14078. // Check if repeat
  14079. MOVL R8, 16(SP)
  14080. MOVL 12(SP), SI
  14081. CMPL SI, DI
  14082. JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
  14083. MOVL DI, R9
  14084. MOVL DI, 12(SP)
  14085. LEAQ (BX)(SI*1), R10
  14086. SUBL SI, R9
  14087. LEAL -1(R9), SI
  14088. CMPL SI, $0x3c
  14089. JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K
  14090. CMPL SI, $0x00000100
  14091. JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
  14092. JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
  14093. three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
  14094. MOVB $0xf4, (CX)
  14095. MOVW SI, 1(CX)
  14096. ADDQ $0x03, CX
  14097. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
  14098. two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
  14099. MOVB $0xf0, (CX)
  14100. MOVB SI, 1(CX)
  14101. ADDQ $0x02, CX
  14102. CMPL SI, $0x40
  14103. JB memmove_match_emit_encodeSnappyBetterBlockAsm64K
  14104. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
  14105. one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
  14106. SHLB $0x02, SI
  14107. MOVB SI, (CX)
  14108. ADDQ $0x01, CX
  14109. memmove_match_emit_encodeSnappyBetterBlockAsm64K:
  14110. LEAQ (CX)(R9*1), SI
  14111. // genMemMoveShort
  14112. CMPQ R9, $0x08
  14113. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
  14114. CMPQ R9, $0x10
  14115. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
  14116. CMPQ R9, $0x20
  14117. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
  14118. JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
  14119. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
  14120. MOVQ (R10), R11
  14121. MOVQ R11, (CX)
  14122. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
  14123. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
  14124. MOVQ (R10), R11
  14125. MOVQ -8(R10)(R9*1), R10
  14126. MOVQ R11, (CX)
  14127. MOVQ R10, -8(CX)(R9*1)
  14128. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
  14129. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
  14130. MOVOU (R10), X0
  14131. MOVOU -16(R10)(R9*1), X1
  14132. MOVOU X0, (CX)
  14133. MOVOU X1, -16(CX)(R9*1)
  14134. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
  14135. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
  14136. MOVOU (R10), X0
  14137. MOVOU 16(R10), X1
  14138. MOVOU -32(R10)(R9*1), X2
  14139. MOVOU -16(R10)(R9*1), X3
  14140. MOVOU X0, (CX)
  14141. MOVOU X1, 16(CX)
  14142. MOVOU X2, -32(CX)(R9*1)
  14143. MOVOU X3, -16(CX)(R9*1)
  14144. memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
  14145. MOVQ SI, CX
  14146. JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
  14147. memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
  14148. LEAQ (CX)(R9*1), SI
  14149. // genMemMoveLong
  14150. MOVOU (R10), X0
  14151. MOVOU 16(R10), X1
  14152. MOVOU -32(R10)(R9*1), X2
  14153. MOVOU -16(R10)(R9*1), X3
  14154. MOVQ R9, R13
  14155. SHRQ $0x05, R13
  14156. MOVQ CX, R11
  14157. ANDL $0x0000001f, R11
  14158. MOVQ $0x00000040, R14
  14159. SUBQ R11, R14
  14160. DECQ R13
  14161. JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  14162. LEAQ -32(R10)(R14*1), R11
  14163. LEAQ -32(CX)(R14*1), R15
  14164. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
  14165. MOVOU (R11), X4
  14166. MOVOU 16(R11), X5
  14167. MOVOA X4, (R15)
  14168. MOVOA X5, 16(R15)
  14169. ADDQ $0x20, R15
  14170. ADDQ $0x20, R11
  14171. ADDQ $0x20, R14
  14172. DECQ R13
  14173. JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
  14174. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
  14175. MOVOU -32(R10)(R14*1), X4
  14176. MOVOU -16(R10)(R14*1), X5
  14177. MOVOA X4, -32(CX)(R14*1)
  14178. MOVOA X5, -16(CX)(R14*1)
  14179. ADDQ $0x20, R14
  14180. CMPQ R9, R14
  14181. JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  14182. MOVOU X0, (CX)
  14183. MOVOU X1, 16(CX)
  14184. MOVOU X2, -32(CX)(R9*1)
  14185. MOVOU X3, -16(CX)(R9*1)
  14186. MOVQ SI, CX
  14187. emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
  14188. ADDL R12, DX
  14189. ADDL $0x04, R12
  14190. MOVL DX, 12(SP)
  14191. // emitCopy
  14192. two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
  14193. CMPL R12, $0x40
  14194. JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
  14195. MOVB $0xee, (CX)
  14196. MOVW R8, 1(CX)
  14197. LEAL -60(R12), R12
  14198. ADDQ $0x03, CX
  14199. JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
  14200. two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
  14201. MOVL R12, SI
  14202. SHLL $0x02, SI
  14203. CMPL R12, $0x0c
  14204. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
  14205. CMPL R8, $0x00000800
  14206. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
  14207. LEAL -15(SI), SI
  14208. MOVB R8, 1(CX)
  14209. SHRL $0x08, R8
  14210. SHLL $0x05, R8
  14211. ORL R8, SI
  14212. MOVB SI, (CX)
  14213. ADDQ $0x02, CX
  14214. JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
  14215. emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
  14216. LEAL -2(SI), SI
  14217. MOVB SI, (CX)
  14218. MOVW R8, 1(CX)
  14219. ADDQ $0x03, CX
  14220. match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
  14221. CMPL DX, 8(SP)
  14222. JAE emit_remainder_encodeSnappyBetterBlockAsm64K
  14223. CMPQ CX, (SP)
  14224. JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
  14225. MOVQ $0x00000000, ret+56(FP)
  14226. RET
  14227. match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
  14228. MOVQ $0x00cf1bbcdcbfa563, SI
  14229. MOVQ $0x9e3779b1, R8
  14230. LEAQ 1(DI), DI
  14231. LEAQ -2(DX), R9
  14232. MOVQ (BX)(DI*1), R10
  14233. MOVQ 1(BX)(DI*1), R11
  14234. MOVQ (BX)(R9*1), R12
  14235. MOVQ 1(BX)(R9*1), R13
  14236. SHLQ $0x08, R10
  14237. IMULQ SI, R10
  14238. SHRQ $0x30, R10
  14239. SHLQ $0x20, R11
  14240. IMULQ R8, R11
  14241. SHRQ $0x33, R11
  14242. SHLQ $0x08, R12
  14243. IMULQ SI, R12
  14244. SHRQ $0x30, R12
  14245. SHLQ $0x20, R13
  14246. IMULQ R8, R13
  14247. SHRQ $0x33, R13
  14248. LEAQ 1(DI), R8
  14249. LEAQ 1(R9), R14
  14250. MOVL DI, (AX)(R10*4)
  14251. MOVL R9, (AX)(R12*4)
  14252. MOVL R8, 262144(AX)(R11*4)
  14253. MOVL R14, 262144(AX)(R13*4)
  14254. LEAQ 1(R9)(DI*1), R8
  14255. SHRQ $0x01, R8
  14256. ADDQ $0x01, DI
  14257. SUBQ $0x01, R9
  14258. index_loop_encodeSnappyBetterBlockAsm64K:
  14259. CMPQ R8, R9
  14260. JAE search_loop_encodeSnappyBetterBlockAsm64K
  14261. MOVQ (BX)(DI*1), R10
  14262. MOVQ (BX)(R8*1), R11
  14263. SHLQ $0x08, R10
  14264. IMULQ SI, R10
  14265. SHRQ $0x30, R10
  14266. SHLQ $0x08, R11
  14267. IMULQ SI, R11
  14268. SHRQ $0x30, R11
  14269. MOVL DI, (AX)(R10*4)
  14270. MOVL R8, (AX)(R11*4)
  14271. ADDQ $0x02, DI
  14272. ADDQ $0x02, R8
  14273. JMP index_loop_encodeSnappyBetterBlockAsm64K
  14274. emit_remainder_encodeSnappyBetterBlockAsm64K:
  14275. MOVQ src_len+32(FP), AX
  14276. SUBL 12(SP), AX
  14277. LEAQ 3(CX)(AX*1), AX
  14278. CMPQ AX, (SP)
  14279. JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K
  14280. MOVQ $0x00000000, ret+56(FP)
  14281. RET
  14282. emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
  14283. MOVQ src_len+32(FP), AX
  14284. MOVL 12(SP), DX
  14285. CMPL DX, AX
  14286. JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
  14287. MOVL AX, SI
  14288. MOVL AX, 12(SP)
  14289. LEAQ (BX)(DX*1), AX
  14290. SUBL DX, SI
  14291. LEAL -1(SI), DX
  14292. CMPL DX, $0x3c
  14293. JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
  14294. CMPL DX, $0x00000100
  14295. JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
  14296. JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
  14297. three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14298. MOVB $0xf4, (CX)
  14299. MOVW DX, 1(CX)
  14300. ADDQ $0x03, CX
  14301. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
  14302. two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14303. MOVB $0xf0, (CX)
  14304. MOVB DL, 1(CX)
  14305. ADDQ $0x02, CX
  14306. CMPL DX, $0x40
  14307. JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
  14308. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
  14309. one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14310. SHLB $0x02, DL
  14311. MOVB DL, (CX)
  14312. ADDQ $0x01, CX
  14313. memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14314. LEAQ (CX)(SI*1), DX
  14315. MOVL SI, BX
  14316. // genMemMoveShort
  14317. CMPQ BX, $0x03
  14318. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2
  14319. JE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3
  14320. CMPQ BX, $0x08
  14321. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7
  14322. CMPQ BX, $0x10
  14323. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
  14324. CMPQ BX, $0x20
  14325. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
  14326. JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
  14327. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
  14328. MOVB (AX), SI
  14329. MOVB -1(AX)(BX*1), AL
  14330. MOVB SI, (CX)
  14331. MOVB AL, -1(CX)(BX*1)
  14332. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  14333. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
  14334. MOVW (AX), SI
  14335. MOVB 2(AX), AL
  14336. MOVW SI, (CX)
  14337. MOVB AL, 2(CX)
  14338. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  14339. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
  14340. MOVL (AX), SI
  14341. MOVL -4(AX)(BX*1), AX
  14342. MOVL SI, (CX)
  14343. MOVL AX, -4(CX)(BX*1)
  14344. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  14345. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
  14346. MOVQ (AX), SI
  14347. MOVQ -8(AX)(BX*1), AX
  14348. MOVQ SI, (CX)
  14349. MOVQ AX, -8(CX)(BX*1)
  14350. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  14351. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
  14352. MOVOU (AX), X0
  14353. MOVOU -16(AX)(BX*1), X1
  14354. MOVOU X0, (CX)
  14355. MOVOU X1, -16(CX)(BX*1)
  14356. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
  14357. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
  14358. MOVOU (AX), X0
  14359. MOVOU 16(AX), X1
  14360. MOVOU -32(AX)(BX*1), X2
  14361. MOVOU -16(AX)(BX*1), X3
  14362. MOVOU X0, (CX)
  14363. MOVOU X1, 16(CX)
  14364. MOVOU X2, -32(CX)(BX*1)
  14365. MOVOU X3, -16(CX)(BX*1)
  14366. memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14367. MOVQ DX, CX
  14368. JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
  14369. memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14370. LEAQ (CX)(SI*1), DX
  14371. MOVL SI, BX
  14372. // genMemMoveLong
  14373. MOVOU (AX), X0
  14374. MOVOU 16(AX), X1
  14375. MOVOU -32(AX)(BX*1), X2
  14376. MOVOU -16(AX)(BX*1), X3
  14377. MOVQ BX, DI
  14378. SHRQ $0x05, DI
  14379. MOVQ CX, SI
  14380. ANDL $0x0000001f, SI
  14381. MOVQ $0x00000040, R8
  14382. SUBQ SI, R8
  14383. DECQ DI
  14384. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  14385. LEAQ -32(AX)(R8*1), SI
  14386. LEAQ -32(CX)(R8*1), R9
  14387. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
  14388. MOVOU (SI), X4
  14389. MOVOU 16(SI), X5
  14390. MOVOA X4, (R9)
  14391. MOVOA X5, 16(R9)
  14392. ADDQ $0x20, R9
  14393. ADDQ $0x20, SI
  14394. ADDQ $0x20, R8
  14395. DECQ DI
  14396. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
  14397. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
  14398. MOVOU -32(AX)(R8*1), X4
  14399. MOVOU -16(AX)(R8*1), X5
  14400. MOVOA X4, -32(CX)(R8*1)
  14401. MOVOA X5, -16(CX)(R8*1)
  14402. ADDQ $0x20, R8
  14403. CMPQ BX, R8
  14404. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
  14405. MOVOU X0, (CX)
  14406. MOVOU X1, 16(CX)
  14407. MOVOU X2, -32(CX)(BX*1)
  14408. MOVOU X3, -16(CX)(BX*1)
  14409. MOVQ DX, CX
  14410. emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
  14411. MOVQ dst_base+0(FP), AX
  14412. SUBQ AX, CX
  14413. MOVQ CX, ret+56(FP)
  14414. RET
  14415. // func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
  14416. // Requires: BMI, SSE2
  14417. TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64
  14418. MOVQ tmp+48(FP), AX
  14419. MOVQ dst_base+0(FP), CX
  14420. MOVQ $0x00000280, DX
  14421. MOVQ AX, BX
  14422. PXOR X0, X0
  14423. zero_loop_encodeSnappyBetterBlockAsm12B:
  14424. MOVOU X0, (BX)
  14425. MOVOU X0, 16(BX)
  14426. MOVOU X0, 32(BX)
  14427. MOVOU X0, 48(BX)
  14428. MOVOU X0, 64(BX)
  14429. MOVOU X0, 80(BX)
  14430. MOVOU X0, 96(BX)
  14431. MOVOU X0, 112(BX)
  14432. ADDQ $0x80, BX
  14433. DECQ DX
  14434. JNZ zero_loop_encodeSnappyBetterBlockAsm12B
  14435. MOVL $0x00000000, 12(SP)
  14436. MOVQ src_len+32(FP), DX
  14437. LEAQ -9(DX), BX
  14438. LEAQ -8(DX), SI
  14439. MOVL SI, 8(SP)
  14440. SHRQ $0x05, DX
  14441. SUBL DX, BX
  14442. LEAQ (CX)(BX*1), BX
  14443. MOVQ BX, (SP)
  14444. MOVL $0x00000001, DX
  14445. MOVL $0x00000000, 16(SP)
  14446. MOVQ src_base+24(FP), BX
  14447. search_loop_encodeSnappyBetterBlockAsm12B:
  14448. MOVL DX, SI
  14449. SUBL 12(SP), SI
  14450. SHRL $0x06, SI
  14451. LEAL 1(DX)(SI*1), SI
  14452. CMPL SI, 8(SP)
  14453. JAE emit_remainder_encodeSnappyBetterBlockAsm12B
  14454. MOVQ (BX)(DX*1), DI
  14455. MOVL SI, 20(SP)
  14456. MOVQ $0x0000cf1bbcdcbf9b, R9
  14457. MOVQ $0x9e3779b1, SI
  14458. MOVQ DI, R10
  14459. MOVQ DI, R11
  14460. SHLQ $0x10, R10
  14461. IMULQ R9, R10
  14462. SHRQ $0x32, R10
  14463. SHLQ $0x20, R11
  14464. IMULQ SI, R11
  14465. SHRQ $0x34, R11
  14466. MOVL (AX)(R10*4), SI
  14467. MOVL 65536(AX)(R11*4), R8
  14468. MOVL DX, (AX)(R10*4)
  14469. MOVL DX, 65536(AX)(R11*4)
  14470. MOVQ (BX)(SI*1), R10
  14471. MOVQ (BX)(R8*1), R11
  14472. CMPQ R10, DI
  14473. JEQ candidate_match_encodeSnappyBetterBlockAsm12B
  14474. CMPQ R11, DI
  14475. JNE no_short_found_encodeSnappyBetterBlockAsm12B
  14476. MOVL R8, SI
  14477. JMP candidate_match_encodeSnappyBetterBlockAsm12B
  14478. no_short_found_encodeSnappyBetterBlockAsm12B:
  14479. CMPL R10, DI
  14480. JEQ candidate_match_encodeSnappyBetterBlockAsm12B
  14481. CMPL R11, DI
  14482. JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
  14483. MOVL 20(SP), DX
  14484. JMP search_loop_encodeSnappyBetterBlockAsm12B
  14485. candidateS_match_encodeSnappyBetterBlockAsm12B:
  14486. SHRQ $0x08, DI
  14487. MOVQ DI, R10
  14488. SHLQ $0x10, R10
  14489. IMULQ R9, R10
  14490. SHRQ $0x32, R10
  14491. MOVL (AX)(R10*4), SI
  14492. INCL DX
  14493. MOVL DX, (AX)(R10*4)
  14494. CMPL (BX)(SI*1), DI
  14495. JEQ candidate_match_encodeSnappyBetterBlockAsm12B
  14496. DECL DX
  14497. MOVL R8, SI
  14498. candidate_match_encodeSnappyBetterBlockAsm12B:
  14499. MOVL 12(SP), DI
  14500. TESTL SI, SI
  14501. JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
  14502. match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
  14503. CMPL DX, DI
  14504. JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B
  14505. MOVB -1(BX)(SI*1), R8
  14506. MOVB -1(BX)(DX*1), R9
  14507. CMPB R8, R9
  14508. JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B
  14509. LEAL -1(DX), DX
  14510. DECL SI
  14511. JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
  14512. JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B
  14513. match_extend_back_end_encodeSnappyBetterBlockAsm12B:
  14514. MOVL DX, DI
  14515. SUBL 12(SP), DI
  14516. LEAQ 3(CX)(DI*1), DI
  14517. CMPQ DI, (SP)
  14518. JB match_dst_size_check_encodeSnappyBetterBlockAsm12B
  14519. MOVQ $0x00000000, ret+56(FP)
  14520. RET
  14521. match_dst_size_check_encodeSnappyBetterBlockAsm12B:
  14522. MOVL DX, DI
  14523. ADDL $0x04, DX
  14524. ADDL $0x04, SI
  14525. MOVQ src_len+32(FP), R8
  14526. SUBL DX, R8
  14527. LEAQ (BX)(DX*1), R9
  14528. LEAQ (BX)(SI*1), R10
  14529. // matchLen
  14530. XORL R12, R12
  14531. matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
  14532. CMPL R8, $0x10
  14533. JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
  14534. MOVQ (R9)(R12*1), R11
  14535. MOVQ 8(R9)(R12*1), R13
  14536. XORQ (R10)(R12*1), R11
  14537. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
  14538. XORQ 8(R10)(R12*1), R13
  14539. JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
  14540. LEAL -16(R8), R8
  14541. LEAL 16(R12), R12
  14542. JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
  14543. matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
  14544. #ifdef GOAMD64_v3
  14545. TZCNTQ R13, R13
  14546. #else
  14547. BSFQ R13, R13
  14548. #endif
  14549. SARQ $0x03, R13
  14550. LEAL 8(R12)(R13*1), R12
  14551. JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
  14552. matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
  14553. CMPL R8, $0x08
  14554. JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
  14555. MOVQ (R9)(R12*1), R11
  14556. XORQ (R10)(R12*1), R11
  14557. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
  14558. LEAL -8(R8), R8
  14559. LEAL 8(R12), R12
  14560. JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
  14561. matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
  14562. #ifdef GOAMD64_v3
  14563. TZCNTQ R11, R11
  14564. #else
  14565. BSFQ R11, R11
  14566. #endif
  14567. SARQ $0x03, R11
  14568. LEAL (R12)(R11*1), R12
  14569. JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
  14570. matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
  14571. CMPL R8, $0x04
  14572. JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
  14573. MOVL (R9)(R12*1), R11
  14574. CMPL (R10)(R12*1), R11
  14575. JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
  14576. LEAL -4(R8), R8
  14577. LEAL 4(R12), R12
  14578. matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
  14579. CMPL R8, $0x01
  14580. JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
  14581. JB match_nolit_end_encodeSnappyBetterBlockAsm12B
  14582. MOVW (R9)(R12*1), R11
  14583. CMPW (R10)(R12*1), R11
  14584. JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
  14585. LEAL 2(R12), R12
  14586. SUBL $0x02, R8
  14587. JZ match_nolit_end_encodeSnappyBetterBlockAsm12B
  14588. matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
  14589. MOVB (R9)(R12*1), R11
  14590. CMPB (R10)(R12*1), R11
  14591. JNE match_nolit_end_encodeSnappyBetterBlockAsm12B
  14592. LEAL 1(R12), R12
  14593. match_nolit_end_encodeSnappyBetterBlockAsm12B:
  14594. MOVL DX, R8
  14595. SUBL SI, R8
  14596. // Check if repeat
  14597. MOVL R8, 16(SP)
  14598. MOVL 12(SP), SI
  14599. CMPL SI, DI
  14600. JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
  14601. MOVL DI, R9
  14602. MOVL DI, 12(SP)
  14603. LEAQ (BX)(SI*1), R10
  14604. SUBL SI, R9
  14605. LEAL -1(R9), SI
  14606. CMPL SI, $0x3c
  14607. JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B
  14608. CMPL SI, $0x00000100
  14609. JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
  14610. JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
  14611. three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
  14612. MOVB $0xf4, (CX)
  14613. MOVW SI, 1(CX)
  14614. ADDQ $0x03, CX
  14615. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
  14616. two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
  14617. MOVB $0xf0, (CX)
  14618. MOVB SI, 1(CX)
  14619. ADDQ $0x02, CX
  14620. CMPL SI, $0x40
  14621. JB memmove_match_emit_encodeSnappyBetterBlockAsm12B
  14622. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
  14623. one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
  14624. SHLB $0x02, SI
  14625. MOVB SI, (CX)
  14626. ADDQ $0x01, CX
  14627. memmove_match_emit_encodeSnappyBetterBlockAsm12B:
  14628. LEAQ (CX)(R9*1), SI
  14629. // genMemMoveShort
  14630. CMPQ R9, $0x08
  14631. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
  14632. CMPQ R9, $0x10
  14633. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
  14634. CMPQ R9, $0x20
  14635. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
  14636. JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
  14637. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
  14638. MOVQ (R10), R11
  14639. MOVQ R11, (CX)
  14640. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
  14641. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
  14642. MOVQ (R10), R11
  14643. MOVQ -8(R10)(R9*1), R10
  14644. MOVQ R11, (CX)
  14645. MOVQ R10, -8(CX)(R9*1)
  14646. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
  14647. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
  14648. MOVOU (R10), X0
  14649. MOVOU -16(R10)(R9*1), X1
  14650. MOVOU X0, (CX)
  14651. MOVOU X1, -16(CX)(R9*1)
  14652. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
  14653. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
  14654. MOVOU (R10), X0
  14655. MOVOU 16(R10), X1
  14656. MOVOU -32(R10)(R9*1), X2
  14657. MOVOU -16(R10)(R9*1), X3
  14658. MOVOU X0, (CX)
  14659. MOVOU X1, 16(CX)
  14660. MOVOU X2, -32(CX)(R9*1)
  14661. MOVOU X3, -16(CX)(R9*1)
  14662. memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
  14663. MOVQ SI, CX
  14664. JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
  14665. memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
  14666. LEAQ (CX)(R9*1), SI
  14667. // genMemMoveLong
  14668. MOVOU (R10), X0
  14669. MOVOU 16(R10), X1
  14670. MOVOU -32(R10)(R9*1), X2
  14671. MOVOU -16(R10)(R9*1), X3
  14672. MOVQ R9, R13
  14673. SHRQ $0x05, R13
  14674. MOVQ CX, R11
  14675. ANDL $0x0000001f, R11
  14676. MOVQ $0x00000040, R14
  14677. SUBQ R11, R14
  14678. DECQ R13
  14679. JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  14680. LEAQ -32(R10)(R14*1), R11
  14681. LEAQ -32(CX)(R14*1), R15
  14682. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
  14683. MOVOU (R11), X4
  14684. MOVOU 16(R11), X5
  14685. MOVOA X4, (R15)
  14686. MOVOA X5, 16(R15)
  14687. ADDQ $0x20, R15
  14688. ADDQ $0x20, R11
  14689. ADDQ $0x20, R14
  14690. DECQ R13
  14691. JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
  14692. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
  14693. MOVOU -32(R10)(R14*1), X4
  14694. MOVOU -16(R10)(R14*1), X5
  14695. MOVOA X4, -32(CX)(R14*1)
  14696. MOVOA X5, -16(CX)(R14*1)
  14697. ADDQ $0x20, R14
  14698. CMPQ R9, R14
  14699. JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  14700. MOVOU X0, (CX)
  14701. MOVOU X1, 16(CX)
  14702. MOVOU X2, -32(CX)(R9*1)
  14703. MOVOU X3, -16(CX)(R9*1)
  14704. MOVQ SI, CX
  14705. emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
  14706. ADDL R12, DX
  14707. ADDL $0x04, R12
  14708. MOVL DX, 12(SP)
  14709. // emitCopy
  14710. two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
  14711. CMPL R12, $0x40
  14712. JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
  14713. MOVB $0xee, (CX)
  14714. MOVW R8, 1(CX)
  14715. LEAL -60(R12), R12
  14716. ADDQ $0x03, CX
  14717. JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
  14718. two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
  14719. MOVL R12, SI
  14720. SHLL $0x02, SI
  14721. CMPL R12, $0x0c
  14722. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
  14723. CMPL R8, $0x00000800
  14724. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
  14725. LEAL -15(SI), SI
  14726. MOVB R8, 1(CX)
  14727. SHRL $0x08, R8
  14728. SHLL $0x05, R8
  14729. ORL R8, SI
  14730. MOVB SI, (CX)
  14731. ADDQ $0x02, CX
  14732. JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
  14733. emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
  14734. LEAL -2(SI), SI
  14735. MOVB SI, (CX)
  14736. MOVW R8, 1(CX)
  14737. ADDQ $0x03, CX
  14738. match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
  14739. CMPL DX, 8(SP)
  14740. JAE emit_remainder_encodeSnappyBetterBlockAsm12B
  14741. CMPQ CX, (SP)
  14742. JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
  14743. MOVQ $0x00000000, ret+56(FP)
  14744. RET
  14745. match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
  14746. MOVQ $0x0000cf1bbcdcbf9b, SI
  14747. MOVQ $0x9e3779b1, R8
  14748. LEAQ 1(DI), DI
  14749. LEAQ -2(DX), R9
  14750. MOVQ (BX)(DI*1), R10
  14751. MOVQ 1(BX)(DI*1), R11
  14752. MOVQ (BX)(R9*1), R12
  14753. MOVQ 1(BX)(R9*1), R13
  14754. SHLQ $0x10, R10
  14755. IMULQ SI, R10
  14756. SHRQ $0x32, R10
  14757. SHLQ $0x20, R11
  14758. IMULQ R8, R11
  14759. SHRQ $0x34, R11
  14760. SHLQ $0x10, R12
  14761. IMULQ SI, R12
  14762. SHRQ $0x32, R12
  14763. SHLQ $0x20, R13
  14764. IMULQ R8, R13
  14765. SHRQ $0x34, R13
  14766. LEAQ 1(DI), R8
  14767. LEAQ 1(R9), R14
  14768. MOVL DI, (AX)(R10*4)
  14769. MOVL R9, (AX)(R12*4)
  14770. MOVL R8, 65536(AX)(R11*4)
  14771. MOVL R14, 65536(AX)(R13*4)
  14772. LEAQ 1(R9)(DI*1), R8
  14773. SHRQ $0x01, R8
  14774. ADDQ $0x01, DI
  14775. SUBQ $0x01, R9
  14776. index_loop_encodeSnappyBetterBlockAsm12B:
  14777. CMPQ R8, R9
  14778. JAE search_loop_encodeSnappyBetterBlockAsm12B
  14779. MOVQ (BX)(DI*1), R10
  14780. MOVQ (BX)(R8*1), R11
  14781. SHLQ $0x10, R10
  14782. IMULQ SI, R10
  14783. SHRQ $0x32, R10
  14784. SHLQ $0x10, R11
  14785. IMULQ SI, R11
  14786. SHRQ $0x32, R11
  14787. MOVL DI, (AX)(R10*4)
  14788. MOVL R8, (AX)(R11*4)
  14789. ADDQ $0x02, DI
  14790. ADDQ $0x02, R8
  14791. JMP index_loop_encodeSnappyBetterBlockAsm12B
  14792. emit_remainder_encodeSnappyBetterBlockAsm12B:
  14793. MOVQ src_len+32(FP), AX
  14794. SUBL 12(SP), AX
  14795. LEAQ 3(CX)(AX*1), AX
  14796. CMPQ AX, (SP)
  14797. JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B
  14798. MOVQ $0x00000000, ret+56(FP)
  14799. RET
  14800. emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
  14801. MOVQ src_len+32(FP), AX
  14802. MOVL 12(SP), DX
  14803. CMPL DX, AX
  14804. JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
  14805. MOVL AX, SI
  14806. MOVL AX, 12(SP)
  14807. LEAQ (BX)(DX*1), AX
  14808. SUBL DX, SI
  14809. LEAL -1(SI), DX
  14810. CMPL DX, $0x3c
  14811. JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
  14812. CMPL DX, $0x00000100
  14813. JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
  14814. JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
  14815. three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14816. MOVB $0xf4, (CX)
  14817. MOVW DX, 1(CX)
  14818. ADDQ $0x03, CX
  14819. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
  14820. two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14821. MOVB $0xf0, (CX)
  14822. MOVB DL, 1(CX)
  14823. ADDQ $0x02, CX
  14824. CMPL DX, $0x40
  14825. JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
  14826. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
  14827. one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14828. SHLB $0x02, DL
  14829. MOVB DL, (CX)
  14830. ADDQ $0x01, CX
  14831. memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14832. LEAQ (CX)(SI*1), DX
  14833. MOVL SI, BX
  14834. // genMemMoveShort
  14835. CMPQ BX, $0x03
  14836. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2
  14837. JE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3
  14838. CMPQ BX, $0x08
  14839. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7
  14840. CMPQ BX, $0x10
  14841. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
  14842. CMPQ BX, $0x20
  14843. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
  14844. JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
  14845. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
  14846. MOVB (AX), SI
  14847. MOVB -1(AX)(BX*1), AL
  14848. MOVB SI, (CX)
  14849. MOVB AL, -1(CX)(BX*1)
  14850. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  14851. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
  14852. MOVW (AX), SI
  14853. MOVB 2(AX), AL
  14854. MOVW SI, (CX)
  14855. MOVB AL, 2(CX)
  14856. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  14857. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
  14858. MOVL (AX), SI
  14859. MOVL -4(AX)(BX*1), AX
  14860. MOVL SI, (CX)
  14861. MOVL AX, -4(CX)(BX*1)
  14862. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  14863. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
  14864. MOVQ (AX), SI
  14865. MOVQ -8(AX)(BX*1), AX
  14866. MOVQ SI, (CX)
  14867. MOVQ AX, -8(CX)(BX*1)
  14868. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  14869. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
  14870. MOVOU (AX), X0
  14871. MOVOU -16(AX)(BX*1), X1
  14872. MOVOU X0, (CX)
  14873. MOVOU X1, -16(CX)(BX*1)
  14874. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
  14875. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
  14876. MOVOU (AX), X0
  14877. MOVOU 16(AX), X1
  14878. MOVOU -32(AX)(BX*1), X2
  14879. MOVOU -16(AX)(BX*1), X3
  14880. MOVOU X0, (CX)
  14881. MOVOU X1, 16(CX)
  14882. MOVOU X2, -32(CX)(BX*1)
  14883. MOVOU X3, -16(CX)(BX*1)
  14884. memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14885. MOVQ DX, CX
  14886. JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
  14887. memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14888. LEAQ (CX)(SI*1), DX
  14889. MOVL SI, BX
  14890. // genMemMoveLong
  14891. MOVOU (AX), X0
  14892. MOVOU 16(AX), X1
  14893. MOVOU -32(AX)(BX*1), X2
  14894. MOVOU -16(AX)(BX*1), X3
  14895. MOVQ BX, DI
  14896. SHRQ $0x05, DI
  14897. MOVQ CX, SI
  14898. ANDL $0x0000001f, SI
  14899. MOVQ $0x00000040, R8
  14900. SUBQ SI, R8
  14901. DECQ DI
  14902. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  14903. LEAQ -32(AX)(R8*1), SI
  14904. LEAQ -32(CX)(R8*1), R9
  14905. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
  14906. MOVOU (SI), X4
  14907. MOVOU 16(SI), X5
  14908. MOVOA X4, (R9)
  14909. MOVOA X5, 16(R9)
  14910. ADDQ $0x20, R9
  14911. ADDQ $0x20, SI
  14912. ADDQ $0x20, R8
  14913. DECQ DI
  14914. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
  14915. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
  14916. MOVOU -32(AX)(R8*1), X4
  14917. MOVOU -16(AX)(R8*1), X5
  14918. MOVOA X4, -32(CX)(R8*1)
  14919. MOVOA X5, -16(CX)(R8*1)
  14920. ADDQ $0x20, R8
  14921. CMPQ BX, R8
  14922. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
  14923. MOVOU X0, (CX)
  14924. MOVOU X1, 16(CX)
  14925. MOVOU X2, -32(CX)(BX*1)
  14926. MOVOU X3, -16(CX)(BX*1)
  14927. MOVQ DX, CX
  14928. emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
  14929. MOVQ dst_base+0(FP), AX
  14930. SUBQ AX, CX
  14931. MOVQ CX, ret+56(FP)
  14932. RET
  14933. // func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
  14934. // Requires: BMI, SSE2
  14935. TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64
  14936. MOVQ tmp+48(FP), AX
  14937. MOVQ dst_base+0(FP), CX
  14938. MOVQ $0x000000a0, DX
  14939. MOVQ AX, BX
  14940. PXOR X0, X0
  14941. zero_loop_encodeSnappyBetterBlockAsm10B:
  14942. MOVOU X0, (BX)
  14943. MOVOU X0, 16(BX)
  14944. MOVOU X0, 32(BX)
  14945. MOVOU X0, 48(BX)
  14946. MOVOU X0, 64(BX)
  14947. MOVOU X0, 80(BX)
  14948. MOVOU X0, 96(BX)
  14949. MOVOU X0, 112(BX)
  14950. ADDQ $0x80, BX
  14951. DECQ DX
  14952. JNZ zero_loop_encodeSnappyBetterBlockAsm10B
  14953. MOVL $0x00000000, 12(SP)
  14954. MOVQ src_len+32(FP), DX
  14955. LEAQ -9(DX), BX
  14956. LEAQ -8(DX), SI
  14957. MOVL SI, 8(SP)
  14958. SHRQ $0x05, DX
  14959. SUBL DX, BX
  14960. LEAQ (CX)(BX*1), BX
  14961. MOVQ BX, (SP)
  14962. MOVL $0x00000001, DX
  14963. MOVL $0x00000000, 16(SP)
  14964. MOVQ src_base+24(FP), BX
  14965. search_loop_encodeSnappyBetterBlockAsm10B:
  14966. MOVL DX, SI
  14967. SUBL 12(SP), SI
  14968. SHRL $0x05, SI
  14969. LEAL 1(DX)(SI*1), SI
  14970. CMPL SI, 8(SP)
  14971. JAE emit_remainder_encodeSnappyBetterBlockAsm10B
  14972. MOVQ (BX)(DX*1), DI
  14973. MOVL SI, 20(SP)
  14974. MOVQ $0x0000cf1bbcdcbf9b, R9
  14975. MOVQ $0x9e3779b1, SI
  14976. MOVQ DI, R10
  14977. MOVQ DI, R11
  14978. SHLQ $0x10, R10
  14979. IMULQ R9, R10
  14980. SHRQ $0x34, R10
  14981. SHLQ $0x20, R11
  14982. IMULQ SI, R11
  14983. SHRQ $0x36, R11
  14984. MOVL (AX)(R10*4), SI
  14985. MOVL 16384(AX)(R11*4), R8
  14986. MOVL DX, (AX)(R10*4)
  14987. MOVL DX, 16384(AX)(R11*4)
  14988. MOVQ (BX)(SI*1), R10
  14989. MOVQ (BX)(R8*1), R11
  14990. CMPQ R10, DI
  14991. JEQ candidate_match_encodeSnappyBetterBlockAsm10B
  14992. CMPQ R11, DI
  14993. JNE no_short_found_encodeSnappyBetterBlockAsm10B
  14994. MOVL R8, SI
  14995. JMP candidate_match_encodeSnappyBetterBlockAsm10B
  14996. no_short_found_encodeSnappyBetterBlockAsm10B:
  14997. CMPL R10, DI
  14998. JEQ candidate_match_encodeSnappyBetterBlockAsm10B
  14999. CMPL R11, DI
  15000. JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
  15001. MOVL 20(SP), DX
  15002. JMP search_loop_encodeSnappyBetterBlockAsm10B
  15003. candidateS_match_encodeSnappyBetterBlockAsm10B:
  15004. SHRQ $0x08, DI
  15005. MOVQ DI, R10
  15006. SHLQ $0x10, R10
  15007. IMULQ R9, R10
  15008. SHRQ $0x34, R10
  15009. MOVL (AX)(R10*4), SI
  15010. INCL DX
  15011. MOVL DX, (AX)(R10*4)
  15012. CMPL (BX)(SI*1), DI
  15013. JEQ candidate_match_encodeSnappyBetterBlockAsm10B
  15014. DECL DX
  15015. MOVL R8, SI
  15016. candidate_match_encodeSnappyBetterBlockAsm10B:
  15017. MOVL 12(SP), DI
  15018. TESTL SI, SI
  15019. JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
  15020. match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
  15021. CMPL DX, DI
  15022. JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B
  15023. MOVB -1(BX)(SI*1), R8
  15024. MOVB -1(BX)(DX*1), R9
  15025. CMPB R8, R9
  15026. JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B
  15027. LEAL -1(DX), DX
  15028. DECL SI
  15029. JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
  15030. JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B
  15031. match_extend_back_end_encodeSnappyBetterBlockAsm10B:
  15032. MOVL DX, DI
  15033. SUBL 12(SP), DI
  15034. LEAQ 3(CX)(DI*1), DI
  15035. CMPQ DI, (SP)
  15036. JB match_dst_size_check_encodeSnappyBetterBlockAsm10B
  15037. MOVQ $0x00000000, ret+56(FP)
  15038. RET
  15039. match_dst_size_check_encodeSnappyBetterBlockAsm10B:
  15040. MOVL DX, DI
  15041. ADDL $0x04, DX
  15042. ADDL $0x04, SI
  15043. MOVQ src_len+32(FP), R8
  15044. SUBL DX, R8
  15045. LEAQ (BX)(DX*1), R9
  15046. LEAQ (BX)(SI*1), R10
  15047. // matchLen
  15048. XORL R12, R12
  15049. matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
  15050. CMPL R8, $0x10
  15051. JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
  15052. MOVQ (R9)(R12*1), R11
  15053. MOVQ 8(R9)(R12*1), R13
  15054. XORQ (R10)(R12*1), R11
  15055. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
  15056. XORQ 8(R10)(R12*1), R13
  15057. JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
  15058. LEAL -16(R8), R8
  15059. LEAL 16(R12), R12
  15060. JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
  15061. matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
  15062. #ifdef GOAMD64_v3
  15063. TZCNTQ R13, R13
  15064. #else
  15065. BSFQ R13, R13
  15066. #endif
  15067. SARQ $0x03, R13
  15068. LEAL 8(R12)(R13*1), R12
  15069. JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
  15070. matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
  15071. CMPL R8, $0x08
  15072. JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
  15073. MOVQ (R9)(R12*1), R11
  15074. XORQ (R10)(R12*1), R11
  15075. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
  15076. LEAL -8(R8), R8
  15077. LEAL 8(R12), R12
  15078. JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
  15079. matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
  15080. #ifdef GOAMD64_v3
  15081. TZCNTQ R11, R11
  15082. #else
  15083. BSFQ R11, R11
  15084. #endif
  15085. SARQ $0x03, R11
  15086. LEAL (R12)(R11*1), R12
  15087. JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
  15088. matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
  15089. CMPL R8, $0x04
  15090. JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
  15091. MOVL (R9)(R12*1), R11
  15092. CMPL (R10)(R12*1), R11
  15093. JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
  15094. LEAL -4(R8), R8
  15095. LEAL 4(R12), R12
  15096. matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
  15097. CMPL R8, $0x01
  15098. JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
  15099. JB match_nolit_end_encodeSnappyBetterBlockAsm10B
  15100. MOVW (R9)(R12*1), R11
  15101. CMPW (R10)(R12*1), R11
  15102. JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
  15103. LEAL 2(R12), R12
  15104. SUBL $0x02, R8
  15105. JZ match_nolit_end_encodeSnappyBetterBlockAsm10B
  15106. matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
  15107. MOVB (R9)(R12*1), R11
  15108. CMPB (R10)(R12*1), R11
  15109. JNE match_nolit_end_encodeSnappyBetterBlockAsm10B
  15110. LEAL 1(R12), R12
  15111. match_nolit_end_encodeSnappyBetterBlockAsm10B:
  15112. MOVL DX, R8
  15113. SUBL SI, R8
  15114. // Check if repeat
  15115. MOVL R8, 16(SP)
  15116. MOVL 12(SP), SI
  15117. CMPL SI, DI
  15118. JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
  15119. MOVL DI, R9
  15120. MOVL DI, 12(SP)
  15121. LEAQ (BX)(SI*1), R10
  15122. SUBL SI, R9
  15123. LEAL -1(R9), SI
  15124. CMPL SI, $0x3c
  15125. JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B
  15126. CMPL SI, $0x00000100
  15127. JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
  15128. JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
  15129. three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
  15130. MOVB $0xf4, (CX)
  15131. MOVW SI, 1(CX)
  15132. ADDQ $0x03, CX
  15133. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
  15134. two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
  15135. MOVB $0xf0, (CX)
  15136. MOVB SI, 1(CX)
  15137. ADDQ $0x02, CX
  15138. CMPL SI, $0x40
  15139. JB memmove_match_emit_encodeSnappyBetterBlockAsm10B
  15140. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
  15141. one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
  15142. SHLB $0x02, SI
  15143. MOVB SI, (CX)
  15144. ADDQ $0x01, CX
  15145. memmove_match_emit_encodeSnappyBetterBlockAsm10B:
  15146. LEAQ (CX)(R9*1), SI
  15147. // genMemMoveShort
  15148. CMPQ R9, $0x08
  15149. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
  15150. CMPQ R9, $0x10
  15151. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
  15152. CMPQ R9, $0x20
  15153. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
  15154. JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
  15155. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
  15156. MOVQ (R10), R11
  15157. MOVQ R11, (CX)
  15158. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
  15159. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
  15160. MOVQ (R10), R11
  15161. MOVQ -8(R10)(R9*1), R10
  15162. MOVQ R11, (CX)
  15163. MOVQ R10, -8(CX)(R9*1)
  15164. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
  15165. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
  15166. MOVOU (R10), X0
  15167. MOVOU -16(R10)(R9*1), X1
  15168. MOVOU X0, (CX)
  15169. MOVOU X1, -16(CX)(R9*1)
  15170. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
  15171. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
  15172. MOVOU (R10), X0
  15173. MOVOU 16(R10), X1
  15174. MOVOU -32(R10)(R9*1), X2
  15175. MOVOU -16(R10)(R9*1), X3
  15176. MOVOU X0, (CX)
  15177. MOVOU X1, 16(CX)
  15178. MOVOU X2, -32(CX)(R9*1)
  15179. MOVOU X3, -16(CX)(R9*1)
  15180. memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
  15181. MOVQ SI, CX
  15182. JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
  15183. memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
  15184. LEAQ (CX)(R9*1), SI
  15185. // genMemMoveLong
  15186. MOVOU (R10), X0
  15187. MOVOU 16(R10), X1
  15188. MOVOU -32(R10)(R9*1), X2
  15189. MOVOU -16(R10)(R9*1), X3
  15190. MOVQ R9, R13
  15191. SHRQ $0x05, R13
  15192. MOVQ CX, R11
  15193. ANDL $0x0000001f, R11
  15194. MOVQ $0x00000040, R14
  15195. SUBQ R11, R14
  15196. DECQ R13
  15197. JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  15198. LEAQ -32(R10)(R14*1), R11
  15199. LEAQ -32(CX)(R14*1), R15
  15200. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
  15201. MOVOU (R11), X4
  15202. MOVOU 16(R11), X5
  15203. MOVOA X4, (R15)
  15204. MOVOA X5, 16(R15)
  15205. ADDQ $0x20, R15
  15206. ADDQ $0x20, R11
  15207. ADDQ $0x20, R14
  15208. DECQ R13
  15209. JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
  15210. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
  15211. MOVOU -32(R10)(R14*1), X4
  15212. MOVOU -16(R10)(R14*1), X5
  15213. MOVOA X4, -32(CX)(R14*1)
  15214. MOVOA X5, -16(CX)(R14*1)
  15215. ADDQ $0x20, R14
  15216. CMPQ R9, R14
  15217. JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  15218. MOVOU X0, (CX)
  15219. MOVOU X1, 16(CX)
  15220. MOVOU X2, -32(CX)(R9*1)
  15221. MOVOU X3, -16(CX)(R9*1)
  15222. MOVQ SI, CX
  15223. emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
  15224. ADDL R12, DX
  15225. ADDL $0x04, R12
  15226. MOVL DX, 12(SP)
  15227. // emitCopy
  15228. two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
  15229. CMPL R12, $0x40
  15230. JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
  15231. MOVB $0xee, (CX)
  15232. MOVW R8, 1(CX)
  15233. LEAL -60(R12), R12
  15234. ADDQ $0x03, CX
  15235. JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
  15236. two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
  15237. MOVL R12, SI
  15238. SHLL $0x02, SI
  15239. CMPL R12, $0x0c
  15240. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
  15241. CMPL R8, $0x00000800
  15242. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
  15243. LEAL -15(SI), SI
  15244. MOVB R8, 1(CX)
  15245. SHRL $0x08, R8
  15246. SHLL $0x05, R8
  15247. ORL R8, SI
  15248. MOVB SI, (CX)
  15249. ADDQ $0x02, CX
  15250. JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
  15251. emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
  15252. LEAL -2(SI), SI
  15253. MOVB SI, (CX)
  15254. MOVW R8, 1(CX)
  15255. ADDQ $0x03, CX
  15256. match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
  15257. CMPL DX, 8(SP)
  15258. JAE emit_remainder_encodeSnappyBetterBlockAsm10B
  15259. CMPQ CX, (SP)
  15260. JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
  15261. MOVQ $0x00000000, ret+56(FP)
  15262. RET
  15263. match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
  15264. MOVQ $0x0000cf1bbcdcbf9b, SI
  15265. MOVQ $0x9e3779b1, R8
  15266. LEAQ 1(DI), DI
  15267. LEAQ -2(DX), R9
  15268. MOVQ (BX)(DI*1), R10
  15269. MOVQ 1(BX)(DI*1), R11
  15270. MOVQ (BX)(R9*1), R12
  15271. MOVQ 1(BX)(R9*1), R13
  15272. SHLQ $0x10, R10
  15273. IMULQ SI, R10
  15274. SHRQ $0x34, R10
  15275. SHLQ $0x20, R11
  15276. IMULQ R8, R11
  15277. SHRQ $0x36, R11
  15278. SHLQ $0x10, R12
  15279. IMULQ SI, R12
  15280. SHRQ $0x34, R12
  15281. SHLQ $0x20, R13
  15282. IMULQ R8, R13
  15283. SHRQ $0x36, R13
  15284. LEAQ 1(DI), R8
  15285. LEAQ 1(R9), R14
  15286. MOVL DI, (AX)(R10*4)
  15287. MOVL R9, (AX)(R12*4)
  15288. MOVL R8, 16384(AX)(R11*4)
  15289. MOVL R14, 16384(AX)(R13*4)
  15290. LEAQ 1(R9)(DI*1), R8
  15291. SHRQ $0x01, R8
  15292. ADDQ $0x01, DI
  15293. SUBQ $0x01, R9
  15294. index_loop_encodeSnappyBetterBlockAsm10B:
  15295. CMPQ R8, R9
  15296. JAE search_loop_encodeSnappyBetterBlockAsm10B
  15297. MOVQ (BX)(DI*1), R10
  15298. MOVQ (BX)(R8*1), R11
  15299. SHLQ $0x10, R10
  15300. IMULQ SI, R10
  15301. SHRQ $0x34, R10
  15302. SHLQ $0x10, R11
  15303. IMULQ SI, R11
  15304. SHRQ $0x34, R11
  15305. MOVL DI, (AX)(R10*4)
  15306. MOVL R8, (AX)(R11*4)
  15307. ADDQ $0x02, DI
  15308. ADDQ $0x02, R8
  15309. JMP index_loop_encodeSnappyBetterBlockAsm10B
  15310. emit_remainder_encodeSnappyBetterBlockAsm10B:
  15311. MOVQ src_len+32(FP), AX
  15312. SUBL 12(SP), AX
  15313. LEAQ 3(CX)(AX*1), AX
  15314. CMPQ AX, (SP)
  15315. JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B
  15316. MOVQ $0x00000000, ret+56(FP)
  15317. RET
  15318. emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
  15319. MOVQ src_len+32(FP), AX
  15320. MOVL 12(SP), DX
  15321. CMPL DX, AX
  15322. JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
  15323. MOVL AX, SI
  15324. MOVL AX, 12(SP)
  15325. LEAQ (BX)(DX*1), AX
  15326. SUBL DX, SI
  15327. LEAL -1(SI), DX
  15328. CMPL DX, $0x3c
  15329. JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
  15330. CMPL DX, $0x00000100
  15331. JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
  15332. JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
  15333. three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15334. MOVB $0xf4, (CX)
  15335. MOVW DX, 1(CX)
  15336. ADDQ $0x03, CX
  15337. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
  15338. two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15339. MOVB $0xf0, (CX)
  15340. MOVB DL, 1(CX)
  15341. ADDQ $0x02, CX
  15342. CMPL DX, $0x40
  15343. JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
  15344. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
  15345. one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15346. SHLB $0x02, DL
  15347. MOVB DL, (CX)
  15348. ADDQ $0x01, CX
  15349. memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15350. LEAQ (CX)(SI*1), DX
  15351. MOVL SI, BX
  15352. // genMemMoveShort
  15353. CMPQ BX, $0x03
  15354. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2
  15355. JE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3
  15356. CMPQ BX, $0x08
  15357. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7
  15358. CMPQ BX, $0x10
  15359. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
  15360. CMPQ BX, $0x20
  15361. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
  15362. JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
  15363. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
  15364. MOVB (AX), SI
  15365. MOVB -1(AX)(BX*1), AL
  15366. MOVB SI, (CX)
  15367. MOVB AL, -1(CX)(BX*1)
  15368. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  15369. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
  15370. MOVW (AX), SI
  15371. MOVB 2(AX), AL
  15372. MOVW SI, (CX)
  15373. MOVB AL, 2(CX)
  15374. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  15375. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
  15376. MOVL (AX), SI
  15377. MOVL -4(AX)(BX*1), AX
  15378. MOVL SI, (CX)
  15379. MOVL AX, -4(CX)(BX*1)
  15380. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  15381. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
  15382. MOVQ (AX), SI
  15383. MOVQ -8(AX)(BX*1), AX
  15384. MOVQ SI, (CX)
  15385. MOVQ AX, -8(CX)(BX*1)
  15386. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  15387. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
  15388. MOVOU (AX), X0
  15389. MOVOU -16(AX)(BX*1), X1
  15390. MOVOU X0, (CX)
  15391. MOVOU X1, -16(CX)(BX*1)
  15392. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
  15393. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
  15394. MOVOU (AX), X0
  15395. MOVOU 16(AX), X1
  15396. MOVOU -32(AX)(BX*1), X2
  15397. MOVOU -16(AX)(BX*1), X3
  15398. MOVOU X0, (CX)
  15399. MOVOU X1, 16(CX)
  15400. MOVOU X2, -32(CX)(BX*1)
  15401. MOVOU X3, -16(CX)(BX*1)
  15402. memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15403. MOVQ DX, CX
  15404. JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
  15405. memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15406. LEAQ (CX)(SI*1), DX
  15407. MOVL SI, BX
  15408. // genMemMoveLong
  15409. MOVOU (AX), X0
  15410. MOVOU 16(AX), X1
  15411. MOVOU -32(AX)(BX*1), X2
  15412. MOVOU -16(AX)(BX*1), X3
  15413. MOVQ BX, DI
  15414. SHRQ $0x05, DI
  15415. MOVQ CX, SI
  15416. ANDL $0x0000001f, SI
  15417. MOVQ $0x00000040, R8
  15418. SUBQ SI, R8
  15419. DECQ DI
  15420. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  15421. LEAQ -32(AX)(R8*1), SI
  15422. LEAQ -32(CX)(R8*1), R9
  15423. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
  15424. MOVOU (SI), X4
  15425. MOVOU 16(SI), X5
  15426. MOVOA X4, (R9)
  15427. MOVOA X5, 16(R9)
  15428. ADDQ $0x20, R9
  15429. ADDQ $0x20, SI
  15430. ADDQ $0x20, R8
  15431. DECQ DI
  15432. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
  15433. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
  15434. MOVOU -32(AX)(R8*1), X4
  15435. MOVOU -16(AX)(R8*1), X5
  15436. MOVOA X4, -32(CX)(R8*1)
  15437. MOVOA X5, -16(CX)(R8*1)
  15438. ADDQ $0x20, R8
  15439. CMPQ BX, R8
  15440. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
  15441. MOVOU X0, (CX)
  15442. MOVOU X1, 16(CX)
  15443. MOVOU X2, -32(CX)(BX*1)
  15444. MOVOU X3, -16(CX)(BX*1)
  15445. MOVQ DX, CX
  15446. emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
  15447. MOVQ dst_base+0(FP), AX
  15448. SUBQ AX, CX
  15449. MOVQ CX, ret+56(FP)
  15450. RET
  15451. // func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
  15452. // Requires: BMI, SSE2
  15453. TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64
  15454. MOVQ tmp+48(FP), AX
  15455. MOVQ dst_base+0(FP), CX
  15456. MOVQ $0x00000028, DX
  15457. MOVQ AX, BX
  15458. PXOR X0, X0
  15459. zero_loop_encodeSnappyBetterBlockAsm8B:
  15460. MOVOU X0, (BX)
  15461. MOVOU X0, 16(BX)
  15462. MOVOU X0, 32(BX)
  15463. MOVOU X0, 48(BX)
  15464. MOVOU X0, 64(BX)
  15465. MOVOU X0, 80(BX)
  15466. MOVOU X0, 96(BX)
  15467. MOVOU X0, 112(BX)
  15468. ADDQ $0x80, BX
  15469. DECQ DX
  15470. JNZ zero_loop_encodeSnappyBetterBlockAsm8B
  15471. MOVL $0x00000000, 12(SP)
  15472. MOVQ src_len+32(FP), DX
  15473. LEAQ -9(DX), BX
  15474. LEAQ -8(DX), SI
  15475. MOVL SI, 8(SP)
  15476. SHRQ $0x05, DX
  15477. SUBL DX, BX
  15478. LEAQ (CX)(BX*1), BX
  15479. MOVQ BX, (SP)
  15480. MOVL $0x00000001, DX
  15481. MOVL $0x00000000, 16(SP)
  15482. MOVQ src_base+24(FP), BX
  15483. search_loop_encodeSnappyBetterBlockAsm8B:
  15484. MOVL DX, SI
  15485. SUBL 12(SP), SI
  15486. SHRL $0x04, SI
  15487. LEAL 1(DX)(SI*1), SI
  15488. CMPL SI, 8(SP)
  15489. JAE emit_remainder_encodeSnappyBetterBlockAsm8B
  15490. MOVQ (BX)(DX*1), DI
  15491. MOVL SI, 20(SP)
  15492. MOVQ $0x0000cf1bbcdcbf9b, R9
  15493. MOVQ $0x9e3779b1, SI
  15494. MOVQ DI, R10
  15495. MOVQ DI, R11
  15496. SHLQ $0x10, R10
  15497. IMULQ R9, R10
  15498. SHRQ $0x36, R10
  15499. SHLQ $0x20, R11
  15500. IMULQ SI, R11
  15501. SHRQ $0x38, R11
  15502. MOVL (AX)(R10*4), SI
  15503. MOVL 4096(AX)(R11*4), R8
  15504. MOVL DX, (AX)(R10*4)
  15505. MOVL DX, 4096(AX)(R11*4)
  15506. MOVQ (BX)(SI*1), R10
  15507. MOVQ (BX)(R8*1), R11
  15508. CMPQ R10, DI
  15509. JEQ candidate_match_encodeSnappyBetterBlockAsm8B
  15510. CMPQ R11, DI
  15511. JNE no_short_found_encodeSnappyBetterBlockAsm8B
  15512. MOVL R8, SI
  15513. JMP candidate_match_encodeSnappyBetterBlockAsm8B
  15514. no_short_found_encodeSnappyBetterBlockAsm8B:
  15515. CMPL R10, DI
  15516. JEQ candidate_match_encodeSnappyBetterBlockAsm8B
  15517. CMPL R11, DI
  15518. JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
  15519. MOVL 20(SP), DX
  15520. JMP search_loop_encodeSnappyBetterBlockAsm8B
  15521. candidateS_match_encodeSnappyBetterBlockAsm8B:
  15522. SHRQ $0x08, DI
  15523. MOVQ DI, R10
  15524. SHLQ $0x10, R10
  15525. IMULQ R9, R10
  15526. SHRQ $0x36, R10
  15527. MOVL (AX)(R10*4), SI
  15528. INCL DX
  15529. MOVL DX, (AX)(R10*4)
  15530. CMPL (BX)(SI*1), DI
  15531. JEQ candidate_match_encodeSnappyBetterBlockAsm8B
  15532. DECL DX
  15533. MOVL R8, SI
  15534. candidate_match_encodeSnappyBetterBlockAsm8B:
  15535. MOVL 12(SP), DI
  15536. TESTL SI, SI
  15537. JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
  15538. match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
  15539. CMPL DX, DI
  15540. JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B
  15541. MOVB -1(BX)(SI*1), R8
  15542. MOVB -1(BX)(DX*1), R9
  15543. CMPB R8, R9
  15544. JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B
  15545. LEAL -1(DX), DX
  15546. DECL SI
  15547. JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
  15548. JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B
  15549. match_extend_back_end_encodeSnappyBetterBlockAsm8B:
  15550. MOVL DX, DI
  15551. SUBL 12(SP), DI
  15552. LEAQ 3(CX)(DI*1), DI
  15553. CMPQ DI, (SP)
  15554. JB match_dst_size_check_encodeSnappyBetterBlockAsm8B
  15555. MOVQ $0x00000000, ret+56(FP)
  15556. RET
  15557. match_dst_size_check_encodeSnappyBetterBlockAsm8B:
  15558. MOVL DX, DI
  15559. ADDL $0x04, DX
  15560. ADDL $0x04, SI
  15561. MOVQ src_len+32(FP), R8
  15562. SUBL DX, R8
  15563. LEAQ (BX)(DX*1), R9
  15564. LEAQ (BX)(SI*1), R10
  15565. // matchLen
  15566. XORL R12, R12
  15567. matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
  15568. CMPL R8, $0x10
  15569. JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
  15570. MOVQ (R9)(R12*1), R11
  15571. MOVQ 8(R9)(R12*1), R13
  15572. XORQ (R10)(R12*1), R11
  15573. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
  15574. XORQ 8(R10)(R12*1), R13
  15575. JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
  15576. LEAL -16(R8), R8
  15577. LEAL 16(R12), R12
  15578. JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
  15579. matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
  15580. #ifdef GOAMD64_v3
  15581. TZCNTQ R13, R13
  15582. #else
  15583. BSFQ R13, R13
  15584. #endif
  15585. SARQ $0x03, R13
  15586. LEAL 8(R12)(R13*1), R12
  15587. JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
  15588. matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
  15589. CMPL R8, $0x08
  15590. JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
  15591. MOVQ (R9)(R12*1), R11
  15592. XORQ (R10)(R12*1), R11
  15593. JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
  15594. LEAL -8(R8), R8
  15595. LEAL 8(R12), R12
  15596. JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
  15597. matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
  15598. #ifdef GOAMD64_v3
  15599. TZCNTQ R11, R11
  15600. #else
  15601. BSFQ R11, R11
  15602. #endif
  15603. SARQ $0x03, R11
  15604. LEAL (R12)(R11*1), R12
  15605. JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
  15606. matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
  15607. CMPL R8, $0x04
  15608. JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
  15609. MOVL (R9)(R12*1), R11
  15610. CMPL (R10)(R12*1), R11
  15611. JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
  15612. LEAL -4(R8), R8
  15613. LEAL 4(R12), R12
  15614. matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
  15615. CMPL R8, $0x01
  15616. JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
  15617. JB match_nolit_end_encodeSnappyBetterBlockAsm8B
  15618. MOVW (R9)(R12*1), R11
  15619. CMPW (R10)(R12*1), R11
  15620. JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
  15621. LEAL 2(R12), R12
  15622. SUBL $0x02, R8
  15623. JZ match_nolit_end_encodeSnappyBetterBlockAsm8B
  15624. matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
  15625. MOVB (R9)(R12*1), R11
  15626. CMPB (R10)(R12*1), R11
  15627. JNE match_nolit_end_encodeSnappyBetterBlockAsm8B
  15628. LEAL 1(R12), R12
  15629. match_nolit_end_encodeSnappyBetterBlockAsm8B:
  15630. MOVL DX, R8
  15631. SUBL SI, R8
  15632. // Check if repeat
  15633. MOVL R8, 16(SP)
  15634. MOVL 12(SP), SI
  15635. CMPL SI, DI
  15636. JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
  15637. MOVL DI, R9
  15638. MOVL DI, 12(SP)
  15639. LEAQ (BX)(SI*1), R10
  15640. SUBL SI, R9
  15641. LEAL -1(R9), SI
  15642. CMPL SI, $0x3c
  15643. JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B
  15644. CMPL SI, $0x00000100
  15645. JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
  15646. JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
  15647. three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
  15648. MOVB $0xf4, (CX)
  15649. MOVW SI, 1(CX)
  15650. ADDQ $0x03, CX
  15651. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
  15652. two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
  15653. MOVB $0xf0, (CX)
  15654. MOVB SI, 1(CX)
  15655. ADDQ $0x02, CX
  15656. CMPL SI, $0x40
  15657. JB memmove_match_emit_encodeSnappyBetterBlockAsm8B
  15658. JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
  15659. one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
  15660. SHLB $0x02, SI
  15661. MOVB SI, (CX)
  15662. ADDQ $0x01, CX
  15663. memmove_match_emit_encodeSnappyBetterBlockAsm8B:
  15664. LEAQ (CX)(R9*1), SI
  15665. // genMemMoveShort
  15666. CMPQ R9, $0x08
  15667. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
  15668. CMPQ R9, $0x10
  15669. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
  15670. CMPQ R9, $0x20
  15671. JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
  15672. JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
  15673. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
  15674. MOVQ (R10), R11
  15675. MOVQ R11, (CX)
  15676. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
  15677. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
  15678. MOVQ (R10), R11
  15679. MOVQ -8(R10)(R9*1), R10
  15680. MOVQ R11, (CX)
  15681. MOVQ R10, -8(CX)(R9*1)
  15682. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
  15683. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
  15684. MOVOU (R10), X0
  15685. MOVOU -16(R10)(R9*1), X1
  15686. MOVOU X0, (CX)
  15687. MOVOU X1, -16(CX)(R9*1)
  15688. JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
  15689. emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
  15690. MOVOU (R10), X0
  15691. MOVOU 16(R10), X1
  15692. MOVOU -32(R10)(R9*1), X2
  15693. MOVOU -16(R10)(R9*1), X3
  15694. MOVOU X0, (CX)
  15695. MOVOU X1, 16(CX)
  15696. MOVOU X2, -32(CX)(R9*1)
  15697. MOVOU X3, -16(CX)(R9*1)
  15698. memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
  15699. MOVQ SI, CX
  15700. JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
  15701. memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
  15702. LEAQ (CX)(R9*1), SI
  15703. // genMemMoveLong
  15704. MOVOU (R10), X0
  15705. MOVOU 16(R10), X1
  15706. MOVOU -32(R10)(R9*1), X2
  15707. MOVOU -16(R10)(R9*1), X3
  15708. MOVQ R9, R13
  15709. SHRQ $0x05, R13
  15710. MOVQ CX, R11
  15711. ANDL $0x0000001f, R11
  15712. MOVQ $0x00000040, R14
  15713. SUBQ R11, R14
  15714. DECQ R13
  15715. JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  15716. LEAQ -32(R10)(R14*1), R11
  15717. LEAQ -32(CX)(R14*1), R15
  15718. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
  15719. MOVOU (R11), X4
  15720. MOVOU 16(R11), X5
  15721. MOVOA X4, (R15)
  15722. MOVOA X5, 16(R15)
  15723. ADDQ $0x20, R15
  15724. ADDQ $0x20, R11
  15725. ADDQ $0x20, R14
  15726. DECQ R13
  15727. JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
  15728. emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
  15729. MOVOU -32(R10)(R14*1), X4
  15730. MOVOU -16(R10)(R14*1), X5
  15731. MOVOA X4, -32(CX)(R14*1)
  15732. MOVOA X5, -16(CX)(R14*1)
  15733. ADDQ $0x20, R14
  15734. CMPQ R9, R14
  15735. JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  15736. MOVOU X0, (CX)
  15737. MOVOU X1, 16(CX)
  15738. MOVOU X2, -32(CX)(R9*1)
  15739. MOVOU X3, -16(CX)(R9*1)
  15740. MOVQ SI, CX
  15741. emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
  15742. ADDL R12, DX
  15743. ADDL $0x04, R12
  15744. MOVL DX, 12(SP)
  15745. // emitCopy
  15746. two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
  15747. CMPL R12, $0x40
  15748. JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
  15749. MOVB $0xee, (CX)
  15750. MOVW R8, 1(CX)
  15751. LEAL -60(R12), R12
  15752. ADDQ $0x03, CX
  15753. JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
  15754. two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
  15755. MOVL R12, SI
  15756. SHLL $0x02, SI
  15757. CMPL R12, $0x0c
  15758. JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
  15759. LEAL -15(SI), SI
  15760. MOVB R8, 1(CX)
  15761. SHRL $0x08, R8
  15762. SHLL $0x05, R8
  15763. ORL R8, SI
  15764. MOVB SI, (CX)
  15765. ADDQ $0x02, CX
  15766. JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
  15767. emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
  15768. LEAL -2(SI), SI
  15769. MOVB SI, (CX)
  15770. MOVW R8, 1(CX)
  15771. ADDQ $0x03, CX
  15772. match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
  15773. CMPL DX, 8(SP)
  15774. JAE emit_remainder_encodeSnappyBetterBlockAsm8B
  15775. CMPQ CX, (SP)
  15776. JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
  15777. MOVQ $0x00000000, ret+56(FP)
  15778. RET
  15779. match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
  15780. MOVQ $0x0000cf1bbcdcbf9b, SI
  15781. MOVQ $0x9e3779b1, R8
  15782. LEAQ 1(DI), DI
  15783. LEAQ -2(DX), R9
  15784. MOVQ (BX)(DI*1), R10
  15785. MOVQ 1(BX)(DI*1), R11
  15786. MOVQ (BX)(R9*1), R12
  15787. MOVQ 1(BX)(R9*1), R13
  15788. SHLQ $0x10, R10
  15789. IMULQ SI, R10
  15790. SHRQ $0x36, R10
  15791. SHLQ $0x20, R11
  15792. IMULQ R8, R11
  15793. SHRQ $0x38, R11
  15794. SHLQ $0x10, R12
  15795. IMULQ SI, R12
  15796. SHRQ $0x36, R12
  15797. SHLQ $0x20, R13
  15798. IMULQ R8, R13
  15799. SHRQ $0x38, R13
  15800. LEAQ 1(DI), R8
  15801. LEAQ 1(R9), R14
  15802. MOVL DI, (AX)(R10*4)
  15803. MOVL R9, (AX)(R12*4)
  15804. MOVL R8, 4096(AX)(R11*4)
  15805. MOVL R14, 4096(AX)(R13*4)
  15806. LEAQ 1(R9)(DI*1), R8
  15807. SHRQ $0x01, R8
  15808. ADDQ $0x01, DI
  15809. SUBQ $0x01, R9
  15810. index_loop_encodeSnappyBetterBlockAsm8B:
  15811. CMPQ R8, R9
  15812. JAE search_loop_encodeSnappyBetterBlockAsm8B
  15813. MOVQ (BX)(DI*1), R10
  15814. MOVQ (BX)(R8*1), R11
  15815. SHLQ $0x10, R10
  15816. IMULQ SI, R10
  15817. SHRQ $0x36, R10
  15818. SHLQ $0x10, R11
  15819. IMULQ SI, R11
  15820. SHRQ $0x36, R11
  15821. MOVL DI, (AX)(R10*4)
  15822. MOVL R8, (AX)(R11*4)
  15823. ADDQ $0x02, DI
  15824. ADDQ $0x02, R8
  15825. JMP index_loop_encodeSnappyBetterBlockAsm8B
  15826. emit_remainder_encodeSnappyBetterBlockAsm8B:
  15827. MOVQ src_len+32(FP), AX
  15828. SUBL 12(SP), AX
  15829. LEAQ 3(CX)(AX*1), AX
  15830. CMPQ AX, (SP)
  15831. JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B
  15832. MOVQ $0x00000000, ret+56(FP)
  15833. RET
  15834. emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
  15835. MOVQ src_len+32(FP), AX
  15836. MOVL 12(SP), DX
  15837. CMPL DX, AX
  15838. JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
  15839. MOVL AX, SI
  15840. MOVL AX, 12(SP)
  15841. LEAQ (BX)(DX*1), AX
  15842. SUBL DX, SI
  15843. LEAL -1(SI), DX
  15844. CMPL DX, $0x3c
  15845. JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
  15846. CMPL DX, $0x00000100
  15847. JB two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
  15848. JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
  15849. three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15850. MOVB $0xf4, (CX)
  15851. MOVW DX, 1(CX)
  15852. ADDQ $0x03, CX
  15853. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
  15854. two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15855. MOVB $0xf0, (CX)
  15856. MOVB DL, 1(CX)
  15857. ADDQ $0x02, CX
  15858. CMPL DX, $0x40
  15859. JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
  15860. JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
  15861. one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15862. SHLB $0x02, DL
  15863. MOVB DL, (CX)
  15864. ADDQ $0x01, CX
  15865. memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15866. LEAQ (CX)(SI*1), DX
  15867. MOVL SI, BX
  15868. // genMemMoveShort
  15869. CMPQ BX, $0x03
  15870. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2
  15871. JE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3
  15872. CMPQ BX, $0x08
  15873. JB emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7
  15874. CMPQ BX, $0x10
  15875. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
  15876. CMPQ BX, $0x20
  15877. JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
  15878. JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
  15879. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
  15880. MOVB (AX), SI
  15881. MOVB -1(AX)(BX*1), AL
  15882. MOVB SI, (CX)
  15883. MOVB AL, -1(CX)(BX*1)
  15884. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  15885. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
  15886. MOVW (AX), SI
  15887. MOVB 2(AX), AL
  15888. MOVW SI, (CX)
  15889. MOVB AL, 2(CX)
  15890. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  15891. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
  15892. MOVL (AX), SI
  15893. MOVL -4(AX)(BX*1), AX
  15894. MOVL SI, (CX)
  15895. MOVL AX, -4(CX)(BX*1)
  15896. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  15897. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
  15898. MOVQ (AX), SI
  15899. MOVQ -8(AX)(BX*1), AX
  15900. MOVQ SI, (CX)
  15901. MOVQ AX, -8(CX)(BX*1)
  15902. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  15903. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
  15904. MOVOU (AX), X0
  15905. MOVOU -16(AX)(BX*1), X1
  15906. MOVOU X0, (CX)
  15907. MOVOU X1, -16(CX)(BX*1)
  15908. JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
  15909. emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
  15910. MOVOU (AX), X0
  15911. MOVOU 16(AX), X1
  15912. MOVOU -32(AX)(BX*1), X2
  15913. MOVOU -16(AX)(BX*1), X3
  15914. MOVOU X0, (CX)
  15915. MOVOU X1, 16(CX)
  15916. MOVOU X2, -32(CX)(BX*1)
  15917. MOVOU X3, -16(CX)(BX*1)
  15918. memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15919. MOVQ DX, CX
  15920. JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
  15921. memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15922. LEAQ (CX)(SI*1), DX
  15923. MOVL SI, BX
  15924. // genMemMoveLong
  15925. MOVOU (AX), X0
  15926. MOVOU 16(AX), X1
  15927. MOVOU -32(AX)(BX*1), X2
  15928. MOVOU -16(AX)(BX*1), X3
  15929. MOVQ BX, DI
  15930. SHRQ $0x05, DI
  15931. MOVQ CX, SI
  15932. ANDL $0x0000001f, SI
  15933. MOVQ $0x00000040, R8
  15934. SUBQ SI, R8
  15935. DECQ DI
  15936. JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  15937. LEAQ -32(AX)(R8*1), SI
  15938. LEAQ -32(CX)(R8*1), R9
  15939. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
  15940. MOVOU (SI), X4
  15941. MOVOU 16(SI), X5
  15942. MOVOA X4, (R9)
  15943. MOVOA X5, 16(R9)
  15944. ADDQ $0x20, R9
  15945. ADDQ $0x20, SI
  15946. ADDQ $0x20, R8
  15947. DECQ DI
  15948. JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
  15949. emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
  15950. MOVOU -32(AX)(R8*1), X4
  15951. MOVOU -16(AX)(R8*1), X5
  15952. MOVOA X4, -32(CX)(R8*1)
  15953. MOVOA X5, -16(CX)(R8*1)
  15954. ADDQ $0x20, R8
  15955. CMPQ BX, R8
  15956. JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
  15957. MOVOU X0, (CX)
  15958. MOVOU X1, 16(CX)
  15959. MOVOU X2, -32(CX)(BX*1)
  15960. MOVOU X3, -16(CX)(BX*1)
  15961. MOVQ DX, CX
  15962. emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
  15963. MOVQ dst_base+0(FP), AX
  15964. SUBQ AX, CX
  15965. MOVQ CX, ret+56(FP)
  15966. RET
  15967. // func calcBlockSize(src []byte, tmp *[32768]byte) int
  15968. // Requires: BMI, SSE2
  15969. TEXT ·calcBlockSize(SB), $24-40
  15970. MOVQ tmp+24(FP), AX
  15971. XORQ CX, CX
  15972. MOVQ $0x00000100, DX
  15973. MOVQ AX, BX
  15974. PXOR X0, X0
  15975. zero_loop_calcBlockSize:
  15976. MOVOU X0, (BX)
  15977. MOVOU X0, 16(BX)
  15978. MOVOU X0, 32(BX)
  15979. MOVOU X0, 48(BX)
  15980. MOVOU X0, 64(BX)
  15981. MOVOU X0, 80(BX)
  15982. MOVOU X0, 96(BX)
  15983. MOVOU X0, 112(BX)
  15984. ADDQ $0x80, BX
  15985. DECQ DX
  15986. JNZ zero_loop_calcBlockSize
  15987. MOVL $0x00000000, 12(SP)
  15988. MOVQ src_len+8(FP), DX
  15989. LEAQ -9(DX), BX
  15990. LEAQ -8(DX), SI
  15991. MOVL SI, 8(SP)
  15992. SHRQ $0x05, DX
  15993. SUBL DX, BX
  15994. LEAQ (CX)(BX*1), BX
  15995. MOVQ BX, (SP)
  15996. MOVL $0x00000001, DX
  15997. MOVL DX, 16(SP)
  15998. MOVQ src_base+0(FP), BX
  15999. search_loop_calcBlockSize:
  16000. MOVL DX, SI
  16001. SUBL 12(SP), SI
  16002. SHRL $0x05, SI
  16003. LEAL 4(DX)(SI*1), SI
  16004. CMPL SI, 8(SP)
  16005. JAE emit_remainder_calcBlockSize
  16006. MOVQ (BX)(DX*1), DI
  16007. MOVL SI, 20(SP)
  16008. MOVQ $0x0000cf1bbcdcbf9b, R9
  16009. MOVQ DI, R10
  16010. MOVQ DI, R11
  16011. SHRQ $0x08, R11
  16012. SHLQ $0x10, R10
  16013. IMULQ R9, R10
  16014. SHRQ $0x33, R10
  16015. SHLQ $0x10, R11
  16016. IMULQ R9, R11
  16017. SHRQ $0x33, R11
  16018. MOVL (AX)(R10*4), SI
  16019. MOVL (AX)(R11*4), R8
  16020. MOVL DX, (AX)(R10*4)
  16021. LEAL 1(DX), R10
  16022. MOVL R10, (AX)(R11*4)
  16023. MOVQ DI, R10
  16024. SHRQ $0x10, R10
  16025. SHLQ $0x10, R10
  16026. IMULQ R9, R10
  16027. SHRQ $0x33, R10
  16028. MOVL DX, R9
  16029. SUBL 16(SP), R9
  16030. MOVL 1(BX)(R9*1), R11
  16031. MOVQ DI, R9
  16032. SHRQ $0x08, R9
  16033. CMPL R9, R11
  16034. JNE no_repeat_found_calcBlockSize
  16035. LEAL 1(DX), DI
  16036. MOVL 12(SP), SI
  16037. MOVL DI, R8
  16038. SUBL 16(SP), R8
  16039. JZ repeat_extend_back_end_calcBlockSize
  16040. repeat_extend_back_loop_calcBlockSize:
  16041. CMPL DI, SI
  16042. JBE repeat_extend_back_end_calcBlockSize
  16043. MOVB -1(BX)(R8*1), R9
  16044. MOVB -1(BX)(DI*1), R10
  16045. CMPB R9, R10
  16046. JNE repeat_extend_back_end_calcBlockSize
  16047. LEAL -1(DI), DI
  16048. DECL R8
  16049. JNZ repeat_extend_back_loop_calcBlockSize
  16050. repeat_extend_back_end_calcBlockSize:
  16051. MOVL DI, SI
  16052. SUBL 12(SP), SI
  16053. LEAQ 5(CX)(SI*1), SI
  16054. CMPQ SI, (SP)
  16055. JB repeat_dst_size_check_calcBlockSize
  16056. MOVQ $0x00000000, ret+32(FP)
  16057. RET
  16058. repeat_dst_size_check_calcBlockSize:
  16059. MOVL 12(SP), SI
  16060. CMPL SI, DI
  16061. JEQ emit_literal_done_repeat_emit_calcBlockSize
  16062. MOVL DI, R8
  16063. MOVL DI, 12(SP)
  16064. LEAQ (BX)(SI*1), R9
  16065. SUBL SI, R8
  16066. LEAL -1(R8), SI
  16067. CMPL SI, $0x3c
  16068. JB one_byte_repeat_emit_calcBlockSize
  16069. CMPL SI, $0x00000100
  16070. JB two_bytes_repeat_emit_calcBlockSize
  16071. CMPL SI, $0x00010000
  16072. JB three_bytes_repeat_emit_calcBlockSize
  16073. CMPL SI, $0x01000000
  16074. JB four_bytes_repeat_emit_calcBlockSize
  16075. ADDQ $0x05, CX
  16076. JMP memmove_long_repeat_emit_calcBlockSize
  16077. four_bytes_repeat_emit_calcBlockSize:
  16078. ADDQ $0x04, CX
  16079. JMP memmove_long_repeat_emit_calcBlockSize
  16080. three_bytes_repeat_emit_calcBlockSize:
  16081. ADDQ $0x03, CX
  16082. JMP memmove_long_repeat_emit_calcBlockSize
  16083. two_bytes_repeat_emit_calcBlockSize:
  16084. ADDQ $0x02, CX
  16085. CMPL SI, $0x40
  16086. JB memmove_repeat_emit_calcBlockSize
  16087. JMP memmove_long_repeat_emit_calcBlockSize
  16088. one_byte_repeat_emit_calcBlockSize:
  16089. ADDQ $0x01, CX
  16090. memmove_repeat_emit_calcBlockSize:
  16091. LEAQ (CX)(R8*1), CX
  16092. JMP emit_literal_done_repeat_emit_calcBlockSize
  16093. memmove_long_repeat_emit_calcBlockSize:
  16094. LEAQ (CX)(R8*1), CX
  16095. emit_literal_done_repeat_emit_calcBlockSize:
  16096. ADDL $0x05, DX
  16097. MOVL DX, SI
  16098. SUBL 16(SP), SI
  16099. MOVQ src_len+8(FP), R8
  16100. SUBL DX, R8
  16101. LEAQ (BX)(DX*1), R9
  16102. LEAQ (BX)(SI*1), SI
  16103. // matchLen
  16104. XORL R11, R11
  16105. matchlen_loopback_16_repeat_extend_calcBlockSize:
  16106. CMPL R8, $0x10
  16107. JB matchlen_match8_repeat_extend_calcBlockSize
  16108. MOVQ (R9)(R11*1), R10
  16109. MOVQ 8(R9)(R11*1), R12
  16110. XORQ (SI)(R11*1), R10
  16111. JNZ matchlen_bsf_8_repeat_extend_calcBlockSize
  16112. XORQ 8(SI)(R11*1), R12
  16113. JNZ matchlen_bsf_16repeat_extend_calcBlockSize
  16114. LEAL -16(R8), R8
  16115. LEAL 16(R11), R11
  16116. JMP matchlen_loopback_16_repeat_extend_calcBlockSize
  16117. matchlen_bsf_16repeat_extend_calcBlockSize:
  16118. #ifdef GOAMD64_v3
  16119. TZCNTQ R12, R12
  16120. #else
  16121. BSFQ R12, R12
  16122. #endif
  16123. SARQ $0x03, R12
  16124. LEAL 8(R11)(R12*1), R11
  16125. JMP repeat_extend_forward_end_calcBlockSize
  16126. matchlen_match8_repeat_extend_calcBlockSize:
  16127. CMPL R8, $0x08
  16128. JB matchlen_match4_repeat_extend_calcBlockSize
  16129. MOVQ (R9)(R11*1), R10
  16130. XORQ (SI)(R11*1), R10
  16131. JNZ matchlen_bsf_8_repeat_extend_calcBlockSize
  16132. LEAL -8(R8), R8
  16133. LEAL 8(R11), R11
  16134. JMP matchlen_match4_repeat_extend_calcBlockSize
  16135. matchlen_bsf_8_repeat_extend_calcBlockSize:
  16136. #ifdef GOAMD64_v3
  16137. TZCNTQ R10, R10
  16138. #else
  16139. BSFQ R10, R10
  16140. #endif
  16141. SARQ $0x03, R10
  16142. LEAL (R11)(R10*1), R11
  16143. JMP repeat_extend_forward_end_calcBlockSize
  16144. matchlen_match4_repeat_extend_calcBlockSize:
  16145. CMPL R8, $0x04
  16146. JB matchlen_match2_repeat_extend_calcBlockSize
  16147. MOVL (R9)(R11*1), R10
  16148. CMPL (SI)(R11*1), R10
  16149. JNE matchlen_match2_repeat_extend_calcBlockSize
  16150. LEAL -4(R8), R8
  16151. LEAL 4(R11), R11
  16152. matchlen_match2_repeat_extend_calcBlockSize:
  16153. CMPL R8, $0x01
  16154. JE matchlen_match1_repeat_extend_calcBlockSize
  16155. JB repeat_extend_forward_end_calcBlockSize
  16156. MOVW (R9)(R11*1), R10
  16157. CMPW (SI)(R11*1), R10
  16158. JNE matchlen_match1_repeat_extend_calcBlockSize
  16159. LEAL 2(R11), R11
  16160. SUBL $0x02, R8
  16161. JZ repeat_extend_forward_end_calcBlockSize
  16162. matchlen_match1_repeat_extend_calcBlockSize:
  16163. MOVB (R9)(R11*1), R10
  16164. CMPB (SI)(R11*1), R10
  16165. JNE repeat_extend_forward_end_calcBlockSize
  16166. LEAL 1(R11), R11
  16167. repeat_extend_forward_end_calcBlockSize:
  16168. ADDL R11, DX
  16169. MOVL DX, SI
  16170. SUBL DI, SI
  16171. MOVL 16(SP), DI
  16172. // emitCopy
  16173. CMPL DI, $0x00010000
  16174. JB two_byte_offset_repeat_as_copy_calcBlockSize
  16175. four_bytes_loop_back_repeat_as_copy_calcBlockSize:
  16176. CMPL SI, $0x40
  16177. JBE four_bytes_remain_repeat_as_copy_calcBlockSize
  16178. LEAL -64(SI), SI
  16179. ADDQ $0x05, CX
  16180. CMPL SI, $0x04
  16181. JB four_bytes_remain_repeat_as_copy_calcBlockSize
  16182. JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize
  16183. four_bytes_remain_repeat_as_copy_calcBlockSize:
  16184. TESTL SI, SI
  16185. JZ repeat_end_emit_calcBlockSize
  16186. XORL SI, SI
  16187. ADDQ $0x05, CX
  16188. JMP repeat_end_emit_calcBlockSize
  16189. two_byte_offset_repeat_as_copy_calcBlockSize:
  16190. CMPL SI, $0x40
  16191. JBE two_byte_offset_short_repeat_as_copy_calcBlockSize
  16192. LEAL -60(SI), SI
  16193. ADDQ $0x03, CX
  16194. JMP two_byte_offset_repeat_as_copy_calcBlockSize
  16195. two_byte_offset_short_repeat_as_copy_calcBlockSize:
  16196. MOVL SI, R8
  16197. SHLL $0x02, R8
  16198. CMPL SI, $0x0c
  16199. JAE emit_copy_three_repeat_as_copy_calcBlockSize
  16200. CMPL DI, $0x00000800
  16201. JAE emit_copy_three_repeat_as_copy_calcBlockSize
  16202. ADDQ $0x02, CX
  16203. JMP repeat_end_emit_calcBlockSize
  16204. emit_copy_three_repeat_as_copy_calcBlockSize:
  16205. ADDQ $0x03, CX
  16206. repeat_end_emit_calcBlockSize:
  16207. MOVL DX, 12(SP)
  16208. JMP search_loop_calcBlockSize
  16209. no_repeat_found_calcBlockSize:
  16210. CMPL (BX)(SI*1), DI
  16211. JEQ candidate_match_calcBlockSize
  16212. SHRQ $0x08, DI
  16213. MOVL (AX)(R10*4), SI
  16214. LEAL 2(DX), R9
  16215. CMPL (BX)(R8*1), DI
  16216. JEQ candidate2_match_calcBlockSize
  16217. MOVL R9, (AX)(R10*4)
  16218. SHRQ $0x08, DI
  16219. CMPL (BX)(SI*1), DI
  16220. JEQ candidate3_match_calcBlockSize
  16221. MOVL 20(SP), DX
  16222. JMP search_loop_calcBlockSize
  16223. candidate3_match_calcBlockSize:
  16224. ADDL $0x02, DX
  16225. JMP candidate_match_calcBlockSize
  16226. candidate2_match_calcBlockSize:
  16227. MOVL R9, (AX)(R10*4)
  16228. INCL DX
  16229. MOVL R8, SI
  16230. candidate_match_calcBlockSize:
  16231. MOVL 12(SP), DI
  16232. TESTL SI, SI
  16233. JZ match_extend_back_end_calcBlockSize
  16234. match_extend_back_loop_calcBlockSize:
  16235. CMPL DX, DI
  16236. JBE match_extend_back_end_calcBlockSize
  16237. MOVB -1(BX)(SI*1), R8
  16238. MOVB -1(BX)(DX*1), R9
  16239. CMPB R8, R9
  16240. JNE match_extend_back_end_calcBlockSize
  16241. LEAL -1(DX), DX
  16242. DECL SI
  16243. JZ match_extend_back_end_calcBlockSize
  16244. JMP match_extend_back_loop_calcBlockSize
  16245. match_extend_back_end_calcBlockSize:
  16246. MOVL DX, DI
  16247. SUBL 12(SP), DI
  16248. LEAQ 5(CX)(DI*1), DI
  16249. CMPQ DI, (SP)
  16250. JB match_dst_size_check_calcBlockSize
  16251. MOVQ $0x00000000, ret+32(FP)
  16252. RET
  16253. match_dst_size_check_calcBlockSize:
  16254. MOVL DX, DI
  16255. MOVL 12(SP), R8
  16256. CMPL R8, DI
  16257. JEQ emit_literal_done_match_emit_calcBlockSize
  16258. MOVL DI, R9
  16259. MOVL DI, 12(SP)
  16260. LEAQ (BX)(R8*1), DI
  16261. SUBL R8, R9
  16262. LEAL -1(R9), DI
  16263. CMPL DI, $0x3c
  16264. JB one_byte_match_emit_calcBlockSize
  16265. CMPL DI, $0x00000100
  16266. JB two_bytes_match_emit_calcBlockSize
  16267. CMPL DI, $0x00010000
  16268. JB three_bytes_match_emit_calcBlockSize
  16269. CMPL DI, $0x01000000
  16270. JB four_bytes_match_emit_calcBlockSize
  16271. ADDQ $0x05, CX
  16272. JMP memmove_long_match_emit_calcBlockSize
  16273. four_bytes_match_emit_calcBlockSize:
  16274. ADDQ $0x04, CX
  16275. JMP memmove_long_match_emit_calcBlockSize
  16276. three_bytes_match_emit_calcBlockSize:
  16277. ADDQ $0x03, CX
  16278. JMP memmove_long_match_emit_calcBlockSize
  16279. two_bytes_match_emit_calcBlockSize:
  16280. ADDQ $0x02, CX
  16281. CMPL DI, $0x40
  16282. JB memmove_match_emit_calcBlockSize
  16283. JMP memmove_long_match_emit_calcBlockSize
  16284. one_byte_match_emit_calcBlockSize:
  16285. ADDQ $0x01, CX
  16286. memmove_match_emit_calcBlockSize:
  16287. LEAQ (CX)(R9*1), CX
  16288. JMP emit_literal_done_match_emit_calcBlockSize
  16289. memmove_long_match_emit_calcBlockSize:
  16290. LEAQ (CX)(R9*1), CX
  16291. emit_literal_done_match_emit_calcBlockSize:
  16292. match_nolit_loop_calcBlockSize:
  16293. MOVL DX, DI
  16294. SUBL SI, DI
  16295. MOVL DI, 16(SP)
  16296. ADDL $0x04, DX
  16297. ADDL $0x04, SI
  16298. MOVQ src_len+8(FP), DI
  16299. SUBL DX, DI
  16300. LEAQ (BX)(DX*1), R8
  16301. LEAQ (BX)(SI*1), SI
  16302. // matchLen
  16303. XORL R10, R10
  16304. matchlen_loopback_16_match_nolit_calcBlockSize:
  16305. CMPL DI, $0x10
  16306. JB matchlen_match8_match_nolit_calcBlockSize
  16307. MOVQ (R8)(R10*1), R9
  16308. MOVQ 8(R8)(R10*1), R11
  16309. XORQ (SI)(R10*1), R9
  16310. JNZ matchlen_bsf_8_match_nolit_calcBlockSize
  16311. XORQ 8(SI)(R10*1), R11
  16312. JNZ matchlen_bsf_16match_nolit_calcBlockSize
  16313. LEAL -16(DI), DI
  16314. LEAL 16(R10), R10
  16315. JMP matchlen_loopback_16_match_nolit_calcBlockSize
  16316. matchlen_bsf_16match_nolit_calcBlockSize:
  16317. #ifdef GOAMD64_v3
  16318. TZCNTQ R11, R11
  16319. #else
  16320. BSFQ R11, R11
  16321. #endif
  16322. SARQ $0x03, R11
  16323. LEAL 8(R10)(R11*1), R10
  16324. JMP match_nolit_end_calcBlockSize
  16325. matchlen_match8_match_nolit_calcBlockSize:
  16326. CMPL DI, $0x08
  16327. JB matchlen_match4_match_nolit_calcBlockSize
  16328. MOVQ (R8)(R10*1), R9
  16329. XORQ (SI)(R10*1), R9
  16330. JNZ matchlen_bsf_8_match_nolit_calcBlockSize
  16331. LEAL -8(DI), DI
  16332. LEAL 8(R10), R10
  16333. JMP matchlen_match4_match_nolit_calcBlockSize
  16334. matchlen_bsf_8_match_nolit_calcBlockSize:
  16335. #ifdef GOAMD64_v3
  16336. TZCNTQ R9, R9
  16337. #else
  16338. BSFQ R9, R9
  16339. #endif
  16340. SARQ $0x03, R9
  16341. LEAL (R10)(R9*1), R10
  16342. JMP match_nolit_end_calcBlockSize
  16343. matchlen_match4_match_nolit_calcBlockSize:
  16344. CMPL DI, $0x04
  16345. JB matchlen_match2_match_nolit_calcBlockSize
  16346. MOVL (R8)(R10*1), R9
  16347. CMPL (SI)(R10*1), R9
  16348. JNE matchlen_match2_match_nolit_calcBlockSize
  16349. LEAL -4(DI), DI
  16350. LEAL 4(R10), R10
  16351. matchlen_match2_match_nolit_calcBlockSize:
  16352. CMPL DI, $0x01
  16353. JE matchlen_match1_match_nolit_calcBlockSize
  16354. JB match_nolit_end_calcBlockSize
  16355. MOVW (R8)(R10*1), R9
  16356. CMPW (SI)(R10*1), R9
  16357. JNE matchlen_match1_match_nolit_calcBlockSize
  16358. LEAL 2(R10), R10
  16359. SUBL $0x02, DI
  16360. JZ match_nolit_end_calcBlockSize
  16361. matchlen_match1_match_nolit_calcBlockSize:
  16362. MOVB (R8)(R10*1), R9
  16363. CMPB (SI)(R10*1), R9
  16364. JNE match_nolit_end_calcBlockSize
  16365. LEAL 1(R10), R10
  16366. match_nolit_end_calcBlockSize:
  16367. ADDL R10, DX
  16368. MOVL 16(SP), SI
  16369. ADDL $0x04, R10
  16370. MOVL DX, 12(SP)
  16371. // emitCopy
  16372. CMPL SI, $0x00010000
  16373. JB two_byte_offset_match_nolit_calcBlockSize
  16374. four_bytes_loop_back_match_nolit_calcBlockSize:
  16375. CMPL R10, $0x40
  16376. JBE four_bytes_remain_match_nolit_calcBlockSize
  16377. LEAL -64(R10), R10
  16378. ADDQ $0x05, CX
  16379. CMPL R10, $0x04
  16380. JB four_bytes_remain_match_nolit_calcBlockSize
  16381. JMP four_bytes_loop_back_match_nolit_calcBlockSize
  16382. four_bytes_remain_match_nolit_calcBlockSize:
  16383. TESTL R10, R10
  16384. JZ match_nolit_emitcopy_end_calcBlockSize
  16385. XORL SI, SI
  16386. ADDQ $0x05, CX
  16387. JMP match_nolit_emitcopy_end_calcBlockSize
  16388. two_byte_offset_match_nolit_calcBlockSize:
  16389. CMPL R10, $0x40
  16390. JBE two_byte_offset_short_match_nolit_calcBlockSize
  16391. LEAL -60(R10), R10
  16392. ADDQ $0x03, CX
  16393. JMP two_byte_offset_match_nolit_calcBlockSize
  16394. two_byte_offset_short_match_nolit_calcBlockSize:
  16395. MOVL R10, DI
  16396. SHLL $0x02, DI
  16397. CMPL R10, $0x0c
  16398. JAE emit_copy_three_match_nolit_calcBlockSize
  16399. CMPL SI, $0x00000800
  16400. JAE emit_copy_three_match_nolit_calcBlockSize
  16401. ADDQ $0x02, CX
  16402. JMP match_nolit_emitcopy_end_calcBlockSize
  16403. emit_copy_three_match_nolit_calcBlockSize:
  16404. ADDQ $0x03, CX
  16405. match_nolit_emitcopy_end_calcBlockSize:
  16406. CMPL DX, 8(SP)
  16407. JAE emit_remainder_calcBlockSize
  16408. MOVQ -2(BX)(DX*1), DI
  16409. CMPQ CX, (SP)
  16410. JB match_nolit_dst_ok_calcBlockSize
  16411. MOVQ $0x00000000, ret+32(FP)
  16412. RET
  16413. match_nolit_dst_ok_calcBlockSize:
  16414. MOVQ $0x0000cf1bbcdcbf9b, R9
  16415. MOVQ DI, R8
  16416. SHRQ $0x10, DI
  16417. MOVQ DI, SI
  16418. SHLQ $0x10, R8
  16419. IMULQ R9, R8
  16420. SHRQ $0x33, R8
  16421. SHLQ $0x10, SI
  16422. IMULQ R9, SI
  16423. SHRQ $0x33, SI
  16424. LEAL -2(DX), R9
  16425. LEAQ (AX)(SI*4), R10
  16426. MOVL (R10), SI
  16427. MOVL R9, (AX)(R8*4)
  16428. MOVL DX, (R10)
  16429. CMPL (BX)(SI*1), DI
  16430. JEQ match_nolit_loop_calcBlockSize
  16431. INCL DX
  16432. JMP search_loop_calcBlockSize
  16433. emit_remainder_calcBlockSize:
  16434. MOVQ src_len+8(FP), AX
  16435. SUBL 12(SP), AX
  16436. LEAQ 5(CX)(AX*1), AX
  16437. CMPQ AX, (SP)
  16438. JB emit_remainder_ok_calcBlockSize
  16439. MOVQ $0x00000000, ret+32(FP)
  16440. RET
  16441. emit_remainder_ok_calcBlockSize:
  16442. MOVQ src_len+8(FP), AX
  16443. MOVL 12(SP), DX
  16444. CMPL DX, AX
  16445. JEQ emit_literal_done_emit_remainder_calcBlockSize
  16446. MOVL AX, SI
  16447. MOVL AX, 12(SP)
  16448. LEAQ (BX)(DX*1), AX
  16449. SUBL DX, SI
  16450. LEAL -1(SI), AX
  16451. CMPL AX, $0x3c
  16452. JB one_byte_emit_remainder_calcBlockSize
  16453. CMPL AX, $0x00000100
  16454. JB two_bytes_emit_remainder_calcBlockSize
  16455. CMPL AX, $0x00010000
  16456. JB three_bytes_emit_remainder_calcBlockSize
  16457. CMPL AX, $0x01000000
  16458. JB four_bytes_emit_remainder_calcBlockSize
  16459. ADDQ $0x05, CX
  16460. JMP memmove_long_emit_remainder_calcBlockSize
  16461. four_bytes_emit_remainder_calcBlockSize:
  16462. ADDQ $0x04, CX
  16463. JMP memmove_long_emit_remainder_calcBlockSize
  16464. three_bytes_emit_remainder_calcBlockSize:
  16465. ADDQ $0x03, CX
  16466. JMP memmove_long_emit_remainder_calcBlockSize
  16467. two_bytes_emit_remainder_calcBlockSize:
  16468. ADDQ $0x02, CX
  16469. CMPL AX, $0x40
  16470. JB memmove_emit_remainder_calcBlockSize
  16471. JMP memmove_long_emit_remainder_calcBlockSize
  16472. one_byte_emit_remainder_calcBlockSize:
  16473. ADDQ $0x01, CX
  16474. memmove_emit_remainder_calcBlockSize:
  16475. LEAQ (CX)(SI*1), AX
  16476. MOVQ AX, CX
  16477. JMP emit_literal_done_emit_remainder_calcBlockSize
  16478. memmove_long_emit_remainder_calcBlockSize:
  16479. LEAQ (CX)(SI*1), AX
  16480. MOVQ AX, CX
  16481. emit_literal_done_emit_remainder_calcBlockSize:
  16482. MOVQ CX, ret+32(FP)
  16483. RET
  16484. // func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
  16485. // Requires: BMI, SSE2
  16486. TEXT ·calcBlockSizeSmall(SB), $24-40
  16487. MOVQ tmp+24(FP), AX
  16488. XORQ CX, CX
  16489. MOVQ $0x00000010, DX
  16490. MOVQ AX, BX
  16491. PXOR X0, X0
  16492. zero_loop_calcBlockSizeSmall:
  16493. MOVOU X0, (BX)
  16494. MOVOU X0, 16(BX)
  16495. MOVOU X0, 32(BX)
  16496. MOVOU X0, 48(BX)
  16497. MOVOU X0, 64(BX)
  16498. MOVOU X0, 80(BX)
  16499. MOVOU X0, 96(BX)
  16500. MOVOU X0, 112(BX)
  16501. ADDQ $0x80, BX
  16502. DECQ DX
  16503. JNZ zero_loop_calcBlockSizeSmall
  16504. MOVL $0x00000000, 12(SP)
  16505. MOVQ src_len+8(FP), DX
  16506. LEAQ -9(DX), BX
  16507. LEAQ -8(DX), SI
  16508. MOVL SI, 8(SP)
  16509. SHRQ $0x05, DX
  16510. SUBL DX, BX
  16511. LEAQ (CX)(BX*1), BX
  16512. MOVQ BX, (SP)
  16513. MOVL $0x00000001, DX
  16514. MOVL DX, 16(SP)
  16515. MOVQ src_base+0(FP), BX
  16516. search_loop_calcBlockSizeSmall:
  16517. MOVL DX, SI
  16518. SUBL 12(SP), SI
  16519. SHRL $0x04, SI
  16520. LEAL 4(DX)(SI*1), SI
  16521. CMPL SI, 8(SP)
  16522. JAE emit_remainder_calcBlockSizeSmall
  16523. MOVQ (BX)(DX*1), DI
  16524. MOVL SI, 20(SP)
  16525. MOVQ $0x9e3779b1, R9
  16526. MOVQ DI, R10
  16527. MOVQ DI, R11
  16528. SHRQ $0x08, R11
  16529. SHLQ $0x20, R10
  16530. IMULQ R9, R10
  16531. SHRQ $0x37, R10
  16532. SHLQ $0x20, R11
  16533. IMULQ R9, R11
  16534. SHRQ $0x37, R11
  16535. MOVL (AX)(R10*4), SI
  16536. MOVL (AX)(R11*4), R8
  16537. MOVL DX, (AX)(R10*4)
  16538. LEAL 1(DX), R10
  16539. MOVL R10, (AX)(R11*4)
  16540. MOVQ DI, R10
  16541. SHRQ $0x10, R10
  16542. SHLQ $0x20, R10
  16543. IMULQ R9, R10
  16544. SHRQ $0x37, R10
  16545. MOVL DX, R9
  16546. SUBL 16(SP), R9
  16547. MOVL 1(BX)(R9*1), R11
  16548. MOVQ DI, R9
  16549. SHRQ $0x08, R9
  16550. CMPL R9, R11
  16551. JNE no_repeat_found_calcBlockSizeSmall
  16552. LEAL 1(DX), DI
  16553. MOVL 12(SP), SI
  16554. MOVL DI, R8
  16555. SUBL 16(SP), R8
  16556. JZ repeat_extend_back_end_calcBlockSizeSmall
  16557. repeat_extend_back_loop_calcBlockSizeSmall:
  16558. CMPL DI, SI
  16559. JBE repeat_extend_back_end_calcBlockSizeSmall
  16560. MOVB -1(BX)(R8*1), R9
  16561. MOVB -1(BX)(DI*1), R10
  16562. CMPB R9, R10
  16563. JNE repeat_extend_back_end_calcBlockSizeSmall
  16564. LEAL -1(DI), DI
  16565. DECL R8
  16566. JNZ repeat_extend_back_loop_calcBlockSizeSmall
  16567. repeat_extend_back_end_calcBlockSizeSmall:
  16568. MOVL DI, SI
  16569. SUBL 12(SP), SI
  16570. LEAQ 3(CX)(SI*1), SI
  16571. CMPQ SI, (SP)
  16572. JB repeat_dst_size_check_calcBlockSizeSmall
  16573. MOVQ $0x00000000, ret+32(FP)
  16574. RET
  16575. repeat_dst_size_check_calcBlockSizeSmall:
  16576. MOVL 12(SP), SI
  16577. CMPL SI, DI
  16578. JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall
  16579. MOVL DI, R8
  16580. MOVL DI, 12(SP)
  16581. LEAQ (BX)(SI*1), R9
  16582. SUBL SI, R8
  16583. LEAL -1(R8), SI
  16584. CMPL SI, $0x3c
  16585. JB one_byte_repeat_emit_calcBlockSizeSmall
  16586. CMPL SI, $0x00000100
  16587. JB two_bytes_repeat_emit_calcBlockSizeSmall
  16588. JB three_bytes_repeat_emit_calcBlockSizeSmall
  16589. three_bytes_repeat_emit_calcBlockSizeSmall:
  16590. ADDQ $0x03, CX
  16591. JMP memmove_long_repeat_emit_calcBlockSizeSmall
  16592. two_bytes_repeat_emit_calcBlockSizeSmall:
  16593. ADDQ $0x02, CX
  16594. CMPL SI, $0x40
  16595. JB memmove_repeat_emit_calcBlockSizeSmall
  16596. JMP memmove_long_repeat_emit_calcBlockSizeSmall
  16597. one_byte_repeat_emit_calcBlockSizeSmall:
  16598. ADDQ $0x01, CX
  16599. memmove_repeat_emit_calcBlockSizeSmall:
  16600. LEAQ (CX)(R8*1), CX
  16601. JMP emit_literal_done_repeat_emit_calcBlockSizeSmall
  16602. memmove_long_repeat_emit_calcBlockSizeSmall:
  16603. LEAQ (CX)(R8*1), CX
  16604. emit_literal_done_repeat_emit_calcBlockSizeSmall:
  16605. ADDL $0x05, DX
  16606. MOVL DX, SI
  16607. SUBL 16(SP), SI
  16608. MOVQ src_len+8(FP), R8
  16609. SUBL DX, R8
  16610. LEAQ (BX)(DX*1), R9
  16611. LEAQ (BX)(SI*1), SI
  16612. // matchLen
  16613. XORL R11, R11
  16614. matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
  16615. CMPL R8, $0x10
  16616. JB matchlen_match8_repeat_extend_calcBlockSizeSmall
  16617. MOVQ (R9)(R11*1), R10
  16618. MOVQ 8(R9)(R11*1), R12
  16619. XORQ (SI)(R11*1), R10
  16620. JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
  16621. XORQ 8(SI)(R11*1), R12
  16622. JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall
  16623. LEAL -16(R8), R8
  16624. LEAL 16(R11), R11
  16625. JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
  16626. matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
  16627. #ifdef GOAMD64_v3
  16628. TZCNTQ R12, R12
  16629. #else
  16630. BSFQ R12, R12
  16631. #endif
  16632. SARQ $0x03, R12
  16633. LEAL 8(R11)(R12*1), R11
  16634. JMP repeat_extend_forward_end_calcBlockSizeSmall
  16635. matchlen_match8_repeat_extend_calcBlockSizeSmall:
  16636. CMPL R8, $0x08
  16637. JB matchlen_match4_repeat_extend_calcBlockSizeSmall
  16638. MOVQ (R9)(R11*1), R10
  16639. XORQ (SI)(R11*1), R10
  16640. JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
  16641. LEAL -8(R8), R8
  16642. LEAL 8(R11), R11
  16643. JMP matchlen_match4_repeat_extend_calcBlockSizeSmall
  16644. matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
  16645. #ifdef GOAMD64_v3
  16646. TZCNTQ R10, R10
  16647. #else
  16648. BSFQ R10, R10
  16649. #endif
  16650. SARQ $0x03, R10
  16651. LEAL (R11)(R10*1), R11
  16652. JMP repeat_extend_forward_end_calcBlockSizeSmall
  16653. matchlen_match4_repeat_extend_calcBlockSizeSmall:
  16654. CMPL R8, $0x04
  16655. JB matchlen_match2_repeat_extend_calcBlockSizeSmall
  16656. MOVL (R9)(R11*1), R10
  16657. CMPL (SI)(R11*1), R10
  16658. JNE matchlen_match2_repeat_extend_calcBlockSizeSmall
  16659. LEAL -4(R8), R8
  16660. LEAL 4(R11), R11
  16661. matchlen_match2_repeat_extend_calcBlockSizeSmall:
  16662. CMPL R8, $0x01
  16663. JE matchlen_match1_repeat_extend_calcBlockSizeSmall
  16664. JB repeat_extend_forward_end_calcBlockSizeSmall
  16665. MOVW (R9)(R11*1), R10
  16666. CMPW (SI)(R11*1), R10
  16667. JNE matchlen_match1_repeat_extend_calcBlockSizeSmall
  16668. LEAL 2(R11), R11
  16669. SUBL $0x02, R8
  16670. JZ repeat_extend_forward_end_calcBlockSizeSmall
  16671. matchlen_match1_repeat_extend_calcBlockSizeSmall:
  16672. MOVB (R9)(R11*1), R10
  16673. CMPB (SI)(R11*1), R10
  16674. JNE repeat_extend_forward_end_calcBlockSizeSmall
  16675. LEAL 1(R11), R11
  16676. repeat_extend_forward_end_calcBlockSizeSmall:
  16677. ADDL R11, DX
  16678. MOVL DX, SI
  16679. SUBL DI, SI
  16680. MOVL 16(SP), DI
  16681. // emitCopy
  16682. two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
  16683. CMPL SI, $0x40
  16684. JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
  16685. LEAL -60(SI), SI
  16686. ADDQ $0x03, CX
  16687. JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall
  16688. two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
  16689. MOVL SI, DI
  16690. SHLL $0x02, DI
  16691. CMPL SI, $0x0c
  16692. JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall
  16693. ADDQ $0x02, CX
  16694. JMP repeat_end_emit_calcBlockSizeSmall
  16695. emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
  16696. ADDQ $0x03, CX
  16697. repeat_end_emit_calcBlockSizeSmall:
  16698. MOVL DX, 12(SP)
  16699. JMP search_loop_calcBlockSizeSmall
  16700. no_repeat_found_calcBlockSizeSmall:
  16701. CMPL (BX)(SI*1), DI
  16702. JEQ candidate_match_calcBlockSizeSmall
  16703. SHRQ $0x08, DI
  16704. MOVL (AX)(R10*4), SI
  16705. LEAL 2(DX), R9
  16706. CMPL (BX)(R8*1), DI
  16707. JEQ candidate2_match_calcBlockSizeSmall
  16708. MOVL R9, (AX)(R10*4)
  16709. SHRQ $0x08, DI
  16710. CMPL (BX)(SI*1), DI
  16711. JEQ candidate3_match_calcBlockSizeSmall
  16712. MOVL 20(SP), DX
  16713. JMP search_loop_calcBlockSizeSmall
  16714. candidate3_match_calcBlockSizeSmall:
  16715. ADDL $0x02, DX
  16716. JMP candidate_match_calcBlockSizeSmall
  16717. candidate2_match_calcBlockSizeSmall:
  16718. MOVL R9, (AX)(R10*4)
  16719. INCL DX
  16720. MOVL R8, SI
  16721. candidate_match_calcBlockSizeSmall:
  16722. MOVL 12(SP), DI
  16723. TESTL SI, SI
  16724. JZ match_extend_back_end_calcBlockSizeSmall
  16725. match_extend_back_loop_calcBlockSizeSmall:
  16726. CMPL DX, DI
  16727. JBE match_extend_back_end_calcBlockSizeSmall
  16728. MOVB -1(BX)(SI*1), R8
  16729. MOVB -1(BX)(DX*1), R9
  16730. CMPB R8, R9
  16731. JNE match_extend_back_end_calcBlockSizeSmall
  16732. LEAL -1(DX), DX
  16733. DECL SI
  16734. JZ match_extend_back_end_calcBlockSizeSmall
  16735. JMP match_extend_back_loop_calcBlockSizeSmall
  16736. match_extend_back_end_calcBlockSizeSmall:
  16737. MOVL DX, DI
  16738. SUBL 12(SP), DI
  16739. LEAQ 3(CX)(DI*1), DI
  16740. CMPQ DI, (SP)
  16741. JB match_dst_size_check_calcBlockSizeSmall
  16742. MOVQ $0x00000000, ret+32(FP)
  16743. RET
  16744. match_dst_size_check_calcBlockSizeSmall:
  16745. MOVL DX, DI
  16746. MOVL 12(SP), R8
  16747. CMPL R8, DI
  16748. JEQ emit_literal_done_match_emit_calcBlockSizeSmall
  16749. MOVL DI, R9
  16750. MOVL DI, 12(SP)
  16751. LEAQ (BX)(R8*1), DI
  16752. SUBL R8, R9
  16753. LEAL -1(R9), DI
  16754. CMPL DI, $0x3c
  16755. JB one_byte_match_emit_calcBlockSizeSmall
  16756. CMPL DI, $0x00000100
  16757. JB two_bytes_match_emit_calcBlockSizeSmall
  16758. JB three_bytes_match_emit_calcBlockSizeSmall
  16759. three_bytes_match_emit_calcBlockSizeSmall:
  16760. ADDQ $0x03, CX
  16761. JMP memmove_long_match_emit_calcBlockSizeSmall
  16762. two_bytes_match_emit_calcBlockSizeSmall:
  16763. ADDQ $0x02, CX
  16764. CMPL DI, $0x40
  16765. JB memmove_match_emit_calcBlockSizeSmall
  16766. JMP memmove_long_match_emit_calcBlockSizeSmall
  16767. one_byte_match_emit_calcBlockSizeSmall:
  16768. ADDQ $0x01, CX
  16769. memmove_match_emit_calcBlockSizeSmall:
  16770. LEAQ (CX)(R9*1), CX
  16771. JMP emit_literal_done_match_emit_calcBlockSizeSmall
  16772. memmove_long_match_emit_calcBlockSizeSmall:
  16773. LEAQ (CX)(R9*1), CX
  16774. emit_literal_done_match_emit_calcBlockSizeSmall:
  16775. match_nolit_loop_calcBlockSizeSmall:
  16776. MOVL DX, DI
  16777. SUBL SI, DI
  16778. MOVL DI, 16(SP)
  16779. ADDL $0x04, DX
  16780. ADDL $0x04, SI
  16781. MOVQ src_len+8(FP), DI
  16782. SUBL DX, DI
  16783. LEAQ (BX)(DX*1), R8
  16784. LEAQ (BX)(SI*1), SI
  16785. // matchLen
  16786. XORL R10, R10
  16787. matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
  16788. CMPL DI, $0x10
  16789. JB matchlen_match8_match_nolit_calcBlockSizeSmall
  16790. MOVQ (R8)(R10*1), R9
  16791. MOVQ 8(R8)(R10*1), R11
  16792. XORQ (SI)(R10*1), R9
  16793. JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall
  16794. XORQ 8(SI)(R10*1), R11
  16795. JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall
  16796. LEAL -16(DI), DI
  16797. LEAL 16(R10), R10
  16798. JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall
  16799. matchlen_bsf_16match_nolit_calcBlockSizeSmall:
  16800. #ifdef GOAMD64_v3
  16801. TZCNTQ R11, R11
  16802. #else
  16803. BSFQ R11, R11
  16804. #endif
  16805. SARQ $0x03, R11
  16806. LEAL 8(R10)(R11*1), R10
  16807. JMP match_nolit_end_calcBlockSizeSmall
  16808. matchlen_match8_match_nolit_calcBlockSizeSmall:
  16809. CMPL DI, $0x08
  16810. JB matchlen_match4_match_nolit_calcBlockSizeSmall
  16811. MOVQ (R8)(R10*1), R9
  16812. XORQ (SI)(R10*1), R9
  16813. JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall
  16814. LEAL -8(DI), DI
  16815. LEAL 8(R10), R10
  16816. JMP matchlen_match4_match_nolit_calcBlockSizeSmall
  16817. matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
  16818. #ifdef GOAMD64_v3
  16819. TZCNTQ R9, R9
  16820. #else
  16821. BSFQ R9, R9
  16822. #endif
  16823. SARQ $0x03, R9
  16824. LEAL (R10)(R9*1), R10
  16825. JMP match_nolit_end_calcBlockSizeSmall
  16826. matchlen_match4_match_nolit_calcBlockSizeSmall:
  16827. CMPL DI, $0x04
  16828. JB matchlen_match2_match_nolit_calcBlockSizeSmall
  16829. MOVL (R8)(R10*1), R9
  16830. CMPL (SI)(R10*1), R9
  16831. JNE matchlen_match2_match_nolit_calcBlockSizeSmall
  16832. LEAL -4(DI), DI
  16833. LEAL 4(R10), R10
  16834. matchlen_match2_match_nolit_calcBlockSizeSmall:
  16835. CMPL DI, $0x01
  16836. JE matchlen_match1_match_nolit_calcBlockSizeSmall
  16837. JB match_nolit_end_calcBlockSizeSmall
  16838. MOVW (R8)(R10*1), R9
  16839. CMPW (SI)(R10*1), R9
  16840. JNE matchlen_match1_match_nolit_calcBlockSizeSmall
  16841. LEAL 2(R10), R10
  16842. SUBL $0x02, DI
  16843. JZ match_nolit_end_calcBlockSizeSmall
  16844. matchlen_match1_match_nolit_calcBlockSizeSmall:
  16845. MOVB (R8)(R10*1), R9
  16846. CMPB (SI)(R10*1), R9
  16847. JNE match_nolit_end_calcBlockSizeSmall
  16848. LEAL 1(R10), R10
  16849. match_nolit_end_calcBlockSizeSmall:
  16850. ADDL R10, DX
  16851. MOVL 16(SP), SI
  16852. ADDL $0x04, R10
  16853. MOVL DX, 12(SP)
  16854. // emitCopy
  16855. two_byte_offset_match_nolit_calcBlockSizeSmall:
  16856. CMPL R10, $0x40
  16857. JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall
  16858. LEAL -60(R10), R10
  16859. ADDQ $0x03, CX
  16860. JMP two_byte_offset_match_nolit_calcBlockSizeSmall
  16861. two_byte_offset_short_match_nolit_calcBlockSizeSmall:
  16862. MOVL R10, SI
  16863. SHLL $0x02, SI
  16864. CMPL R10, $0x0c
  16865. JAE emit_copy_three_match_nolit_calcBlockSizeSmall
  16866. ADDQ $0x02, CX
  16867. JMP match_nolit_emitcopy_end_calcBlockSizeSmall
  16868. emit_copy_three_match_nolit_calcBlockSizeSmall:
  16869. ADDQ $0x03, CX
  16870. match_nolit_emitcopy_end_calcBlockSizeSmall:
  16871. CMPL DX, 8(SP)
  16872. JAE emit_remainder_calcBlockSizeSmall
  16873. MOVQ -2(BX)(DX*1), DI
  16874. CMPQ CX, (SP)
  16875. JB match_nolit_dst_ok_calcBlockSizeSmall
  16876. MOVQ $0x00000000, ret+32(FP)
  16877. RET
  16878. match_nolit_dst_ok_calcBlockSizeSmall:
  16879. MOVQ $0x9e3779b1, R9
  16880. MOVQ DI, R8
  16881. SHRQ $0x10, DI
  16882. MOVQ DI, SI
  16883. SHLQ $0x20, R8
  16884. IMULQ R9, R8
  16885. SHRQ $0x37, R8
  16886. SHLQ $0x20, SI
  16887. IMULQ R9, SI
  16888. SHRQ $0x37, SI
  16889. LEAL -2(DX), R9
  16890. LEAQ (AX)(SI*4), R10
  16891. MOVL (R10), SI
  16892. MOVL R9, (AX)(R8*4)
  16893. MOVL DX, (R10)
  16894. CMPL (BX)(SI*1), DI
  16895. JEQ match_nolit_loop_calcBlockSizeSmall
  16896. INCL DX
  16897. JMP search_loop_calcBlockSizeSmall
  16898. emit_remainder_calcBlockSizeSmall:
  16899. MOVQ src_len+8(FP), AX
  16900. SUBL 12(SP), AX
  16901. LEAQ 3(CX)(AX*1), AX
  16902. CMPQ AX, (SP)
  16903. JB emit_remainder_ok_calcBlockSizeSmall
  16904. MOVQ $0x00000000, ret+32(FP)
  16905. RET
  16906. emit_remainder_ok_calcBlockSizeSmall:
  16907. MOVQ src_len+8(FP), AX
  16908. MOVL 12(SP), DX
  16909. CMPL DX, AX
  16910. JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall
  16911. MOVL AX, SI
  16912. MOVL AX, 12(SP)
  16913. LEAQ (BX)(DX*1), AX
  16914. SUBL DX, SI
  16915. LEAL -1(SI), AX
  16916. CMPL AX, $0x3c
  16917. JB one_byte_emit_remainder_calcBlockSizeSmall
  16918. CMPL AX, $0x00000100
  16919. JB two_bytes_emit_remainder_calcBlockSizeSmall
  16920. JB three_bytes_emit_remainder_calcBlockSizeSmall
  16921. three_bytes_emit_remainder_calcBlockSizeSmall:
  16922. ADDQ $0x03, CX
  16923. JMP memmove_long_emit_remainder_calcBlockSizeSmall
  16924. two_bytes_emit_remainder_calcBlockSizeSmall:
  16925. ADDQ $0x02, CX
  16926. CMPL AX, $0x40
  16927. JB memmove_emit_remainder_calcBlockSizeSmall
  16928. JMP memmove_long_emit_remainder_calcBlockSizeSmall
  16929. one_byte_emit_remainder_calcBlockSizeSmall:
  16930. ADDQ $0x01, CX
  16931. memmove_emit_remainder_calcBlockSizeSmall:
  16932. LEAQ (CX)(SI*1), AX
  16933. MOVQ AX, CX
  16934. JMP emit_literal_done_emit_remainder_calcBlockSizeSmall
  16935. memmove_long_emit_remainder_calcBlockSizeSmall:
  16936. LEAQ (CX)(SI*1), AX
  16937. MOVQ AX, CX
  16938. emit_literal_done_emit_remainder_calcBlockSizeSmall:
  16939. MOVQ CX, ret+32(FP)
  16940. RET
  16941. // func emitLiteral(dst []byte, lit []byte) int
  16942. // Requires: SSE2
  16943. TEXT ·emitLiteral(SB), NOSPLIT, $0-56
  16944. MOVQ lit_len+32(FP), DX
  16945. MOVQ dst_base+0(FP), AX
  16946. MOVQ lit_base+24(FP), CX
  16947. TESTQ DX, DX
  16948. JZ emit_literal_end_standalone_skip
  16949. MOVL DX, BX
  16950. LEAL -1(DX), SI
  16951. CMPL SI, $0x3c
  16952. JB one_byte_standalone
  16953. CMPL SI, $0x00000100
  16954. JB two_bytes_standalone
  16955. CMPL SI, $0x00010000
  16956. JB three_bytes_standalone
  16957. CMPL SI, $0x01000000
  16958. JB four_bytes_standalone
  16959. MOVB $0xfc, (AX)
  16960. MOVL SI, 1(AX)
  16961. ADDQ $0x05, BX
  16962. ADDQ $0x05, AX
  16963. JMP memmove_long_standalone
  16964. four_bytes_standalone:
  16965. MOVL SI, DI
  16966. SHRL $0x10, DI
  16967. MOVB $0xf8, (AX)
  16968. MOVW SI, 1(AX)
  16969. MOVB DI, 3(AX)
  16970. ADDQ $0x04, BX
  16971. ADDQ $0x04, AX
  16972. JMP memmove_long_standalone
  16973. three_bytes_standalone:
  16974. MOVB $0xf4, (AX)
  16975. MOVW SI, 1(AX)
  16976. ADDQ $0x03, BX
  16977. ADDQ $0x03, AX
  16978. JMP memmove_long_standalone
  16979. two_bytes_standalone:
  16980. MOVB $0xf0, (AX)
  16981. MOVB SI, 1(AX)
  16982. ADDQ $0x02, BX
  16983. ADDQ $0x02, AX
  16984. CMPL SI, $0x40
  16985. JB memmove_standalone
  16986. JMP memmove_long_standalone
  16987. one_byte_standalone:
  16988. SHLB $0x02, SI
  16989. MOVB SI, (AX)
  16990. ADDQ $0x01, BX
  16991. ADDQ $0x01, AX
  16992. memmove_standalone:
  16993. // genMemMoveShort
  16994. CMPQ DX, $0x03
  16995. JB emit_lit_memmove_standalone_memmove_move_1or2
  16996. JE emit_lit_memmove_standalone_memmove_move_3
  16997. CMPQ DX, $0x08
  16998. JB emit_lit_memmove_standalone_memmove_move_4through7
  16999. CMPQ DX, $0x10
  17000. JBE emit_lit_memmove_standalone_memmove_move_8through16
  17001. CMPQ DX, $0x20
  17002. JBE emit_lit_memmove_standalone_memmove_move_17through32
  17003. JMP emit_lit_memmove_standalone_memmove_move_33through64
  17004. emit_lit_memmove_standalone_memmove_move_1or2:
  17005. MOVB (CX), SI
  17006. MOVB -1(CX)(DX*1), CL
  17007. MOVB SI, (AX)
  17008. MOVB CL, -1(AX)(DX*1)
  17009. JMP emit_literal_end_standalone
  17010. emit_lit_memmove_standalone_memmove_move_3:
  17011. MOVW (CX), SI
  17012. MOVB 2(CX), CL
  17013. MOVW SI, (AX)
  17014. MOVB CL, 2(AX)
  17015. JMP emit_literal_end_standalone
  17016. emit_lit_memmove_standalone_memmove_move_4through7:
  17017. MOVL (CX), SI
  17018. MOVL -4(CX)(DX*1), CX
  17019. MOVL SI, (AX)
  17020. MOVL CX, -4(AX)(DX*1)
  17021. JMP emit_literal_end_standalone
  17022. emit_lit_memmove_standalone_memmove_move_8through16:
  17023. MOVQ (CX), SI
  17024. MOVQ -8(CX)(DX*1), CX
  17025. MOVQ SI, (AX)
  17026. MOVQ CX, -8(AX)(DX*1)
  17027. JMP emit_literal_end_standalone
  17028. emit_lit_memmove_standalone_memmove_move_17through32:
  17029. MOVOU (CX), X0
  17030. MOVOU -16(CX)(DX*1), X1
  17031. MOVOU X0, (AX)
  17032. MOVOU X1, -16(AX)(DX*1)
  17033. JMP emit_literal_end_standalone
  17034. emit_lit_memmove_standalone_memmove_move_33through64:
  17035. MOVOU (CX), X0
  17036. MOVOU 16(CX), X1
  17037. MOVOU -32(CX)(DX*1), X2
  17038. MOVOU -16(CX)(DX*1), X3
  17039. MOVOU X0, (AX)
  17040. MOVOU X1, 16(AX)
  17041. MOVOU X2, -32(AX)(DX*1)
  17042. MOVOU X3, -16(AX)(DX*1)
  17043. JMP emit_literal_end_standalone
  17044. JMP emit_literal_end_standalone
  17045. memmove_long_standalone:
  17046. // genMemMoveLong
  17047. MOVOU (CX), X0
  17048. MOVOU 16(CX), X1
  17049. MOVOU -32(CX)(DX*1), X2
  17050. MOVOU -16(CX)(DX*1), X3
  17051. MOVQ DX, DI
  17052. SHRQ $0x05, DI
  17053. MOVQ AX, SI
  17054. ANDL $0x0000001f, SI
  17055. MOVQ $0x00000040, R8
  17056. SUBQ SI, R8
  17057. DECQ DI
  17058. JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
  17059. LEAQ -32(CX)(R8*1), SI
  17060. LEAQ -32(AX)(R8*1), R9
  17061. emit_lit_memmove_long_standalonelarge_big_loop_back:
  17062. MOVOU (SI), X4
  17063. MOVOU 16(SI), X5
  17064. MOVOA X4, (R9)
  17065. MOVOA X5, 16(R9)
  17066. ADDQ $0x20, R9
  17067. ADDQ $0x20, SI
  17068. ADDQ $0x20, R8
  17069. DECQ DI
  17070. JNA emit_lit_memmove_long_standalonelarge_big_loop_back
  17071. emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
  17072. MOVOU -32(CX)(R8*1), X4
  17073. MOVOU -16(CX)(R8*1), X5
  17074. MOVOA X4, -32(AX)(R8*1)
  17075. MOVOA X5, -16(AX)(R8*1)
  17076. ADDQ $0x20, R8
  17077. CMPQ DX, R8
  17078. JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
  17079. MOVOU X0, (AX)
  17080. MOVOU X1, 16(AX)
  17081. MOVOU X2, -32(AX)(DX*1)
  17082. MOVOU X3, -16(AX)(DX*1)
  17083. JMP emit_literal_end_standalone
  17084. JMP emit_literal_end_standalone
  17085. emit_literal_end_standalone_skip:
  17086. XORQ BX, BX
  17087. emit_literal_end_standalone:
  17088. MOVQ BX, ret+48(FP)
  17089. RET
  17090. // func emitRepeat(dst []byte, offset int, length int) int
  17091. TEXT ·emitRepeat(SB), NOSPLIT, $0-48
  17092. XORQ BX, BX
  17093. MOVQ dst_base+0(FP), AX
  17094. MOVQ offset+24(FP), CX
  17095. MOVQ length+32(FP), DX
  17096. // emitRepeat
  17097. emit_repeat_again_standalone:
  17098. MOVL DX, SI
  17099. LEAL -4(DX), DX
  17100. CMPL SI, $0x08
  17101. JBE repeat_two_standalone
  17102. CMPL SI, $0x0c
  17103. JAE cant_repeat_two_offset_standalone
  17104. CMPL CX, $0x00000800
  17105. JB repeat_two_offset_standalone
  17106. cant_repeat_two_offset_standalone:
  17107. CMPL DX, $0x00000104
  17108. JB repeat_three_standalone
  17109. CMPL DX, $0x00010100
  17110. JB repeat_four_standalone
  17111. CMPL DX, $0x0100ffff
  17112. JB repeat_five_standalone
  17113. LEAL -16842747(DX), DX
  17114. MOVL $0xfffb001d, (AX)
  17115. MOVB $0xff, 4(AX)
  17116. ADDQ $0x05, AX
  17117. ADDQ $0x05, BX
  17118. JMP emit_repeat_again_standalone
  17119. repeat_five_standalone:
  17120. LEAL -65536(DX), DX
  17121. MOVL DX, CX
  17122. MOVW $0x001d, (AX)
  17123. MOVW DX, 2(AX)
  17124. SARL $0x10, CX
  17125. MOVB CL, 4(AX)
  17126. ADDQ $0x05, BX
  17127. ADDQ $0x05, AX
  17128. JMP gen_emit_repeat_end
  17129. repeat_four_standalone:
  17130. LEAL -256(DX), DX
  17131. MOVW $0x0019, (AX)
  17132. MOVW DX, 2(AX)
  17133. ADDQ $0x04, BX
  17134. ADDQ $0x04, AX
  17135. JMP gen_emit_repeat_end
  17136. repeat_three_standalone:
  17137. LEAL -4(DX), DX
  17138. MOVW $0x0015, (AX)
  17139. MOVB DL, 2(AX)
  17140. ADDQ $0x03, BX
  17141. ADDQ $0x03, AX
  17142. JMP gen_emit_repeat_end
  17143. repeat_two_standalone:
  17144. SHLL $0x02, DX
  17145. ORL $0x01, DX
  17146. MOVW DX, (AX)
  17147. ADDQ $0x02, BX
  17148. ADDQ $0x02, AX
  17149. JMP gen_emit_repeat_end
  17150. repeat_two_offset_standalone:
  17151. XORQ SI, SI
  17152. LEAL 1(SI)(DX*4), DX
  17153. MOVB CL, 1(AX)
  17154. SARL $0x08, CX
  17155. SHLL $0x05, CX
  17156. ORL CX, DX
  17157. MOVB DL, (AX)
  17158. ADDQ $0x02, BX
  17159. ADDQ $0x02, AX
  17160. gen_emit_repeat_end:
  17161. MOVQ BX, ret+40(FP)
  17162. RET
  17163. // func emitCopy(dst []byte, offset int, length int) int
  17164. TEXT ·emitCopy(SB), NOSPLIT, $0-48
  17165. XORQ BX, BX
  17166. MOVQ dst_base+0(FP), AX
  17167. MOVQ offset+24(FP), CX
  17168. MOVQ length+32(FP), DX
  17169. // emitCopy
  17170. CMPL CX, $0x00010000
  17171. JB two_byte_offset_standalone
  17172. CMPL DX, $0x40
  17173. JBE four_bytes_remain_standalone
  17174. MOVB $0xff, (AX)
  17175. MOVL CX, 1(AX)
  17176. LEAL -64(DX), DX
  17177. ADDQ $0x05, BX
  17178. ADDQ $0x05, AX
  17179. CMPL DX, $0x04
  17180. JB four_bytes_remain_standalone
  17181. // emitRepeat
  17182. emit_repeat_again_standalone_emit_copy:
  17183. MOVL DX, SI
  17184. LEAL -4(DX), DX
  17185. CMPL SI, $0x08
  17186. JBE repeat_two_standalone_emit_copy
  17187. CMPL SI, $0x0c
  17188. JAE cant_repeat_two_offset_standalone_emit_copy
  17189. CMPL CX, $0x00000800
  17190. JB repeat_two_offset_standalone_emit_copy
  17191. cant_repeat_two_offset_standalone_emit_copy:
  17192. CMPL DX, $0x00000104
  17193. JB repeat_three_standalone_emit_copy
  17194. CMPL DX, $0x00010100
  17195. JB repeat_four_standalone_emit_copy
  17196. CMPL DX, $0x0100ffff
  17197. JB repeat_five_standalone_emit_copy
  17198. LEAL -16842747(DX), DX
  17199. MOVL $0xfffb001d, (AX)
  17200. MOVB $0xff, 4(AX)
  17201. ADDQ $0x05, AX
  17202. ADDQ $0x05, BX
  17203. JMP emit_repeat_again_standalone_emit_copy
  17204. repeat_five_standalone_emit_copy:
  17205. LEAL -65536(DX), DX
  17206. MOVL DX, CX
  17207. MOVW $0x001d, (AX)
  17208. MOVW DX, 2(AX)
  17209. SARL $0x10, CX
  17210. MOVB CL, 4(AX)
  17211. ADDQ $0x05, BX
  17212. ADDQ $0x05, AX
  17213. JMP gen_emit_copy_end
  17214. repeat_four_standalone_emit_copy:
  17215. LEAL -256(DX), DX
  17216. MOVW $0x0019, (AX)
  17217. MOVW DX, 2(AX)
  17218. ADDQ $0x04, BX
  17219. ADDQ $0x04, AX
  17220. JMP gen_emit_copy_end
  17221. repeat_three_standalone_emit_copy:
  17222. LEAL -4(DX), DX
  17223. MOVW $0x0015, (AX)
  17224. MOVB DL, 2(AX)
  17225. ADDQ $0x03, BX
  17226. ADDQ $0x03, AX
  17227. JMP gen_emit_copy_end
  17228. repeat_two_standalone_emit_copy:
  17229. SHLL $0x02, DX
  17230. ORL $0x01, DX
  17231. MOVW DX, (AX)
  17232. ADDQ $0x02, BX
  17233. ADDQ $0x02, AX
  17234. JMP gen_emit_copy_end
  17235. repeat_two_offset_standalone_emit_copy:
  17236. XORQ SI, SI
  17237. LEAL 1(SI)(DX*4), DX
  17238. MOVB CL, 1(AX)
  17239. SARL $0x08, CX
  17240. SHLL $0x05, CX
  17241. ORL CX, DX
  17242. MOVB DL, (AX)
  17243. ADDQ $0x02, BX
  17244. ADDQ $0x02, AX
  17245. JMP gen_emit_copy_end
  17246. four_bytes_remain_standalone:
  17247. TESTL DX, DX
  17248. JZ gen_emit_copy_end
  17249. XORL SI, SI
  17250. LEAL -1(SI)(DX*4), DX
  17251. MOVB DL, (AX)
  17252. MOVL CX, 1(AX)
  17253. ADDQ $0x05, BX
  17254. ADDQ $0x05, AX
  17255. JMP gen_emit_copy_end
  17256. two_byte_offset_standalone:
  17257. CMPL DX, $0x40
  17258. JBE two_byte_offset_short_standalone
  17259. CMPL CX, $0x00000800
  17260. JAE long_offset_short_standalone
  17261. MOVL $0x00000001, SI
  17262. LEAL 16(SI), SI
  17263. MOVB CL, 1(AX)
  17264. MOVL CX, DI
  17265. SHRL $0x08, DI
  17266. SHLL $0x05, DI
  17267. ORL DI, SI
  17268. MOVB SI, (AX)
  17269. ADDQ $0x02, BX
  17270. ADDQ $0x02, AX
  17271. SUBL $0x08, DX
  17272. // emitRepeat
  17273. LEAL -4(DX), DX
  17274. JMP cant_repeat_two_offset_standalone_emit_copy_short_2b
  17275. emit_repeat_again_standalone_emit_copy_short_2b:
  17276. MOVL DX, SI
  17277. LEAL -4(DX), DX
  17278. CMPL SI, $0x08
  17279. JBE repeat_two_standalone_emit_copy_short_2b
  17280. CMPL SI, $0x0c
  17281. JAE cant_repeat_two_offset_standalone_emit_copy_short_2b
  17282. CMPL CX, $0x00000800
  17283. JB repeat_two_offset_standalone_emit_copy_short_2b
  17284. cant_repeat_two_offset_standalone_emit_copy_short_2b:
  17285. CMPL DX, $0x00000104
  17286. JB repeat_three_standalone_emit_copy_short_2b
  17287. CMPL DX, $0x00010100
  17288. JB repeat_four_standalone_emit_copy_short_2b
  17289. CMPL DX, $0x0100ffff
  17290. JB repeat_five_standalone_emit_copy_short_2b
  17291. LEAL -16842747(DX), DX
  17292. MOVL $0xfffb001d, (AX)
  17293. MOVB $0xff, 4(AX)
  17294. ADDQ $0x05, AX
  17295. ADDQ $0x05, BX
  17296. JMP emit_repeat_again_standalone_emit_copy_short_2b
  17297. repeat_five_standalone_emit_copy_short_2b:
  17298. LEAL -65536(DX), DX
  17299. MOVL DX, CX
  17300. MOVW $0x001d, (AX)
  17301. MOVW DX, 2(AX)
  17302. SARL $0x10, CX
  17303. MOVB CL, 4(AX)
  17304. ADDQ $0x05, BX
  17305. ADDQ $0x05, AX
  17306. JMP gen_emit_copy_end
  17307. repeat_four_standalone_emit_copy_short_2b:
  17308. LEAL -256(DX), DX
  17309. MOVW $0x0019, (AX)
  17310. MOVW DX, 2(AX)
  17311. ADDQ $0x04, BX
  17312. ADDQ $0x04, AX
  17313. JMP gen_emit_copy_end
  17314. repeat_three_standalone_emit_copy_short_2b:
  17315. LEAL -4(DX), DX
  17316. MOVW $0x0015, (AX)
  17317. MOVB DL, 2(AX)
  17318. ADDQ $0x03, BX
  17319. ADDQ $0x03, AX
  17320. JMP gen_emit_copy_end
  17321. repeat_two_standalone_emit_copy_short_2b:
  17322. SHLL $0x02, DX
  17323. ORL $0x01, DX
  17324. MOVW DX, (AX)
  17325. ADDQ $0x02, BX
  17326. ADDQ $0x02, AX
  17327. JMP gen_emit_copy_end
  17328. repeat_two_offset_standalone_emit_copy_short_2b:
  17329. XORQ SI, SI
  17330. LEAL 1(SI)(DX*4), DX
  17331. MOVB CL, 1(AX)
  17332. SARL $0x08, CX
  17333. SHLL $0x05, CX
  17334. ORL CX, DX
  17335. MOVB DL, (AX)
  17336. ADDQ $0x02, BX
  17337. ADDQ $0x02, AX
  17338. JMP gen_emit_copy_end
  17339. long_offset_short_standalone:
  17340. MOVB $0xee, (AX)
  17341. MOVW CX, 1(AX)
  17342. LEAL -60(DX), DX
  17343. ADDQ $0x03, AX
  17344. ADDQ $0x03, BX
  17345. // emitRepeat
  17346. emit_repeat_again_standalone_emit_copy_short:
  17347. MOVL DX, SI
  17348. LEAL -4(DX), DX
  17349. CMPL SI, $0x08
  17350. JBE repeat_two_standalone_emit_copy_short
  17351. CMPL SI, $0x0c
  17352. JAE cant_repeat_two_offset_standalone_emit_copy_short
  17353. CMPL CX, $0x00000800
  17354. JB repeat_two_offset_standalone_emit_copy_short
  17355. cant_repeat_two_offset_standalone_emit_copy_short:
  17356. CMPL DX, $0x00000104
  17357. JB repeat_three_standalone_emit_copy_short
  17358. CMPL DX, $0x00010100
  17359. JB repeat_four_standalone_emit_copy_short
  17360. CMPL DX, $0x0100ffff
  17361. JB repeat_five_standalone_emit_copy_short
  17362. LEAL -16842747(DX), DX
  17363. MOVL $0xfffb001d, (AX)
  17364. MOVB $0xff, 4(AX)
  17365. ADDQ $0x05, AX
  17366. ADDQ $0x05, BX
  17367. JMP emit_repeat_again_standalone_emit_copy_short
  17368. repeat_five_standalone_emit_copy_short:
  17369. LEAL -65536(DX), DX
  17370. MOVL DX, CX
  17371. MOVW $0x001d, (AX)
  17372. MOVW DX, 2(AX)
  17373. SARL $0x10, CX
  17374. MOVB CL, 4(AX)
  17375. ADDQ $0x05, BX
  17376. ADDQ $0x05, AX
  17377. JMP gen_emit_copy_end
  17378. repeat_four_standalone_emit_copy_short:
  17379. LEAL -256(DX), DX
  17380. MOVW $0x0019, (AX)
  17381. MOVW DX, 2(AX)
  17382. ADDQ $0x04, BX
  17383. ADDQ $0x04, AX
  17384. JMP gen_emit_copy_end
  17385. repeat_three_standalone_emit_copy_short:
  17386. LEAL -4(DX), DX
  17387. MOVW $0x0015, (AX)
  17388. MOVB DL, 2(AX)
  17389. ADDQ $0x03, BX
  17390. ADDQ $0x03, AX
  17391. JMP gen_emit_copy_end
  17392. repeat_two_standalone_emit_copy_short:
  17393. SHLL $0x02, DX
  17394. ORL $0x01, DX
  17395. MOVW DX, (AX)
  17396. ADDQ $0x02, BX
  17397. ADDQ $0x02, AX
  17398. JMP gen_emit_copy_end
  17399. repeat_two_offset_standalone_emit_copy_short:
  17400. XORQ SI, SI
  17401. LEAL 1(SI)(DX*4), DX
  17402. MOVB CL, 1(AX)
  17403. SARL $0x08, CX
  17404. SHLL $0x05, CX
  17405. ORL CX, DX
  17406. MOVB DL, (AX)
  17407. ADDQ $0x02, BX
  17408. ADDQ $0x02, AX
  17409. JMP gen_emit_copy_end
  17410. two_byte_offset_short_standalone:
  17411. MOVL DX, SI
  17412. SHLL $0x02, SI
  17413. CMPL DX, $0x0c
  17414. JAE emit_copy_three_standalone
  17415. CMPL CX, $0x00000800
  17416. JAE emit_copy_three_standalone
  17417. LEAL -15(SI), SI
  17418. MOVB CL, 1(AX)
  17419. SHRL $0x08, CX
  17420. SHLL $0x05, CX
  17421. ORL CX, SI
  17422. MOVB SI, (AX)
  17423. ADDQ $0x02, BX
  17424. ADDQ $0x02, AX
  17425. JMP gen_emit_copy_end
  17426. emit_copy_three_standalone:
  17427. LEAL -2(SI), SI
  17428. MOVB SI, (AX)
  17429. MOVW CX, 1(AX)
  17430. ADDQ $0x03, BX
  17431. ADDQ $0x03, AX
  17432. gen_emit_copy_end:
  17433. MOVQ BX, ret+40(FP)
  17434. RET
  17435. // func emitCopyNoRepeat(dst []byte, offset int, length int) int
  17436. TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
  17437. XORQ BX, BX
  17438. MOVQ dst_base+0(FP), AX
  17439. MOVQ offset+24(FP), CX
  17440. MOVQ length+32(FP), DX
  17441. // emitCopy
  17442. CMPL CX, $0x00010000
  17443. JB two_byte_offset_standalone_snappy
  17444. four_bytes_loop_back_standalone_snappy:
  17445. CMPL DX, $0x40
  17446. JBE four_bytes_remain_standalone_snappy
  17447. MOVB $0xff, (AX)
  17448. MOVL CX, 1(AX)
  17449. LEAL -64(DX), DX
  17450. ADDQ $0x05, BX
  17451. ADDQ $0x05, AX
  17452. CMPL DX, $0x04
  17453. JB four_bytes_remain_standalone_snappy
  17454. JMP four_bytes_loop_back_standalone_snappy
  17455. four_bytes_remain_standalone_snappy:
  17456. TESTL DX, DX
  17457. JZ gen_emit_copy_end_snappy
  17458. XORL SI, SI
  17459. LEAL -1(SI)(DX*4), DX
  17460. MOVB DL, (AX)
  17461. MOVL CX, 1(AX)
  17462. ADDQ $0x05, BX
  17463. ADDQ $0x05, AX
  17464. JMP gen_emit_copy_end_snappy
  17465. two_byte_offset_standalone_snappy:
  17466. CMPL DX, $0x40
  17467. JBE two_byte_offset_short_standalone_snappy
  17468. MOVB $0xee, (AX)
  17469. MOVW CX, 1(AX)
  17470. LEAL -60(DX), DX
  17471. ADDQ $0x03, AX
  17472. ADDQ $0x03, BX
  17473. JMP two_byte_offset_standalone_snappy
  17474. two_byte_offset_short_standalone_snappy:
  17475. MOVL DX, SI
  17476. SHLL $0x02, SI
  17477. CMPL DX, $0x0c
  17478. JAE emit_copy_three_standalone_snappy
  17479. CMPL CX, $0x00000800
  17480. JAE emit_copy_three_standalone_snappy
  17481. LEAL -15(SI), SI
  17482. MOVB CL, 1(AX)
  17483. SHRL $0x08, CX
  17484. SHLL $0x05, CX
  17485. ORL CX, SI
  17486. MOVB SI, (AX)
  17487. ADDQ $0x02, BX
  17488. ADDQ $0x02, AX
  17489. JMP gen_emit_copy_end_snappy
  17490. emit_copy_three_standalone_snappy:
  17491. LEAL -2(SI), SI
  17492. MOVB SI, (AX)
  17493. MOVW CX, 1(AX)
  17494. ADDQ $0x03, BX
  17495. ADDQ $0x03, AX
  17496. gen_emit_copy_end_snappy:
  17497. MOVQ BX, ret+40(FP)
  17498. RET
  17499. // func matchLen(a []byte, b []byte) int
  17500. // Requires: BMI
  17501. TEXT ·matchLen(SB), NOSPLIT, $0-56
  17502. MOVQ a_base+0(FP), AX
  17503. MOVQ b_base+24(FP), CX
  17504. MOVQ a_len+8(FP), DX
  17505. // matchLen
  17506. XORL SI, SI
  17507. matchlen_loopback_16_standalone:
  17508. CMPL DX, $0x10
  17509. JB matchlen_match8_standalone
  17510. MOVQ (AX)(SI*1), BX
  17511. MOVQ 8(AX)(SI*1), DI
  17512. XORQ (CX)(SI*1), BX
  17513. JNZ matchlen_bsf_8_standalone
  17514. XORQ 8(CX)(SI*1), DI
  17515. JNZ matchlen_bsf_16standalone
  17516. LEAL -16(DX), DX
  17517. LEAL 16(SI), SI
  17518. JMP matchlen_loopback_16_standalone
  17519. matchlen_bsf_16standalone:
  17520. #ifdef GOAMD64_v3
  17521. TZCNTQ DI, DI
  17522. #else
  17523. BSFQ DI, DI
  17524. #endif
  17525. SARQ $0x03, DI
  17526. LEAL 8(SI)(DI*1), SI
  17527. JMP gen_match_len_end
  17528. matchlen_match8_standalone:
  17529. CMPL DX, $0x08
  17530. JB matchlen_match4_standalone
  17531. MOVQ (AX)(SI*1), BX
  17532. XORQ (CX)(SI*1), BX
  17533. JNZ matchlen_bsf_8_standalone
  17534. LEAL -8(DX), DX
  17535. LEAL 8(SI), SI
  17536. JMP matchlen_match4_standalone
  17537. matchlen_bsf_8_standalone:
  17538. #ifdef GOAMD64_v3
  17539. TZCNTQ BX, BX
  17540. #else
  17541. BSFQ BX, BX
  17542. #endif
  17543. SARQ $0x03, BX
  17544. LEAL (SI)(BX*1), SI
  17545. JMP gen_match_len_end
  17546. matchlen_match4_standalone:
  17547. CMPL DX, $0x04
  17548. JB matchlen_match2_standalone
  17549. MOVL (AX)(SI*1), BX
  17550. CMPL (CX)(SI*1), BX
  17551. JNE matchlen_match2_standalone
  17552. LEAL -4(DX), DX
  17553. LEAL 4(SI), SI
  17554. matchlen_match2_standalone:
  17555. CMPL DX, $0x01
  17556. JE matchlen_match1_standalone
  17557. JB gen_match_len_end
  17558. MOVW (AX)(SI*1), BX
  17559. CMPW (CX)(SI*1), BX
  17560. JNE matchlen_match1_standalone
  17561. LEAL 2(SI), SI
  17562. SUBL $0x02, DX
  17563. JZ gen_match_len_end
  17564. matchlen_match1_standalone:
  17565. MOVB (AX)(SI*1), BL
  17566. CMPB (CX)(SI*1), BL
  17567. JNE gen_match_len_end
  17568. LEAL 1(SI), SI
  17569. gen_match_len_end:
  17570. MOVQ SI, ret+48(FP)
  17571. RET
  17572. // func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  17573. // Requires: SSE2
  17574. TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
  17575. XORQ SI, SI
  17576. MOVQ dst_base+0(FP), AX
  17577. MOVQ dst_len+8(FP), CX
  17578. MOVQ src_base+24(FP), DX
  17579. MOVQ src_len+32(FP), BX
  17580. LEAQ (DX)(BX*1), BX
  17581. LEAQ -8(AX)(CX*1), CX
  17582. XORQ DI, DI
  17583. lz4_s2_loop:
  17584. CMPQ DX, BX
  17585. JAE lz4_s2_corrupt
  17586. CMPQ AX, CX
  17587. JAE lz4_s2_dstfull
  17588. MOVBQZX (DX), R8
  17589. MOVQ R8, R9
  17590. MOVQ R8, R10
  17591. SHRQ $0x04, R9
  17592. ANDQ $0x0f, R10
  17593. CMPQ R8, $0xf0
  17594. JB lz4_s2_ll_end
  17595. lz4_s2_ll_loop:
  17596. INCQ DX
  17597. CMPQ DX, BX
  17598. JAE lz4_s2_corrupt
  17599. MOVBQZX (DX), R8
  17600. ADDQ R8, R9
  17601. CMPQ R8, $0xff
  17602. JEQ lz4_s2_ll_loop
  17603. lz4_s2_ll_end:
  17604. LEAQ (DX)(R9*1), R8
  17605. ADDQ $0x04, R10
  17606. CMPQ R8, BX
  17607. JAE lz4_s2_corrupt
  17608. INCQ DX
  17609. INCQ R8
  17610. TESTQ R9, R9
  17611. JZ lz4_s2_lits_done
  17612. LEAQ (AX)(R9*1), R11
  17613. CMPQ R11, CX
  17614. JAE lz4_s2_dstfull
  17615. ADDQ R9, SI
  17616. LEAL -1(R9), R11
  17617. CMPL R11, $0x3c
  17618. JB one_byte_lz4_s2
  17619. CMPL R11, $0x00000100
  17620. JB two_bytes_lz4_s2
  17621. CMPL R11, $0x00010000
  17622. JB three_bytes_lz4_s2
  17623. CMPL R11, $0x01000000
  17624. JB four_bytes_lz4_s2
  17625. MOVB $0xfc, (AX)
  17626. MOVL R11, 1(AX)
  17627. ADDQ $0x05, AX
  17628. JMP memmove_long_lz4_s2
  17629. four_bytes_lz4_s2:
  17630. MOVL R11, R12
  17631. SHRL $0x10, R12
  17632. MOVB $0xf8, (AX)
  17633. MOVW R11, 1(AX)
  17634. MOVB R12, 3(AX)
  17635. ADDQ $0x04, AX
  17636. JMP memmove_long_lz4_s2
  17637. three_bytes_lz4_s2:
  17638. MOVB $0xf4, (AX)
  17639. MOVW R11, 1(AX)
  17640. ADDQ $0x03, AX
  17641. JMP memmove_long_lz4_s2
  17642. two_bytes_lz4_s2:
  17643. MOVB $0xf0, (AX)
  17644. MOVB R11, 1(AX)
  17645. ADDQ $0x02, AX
  17646. CMPL R11, $0x40
  17647. JB memmove_lz4_s2
  17648. JMP memmove_long_lz4_s2
  17649. one_byte_lz4_s2:
  17650. SHLB $0x02, R11
  17651. MOVB R11, (AX)
  17652. ADDQ $0x01, AX
  17653. memmove_lz4_s2:
  17654. LEAQ (AX)(R9*1), R11
  17655. // genMemMoveShort
  17656. CMPQ R9, $0x08
  17657. JBE emit_lit_memmove_lz4_s2_memmove_move_8
  17658. CMPQ R9, $0x10
  17659. JBE emit_lit_memmove_lz4_s2_memmove_move_8through16
  17660. CMPQ R9, $0x20
  17661. JBE emit_lit_memmove_lz4_s2_memmove_move_17through32
  17662. JMP emit_lit_memmove_lz4_s2_memmove_move_33through64
  17663. emit_lit_memmove_lz4_s2_memmove_move_8:
  17664. MOVQ (DX), R12
  17665. MOVQ R12, (AX)
  17666. JMP memmove_end_copy_lz4_s2
  17667. emit_lit_memmove_lz4_s2_memmove_move_8through16:
  17668. MOVQ (DX), R12
  17669. MOVQ -8(DX)(R9*1), DX
  17670. MOVQ R12, (AX)
  17671. MOVQ DX, -8(AX)(R9*1)
  17672. JMP memmove_end_copy_lz4_s2
  17673. emit_lit_memmove_lz4_s2_memmove_move_17through32:
  17674. MOVOU (DX), X0
  17675. MOVOU -16(DX)(R9*1), X1
  17676. MOVOU X0, (AX)
  17677. MOVOU X1, -16(AX)(R9*1)
  17678. JMP memmove_end_copy_lz4_s2
  17679. emit_lit_memmove_lz4_s2_memmove_move_33through64:
  17680. MOVOU (DX), X0
  17681. MOVOU 16(DX), X1
  17682. MOVOU -32(DX)(R9*1), X2
  17683. MOVOU -16(DX)(R9*1), X3
  17684. MOVOU X0, (AX)
  17685. MOVOU X1, 16(AX)
  17686. MOVOU X2, -32(AX)(R9*1)
  17687. MOVOU X3, -16(AX)(R9*1)
  17688. memmove_end_copy_lz4_s2:
  17689. MOVQ R11, AX
  17690. JMP lz4_s2_lits_emit_done
  17691. memmove_long_lz4_s2:
  17692. LEAQ (AX)(R9*1), R11
  17693. // genMemMoveLong
  17694. MOVOU (DX), X0
  17695. MOVOU 16(DX), X1
  17696. MOVOU -32(DX)(R9*1), X2
  17697. MOVOU -16(DX)(R9*1), X3
  17698. MOVQ R9, R13
  17699. SHRQ $0x05, R13
  17700. MOVQ AX, R12
  17701. ANDL $0x0000001f, R12
  17702. MOVQ $0x00000040, R14
  17703. SUBQ R12, R14
  17704. DECQ R13
  17705. JA emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
  17706. LEAQ -32(DX)(R14*1), R12
  17707. LEAQ -32(AX)(R14*1), R15
  17708. emit_lit_memmove_long_lz4_s2large_big_loop_back:
  17709. MOVOU (R12), X4
  17710. MOVOU 16(R12), X5
  17711. MOVOA X4, (R15)
  17712. MOVOA X5, 16(R15)
  17713. ADDQ $0x20, R15
  17714. ADDQ $0x20, R12
  17715. ADDQ $0x20, R14
  17716. DECQ R13
  17717. JNA emit_lit_memmove_long_lz4_s2large_big_loop_back
  17718. emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32:
  17719. MOVOU -32(DX)(R14*1), X4
  17720. MOVOU -16(DX)(R14*1), X5
  17721. MOVOA X4, -32(AX)(R14*1)
  17722. MOVOA X5, -16(AX)(R14*1)
  17723. ADDQ $0x20, R14
  17724. CMPQ R9, R14
  17725. JAE emit_lit_memmove_long_lz4_s2large_forward_sse_loop_32
  17726. MOVOU X0, (AX)
  17727. MOVOU X1, 16(AX)
  17728. MOVOU X2, -32(AX)(R9*1)
  17729. MOVOU X3, -16(AX)(R9*1)
  17730. MOVQ R11, AX
  17731. lz4_s2_lits_emit_done:
  17732. MOVQ R8, DX
  17733. lz4_s2_lits_done:
  17734. CMPQ DX, BX
  17735. JNE lz4_s2_match
  17736. CMPQ R10, $0x04
  17737. JEQ lz4_s2_done
  17738. JMP lz4_s2_corrupt
  17739. lz4_s2_match:
  17740. LEAQ 2(DX), R8
  17741. CMPQ R8, BX
  17742. JAE lz4_s2_corrupt
  17743. MOVWQZX (DX), R9
  17744. MOVQ R8, DX
  17745. TESTQ R9, R9
  17746. JZ lz4_s2_corrupt
  17747. CMPQ R9, SI
  17748. JA lz4_s2_corrupt
  17749. CMPQ R10, $0x13
  17750. JNE lz4_s2_ml_done
  17751. lz4_s2_ml_loop:
  17752. MOVBQZX (DX), R8
  17753. INCQ DX
  17754. ADDQ R8, R10
  17755. CMPQ DX, BX
  17756. JAE lz4_s2_corrupt
  17757. CMPQ R8, $0xff
  17758. JEQ lz4_s2_ml_loop
  17759. lz4_s2_ml_done:
  17760. ADDQ R10, SI
  17761. CMPQ R9, DI
  17762. JNE lz4_s2_docopy
  17763. // emitRepeat
  17764. emit_repeat_again_lz4_s2:
  17765. MOVL R10, R8
  17766. LEAL -4(R10), R10
  17767. CMPL R8, $0x08
  17768. JBE repeat_two_lz4_s2
  17769. CMPL R8, $0x0c
  17770. JAE cant_repeat_two_offset_lz4_s2
  17771. CMPL R9, $0x00000800
  17772. JB repeat_two_offset_lz4_s2
  17773. cant_repeat_two_offset_lz4_s2:
  17774. CMPL R10, $0x00000104
  17775. JB repeat_three_lz4_s2
  17776. CMPL R10, $0x00010100
  17777. JB repeat_four_lz4_s2
  17778. CMPL R10, $0x0100ffff
  17779. JB repeat_five_lz4_s2
  17780. LEAL -16842747(R10), R10
  17781. MOVL $0xfffb001d, (AX)
  17782. MOVB $0xff, 4(AX)
  17783. ADDQ $0x05, AX
  17784. JMP emit_repeat_again_lz4_s2
  17785. repeat_five_lz4_s2:
  17786. LEAL -65536(R10), R10
  17787. MOVL R10, R9
  17788. MOVW $0x001d, (AX)
  17789. MOVW R10, 2(AX)
  17790. SARL $0x10, R9
  17791. MOVB R9, 4(AX)
  17792. ADDQ $0x05, AX
  17793. JMP lz4_s2_loop
  17794. repeat_four_lz4_s2:
  17795. LEAL -256(R10), R10
  17796. MOVW $0x0019, (AX)
  17797. MOVW R10, 2(AX)
  17798. ADDQ $0x04, AX
  17799. JMP lz4_s2_loop
  17800. repeat_three_lz4_s2:
  17801. LEAL -4(R10), R10
  17802. MOVW $0x0015, (AX)
  17803. MOVB R10, 2(AX)
  17804. ADDQ $0x03, AX
  17805. JMP lz4_s2_loop
  17806. repeat_two_lz4_s2:
  17807. SHLL $0x02, R10
  17808. ORL $0x01, R10
  17809. MOVW R10, (AX)
  17810. ADDQ $0x02, AX
  17811. JMP lz4_s2_loop
  17812. repeat_two_offset_lz4_s2:
  17813. XORQ R8, R8
  17814. LEAL 1(R8)(R10*4), R10
  17815. MOVB R9, 1(AX)
  17816. SARL $0x08, R9
  17817. SHLL $0x05, R9
  17818. ORL R9, R10
  17819. MOVB R10, (AX)
  17820. ADDQ $0x02, AX
  17821. JMP lz4_s2_loop
  17822. lz4_s2_docopy:
  17823. MOVQ R9, DI
  17824. // emitCopy
  17825. CMPL R10, $0x40
  17826. JBE two_byte_offset_short_lz4_s2
  17827. CMPL R9, $0x00000800
  17828. JAE long_offset_short_lz4_s2
  17829. MOVL $0x00000001, R8
  17830. LEAL 16(R8), R8
  17831. MOVB R9, 1(AX)
  17832. MOVL R9, R11
  17833. SHRL $0x08, R11
  17834. SHLL $0x05, R11
  17835. ORL R11, R8
  17836. MOVB R8, (AX)
  17837. ADDQ $0x02, AX
  17838. SUBL $0x08, R10
  17839. // emitRepeat
  17840. LEAL -4(R10), R10
  17841. JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  17842. emit_repeat_again_lz4_s2_emit_copy_short_2b:
  17843. MOVL R10, R8
  17844. LEAL -4(R10), R10
  17845. CMPL R8, $0x08
  17846. JBE repeat_two_lz4_s2_emit_copy_short_2b
  17847. CMPL R8, $0x0c
  17848. JAE cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  17849. CMPL R9, $0x00000800
  17850. JB repeat_two_offset_lz4_s2_emit_copy_short_2b
  17851. cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
  17852. CMPL R10, $0x00000104
  17853. JB repeat_three_lz4_s2_emit_copy_short_2b
  17854. CMPL R10, $0x00010100
  17855. JB repeat_four_lz4_s2_emit_copy_short_2b
  17856. CMPL R10, $0x0100ffff
  17857. JB repeat_five_lz4_s2_emit_copy_short_2b
  17858. LEAL -16842747(R10), R10
  17859. MOVL $0xfffb001d, (AX)
  17860. MOVB $0xff, 4(AX)
  17861. ADDQ $0x05, AX
  17862. JMP emit_repeat_again_lz4_s2_emit_copy_short_2b
  17863. repeat_five_lz4_s2_emit_copy_short_2b:
  17864. LEAL -65536(R10), R10
  17865. MOVL R10, R9
  17866. MOVW $0x001d, (AX)
  17867. MOVW R10, 2(AX)
  17868. SARL $0x10, R9
  17869. MOVB R9, 4(AX)
  17870. ADDQ $0x05, AX
  17871. JMP lz4_s2_loop
  17872. repeat_four_lz4_s2_emit_copy_short_2b:
  17873. LEAL -256(R10), R10
  17874. MOVW $0x0019, (AX)
  17875. MOVW R10, 2(AX)
  17876. ADDQ $0x04, AX
  17877. JMP lz4_s2_loop
  17878. repeat_three_lz4_s2_emit_copy_short_2b:
  17879. LEAL -4(R10), R10
  17880. MOVW $0x0015, (AX)
  17881. MOVB R10, 2(AX)
  17882. ADDQ $0x03, AX
  17883. JMP lz4_s2_loop
  17884. repeat_two_lz4_s2_emit_copy_short_2b:
  17885. SHLL $0x02, R10
  17886. ORL $0x01, R10
  17887. MOVW R10, (AX)
  17888. ADDQ $0x02, AX
  17889. JMP lz4_s2_loop
  17890. repeat_two_offset_lz4_s2_emit_copy_short_2b:
  17891. XORQ R8, R8
  17892. LEAL 1(R8)(R10*4), R10
  17893. MOVB R9, 1(AX)
  17894. SARL $0x08, R9
  17895. SHLL $0x05, R9
  17896. ORL R9, R10
  17897. MOVB R10, (AX)
  17898. ADDQ $0x02, AX
  17899. JMP lz4_s2_loop
  17900. long_offset_short_lz4_s2:
  17901. MOVB $0xee, (AX)
  17902. MOVW R9, 1(AX)
  17903. LEAL -60(R10), R10
  17904. ADDQ $0x03, AX
  17905. // emitRepeat
  17906. emit_repeat_again_lz4_s2_emit_copy_short:
  17907. MOVL R10, R8
  17908. LEAL -4(R10), R10
  17909. CMPL R8, $0x08
  17910. JBE repeat_two_lz4_s2_emit_copy_short
  17911. CMPL R8, $0x0c
  17912. JAE cant_repeat_two_offset_lz4_s2_emit_copy_short
  17913. CMPL R9, $0x00000800
  17914. JB repeat_two_offset_lz4_s2_emit_copy_short
  17915. cant_repeat_two_offset_lz4_s2_emit_copy_short:
  17916. CMPL R10, $0x00000104
  17917. JB repeat_three_lz4_s2_emit_copy_short
  17918. CMPL R10, $0x00010100
  17919. JB repeat_four_lz4_s2_emit_copy_short
  17920. CMPL R10, $0x0100ffff
  17921. JB repeat_five_lz4_s2_emit_copy_short
  17922. LEAL -16842747(R10), R10
  17923. MOVL $0xfffb001d, (AX)
  17924. MOVB $0xff, 4(AX)
  17925. ADDQ $0x05, AX
  17926. JMP emit_repeat_again_lz4_s2_emit_copy_short
  17927. repeat_five_lz4_s2_emit_copy_short:
  17928. LEAL -65536(R10), R10
  17929. MOVL R10, R9
  17930. MOVW $0x001d, (AX)
  17931. MOVW R10, 2(AX)
  17932. SARL $0x10, R9
  17933. MOVB R9, 4(AX)
  17934. ADDQ $0x05, AX
  17935. JMP lz4_s2_loop
  17936. repeat_four_lz4_s2_emit_copy_short:
  17937. LEAL -256(R10), R10
  17938. MOVW $0x0019, (AX)
  17939. MOVW R10, 2(AX)
  17940. ADDQ $0x04, AX
  17941. JMP lz4_s2_loop
  17942. repeat_three_lz4_s2_emit_copy_short:
  17943. LEAL -4(R10), R10
  17944. MOVW $0x0015, (AX)
  17945. MOVB R10, 2(AX)
  17946. ADDQ $0x03, AX
  17947. JMP lz4_s2_loop
  17948. repeat_two_lz4_s2_emit_copy_short:
  17949. SHLL $0x02, R10
  17950. ORL $0x01, R10
  17951. MOVW R10, (AX)
  17952. ADDQ $0x02, AX
  17953. JMP lz4_s2_loop
  17954. repeat_two_offset_lz4_s2_emit_copy_short:
  17955. XORQ R8, R8
  17956. LEAL 1(R8)(R10*4), R10
  17957. MOVB R9, 1(AX)
  17958. SARL $0x08, R9
  17959. SHLL $0x05, R9
  17960. ORL R9, R10
  17961. MOVB R10, (AX)
  17962. ADDQ $0x02, AX
  17963. JMP lz4_s2_loop
  17964. two_byte_offset_short_lz4_s2:
  17965. MOVL R10, R8
  17966. SHLL $0x02, R8
  17967. CMPL R10, $0x0c
  17968. JAE emit_copy_three_lz4_s2
  17969. CMPL R9, $0x00000800
  17970. JAE emit_copy_three_lz4_s2
  17971. LEAL -15(R8), R8
  17972. MOVB R9, 1(AX)
  17973. SHRL $0x08, R9
  17974. SHLL $0x05, R9
  17975. ORL R9, R8
  17976. MOVB R8, (AX)
  17977. ADDQ $0x02, AX
  17978. JMP lz4_s2_loop
  17979. emit_copy_three_lz4_s2:
  17980. LEAL -2(R8), R8
  17981. MOVB R8, (AX)
  17982. MOVW R9, 1(AX)
  17983. ADDQ $0x03, AX
  17984. JMP lz4_s2_loop
  17985. lz4_s2_done:
  17986. MOVQ dst_base+0(FP), CX
  17987. SUBQ CX, AX
  17988. MOVQ SI, uncompressed+48(FP)
  17989. MOVQ AX, dstUsed+56(FP)
  17990. RET
  17991. lz4_s2_corrupt:
  17992. XORQ AX, AX
  17993. LEAQ -1(AX), SI
  17994. MOVQ SI, uncompressed+48(FP)
  17995. RET
  17996. lz4_s2_dstfull:
  17997. XORQ AX, AX
  17998. LEAQ -2(AX), SI
  17999. MOVQ SI, uncompressed+48(FP)
  18000. RET
  18001. // func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  18002. // Requires: SSE2
  18003. TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
  18004. XORQ SI, SI
  18005. MOVQ dst_base+0(FP), AX
  18006. MOVQ dst_len+8(FP), CX
  18007. MOVQ src_base+24(FP), DX
  18008. MOVQ src_len+32(FP), BX
  18009. LEAQ (DX)(BX*1), BX
  18010. LEAQ -8(AX)(CX*1), CX
  18011. XORQ DI, DI
  18012. lz4s_s2_loop:
  18013. CMPQ DX, BX
  18014. JAE lz4s_s2_corrupt
  18015. CMPQ AX, CX
  18016. JAE lz4s_s2_dstfull
  18017. MOVBQZX (DX), R8
  18018. MOVQ R8, R9
  18019. MOVQ R8, R10
  18020. SHRQ $0x04, R9
  18021. ANDQ $0x0f, R10
  18022. CMPQ R8, $0xf0
  18023. JB lz4s_s2_ll_end
  18024. lz4s_s2_ll_loop:
  18025. INCQ DX
  18026. CMPQ DX, BX
  18027. JAE lz4s_s2_corrupt
  18028. MOVBQZX (DX), R8
  18029. ADDQ R8, R9
  18030. CMPQ R8, $0xff
  18031. JEQ lz4s_s2_ll_loop
  18032. lz4s_s2_ll_end:
  18033. LEAQ (DX)(R9*1), R8
  18034. ADDQ $0x03, R10
  18035. CMPQ R8, BX
  18036. JAE lz4s_s2_corrupt
  18037. INCQ DX
  18038. INCQ R8
  18039. TESTQ R9, R9
  18040. JZ lz4s_s2_lits_done
  18041. LEAQ (AX)(R9*1), R11
  18042. CMPQ R11, CX
  18043. JAE lz4s_s2_dstfull
  18044. ADDQ R9, SI
  18045. LEAL -1(R9), R11
  18046. CMPL R11, $0x3c
  18047. JB one_byte_lz4s_s2
  18048. CMPL R11, $0x00000100
  18049. JB two_bytes_lz4s_s2
  18050. CMPL R11, $0x00010000
  18051. JB three_bytes_lz4s_s2
  18052. CMPL R11, $0x01000000
  18053. JB four_bytes_lz4s_s2
  18054. MOVB $0xfc, (AX)
  18055. MOVL R11, 1(AX)
  18056. ADDQ $0x05, AX
  18057. JMP memmove_long_lz4s_s2
  18058. four_bytes_lz4s_s2:
  18059. MOVL R11, R12
  18060. SHRL $0x10, R12
  18061. MOVB $0xf8, (AX)
  18062. MOVW R11, 1(AX)
  18063. MOVB R12, 3(AX)
  18064. ADDQ $0x04, AX
  18065. JMP memmove_long_lz4s_s2
  18066. three_bytes_lz4s_s2:
  18067. MOVB $0xf4, (AX)
  18068. MOVW R11, 1(AX)
  18069. ADDQ $0x03, AX
  18070. JMP memmove_long_lz4s_s2
  18071. two_bytes_lz4s_s2:
  18072. MOVB $0xf0, (AX)
  18073. MOVB R11, 1(AX)
  18074. ADDQ $0x02, AX
  18075. CMPL R11, $0x40
  18076. JB memmove_lz4s_s2
  18077. JMP memmove_long_lz4s_s2
  18078. one_byte_lz4s_s2:
  18079. SHLB $0x02, R11
  18080. MOVB R11, (AX)
  18081. ADDQ $0x01, AX
  18082. memmove_lz4s_s2:
  18083. LEAQ (AX)(R9*1), R11
  18084. // genMemMoveShort
  18085. CMPQ R9, $0x08
  18086. JBE emit_lit_memmove_lz4s_s2_memmove_move_8
  18087. CMPQ R9, $0x10
  18088. JBE emit_lit_memmove_lz4s_s2_memmove_move_8through16
  18089. CMPQ R9, $0x20
  18090. JBE emit_lit_memmove_lz4s_s2_memmove_move_17through32
  18091. JMP emit_lit_memmove_lz4s_s2_memmove_move_33through64
  18092. emit_lit_memmove_lz4s_s2_memmove_move_8:
  18093. MOVQ (DX), R12
  18094. MOVQ R12, (AX)
  18095. JMP memmove_end_copy_lz4s_s2
  18096. emit_lit_memmove_lz4s_s2_memmove_move_8through16:
  18097. MOVQ (DX), R12
  18098. MOVQ -8(DX)(R9*1), DX
  18099. MOVQ R12, (AX)
  18100. MOVQ DX, -8(AX)(R9*1)
  18101. JMP memmove_end_copy_lz4s_s2
  18102. emit_lit_memmove_lz4s_s2_memmove_move_17through32:
  18103. MOVOU (DX), X0
  18104. MOVOU -16(DX)(R9*1), X1
  18105. MOVOU X0, (AX)
  18106. MOVOU X1, -16(AX)(R9*1)
  18107. JMP memmove_end_copy_lz4s_s2
  18108. emit_lit_memmove_lz4s_s2_memmove_move_33through64:
  18109. MOVOU (DX), X0
  18110. MOVOU 16(DX), X1
  18111. MOVOU -32(DX)(R9*1), X2
  18112. MOVOU -16(DX)(R9*1), X3
  18113. MOVOU X0, (AX)
  18114. MOVOU X1, 16(AX)
  18115. MOVOU X2, -32(AX)(R9*1)
  18116. MOVOU X3, -16(AX)(R9*1)
  18117. memmove_end_copy_lz4s_s2:
  18118. MOVQ R11, AX
  18119. JMP lz4s_s2_lits_emit_done
  18120. memmove_long_lz4s_s2:
  18121. LEAQ (AX)(R9*1), R11
  18122. // genMemMoveLong
  18123. MOVOU (DX), X0
  18124. MOVOU 16(DX), X1
  18125. MOVOU -32(DX)(R9*1), X2
  18126. MOVOU -16(DX)(R9*1), X3
  18127. MOVQ R9, R13
  18128. SHRQ $0x05, R13
  18129. MOVQ AX, R12
  18130. ANDL $0x0000001f, R12
  18131. MOVQ $0x00000040, R14
  18132. SUBQ R12, R14
  18133. DECQ R13
  18134. JA emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
  18135. LEAQ -32(DX)(R14*1), R12
  18136. LEAQ -32(AX)(R14*1), R15
  18137. emit_lit_memmove_long_lz4s_s2large_big_loop_back:
  18138. MOVOU (R12), X4
  18139. MOVOU 16(R12), X5
  18140. MOVOA X4, (R15)
  18141. MOVOA X5, 16(R15)
  18142. ADDQ $0x20, R15
  18143. ADDQ $0x20, R12
  18144. ADDQ $0x20, R14
  18145. DECQ R13
  18146. JNA emit_lit_memmove_long_lz4s_s2large_big_loop_back
  18147. emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32:
  18148. MOVOU -32(DX)(R14*1), X4
  18149. MOVOU -16(DX)(R14*1), X5
  18150. MOVOA X4, -32(AX)(R14*1)
  18151. MOVOA X5, -16(AX)(R14*1)
  18152. ADDQ $0x20, R14
  18153. CMPQ R9, R14
  18154. JAE emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
  18155. MOVOU X0, (AX)
  18156. MOVOU X1, 16(AX)
  18157. MOVOU X2, -32(AX)(R9*1)
  18158. MOVOU X3, -16(AX)(R9*1)
  18159. MOVQ R11, AX
  18160. lz4s_s2_lits_emit_done:
  18161. MOVQ R8, DX
  18162. lz4s_s2_lits_done:
  18163. CMPQ DX, BX
  18164. JNE lz4s_s2_match
  18165. CMPQ R10, $0x03
  18166. JEQ lz4s_s2_done
  18167. JMP lz4s_s2_corrupt
  18168. lz4s_s2_match:
  18169. CMPQ R10, $0x03
  18170. JEQ lz4s_s2_loop
  18171. LEAQ 2(DX), R8
  18172. CMPQ R8, BX
  18173. JAE lz4s_s2_corrupt
  18174. MOVWQZX (DX), R9
  18175. MOVQ R8, DX
  18176. TESTQ R9, R9
  18177. JZ lz4s_s2_corrupt
  18178. CMPQ R9, SI
  18179. JA lz4s_s2_corrupt
  18180. CMPQ R10, $0x12
  18181. JNE lz4s_s2_ml_done
  18182. lz4s_s2_ml_loop:
  18183. MOVBQZX (DX), R8
  18184. INCQ DX
  18185. ADDQ R8, R10
  18186. CMPQ DX, BX
  18187. JAE lz4s_s2_corrupt
  18188. CMPQ R8, $0xff
  18189. JEQ lz4s_s2_ml_loop
  18190. lz4s_s2_ml_done:
  18191. ADDQ R10, SI
  18192. CMPQ R9, DI
  18193. JNE lz4s_s2_docopy
  18194. // emitRepeat
  18195. emit_repeat_again_lz4_s2:
  18196. MOVL R10, R8
  18197. LEAL -4(R10), R10
  18198. CMPL R8, $0x08
  18199. JBE repeat_two_lz4_s2
  18200. CMPL R8, $0x0c
  18201. JAE cant_repeat_two_offset_lz4_s2
  18202. CMPL R9, $0x00000800
  18203. JB repeat_two_offset_lz4_s2
  18204. cant_repeat_two_offset_lz4_s2:
  18205. CMPL R10, $0x00000104
  18206. JB repeat_three_lz4_s2
  18207. CMPL R10, $0x00010100
  18208. JB repeat_four_lz4_s2
  18209. CMPL R10, $0x0100ffff
  18210. JB repeat_five_lz4_s2
  18211. LEAL -16842747(R10), R10
  18212. MOVL $0xfffb001d, (AX)
  18213. MOVB $0xff, 4(AX)
  18214. ADDQ $0x05, AX
  18215. JMP emit_repeat_again_lz4_s2
  18216. repeat_five_lz4_s2:
  18217. LEAL -65536(R10), R10
  18218. MOVL R10, R9
  18219. MOVW $0x001d, (AX)
  18220. MOVW R10, 2(AX)
  18221. SARL $0x10, R9
  18222. MOVB R9, 4(AX)
  18223. ADDQ $0x05, AX
  18224. JMP lz4s_s2_loop
  18225. repeat_four_lz4_s2:
  18226. LEAL -256(R10), R10
  18227. MOVW $0x0019, (AX)
  18228. MOVW R10, 2(AX)
  18229. ADDQ $0x04, AX
  18230. JMP lz4s_s2_loop
  18231. repeat_three_lz4_s2:
  18232. LEAL -4(R10), R10
  18233. MOVW $0x0015, (AX)
  18234. MOVB R10, 2(AX)
  18235. ADDQ $0x03, AX
  18236. JMP lz4s_s2_loop
  18237. repeat_two_lz4_s2:
  18238. SHLL $0x02, R10
  18239. ORL $0x01, R10
  18240. MOVW R10, (AX)
  18241. ADDQ $0x02, AX
  18242. JMP lz4s_s2_loop
  18243. repeat_two_offset_lz4_s2:
  18244. XORQ R8, R8
  18245. LEAL 1(R8)(R10*4), R10
  18246. MOVB R9, 1(AX)
  18247. SARL $0x08, R9
  18248. SHLL $0x05, R9
  18249. ORL R9, R10
  18250. MOVB R10, (AX)
  18251. ADDQ $0x02, AX
  18252. JMP lz4s_s2_loop
  18253. lz4s_s2_docopy:
  18254. MOVQ R9, DI
  18255. // emitCopy
  18256. CMPL R10, $0x40
  18257. JBE two_byte_offset_short_lz4_s2
  18258. CMPL R9, $0x00000800
  18259. JAE long_offset_short_lz4_s2
  18260. MOVL $0x00000001, R8
  18261. LEAL 16(R8), R8
  18262. MOVB R9, 1(AX)
  18263. MOVL R9, R11
  18264. SHRL $0x08, R11
  18265. SHLL $0x05, R11
  18266. ORL R11, R8
  18267. MOVB R8, (AX)
  18268. ADDQ $0x02, AX
  18269. SUBL $0x08, R10
  18270. // emitRepeat
  18271. LEAL -4(R10), R10
  18272. JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  18273. emit_repeat_again_lz4_s2_emit_copy_short_2b:
  18274. MOVL R10, R8
  18275. LEAL -4(R10), R10
  18276. CMPL R8, $0x08
  18277. JBE repeat_two_lz4_s2_emit_copy_short_2b
  18278. CMPL R8, $0x0c
  18279. JAE cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
  18280. CMPL R9, $0x00000800
  18281. JB repeat_two_offset_lz4_s2_emit_copy_short_2b
  18282. cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
  18283. CMPL R10, $0x00000104
  18284. JB repeat_three_lz4_s2_emit_copy_short_2b
  18285. CMPL R10, $0x00010100
  18286. JB repeat_four_lz4_s2_emit_copy_short_2b
  18287. CMPL R10, $0x0100ffff
  18288. JB repeat_five_lz4_s2_emit_copy_short_2b
  18289. LEAL -16842747(R10), R10
  18290. MOVL $0xfffb001d, (AX)
  18291. MOVB $0xff, 4(AX)
  18292. ADDQ $0x05, AX
  18293. JMP emit_repeat_again_lz4_s2_emit_copy_short_2b
  18294. repeat_five_lz4_s2_emit_copy_short_2b:
  18295. LEAL -65536(R10), R10
  18296. MOVL R10, R9
  18297. MOVW $0x001d, (AX)
  18298. MOVW R10, 2(AX)
  18299. SARL $0x10, R9
  18300. MOVB R9, 4(AX)
  18301. ADDQ $0x05, AX
  18302. JMP lz4s_s2_loop
  18303. repeat_four_lz4_s2_emit_copy_short_2b:
  18304. LEAL -256(R10), R10
  18305. MOVW $0x0019, (AX)
  18306. MOVW R10, 2(AX)
  18307. ADDQ $0x04, AX
  18308. JMP lz4s_s2_loop
  18309. repeat_three_lz4_s2_emit_copy_short_2b:
  18310. LEAL -4(R10), R10
  18311. MOVW $0x0015, (AX)
  18312. MOVB R10, 2(AX)
  18313. ADDQ $0x03, AX
  18314. JMP lz4s_s2_loop
  18315. repeat_two_lz4_s2_emit_copy_short_2b:
  18316. SHLL $0x02, R10
  18317. ORL $0x01, R10
  18318. MOVW R10, (AX)
  18319. ADDQ $0x02, AX
  18320. JMP lz4s_s2_loop
  18321. repeat_two_offset_lz4_s2_emit_copy_short_2b:
  18322. XORQ R8, R8
  18323. LEAL 1(R8)(R10*4), R10
  18324. MOVB R9, 1(AX)
  18325. SARL $0x08, R9
  18326. SHLL $0x05, R9
  18327. ORL R9, R10
  18328. MOVB R10, (AX)
  18329. ADDQ $0x02, AX
  18330. JMP lz4s_s2_loop
  18331. long_offset_short_lz4_s2:
  18332. MOVB $0xee, (AX)
  18333. MOVW R9, 1(AX)
  18334. LEAL -60(R10), R10
  18335. ADDQ $0x03, AX
  18336. // emitRepeat
  18337. emit_repeat_again_lz4_s2_emit_copy_short:
  18338. MOVL R10, R8
  18339. LEAL -4(R10), R10
  18340. CMPL R8, $0x08
  18341. JBE repeat_two_lz4_s2_emit_copy_short
  18342. CMPL R8, $0x0c
  18343. JAE cant_repeat_two_offset_lz4_s2_emit_copy_short
  18344. CMPL R9, $0x00000800
  18345. JB repeat_two_offset_lz4_s2_emit_copy_short
  18346. cant_repeat_two_offset_lz4_s2_emit_copy_short:
  18347. CMPL R10, $0x00000104
  18348. JB repeat_three_lz4_s2_emit_copy_short
  18349. CMPL R10, $0x00010100
  18350. JB repeat_four_lz4_s2_emit_copy_short
  18351. CMPL R10, $0x0100ffff
  18352. JB repeat_five_lz4_s2_emit_copy_short
  18353. LEAL -16842747(R10), R10
  18354. MOVL $0xfffb001d, (AX)
  18355. MOVB $0xff, 4(AX)
  18356. ADDQ $0x05, AX
  18357. JMP emit_repeat_again_lz4_s2_emit_copy_short
  18358. repeat_five_lz4_s2_emit_copy_short:
  18359. LEAL -65536(R10), R10
  18360. MOVL R10, R9
  18361. MOVW $0x001d, (AX)
  18362. MOVW R10, 2(AX)
  18363. SARL $0x10, R9
  18364. MOVB R9, 4(AX)
  18365. ADDQ $0x05, AX
  18366. JMP lz4s_s2_loop
  18367. repeat_four_lz4_s2_emit_copy_short:
  18368. LEAL -256(R10), R10
  18369. MOVW $0x0019, (AX)
  18370. MOVW R10, 2(AX)
  18371. ADDQ $0x04, AX
  18372. JMP lz4s_s2_loop
  18373. repeat_three_lz4_s2_emit_copy_short:
  18374. LEAL -4(R10), R10
  18375. MOVW $0x0015, (AX)
  18376. MOVB R10, 2(AX)
  18377. ADDQ $0x03, AX
  18378. JMP lz4s_s2_loop
  18379. repeat_two_lz4_s2_emit_copy_short:
  18380. SHLL $0x02, R10
  18381. ORL $0x01, R10
  18382. MOVW R10, (AX)
  18383. ADDQ $0x02, AX
  18384. JMP lz4s_s2_loop
  18385. repeat_two_offset_lz4_s2_emit_copy_short:
  18386. XORQ R8, R8
  18387. LEAL 1(R8)(R10*4), R10
  18388. MOVB R9, 1(AX)
  18389. SARL $0x08, R9
  18390. SHLL $0x05, R9
  18391. ORL R9, R10
  18392. MOVB R10, (AX)
  18393. ADDQ $0x02, AX
  18394. JMP lz4s_s2_loop
  18395. two_byte_offset_short_lz4_s2:
  18396. MOVL R10, R8
  18397. SHLL $0x02, R8
  18398. CMPL R10, $0x0c
  18399. JAE emit_copy_three_lz4_s2
  18400. CMPL R9, $0x00000800
  18401. JAE emit_copy_three_lz4_s2
  18402. LEAL -15(R8), R8
  18403. MOVB R9, 1(AX)
  18404. SHRL $0x08, R9
  18405. SHLL $0x05, R9
  18406. ORL R9, R8
  18407. MOVB R8, (AX)
  18408. ADDQ $0x02, AX
  18409. JMP lz4s_s2_loop
  18410. emit_copy_three_lz4_s2:
  18411. LEAL -2(R8), R8
  18412. MOVB R8, (AX)
  18413. MOVW R9, 1(AX)
  18414. ADDQ $0x03, AX
  18415. JMP lz4s_s2_loop
  18416. lz4s_s2_done:
  18417. MOVQ dst_base+0(FP), CX
  18418. SUBQ CX, AX
  18419. MOVQ SI, uncompressed+48(FP)
  18420. MOVQ AX, dstUsed+56(FP)
  18421. RET
  18422. lz4s_s2_corrupt:
  18423. XORQ AX, AX
  18424. LEAQ -1(AX), SI
  18425. MOVQ SI, uncompressed+48(FP)
  18426. RET
  18427. lz4s_s2_dstfull:
  18428. XORQ AX, AX
  18429. LEAQ -2(AX), SI
  18430. MOVQ SI, uncompressed+48(FP)
  18431. RET
  18432. // func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  18433. // Requires: SSE2
  18434. TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
  18435. XORQ SI, SI
  18436. MOVQ dst_base+0(FP), AX
  18437. MOVQ dst_len+8(FP), CX
  18438. MOVQ src_base+24(FP), DX
  18439. MOVQ src_len+32(FP), BX
  18440. LEAQ (DX)(BX*1), BX
  18441. LEAQ -8(AX)(CX*1), CX
  18442. lz4_snappy_loop:
  18443. CMPQ DX, BX
  18444. JAE lz4_snappy_corrupt
  18445. CMPQ AX, CX
  18446. JAE lz4_snappy_dstfull
  18447. MOVBQZX (DX), DI
  18448. MOVQ DI, R8
  18449. MOVQ DI, R9
  18450. SHRQ $0x04, R8
  18451. ANDQ $0x0f, R9
  18452. CMPQ DI, $0xf0
  18453. JB lz4_snappy_ll_end
  18454. lz4_snappy_ll_loop:
  18455. INCQ DX
  18456. CMPQ DX, BX
  18457. JAE lz4_snappy_corrupt
  18458. MOVBQZX (DX), DI
  18459. ADDQ DI, R8
  18460. CMPQ DI, $0xff
  18461. JEQ lz4_snappy_ll_loop
  18462. lz4_snappy_ll_end:
  18463. LEAQ (DX)(R8*1), DI
  18464. ADDQ $0x04, R9
  18465. CMPQ DI, BX
  18466. JAE lz4_snappy_corrupt
  18467. INCQ DX
  18468. INCQ DI
  18469. TESTQ R8, R8
  18470. JZ lz4_snappy_lits_done
  18471. LEAQ (AX)(R8*1), R10
  18472. CMPQ R10, CX
  18473. JAE lz4_snappy_dstfull
  18474. ADDQ R8, SI
  18475. LEAL -1(R8), R10
  18476. CMPL R10, $0x3c
  18477. JB one_byte_lz4_snappy
  18478. CMPL R10, $0x00000100
  18479. JB two_bytes_lz4_snappy
  18480. CMPL R10, $0x00010000
  18481. JB three_bytes_lz4_snappy
  18482. CMPL R10, $0x01000000
  18483. JB four_bytes_lz4_snappy
  18484. MOVB $0xfc, (AX)
  18485. MOVL R10, 1(AX)
  18486. ADDQ $0x05, AX
  18487. JMP memmove_long_lz4_snappy
  18488. four_bytes_lz4_snappy:
  18489. MOVL R10, R11
  18490. SHRL $0x10, R11
  18491. MOVB $0xf8, (AX)
  18492. MOVW R10, 1(AX)
  18493. MOVB R11, 3(AX)
  18494. ADDQ $0x04, AX
  18495. JMP memmove_long_lz4_snappy
  18496. three_bytes_lz4_snappy:
  18497. MOVB $0xf4, (AX)
  18498. MOVW R10, 1(AX)
  18499. ADDQ $0x03, AX
  18500. JMP memmove_long_lz4_snappy
  18501. two_bytes_lz4_snappy:
  18502. MOVB $0xf0, (AX)
  18503. MOVB R10, 1(AX)
  18504. ADDQ $0x02, AX
  18505. CMPL R10, $0x40
  18506. JB memmove_lz4_snappy
  18507. JMP memmove_long_lz4_snappy
  18508. one_byte_lz4_snappy:
  18509. SHLB $0x02, R10
  18510. MOVB R10, (AX)
  18511. ADDQ $0x01, AX
  18512. memmove_lz4_snappy:
  18513. LEAQ (AX)(R8*1), R10
  18514. // genMemMoveShort
  18515. CMPQ R8, $0x08
  18516. JBE emit_lit_memmove_lz4_snappy_memmove_move_8
  18517. CMPQ R8, $0x10
  18518. JBE emit_lit_memmove_lz4_snappy_memmove_move_8through16
  18519. CMPQ R8, $0x20
  18520. JBE emit_lit_memmove_lz4_snappy_memmove_move_17through32
  18521. JMP emit_lit_memmove_lz4_snappy_memmove_move_33through64
  18522. emit_lit_memmove_lz4_snappy_memmove_move_8:
  18523. MOVQ (DX), R11
  18524. MOVQ R11, (AX)
  18525. JMP memmove_end_copy_lz4_snappy
  18526. emit_lit_memmove_lz4_snappy_memmove_move_8through16:
  18527. MOVQ (DX), R11
  18528. MOVQ -8(DX)(R8*1), DX
  18529. MOVQ R11, (AX)
  18530. MOVQ DX, -8(AX)(R8*1)
  18531. JMP memmove_end_copy_lz4_snappy
  18532. emit_lit_memmove_lz4_snappy_memmove_move_17through32:
  18533. MOVOU (DX), X0
  18534. MOVOU -16(DX)(R8*1), X1
  18535. MOVOU X0, (AX)
  18536. MOVOU X1, -16(AX)(R8*1)
  18537. JMP memmove_end_copy_lz4_snappy
  18538. emit_lit_memmove_lz4_snappy_memmove_move_33through64:
  18539. MOVOU (DX), X0
  18540. MOVOU 16(DX), X1
  18541. MOVOU -32(DX)(R8*1), X2
  18542. MOVOU -16(DX)(R8*1), X3
  18543. MOVOU X0, (AX)
  18544. MOVOU X1, 16(AX)
  18545. MOVOU X2, -32(AX)(R8*1)
  18546. MOVOU X3, -16(AX)(R8*1)
  18547. memmove_end_copy_lz4_snappy:
  18548. MOVQ R10, AX
  18549. JMP lz4_snappy_lits_emit_done
  18550. memmove_long_lz4_snappy:
  18551. LEAQ (AX)(R8*1), R10
  18552. // genMemMoveLong
  18553. MOVOU (DX), X0
  18554. MOVOU 16(DX), X1
  18555. MOVOU -32(DX)(R8*1), X2
  18556. MOVOU -16(DX)(R8*1), X3
  18557. MOVQ R8, R12
  18558. SHRQ $0x05, R12
  18559. MOVQ AX, R11
  18560. ANDL $0x0000001f, R11
  18561. MOVQ $0x00000040, R13
  18562. SUBQ R11, R13
  18563. DECQ R12
  18564. JA emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
  18565. LEAQ -32(DX)(R13*1), R11
  18566. LEAQ -32(AX)(R13*1), R14
  18567. emit_lit_memmove_long_lz4_snappylarge_big_loop_back:
  18568. MOVOU (R11), X4
  18569. MOVOU 16(R11), X5
  18570. MOVOA X4, (R14)
  18571. MOVOA X5, 16(R14)
  18572. ADDQ $0x20, R14
  18573. ADDQ $0x20, R11
  18574. ADDQ $0x20, R13
  18575. DECQ R12
  18576. JNA emit_lit_memmove_long_lz4_snappylarge_big_loop_back
  18577. emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32:
  18578. MOVOU -32(DX)(R13*1), X4
  18579. MOVOU -16(DX)(R13*1), X5
  18580. MOVOA X4, -32(AX)(R13*1)
  18581. MOVOA X5, -16(AX)(R13*1)
  18582. ADDQ $0x20, R13
  18583. CMPQ R8, R13
  18584. JAE emit_lit_memmove_long_lz4_snappylarge_forward_sse_loop_32
  18585. MOVOU X0, (AX)
  18586. MOVOU X1, 16(AX)
  18587. MOVOU X2, -32(AX)(R8*1)
  18588. MOVOU X3, -16(AX)(R8*1)
  18589. MOVQ R10, AX
  18590. lz4_snappy_lits_emit_done:
  18591. MOVQ DI, DX
  18592. lz4_snappy_lits_done:
  18593. CMPQ DX, BX
  18594. JNE lz4_snappy_match
  18595. CMPQ R9, $0x04
  18596. JEQ lz4_snappy_done
  18597. JMP lz4_snappy_corrupt
  18598. lz4_snappy_match:
  18599. LEAQ 2(DX), DI
  18600. CMPQ DI, BX
  18601. JAE lz4_snappy_corrupt
  18602. MOVWQZX (DX), R8
  18603. MOVQ DI, DX
  18604. TESTQ R8, R8
  18605. JZ lz4_snappy_corrupt
  18606. CMPQ R8, SI
  18607. JA lz4_snappy_corrupt
  18608. CMPQ R9, $0x13
  18609. JNE lz4_snappy_ml_done
  18610. lz4_snappy_ml_loop:
  18611. MOVBQZX (DX), DI
  18612. INCQ DX
  18613. ADDQ DI, R9
  18614. CMPQ DX, BX
  18615. JAE lz4_snappy_corrupt
  18616. CMPQ DI, $0xff
  18617. JEQ lz4_snappy_ml_loop
  18618. lz4_snappy_ml_done:
  18619. ADDQ R9, SI
  18620. // emitCopy
  18621. two_byte_offset_lz4_s2:
  18622. CMPL R9, $0x40
  18623. JBE two_byte_offset_short_lz4_s2
  18624. MOVB $0xee, (AX)
  18625. MOVW R8, 1(AX)
  18626. LEAL -60(R9), R9
  18627. ADDQ $0x03, AX
  18628. CMPQ AX, CX
  18629. JAE lz4_snappy_loop
  18630. JMP two_byte_offset_lz4_s2
  18631. two_byte_offset_short_lz4_s2:
  18632. MOVL R9, DI
  18633. SHLL $0x02, DI
  18634. CMPL R9, $0x0c
  18635. JAE emit_copy_three_lz4_s2
  18636. CMPL R8, $0x00000800
  18637. JAE emit_copy_three_lz4_s2
  18638. LEAL -15(DI), DI
  18639. MOVB R8, 1(AX)
  18640. SHRL $0x08, R8
  18641. SHLL $0x05, R8
  18642. ORL R8, DI
  18643. MOVB DI, (AX)
  18644. ADDQ $0x02, AX
  18645. JMP lz4_snappy_loop
  18646. emit_copy_three_lz4_s2:
  18647. LEAL -2(DI), DI
  18648. MOVB DI, (AX)
  18649. MOVW R8, 1(AX)
  18650. ADDQ $0x03, AX
  18651. JMP lz4_snappy_loop
  18652. lz4_snappy_done:
  18653. MOVQ dst_base+0(FP), CX
  18654. SUBQ CX, AX
  18655. MOVQ SI, uncompressed+48(FP)
  18656. MOVQ AX, dstUsed+56(FP)
  18657. RET
  18658. lz4_snappy_corrupt:
  18659. XORQ AX, AX
  18660. LEAQ -1(AX), SI
  18661. MOVQ SI, uncompressed+48(FP)
  18662. RET
  18663. lz4_snappy_dstfull:
  18664. XORQ AX, AX
  18665. LEAQ -2(AX), SI
  18666. MOVQ SI, uncompressed+48(FP)
  18667. RET
  18668. // func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
  18669. // Requires: SSE2
  18670. TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
  18671. XORQ SI, SI
  18672. MOVQ dst_base+0(FP), AX
  18673. MOVQ dst_len+8(FP), CX
  18674. MOVQ src_base+24(FP), DX
  18675. MOVQ src_len+32(FP), BX
  18676. LEAQ (DX)(BX*1), BX
  18677. LEAQ -8(AX)(CX*1), CX
  18678. lz4s_snappy_loop:
  18679. CMPQ DX, BX
  18680. JAE lz4s_snappy_corrupt
  18681. CMPQ AX, CX
  18682. JAE lz4s_snappy_dstfull
  18683. MOVBQZX (DX), DI
  18684. MOVQ DI, R8
  18685. MOVQ DI, R9
  18686. SHRQ $0x04, R8
  18687. ANDQ $0x0f, R9
  18688. CMPQ DI, $0xf0
  18689. JB lz4s_snappy_ll_end
  18690. lz4s_snappy_ll_loop:
  18691. INCQ DX
  18692. CMPQ DX, BX
  18693. JAE lz4s_snappy_corrupt
  18694. MOVBQZX (DX), DI
  18695. ADDQ DI, R8
  18696. CMPQ DI, $0xff
  18697. JEQ lz4s_snappy_ll_loop
  18698. lz4s_snappy_ll_end:
  18699. LEAQ (DX)(R8*1), DI
  18700. ADDQ $0x03, R9
  18701. CMPQ DI, BX
  18702. JAE lz4s_snappy_corrupt
  18703. INCQ DX
  18704. INCQ DI
  18705. TESTQ R8, R8
  18706. JZ lz4s_snappy_lits_done
  18707. LEAQ (AX)(R8*1), R10
  18708. CMPQ R10, CX
  18709. JAE lz4s_snappy_dstfull
  18710. ADDQ R8, SI
  18711. LEAL -1(R8), R10
  18712. CMPL R10, $0x3c
  18713. JB one_byte_lz4s_snappy
  18714. CMPL R10, $0x00000100
  18715. JB two_bytes_lz4s_snappy
  18716. CMPL R10, $0x00010000
  18717. JB three_bytes_lz4s_snappy
  18718. CMPL R10, $0x01000000
  18719. JB four_bytes_lz4s_snappy
  18720. MOVB $0xfc, (AX)
  18721. MOVL R10, 1(AX)
  18722. ADDQ $0x05, AX
  18723. JMP memmove_long_lz4s_snappy
  18724. four_bytes_lz4s_snappy:
  18725. MOVL R10, R11
  18726. SHRL $0x10, R11
  18727. MOVB $0xf8, (AX)
  18728. MOVW R10, 1(AX)
  18729. MOVB R11, 3(AX)
  18730. ADDQ $0x04, AX
  18731. JMP memmove_long_lz4s_snappy
  18732. three_bytes_lz4s_snappy:
  18733. MOVB $0xf4, (AX)
  18734. MOVW R10, 1(AX)
  18735. ADDQ $0x03, AX
  18736. JMP memmove_long_lz4s_snappy
  18737. two_bytes_lz4s_snappy:
  18738. MOVB $0xf0, (AX)
  18739. MOVB R10, 1(AX)
  18740. ADDQ $0x02, AX
  18741. CMPL R10, $0x40
  18742. JB memmove_lz4s_snappy
  18743. JMP memmove_long_lz4s_snappy
  18744. one_byte_lz4s_snappy:
  18745. SHLB $0x02, R10
  18746. MOVB R10, (AX)
  18747. ADDQ $0x01, AX
  18748. memmove_lz4s_snappy:
  18749. LEAQ (AX)(R8*1), R10
  18750. // genMemMoveShort
  18751. CMPQ R8, $0x08
  18752. JBE emit_lit_memmove_lz4s_snappy_memmove_move_8
  18753. CMPQ R8, $0x10
  18754. JBE emit_lit_memmove_lz4s_snappy_memmove_move_8through16
  18755. CMPQ R8, $0x20
  18756. JBE emit_lit_memmove_lz4s_snappy_memmove_move_17through32
  18757. JMP emit_lit_memmove_lz4s_snappy_memmove_move_33through64
  18758. emit_lit_memmove_lz4s_snappy_memmove_move_8:
  18759. MOVQ (DX), R11
  18760. MOVQ R11, (AX)
  18761. JMP memmove_end_copy_lz4s_snappy
  18762. emit_lit_memmove_lz4s_snappy_memmove_move_8through16:
  18763. MOVQ (DX), R11
  18764. MOVQ -8(DX)(R8*1), DX
  18765. MOVQ R11, (AX)
  18766. MOVQ DX, -8(AX)(R8*1)
  18767. JMP memmove_end_copy_lz4s_snappy
  18768. emit_lit_memmove_lz4s_snappy_memmove_move_17through32:
  18769. MOVOU (DX), X0
  18770. MOVOU -16(DX)(R8*1), X1
  18771. MOVOU X0, (AX)
  18772. MOVOU X1, -16(AX)(R8*1)
  18773. JMP memmove_end_copy_lz4s_snappy
  18774. emit_lit_memmove_lz4s_snappy_memmove_move_33through64:
  18775. MOVOU (DX), X0
  18776. MOVOU 16(DX), X1
  18777. MOVOU -32(DX)(R8*1), X2
  18778. MOVOU -16(DX)(R8*1), X3
  18779. MOVOU X0, (AX)
  18780. MOVOU X1, 16(AX)
  18781. MOVOU X2, -32(AX)(R8*1)
  18782. MOVOU X3, -16(AX)(R8*1)
  18783. memmove_end_copy_lz4s_snappy:
  18784. MOVQ R10, AX
  18785. JMP lz4s_snappy_lits_emit_done
  18786. memmove_long_lz4s_snappy:
  18787. LEAQ (AX)(R8*1), R10
  18788. // genMemMoveLong
  18789. MOVOU (DX), X0
  18790. MOVOU 16(DX), X1
  18791. MOVOU -32(DX)(R8*1), X2
  18792. MOVOU -16(DX)(R8*1), X3
  18793. MOVQ R8, R12
  18794. SHRQ $0x05, R12
  18795. MOVQ AX, R11
  18796. ANDL $0x0000001f, R11
  18797. MOVQ $0x00000040, R13
  18798. SUBQ R11, R13
  18799. DECQ R12
  18800. JA emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
  18801. LEAQ -32(DX)(R13*1), R11
  18802. LEAQ -32(AX)(R13*1), R14
  18803. emit_lit_memmove_long_lz4s_snappylarge_big_loop_back:
  18804. MOVOU (R11), X4
  18805. MOVOU 16(R11), X5
  18806. MOVOA X4, (R14)
  18807. MOVOA X5, 16(R14)
  18808. ADDQ $0x20, R14
  18809. ADDQ $0x20, R11
  18810. ADDQ $0x20, R13
  18811. DECQ R12
  18812. JNA emit_lit_memmove_long_lz4s_snappylarge_big_loop_back
  18813. emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32:
  18814. MOVOU -32(DX)(R13*1), X4
  18815. MOVOU -16(DX)(R13*1), X5
  18816. MOVOA X4, -32(AX)(R13*1)
  18817. MOVOA X5, -16(AX)(R13*1)
  18818. ADDQ $0x20, R13
  18819. CMPQ R8, R13
  18820. JAE emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
  18821. MOVOU X0, (AX)
  18822. MOVOU X1, 16(AX)
  18823. MOVOU X2, -32(AX)(R8*1)
  18824. MOVOU X3, -16(AX)(R8*1)
  18825. MOVQ R10, AX
  18826. lz4s_snappy_lits_emit_done:
  18827. MOVQ DI, DX
  18828. lz4s_snappy_lits_done:
  18829. CMPQ DX, BX
  18830. JNE lz4s_snappy_match
  18831. CMPQ R9, $0x03
  18832. JEQ lz4s_snappy_done
  18833. JMP lz4s_snappy_corrupt
  18834. lz4s_snappy_match:
  18835. CMPQ R9, $0x03
  18836. JEQ lz4s_snappy_loop
  18837. LEAQ 2(DX), DI
  18838. CMPQ DI, BX
  18839. JAE lz4s_snappy_corrupt
  18840. MOVWQZX (DX), R8
  18841. MOVQ DI, DX
  18842. TESTQ R8, R8
  18843. JZ lz4s_snappy_corrupt
  18844. CMPQ R8, SI
  18845. JA lz4s_snappy_corrupt
  18846. CMPQ R9, $0x12
  18847. JNE lz4s_snappy_ml_done
  18848. lz4s_snappy_ml_loop:
  18849. MOVBQZX (DX), DI
  18850. INCQ DX
  18851. ADDQ DI, R9
  18852. CMPQ DX, BX
  18853. JAE lz4s_snappy_corrupt
  18854. CMPQ DI, $0xff
  18855. JEQ lz4s_snappy_ml_loop
  18856. lz4s_snappy_ml_done:
  18857. ADDQ R9, SI
  18858. // emitCopy
  18859. two_byte_offset_lz4_s2:
  18860. CMPL R9, $0x40
  18861. JBE two_byte_offset_short_lz4_s2
  18862. MOVB $0xee, (AX)
  18863. MOVW R8, 1(AX)
  18864. LEAL -60(R9), R9
  18865. ADDQ $0x03, AX
  18866. CMPQ AX, CX
  18867. JAE lz4s_snappy_loop
  18868. JMP two_byte_offset_lz4_s2
  18869. two_byte_offset_short_lz4_s2:
  18870. MOVL R9, DI
  18871. SHLL $0x02, DI
  18872. CMPL R9, $0x0c
  18873. JAE emit_copy_three_lz4_s2
  18874. CMPL R8, $0x00000800
  18875. JAE emit_copy_three_lz4_s2
  18876. LEAL -15(DI), DI
  18877. MOVB R8, 1(AX)
  18878. SHRL $0x08, R8
  18879. SHLL $0x05, R8
  18880. ORL R8, DI
  18881. MOVB DI, (AX)
  18882. ADDQ $0x02, AX
  18883. JMP lz4s_snappy_loop
  18884. emit_copy_three_lz4_s2:
  18885. LEAL -2(DI), DI
  18886. MOVB DI, (AX)
  18887. MOVW R8, 1(AX)
  18888. ADDQ $0x03, AX
  18889. JMP lz4s_snappy_loop
  18890. lz4s_snappy_done:
  18891. MOVQ dst_base+0(FP), CX
  18892. SUBQ CX, AX
  18893. MOVQ SI, uncompressed+48(FP)
  18894. MOVQ AX, dstUsed+56(FP)
  18895. RET
  18896. lz4s_snappy_corrupt:
  18897. XORQ AX, AX
  18898. LEAQ -1(AX), SI
  18899. MOVQ SI, uncompressed+48(FP)
  18900. RET
  18901. lz4s_snappy_dstfull:
  18902. XORQ AX, AX
  18903. LEAQ -2(AX), SI
  18904. MOVQ SI, uncompressed+48(FP)
  18905. RET