1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777
/*******************************************************************************

    copyright:      Copyright (c) 2007-2008 Jascha Wetzel. All rights reserved.

    license:        BSD style: $(LICENSE)

    version:        Initial release: Jan 2008

    authors:        Jascha Wetzel

    This is a regular expression compiler and interpreter based on the Tagged NFA/DFA method.

    The Regex class is not thread safe.

    See <a href="http://en.wikipedia.org/wiki/Regular_expression">Wikpedia's article on regular expressions</a>
    for details on regular expressions in general.

    The used method implies, that the expressions are <i>regular</i>, in the way language theory defines it,
    as opposed to what &quot;regular expression&quot; means in most implementations
    (e.g. PCRE or those from the standard libraries of Perl, Java or Python).
    The advantage of this method is it's performance, it's disadvantage is the inability to realize some features
    that Perl-like regular expressions have (e.g. back-references).
    See <a href="http://swtch.com/~rsc/regexp/regexp1.html">&quot;Regular Expression Matching Can Be Simple And Fast&quot;</a>
    for details on the differences.

    The time for matching a regular expression against an input string of length N is in O(M*N), where M depends on the
    number of matching brackets and the complexity of the expression. That is, M is constant wrt. the input
    and therefore matching is a linear-time process.

    The syntax of a regular expressions is as follows.
    <i>X</i> and <i>Y</i> stand for an arbitrary regular expression.

    <table border=1 cellspacing=0 cellpadding=5>
    <caption>Operators</caption>
    $(TR $(TD X|Y) $(TD alternation, i.e. X or Y) )
    $(TR $(TD (X)) $(TD matching brackets - creates a sub-match) )
    $(TR $(TD (?X)) $(TD non-matching brackets - only groups X, no sub-match is created) )
    $(TR $(TD [Z]) $(TD character class specification, Z is a string of characters or character ranges, e.g. [a-zA-Z0-9_.\-]) )
    $(TR $(TD [^Z]) $(TD negated character class specification) )
    $(TR $(TD &lt;X) $(TD lookbehind, X may be a single character or a character class) )
    $(TR $(TD &gt;X) $(TD lookahead, X may be a single character or a character class) )
    $(TR $(TD ^) $(TD start of input or start of line) )
    $(TR $(TD $) $(TD end of input or end of line) )
    $(TR $(TD \b) $(TD start or end of word, equals (?&lt;\s&gt;\S|&lt;\S&gt;\s)) )
    $(TR $(TD \B) $(TD opposite of \b, equals (?&lt;\S&gt;\S|&lt;\s&gt;\s)) )
    </table>

    <table border=1 cellspacing=0 cellpadding=5>
    <caption>Quantifiers</caption>
    $(TR $(TD X?) $(TD zero or one) )
    $(TR $(TD X*) $(TD zero or more) )
    $(TR $(TD X+) $(TD one or more) )
    $(TR $(TD X{n,m}) $(TD at least n, at most m instances of X.<br>If n is missing, it's set to 0.<br>If m is missing, it is set to infinity.) )
    $(TR $(TD X??) $(TD non-greedy version of the above operators) )
    $(TR $(TD X*?) $(TD see above) )
    $(TR $(TD X+?) $(TD see above) )
    $(TR $(TD X{n,m}?) $(TD see above) )
    </table>

    <table border=1 cellspacing=0 cellpadding=5>
    <caption>Pre-defined character classes</caption>
    $(TR $(TD .) $(TD any printable character) )
    $(TR $(TD \s) $(TD whitespace) )
    $(TR $(TD \S) $(TD non-whitespace) )
    $(TR $(TD \w) $(TD alpha-numeric characters or underscore) )
    $(TR $(TD \W) $(TD opposite of \w) )
    $(TR $(TD \d) $(TD digits) )
    $(TR $(TD \D) $(TD non-digit) )
    </table>

    Note that "alphanumeric" only applies to Latin-1.
*******************************************************************************/
module tango.text.Regex;

debug(TangoRegex) import tango.io.Stdout;

/* *****************************************************************************
    A simple pair
*******************************************************************************/
private struct Pair(T)
{
    static Pair opCall(T a, T b)
    {
        Pair p;
        p.a = a;
        p.b = b;
        return p;
    }

    union
    {
        struct {
            T first, second;
        }
        struct {
            T a, b;
        }
    }
}

/* *****************************************************************************
    Double linked list
*******************************************************************************/
private class List(T)
{
    class Element
    {
        T value;
        Element prev,
                next;

        this(T v)
        {
            value = v;
        }
    }

    size_t  len;
    Element head,
            tail;

    List opCatAssign(T v)
    {
        if ( tail is null )
            head = tail = new Element(v);
        else {
            tail.next = new Element(v);
            tail.next.prev = tail;
            tail = tail.next;
        }
        ++len;
        return this;
    }

    List insertAfter(T w, T v)
    {
        foreach ( e; &this.elements )
        {
            if ( e.value is w )
                return insertAfter(e, v);
        }
        return null;
    }

    List insertAfter(Element e, T v)
    {
        auto tmp = new Element(v);
        tmp.prev = e;
        tmp.next = e.next;
        e.next.prev = tmp;
        e.next = tmp;
        if ( e is tail )
            tail = tmp;
        ++len;
        return this;
    }

    List opCatAssign(List l)
    {
        if ( l.empty )
            return this;
        if ( tail is null ) {
            head = l.head;
            tail = l.tail;
        }
        else {
            tail.next = l.head;
            tail.next.prev = tail;
            tail = l.tail;
        }
        len += l.len;
        return this;
    }

    List pushFront(T v)
    {
        if ( head is null )
            head = tail = new Element(v);
        else
        {
            head.prev = new Element(v);
            head.prev.next = head;
            head = head.prev;
        }
        ++len;
        return this;
    }

    List insertBefore(T w, T v)
    {
        foreach ( e; &this.elements )
        {
            if ( e.value is w )
                return insertBefore(e, v);
        }
        return null;
    }

    List insertBefore(Element e, T v)
    {
        auto tmp = new Element(v);
        tmp.prev = e.prev;
        tmp.next = e;
        e.prev.next = tmp;
        e.prev = tmp;
        if ( e is head )
            head = tmp;
        ++len;
        return this;
    }

    List pushFront(List l)
    {
        if ( l.empty )
            return this;
        if ( head is null ) {
            head = l.head;
            tail = l.tail;
        }
        else {
            head.prev = l.tail;
            head.prev.next = head;
            head = l.head;
        }
        len += l.len;
        return this;
    }

    size_t length()
    {
        return len;
    }

    bool empty()
    {
        return head is null;
    }

    void clear()
    {
        head = null;
        tail = null;
        len = 0;
    }

    void pop()
    {
        remove(tail);
    }

    void remove(Element e)
    {
        if ( e is null )
            return;
        if ( e.prev is null )
            head = e.next;
        else
            e.prev.next = e.next;
        if ( e.next is null )
            tail = e.prev;
        else
            e.next.prev = e.prev;
        --len;
    }

    int elements(int delegate(ref Element) dg)
    {
        for ( Element e=head; e !is null; e = e.next )
        {
            int ret = dg(e);
            if ( ret )
                return ret;
        }
        return 0;
    }

    int elements_reverse(int delegate(ref Element) dg)
    {
        for ( Element e=tail; e !is null; e = e.prev )
        {
            int ret = dg(e);
            if ( ret )
                return ret;
        }
        return 0;
    }

    int opApply(int delegate(ref T) dg)
    {
        for ( Element e=head; e !is null; e = e.next )
        {
            int ret = dg(e.value);
            if ( ret )
                return ret;
        }
        return 0;
    }

    int opApplyReverse(int delegate(ref T) dg)
    {
        for ( Element e=tail; e !is null; e = e.prev )
        {
            int ret = dg(e.value);
            if ( ret )
                return ret;
        }
        return 0;
    }
}

/* *****************************************************************************
    Stack based on dynamic array
*******************************************************************************/
private struct Stack(T)
{
    size_t  _top;
    T[]     stack;

    void push(T v)
    {
        if ( _top >= stack.length )
            stack.length = stack.length*2+1;
        stack[_top] = v;
        ++_top;
    }
    alias push opCatAssign;

    void opCatAssign(T[] vs)
    {
        size_t end = _top+vs.length;
        if ( end > stack.length )
            stack.length = end*2;
        stack[_top..end] = vs;
        _top = end;
    }

    void pop(size_t num)
    {
        assert(_top>=num);
        _top -= num;
    }

    T pop()
    {
        assert(_top>0);
        return stack[--_top];
    }

    T top()
    {
        assert(_top>0);
        return stack[_top-1];
    }

    T* topPtr()
    {
        assert(_top>0);
        return &stack[_top-1];
    }

    bool empty()
    {
        return _top == 0;
    }

    void clear()
    {
        _top = 0;
    }

    size_t length()
    {
        return _top;
    }

    T[] array()
    {
        return stack[0.._top];
    }

    T opIndex(size_t i)
    {
        return stack[i];
    }

    Stack dup()
    {
        Stack s;
        s._top = _top;
        s.stack = stack.dup;
        return s;
    }
}

/* ************************************************************************************************
    Set container based on assoc array
**************************************************************************************************/
private struct Set(T)
{
    bool[T] data;

    static Set opCall()
    {
        Set s;
        return s;
    }

    static Set opCall(T v)
    {
        Set s;
        s ~= v;
        return s;
    }

    void opAddAssign(T v)
    {
        data[v] = true;
    }

    void opAddAssign(Set s)
    {
        foreach ( v; s.elements )
            data[v] = true;
    }
    alias opAddAssign opCatAssign;

    size_t length()
    {
        return data.length;
    }

    T[] elements()
    {
        return data.keys;
    }

    bool remove(T v)
    {
        if ( (v in data) is null )
            return false;
        data.remove(v);
        return true;
    }

    bool contains(T v)
    {
        return (v in data) !is null;
    }

    bool contains(Set s)
    {
        Set tmp = s - *this;
        return tmp.empty;
    }

    bool empty()
    {
        return data.length==0;
    }

    Set opSub(Set s)
    {
        Set res = dup;
        foreach ( v; s.elements )
            res.remove(v);
        return res;
    }

    Set dup()
    {
        Set s;
        foreach ( v; data.keys )
            s.data[v] = true;
        return s;
    }
}

/* ************************************************************************************************

**************************************************************************************************/
void quickSort(T)(T[] a)
{
    quickSort(a,cast(size_t)0,a.length);
}

void quickSort(T)(T[] a, size_t l, size_t r)
{
    T t;
    auto i = r-l;
    if ( i < 3 )
    {
        if ( i < 2 )
            return;
        if ( a[l] < a[l+1] )
            return;
        t = a[l];
        a[l] = a[l+1];
        a[l+1] = t;
        return;
    }

    auto p = a[l];
    i = l;
    auto j = r;

    while ( true )
    {
        ++i;
        for ( ; i < j && a[i] < p; ++i ) {}
        --j;
        for ( ; i < j && a[j] >= p; --j ) {}
        if ( i >= j )
            break;
        t = a[i];
        a[i] = a[j];
        a[j] = t;
    }
    --i;
    a[l] = a[i];
    a[i] = p;

    quickSort(a, l, i);
    quickSort(a, i+1, r);
}
import tango.math.Math;

/* ************************************************************************************************
    A range of characters
**************************************************************************************************/
struct CharRange(char_t)
{
    char_t  l_, r_;

    static CharRange opCall(char_t c)
    {
        CharRange cr;
        cr.l_ = c;
        cr.r_ = c;
        return cr;
    }

    static CharRange opCall(char_t a, char_t b)
    {
        CharRange cr;
        cr.l_ = min(a,b);
        cr.r_ = max(a,b);
        return cr;
    }

    char_t l()
    {
        return l_;
    }

    char_t r()
    {
        return r_;
    }

    /* ********************************************************************************************
        Compares the ranges according to their beginning.
    **********************************************************************************************/
    int opCmp(CharRange cr)
    {
        if ( l_ == cr.l_ )
            return 0;
        if ( l_ < cr.l_ )
            return -1;
        return 1;
    }

    int opEquals(CharRange cr)
    {
        if ( l_ == cr.l_ && r_ == cr.r_ )
            return 1;
        return 0;
    }

    bool contains(char_t c)
    {
        return c >= l_ && c <= r_;
    }

    bool contains(CharRange cr)
    {
        return l_ <= cr.l_ && r_ >= cr.r_;
    }

    bool intersects(CharRange cr)
    {
        return r_ >= cr.l_ && l_ <= cr.r_;
    }

    CharRange intersect(CharRange cr)
    {
        assert(intersects(cr));
        CharRange ir;
        ir.l_ = max(l_, cr.l_);
        ir.r_ = min(r_, cr.r_);
        if ( ir.l_ > ir.r_ )
            ir.l_ = ir.r_ = char_t.min;
        return ir;
    }

    CharRange[] subtract(CharRange cr)
    {
        CharRange[] sr;
        if ( cr.contains(*this) )
            return sr;
        if ( !intersects(cr) )
            sr ~= *this;
        else
        {
            CharRange d;
            if ( contains(cr) )
            {
                d.l_ = l_;
                d.r_ = cr.l_-1;
                if ( d.l_ <= d.r_ )
                    sr ~= d;
                d.l_ = cr.r_+1;
                d.r_ = r_;
                if ( d.l_ <= d.r_ )
                    sr ~= d;
            }
            else if ( cr.r_ > l_ )
            {
                d.l_ = cr.r_+1;
                d.r_ = r_;
                if ( d.l_ <= d.r_ )
                    sr ~= d;
            }
            else if ( cr.l_ < r_ )
            {
                d.l_ = l_;
                d.r_ = cr.l_-1;
                if ( d.l_ <= d.r_ )
                    sr ~= d;
            }
        }
        return sr;
    }

    string toString()
    {
        string str;
        if ( l_ == r_ )
        {
            if ( l_ > 0x20 && l_ < 0x7f )
                str = Format.convert("'{}'", l_);
            else
                str = Format.convert("({:x})", cast(int)l_);
        }
        else
        {
            if ( l_ > 0x20 && l_ < 0x7f )
                str = Format.convert("'{}'", l_);
            else
                str = Format.convert("({:x})", cast(int)l_);
            str ~= "-";
            if ( r_ > 0x20 && r_ < 0x7f )
                str ~= Format.convert("'{}'", r_);
            else
                str ~= Format.convert("({:x})", cast(int)r_);
        }
        return str;
    }
}

/* ************************************************************************************************
    Represents a class of characters as used in regular expressions (e.g. [0-9a-z], etc.)
**************************************************************************************************/
struct CharClass(char_t)
{
    alias CharRange!(char_t) range_t;

    //---------------------------------------------------------------------------------------------
    // pre-defined character classes
    static const CharClass!(char_t)
        line_startend = {parts: [
            {l_:0x00, r_:0x00},
            {l_:0x0a, r_:0x0a},
            {l_:0x13, r_:0x13}
        ]},
        digit = {parts: [
            {l_:0x30, r_:0x39}
        ]},
        whitespace = {parts: [
            {l_:0x09, r_:0x09},
            {l_:0x0a, r_:0x0a},
            {l_:0x0b, r_:0x0b},
            {l_:0x13, r_:0x13},
            {l_:0x14, r_:0x14},
            {l_:0x20, r_:0x20}
        ]};

    // 8bit classes
    static if ( is(char_t == char) )
    {
        static const CharClass!(char_t)
            any_char = {parts: [
                {l_:0x01, r_:0xff}
            ]},
            dot_oper = {parts: [
                {l_:0x01, r_:0xff}
            ]},
            alphanum_ = {parts: [
                {l_:0x30, r_:0x39},
                {l_:0x41, r_:0x5a},
                {l_:0x5f, r_:0x5f},
                {l_:0x61, r_:0x7a}
            ]};
    }
    // 16bit and 32bit classes
    static if ( is(char_t == wchar) || is(char_t == dchar) )
    {
        static const CharClass!(char_t)
            any_char = {parts: [
                {l_:0x0001, r_:0xffff}
            ]},
            dot_oper = {parts: [
                {l_:0x0001, r_:0xffff}
            ]},
            alphanum_ = {parts: [
                {l_:0x30, r_:0x39},
                {l_:0x41, r_:0x5a},
                {l_:0x5f, r_:0x5f},
                {l_:0x61, r_:0x7a}
            ]};
    }

    //---------------------------------------------------------------------------------------------
    range_t[] parts;

    invariant()
    {
//        foreach ( i, p; parts )
//            assert(p.l_<=p.r_, Int.toString(i)~": "~Int.toString(p.l_)~" > "~Int.toString(p.r_));
    }

    static CharClass opCall(CharClass cc)
    {
        CharClass ncc;
        ncc.parts = cc.parts.dup;
        return ncc;
    }

    int opCmp(CharClass cc)
    {
        if ( parts.length < cc.parts.length )
            return -1;
        if ( parts.length > cc.parts.length )
            return 1;
        foreach ( i, p; cc.parts )
        {
            if ( p.l_ != parts[i].l_ || p.r_ != parts[i].r_ )
                return 1;
        }
        return 0;
    }

    bool empty()
    {
        return parts.length <= 0;
    }

    bool matches(char_t c)
    {
        foreach ( p; parts )
        {
            if ( p.contains(c) )
                return true;
        }
        return false;
    }

    CharClass intersect(CharClass cc)
    {
        CharClass ic;
        foreach ( p; parts )
        {
            foreach ( cp; cc.parts )
            {
                if ( p.intersects(cp) )
                    ic.parts ~= p.intersect(cp);
            }
        }
        return ic;
    }

    // requires the class to be optimized
    bool contains(range_t cr)
    {
        foreach ( p; parts )
        {
            if ( p.contains(cr) )
                return true;
        }
        return false;
    }

    // requires the class to be optimized
    bool contains(CharClass cc)
    {
        Louter: foreach ( p; cc.parts )
        {
            foreach ( p2; parts )
            {
                if ( p2.contains(p) )
                    continue Louter;
            }
            return false;
        }
        return true;
    }

    void subtract(CharClass cc)
    {
        negate;
        add(cc);
        negate;
    }

    void add(CharClass cc)
    {
        parts ~= cc.parts;
    }

    void add(range_t cr)
    {
        parts ~= cr;
    }

    void add(char_t c)
    {
        parts ~= CharRange!(char_t)(c);
    }

    /* ********************************************************************************************
        Requires the CharClass to be optimized.
    **********************************************************************************************/
    void negate()
    {
        optimize;
        char_t  start = char_t.min;

        // first part touches left boundary of value range
        if ( parts.length > 0 && parts[0].l_ == start )
        {
            start = parts[0].r_;
            if ( start < char_t.max )
                ++start;

            foreach ( i, ref cr; parts[0 .. $-1] )
            {
                cr.l_ = start;
                cr.r_ = parts[i+1].l_-1;
                start = parts[i+1].r_;
                if ( start < char_t.max )
                    ++start;
            }
            if ( start != char_t.max ) {
                parts[$-1].l_ = start;
                parts[$-1].r_ = char_t.max;
            }
            else
                parts.length = parts.length-1;
            return;
        }

        foreach ( i, ref cr; parts )
        {
            char_t tmp = cr.l_-1;
            cr.l_ = start;
            start = cr.r_;
            if ( start < char_t.max )
                ++start;
            cr.r_ = tmp;
        }

        // last part does not touch right boundary
        if ( start != char_t.max )
            parts ~= range_t(start, char_t.max);
    }

    void optimize()
    {
        if ( empty )
            return;

        parts.sort;

        size_t i = 0;
        foreach ( p; parts[1 .. $] )
        {
            if ( p.l_ > parts[i].r_+1 ) {
                ++i;
                parts[i].l_ = p.l_;
                parts[i].r_ = p.r_;
                continue;
            }
            parts[i].r_ = max(p.r_, parts[i].r_);
            if ( parts[i].r_ >= char_t.max )
                break;
        }
        parts.length = i+1;
    }

    char[] toString()
    {
        char[] str;
        str ~= "[";
        foreach ( p; parts )
            str ~= p.toString;
        str ~= "]";
        return str;
    }
}

debug(UnitTest)
{
unittest
{
    static CharClass!(char) cc = { parts: [{l_:0,r_:10},{l_:0,r_:6},{l_:5,r_:12},{l_:12,r_:17},{l_:20,r_:100}] };
    assert(cc.toString, "[(0)-(a)(0)-(6)(5)-(c)(c)-(11)(14)-'d']");
    cc.optimize;
    assert(cc.toString,  "[(0)-(11)(14)-'d']");
    cc.negate;
    assert(cc.toString,  " [(12)-(13)'e'-(ff)]");
    cc.optimize;
    assert(cc.toString,  "[(0)-(11)(14)-'d']");
    cc.negate;
    assert(cc.toString,  "[(12)-(13)'e'-(ff)]");

    static CharClass!(char) cc2 = { parts: [] };
    assert(cc.toString,  "[]");
    cc2.optimize;
    assert(cc.toString,  "[]");
    cc2.negate;
    assert(cc.toString,  "[(0)-(ff)]");
    cc2.optimize;
    assert(cc.toString,  "[(0)-(ff)]");
    cc2.negate;
    assert(cc.toString,  "[]");

    static CharClass!(char) cc3 = { parts: [{l_:0,r_:100},{l_:200,r_:0xff},] };
    assert(cc3.toString, "[(0)-'d'(c8)-(ff)]");
    cc3.negate;
    assert(cc.toString,  "['e'-(c7)]");
    cc3.negate;
    assert(cc.toString,  "[(0)-'d'(c8)-(ff)]");

    static CharClass!(char) cc4 = { parts: [{l_:0,r_:200},{l_:100,r_:0xff},] };
    assert(cc.toString,  "[(0)-(c8)'d'-(ff)]");
    cc4.optimize;
    assert(cc.toString,  "[(9)-(13)(20)-'~'(a0)-(ff)(100)-(17f)(180)-(24f)(20a3)-(20b5)]");

    static CharClass!(dchar) cc5 = { parts: [{l_:0x9,r_:0x13},{0x20,r_:'~'},{l_:0xa0,r_:0xff},{l_:0x100,r_:0x17f},{l_:0x180,r_:0x24f},{l_:0x20a3,r_:0x20b5}] };
    cc5.optimize;
    assert(cc.toString,  "[(9)-(13)(20)-'~'(a0)-(24f)(20a3)-(20b5)]");
    cc5.negate;
    assert(cc.toString,  "[(0)-(8)(14)-(1f)(7f)-(9f)(250)-(20a2)(20b6)-(10ffff)]");
    cc5.optimize;
    assert(cc.toString,  "[(0)-(8)(14)-(1f)(7f)-(9f)(250)-(20a2)(20b6)-(10ffff)]");
    cc5.negate;
    assert(cc.toString,  "[(9)-(13)(20)-'~'(a0)-(24f)(20a3)-(20b5)]");
}
}

/* ************************************************************************************************

**************************************************************************************************/
private struct Predicate(char_t)
{
    alias char_t[]              string_t;
    alias CharClass!(char_t)    cc_t;
    alias CharRange!(char_t)    cr_t;

    // generic data
    enum Type {
        consume, epsilon, lookahead, lookbehind
    }

    cc_t    input;
    Type    type;

    // data for compiled predicates
    const uint  MAX_BITMAP_LENGTH = 256,
                MAX_SEARCH_LENGTH = 256;
    enum MatchMode {
        generic, generic_l,
        single_char, bitmap, string_search,         // consume
        single_char_l, bitmap_l, string_search_l    // lookahead
    }

    MatchMode   mode;
    // data_chr had to be pulled out of the union due to
    // http://d.puremagic.com/issues/show_bug.cgi?id=2632 --- don't put it back
    // in until this is resolved!
    //
    // Keep in mind that data_str.length can't be modified directly unless the
    // new length is strictly greater than the old length. This is essentially
    // because ubyte.sizeof can be less than char_t.sizeof. If you set
    // data_str.length to anything less than or equal to data_bmp.length,
    // data_str will not be reallocated: only the length value will change but
    // nothing will realize that not that much space has actually been
    // allocated. data_str will be too small and you'll likely get segfaults or
    // such.
    //
    // In short: don't mess with data_str.length. If you have to, remove the
    // union entirely.
    //
    // -- Deewiant
    union {
        ubyte[]     data_bmp;
        string_t    data_str;
    };
    char_t      data_chr;


    void compile()
    {
        assert(input.parts.length > 0);

        // single char?
        if ( input.parts.length == 1 && input.parts[0].l_ == input.parts[0].r_ )
        {
            mode = type==Type.consume ? MatchMode.single_char : MatchMode.single_char_l;
            data_chr = input.parts[0].l_;
            return;
        }
        // check whether we can use a bitmap
        foreach ( p; input.parts )
        {
            if ( p.l_ > MAX_BITMAP_LENGTH || p.r_ > MAX_BITMAP_LENGTH )
                goto LnoBitmap;
        }

        // setup bitmap
        data_bmp.length = MAX_BITMAP_LENGTH/8;
        foreach ( p; input.parts )
        {
            for ( char_t c = p.l_; c <= p.r_; ++c )
                data_bmp[c/8] |= 1 << (c&7);
        }
        mode = type==Type.consume ? MatchMode.bitmap : MatchMode.bitmap_l;
        return;

    LnoBitmap:
/*
        // check whether the class is small enough to justify a string-search
        // TODO: consider inverse class for 8bit chars?
        uint class_size;
        foreach ( p; input.parts )
            class_size += cast(uint)p.r_+1-p.l_;
        if ( class_size > MAX_SEARCH_LENGTH )
            goto Lgeneric;
        data_str.length = class_size;
        size_t ind;
        foreach ( p; input.parts )
        {
            for ( char_t c = p.l_; c <= p.r_; ++c )
                data_str[ind++] = c;
        }
        mode = type==Type.consume ? MatchMode.string_search : MatchMode.string_search_l;
        return;
*/
    Lgeneric:
        data_str = cast(char_t[])input.parts;
        mode = type==Type.consume ? MatchMode.generic : MatchMode.generic_l;
    }

    bool matches(char_t c)
    {
        if ( type == Type.consume || type == Type.lookahead )
            return input.matches(c);
        assert(0);
    }

    Predicate intersect(Predicate p)
    {
        Predicate p2;
        if ( type != Type.epsilon && p.type != Type.epsilon )
            p2.input = input.intersect(p.input);
        return p2;
    }

    bool intersects(Predicate p)
    {
        if ( type != p.type )
            return false;
        foreach ( cr; input.parts )
        {
            foreach ( cr2; p.input.parts )
            {
                if ( cr.intersects(cr2) )
                    return true;
            }
        }
        return false;
    }

    bool exceedsMax(uint maxc)
    {
        foreach ( p; input.parts )
        {
            if ( p.l_ > maxc || p.r_ > maxc )
                return true;
        }

        return false;
    }

    bool empty()
    {
        return type != Type.epsilon && input.empty;
    }

    void subtract(Predicate p)
    {
        if ( type != Type.epsilon && p.type != Type.epsilon )
            input.subtract(p.input);
    }

    void negate()
    {
        assert(type != Type.epsilon);
        input.negate;
    }

    void optimize()
    {
        assert(type != Type.epsilon);
        input.optimize;
    }

    int opCmp(Predicate p)
    {
        return input.opCmp(p.input);
    }

    int opEquals(Predicate p)
    {
        if ( type != p.type )
            return 0;
        if ( input.opCmp(p.input) != 0 )
            return 0;
        return 1;
    }

    cc_t getInput()
    {
        return input;
    }

    void setInput(cc_t cc)
    {
        input = cc;
    }

    void appendInput(cr_t cr)
    {
        input.add(cr);
    }

    void appendInput(cc_t cc)
    {
        input.add(cc);
    }

    void appendInput(Predicate p)
    {
        input.add(p.input);
    }

    string toString()
    {
        string str;
        switch ( type )
        {
            case Type.consume:      str = input.toString;       break;
            case Type.epsilon:      str = "eps";                break;
            case Type.lookahead:    str = "la:"~input.toString; break;
            case Type.lookbehind:   str = "lb:"~input.toString; break;
            default:
                assert(0);
        }
        return str;
    }
}
import Utf = tango.text.convert.Utf;
import tango.text.convert.Format;

/* ************************************************************************************************

**************************************************************************************************/
class RegExpException : Exception
{
    this(string msg)
    {
        super("RegExp: "~msg);
    }
}

/* ************************************************************************************************
    TNFA state
**************************************************************************************************/
private class TNFAState(char_t)
{
    bool    accept = false,
            visited = false;
    uint    index;
    List!(TNFATransition!(char_t))  transitions;

    this()
    {
        transitions = new List!(TNFATransition!(char_t));
    }
}


/* ************************************************************************************************
    Priority classes used to linearize priorities after non-linear transition creation.
**************************************************************************************************/
private enum PriorityClass {
    greedy=0, normal=1, reluctant=2, extraReluctant=3
}

/* ********************************************************************************
    TNFA tagged transition
***********************************************************************************/
private class TNFATransition(char_t)
{
    TNFAState!(char_t)  target;
    Predicate!(char_t)  predicate;
    uint                priority,
                        tag;        /// one-based tag number, 0 = untagged
    PriorityClass       priorityClass;

    this(PriorityClass pc)
    {
        priorityClass = pc;
    }

    /******************************************************************************
        Move through states only going via epsilon transitions, and only choosing
        the one with highest priority. If the highest priority transition from a
        state isn't an epsilon transition, false is returned.
        If the accepting NFA state can be reached in this manner, true is returned.

        NOTE: This method does not look for cycles which should be kept in mind for
        later. larsivi 20090827
    *******************************************************************************/
    bool canFinish()
    {
        TNFAState!(char_t)  t = target;
        while (!t.accept) {
            TNFATransition!(char_t) highestPriTrans;
            foreach (trans; t.transitions) {
                if (!highestPriTrans || highestPriTrans.priority > trans.priority)
                    highestPriTrans = trans;
            }
            if (!(highestPriTrans.predicate.type == Predicate!(char_t).Type.epsilon))
                return false;

            t = highestPriTrans.target;
        }
        return true;
    }

}

/* ************************************************************************************************
    Fragments of TNFAs as used in the Thompson method
**************************************************************************************************/
private class TNFAFragment(char_t)
{
    alias TNFAState!(char_t)        state_t;
    alias TNFATransition!(char_t)   trans_t;

    List!(trans_t)  entries,        /// transitions to be added to the entry state
                    exits,          /// transitions to be added to the exit state
                    entry_state,    /// transitions to write the entry state to
                    exit_state;     /// transitions to write the exit state to

    bool swapMatchingBracketSyntax;

    this()
    {
        entries     = new List!(trans_t);
        exits       = new List!(trans_t);
        entry_state = new List!(trans_t);
        exit_state  = new List!(trans_t);
    }

    /* ********************************************************************************************
        Write the given state as entry state to this fragment.
    **********************************************************************************************/
    void setEntry(state_t state)
    {
        state.transitions ~= entries;
        foreach ( t; entry_state )
            t.target = state;
    }

    /* ********************************************************************************************
        Write the given state as exit state to this fragment.
    **********************************************************************************************/
    void setExit(state_t state)
    {
        state.transitions ~= exits;
        foreach ( t; exit_state )
            t.target = state;
    }
}

/* ************************************************************************************************
    Tagged NFA
**************************************************************************************************/
private final class TNFA(char_t)
{
    alias TNFATransition!(char_t)   trans_t;
    alias TNFAFragment!(char_t)     frag_t;
    alias TNFAState!(char_t)        state_t;
    alias Predicate!(char_t)        predicate_t;
    alias char_t[]                  string_t;
    alias CharRange!(char_t)        range_t;
    alias CharClass!(char_t)        cc_t;

    string_t    pattern;
    state_t[]   states;
    state_t     start;

    bool swapMatchingBracketSyntax; /// whether to make (?...) matching and (...) non-matching

    /* ********************************************************************************************
        Creates the TNFA from the given regex pattern
    **********************************************************************************************/
    this(string_t regex)
    {
        next_tag        = 1;
        transitions     = new List!(trans_t);

        pattern = regex;
    }

    /* ********************************************************************************************
        Print the TNFA (tabular representation of the delta function)
    **********************************************************************************************/
    debug(TangoRegex) void print()
    {
        foreach ( int i, s; states )
        {
            Stdout.format("{}{:d2}{}", s is start?">":" ", i, s.accept?"*":" ");

            bool first=true;
            Stdout(" {");
            foreach ( t; s.transitions )
            {
                Stdout.format("{}{}{}:{}->{}", first?"":", ", t.priority, "gnrx"[t.priorityClass], t.predicate.toString, t.target is null?-1:t.target.index);
                if ( t.tag > 0 ) {
                    Stdout.format(" t{}", t.tag);
                }
                first = false;
            }
            Stdout("}").newline;
        }
    }

    uint tagCount()
    {
        return next_tag-1;
    }

    /* ********************************************************************************************
        Constructs the TNFA using extended Thompson method.
        Uses a slightly extended version of Dijkstra's shunting yard algorithm to convert
        the regexp from infix notation.
    **********************************************************************************************/
    void parse(bool unanchored)
    {
        List!(frag_t)       frags       = new List!(frag_t);
        Stack!(Operator)    opStack;
        Stack!(uint)        tagStack;
        Stack!(Pair!(uint)) occurStack;
        opStack ~= Operator.eos;

        /* ****************************************************************************************
            Perform action on operator stack
        ******************************************************************************************/
        bool perform(Operator next_op, bool explicit_operator=true)
        {
            // calculate index in action matrix
            int index = cast(int)opStack.top*(Operator.max+1);
            index += cast(int)next_op;

            debug(tnfa) Stdout.formatln("\t{}:{} -> {}  {} frag(s)",
                operator_names[opStack.top], operator_names[next_op], action_names[action_lookup[index]], frags.length
            );
            switch ( action_lookup[index] )
            {
                case Act.pua:
                    opStack ~= next_op;
                    if ( next_op == Operator.open_par ) {
                        tagStack ~= next_tag;
                        next_tag += 2;
                    }
                    break;
                case Act.poc:
                    switch ( opStack.top )
                    {
                        case Operator.concat:       constructConcat(frags);                             break;
                        case Operator.altern:       constructAltern(frags);                             break;
                        case Operator.zero_one_g:   constructZeroOne(frags, PriorityClass.greedy);      break;
                        case Operator.zero_one_ng:  constructZeroOne(frags, PriorityClass.reluctant);   break;
                        case Operator.zero_one_xr:  constructZeroOne(frags, PriorityClass.extraReluctant);  break;
                        case Operator.zero_more_g:  constructZeroMore(frags, PriorityClass.greedy);     break;
                        case Operator.zero_more_ng: constructZeroMore(frags, PriorityClass.reluctant);  break;
                        case Operator.zero_more_xr: constructZeroMore(frags, PriorityClass.extraReluctant); break;
                        case Operator.one_more_g:   constructOneMore(frags, PriorityClass.greedy);      break;
                        case Operator.one_more_ng:  constructOneMore(frags, PriorityClass.reluctant);   break;
                        case Operator.one_more_xr:  constructOneMore(frags, PriorityClass.extraReluctant);  break;
                        case Operator.occur_g:
                            Pair!(uint) occur = occurStack.pop;
                            constructOccur(frags, occur.a, occur.b, PriorityClass.greedy);
                            break;
                        case Operator.occur_ng:
                            Pair!(uint) occur = occurStack.pop;
                            constructOccur(frags, occur.a, occur.b, PriorityClass.reluctant);
                            break;
                        default:
                            throw new RegExpException("cannot process operand at \""~Utf.toString(pattern[cursor..$])~"\"");
                    }
                    opStack.pop;

                    perform(next_op, false);
                    break;
                case Act.poa:
                    opStack.pop;
                    break;
                case Act.pca:
                    if ( opStack.top == Operator.open_par )
                    {
                        if ( tagStack.empty )
                            throw new RegExpException(Format.convert("Missing opening parentheses for closing parentheses at char {} \"{}\"", cursor, Utf.toString(pattern[cursor..$])));
                        constructBracket(frags, tagStack.top);
                        tagStack.pop;
                    }
                    else {
                        assert(opStack.top == Operator.open_par_nm);
                        constructBracket(frags);
                    }
                    opStack.pop;
                    break;
                case Act.don:
                    return true;
                case Act.err:
                default:
                    throw new RegExpException(Format.convert("Unexpected operand at char {} \"{}\" in \"{}\"", cursor, Utf.toString(pattern[cursor..$]), Utf.toString(pattern)));
            }

            return false;
        }

        // add implicit extra reluctant .* (with . == any_char) at the beginning for unanchored matches
        // and matching bracket for total match group
        if ( unanchored ) {
            frags ~= constructChars(cc_t.any_char, predicate_t.Type.consume);
            perform(Operator.zero_more_xr, false);
            perform(Operator.concat, false);
            perform(Operator.open_par, false);
        }

        // convert regex to postfix and create TNFA
        bool implicit_concat;
        predicate_t.Type pred_type;

        while ( !endOfPattern )
        {
            pred_type = predicate_t.Type.consume;

            dchar c = readPattern;
            switch ( c )
            {
                case '|':
                    perform(Operator.altern);
                    implicit_concat = false;
                    break;
                case '(':
                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = false;
                    if ( peekPattern == '?' ) {
                        readPattern;
                        perform(swapMatchingBracketSyntax?Operator.open_par:Operator.open_par_nm);
                    }
                    else
                        perform(swapMatchingBracketSyntax?Operator.open_par_nm:Operator.open_par);
                    break;
                case ')':
                    perform(Operator.close_par);
                    break;
                case '?':
                    if ( peekPattern == '?' ) {
                        readPattern;
                        perform(Operator.zero_one_ng);
                    }
                    else
                        perform(Operator.zero_one_g);
                    break;
                case '*':
                    if ( peekPattern == '?' ) {
                        readPattern;
                        perform(Operator.zero_more_ng);
                    }
                    else
                        perform(Operator.zero_more_g);
                    break;
                case '+':
                    if ( peekPattern == '?' ) {
                        readPattern;
                        perform(Operator.one_more_ng);
                    }
                    else
                        perform(Operator.one_more_g);
                    break;
                case '{':
                    Pair!(uint) occur;
                    parseOccurCount(occur.a, occur.b);
                    occurStack ~= occur;
                    if ( peekPattern == '?' ) {
                        readPattern;
                        perform(Operator.occur_ng);
                    }
                    else
                        perform(Operator.occur_g);
                    break;
                case '[':
                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = true;
                    frags ~= constructCharClass(pred_type);
                    break;
                case '.':
                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = true;
                    frags ~= constructChars(cc_t.dot_oper, pred_type);
                    break;
                case '$':
                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = true;

                    frags ~= constructChars(cc_t.line_startend, predicate_t.Type.lookahead);
                    break;
                case '^':
                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = true;

                    frags ~= constructChars(cc_t.line_startend, predicate_t.Type.lookbehind);
                    break;
                case '>':
                    c = readPattern;
                    pred_type = predicate_t.Type.lookahead;
                    if ( c == '[' )
                        goto case '[';
                    else if ( c == '\\' )
                        goto case '\\';
                    else if ( c == '.' )
                        goto case '.';
                    else
                        goto default;
                case '<':
                    c = readPattern;
                    pred_type = predicate_t.Type.lookbehind;
                    if ( c == '[' )
                        goto case '[';
                    else if ( c == '\\' )
                        goto case '\\';
                    else if ( c == '.' )
                        goto case '.';
                    else
                        goto default;
                case '\\':
                    c = readPattern;

                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = true;

                    switch ( c )
                    {
                        case 't':
                            frags ~= constructSingleChar('\t', pred_type);
                            break;
                        case 'n':
                            frags ~= constructSingleChar('\n', pred_type);
                            break;
                        case 'r':
                            frags ~= constructSingleChar('\r', pred_type);
                            break;
                        case 'w':   // alphanumeric and _
                            frags ~= constructChars(cc_t.alphanum_, pred_type);
                            break;
                        case 'W':   // non-(alphanum and _)
                            auto cc = cc_t(cc_t.alphanum_);
                            cc.negate;
                            frags ~= constructChars(cc, pred_type);
                            break;
                        case 's':   // whitespace
                            frags ~= constructChars(cc_t.whitespace, pred_type);
                            break;
                        case 'S':   // non-whitespace
                            auto cc = cc_t(cc_t.whitespace);
                            cc.negate;
                            frags ~= constructChars(cc, pred_type);
                            break;
                        case 'd':   // digit
                            frags ~= constructChars(cc_t.digit, pred_type);
                            break;
                        case 'D':   // non-digit
                            auto cc = cc_t(cc_t.digit);
                            cc.negate;
                            frags ~= constructChars(cc, pred_type);
                            break;
                        case 'b':   // either end of word
                            if ( pred_type != predicate_t.Type.consume )
                                throw new RegExpException("Escape sequence \\b not allowed in look-ahead or -behind");

                            // create (?<\S>\s|<\s>\S)
                            auto cc = cc_t(cc_t.whitespace);
                            cc.negate;

                            perform(Operator.open_par_nm);

                            frags ~= constructChars(cc, predicate_t.Type.lookbehind);
                            perform(Operator.concat, false);
                            frags ~= constructChars(cc_t.whitespace, predicate_t.Type.lookahead);
                            perform(Operator.altern, false);
                            frags ~= constructChars(cc_t.whitespace, predicate_t.Type.lookbehind);
                            perform(Operator.concat, false);
                            frags ~= constructChars(cc, predicate_t.Type.lookahead);

                            perform(Operator.close_par, false);
                            break;
                        case 'B':   // neither end of word
                            if ( pred_type != predicate_t.Type.consume )
                                throw new RegExpException("Escape sequence \\B not allowed in look-ahead or -behind");

                            // create (?<\S>\S|<\s>\s)
                            auto cc = cc_t(cc_t.whitespace);
                            cc.negate;

                            perform(Operator.open_par_nm);

                            frags ~= constructChars(cc, predicate_t.Type.lookbehind);
                            perform(Operator.concat, false);
                            frags ~= constructChars(cc, predicate_t.Type.lookahead);
                            perform(Operator.altern, false);
                            frags ~= constructChars(cc_t.whitespace, predicate_t.Type.lookbehind);
                            perform(Operator.concat, false);
                            frags ~= constructChars(cc_t.whitespace, predicate_t.Type.lookahead);

                            perform(Operator.close_par, false);
                            break;
                        case '(':
                        case ')':
                        case '[':
                        case ']':
                        case '{':
                        case '}':
                        case '*':
                        case '+':
                        case '?':
                        case '.':
                        case '\\':
                        case '^':
                        case '$':
                        case '|':
                        case '<':
                        case '>':
                        default:
                            frags ~= constructSingleChar(c, pred_type);
                            break;
//                            throw new RegExpException(Format.convert("Unknown escape sequence \\{}", c));
                    }
                    break;

                default:
                    if ( implicit_concat )
                        perform(Operator.concat, false);
                    implicit_concat = true;
                    frags ~= constructSingleChar(c, pred_type);
            }
        }

        // add implicit reluctant .* (with . == any_char) at the end for unanchored matches
        if ( unanchored )
        {
            perform(Operator.close_par, false);
            if ( implicit_concat )
                perform(Operator.concat, false);
            frags ~= constructChars(cc_t.any_char, predicate_t.Type.consume);
            perform(Operator.zero_more_ng, false);
        }

        // empty operator stack
        while ( !perform(Operator.eos) ) {}

        // set start and finish states
        start = addState;
        state_t finish = addState;
        finish.accept = true;

        foreach ( f; frags ) {
            f.setExit(finish);
            f.setEntry(start);
        }

        // set transition priorities
        List!(trans_t)[PriorityClass.max+1] trans;
        foreach ( ref t; trans )
            t = new List!(trans_t);

        Stack!(trans_t) todo;
        state_t state = start;

        while ( !todo.empty || !state.visited )
        {
            if ( !state.visited )
            {
                state.visited = true;
                foreach_reverse ( t; state.transitions )
                    todo ~= t;
            }

            if ( todo.empty )
                break;
            trans_t t = todo.top;
            todo.pop;
            assert(t.priorityClass<=PriorityClass.max);
            trans[t.priorityClass] ~= t;
            state = t.target;
        }

        uint nextPrio;
        foreach ( ts; trans )
        {
            foreach ( t; ts )
                t.priority = nextPrio++;
        }
    }

private:
    uint            next_tag;
    size_t          cursor,
                    next_cursor;
    List!(trans_t)  transitions;

    state_t[state_t]    clonedStates;
    trans_t[trans_t]    clonedTransitions;

    /// RegEx operators
    enum Operator {
        eos, concat, altern, open_par, close_par,
        zero_one_g, zero_more_g, one_more_g,        // greedy
        zero_one_ng, zero_more_ng, one_more_ng,     // non-greedy/reluctant
        zero_one_xr, zero_more_xr, one_more_xr,     // extra-reluctant
        open_par_nm, occur_g, occur_ng
    }
    const char[][] operator_names = ["EOS", "concat", "|", "(", ")", "?", "*", "+", "??", "*?", "+?", "??x", "*?x", "+?x", "(?", "{x,y}", "{x,y}?"];

    /// Actions for to-postfix transformation
    enum Act {
        pua, poc, poa, pca, don, err
    }
    const char[][] action_names = ["push+advance", "pop+copy", "pop+advance", "pop+copy+advance", "done", "error"];

    /// Action lookup for to-postfix transformation
    const Act[] action_lookup =
    [
    //  eos      concat   |        (        )        ?        *        +        ??       *?       +?       ??extra  *?extra  +?extra  (?       {x,y}    {x,y}?
        Act.don, Act.pua, Act.pua, Act.pua, Act.err, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua,
        Act.poc, Act.pua, Act.poc, Act.pua, Act.poc, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua,
        Act.err, Act.pua, Act.pua, Act.pua, Act.pca, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua,
        Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err, Act.err,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.err, Act.pua, Act.pua, Act.pua, Act.pca, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua, Act.pua,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc,
        Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.poc, Act.pua, Act.poc, Act.poc
    ];

    final dchar peekPattern()
    {
        auto tmp = next_cursor;
        if ( tmp < pattern.length )
            return decode(pattern, tmp);
        return 0;
    }

    final dchar readPattern()
    {
        cursor = next_cursor;
        if ( next_cursor < pattern.length )
            return decode(pattern, next_cursor);
        return 0;
    }

    final bool endOfPattern()
    {
        return next_cursor >= pattern.length;
    }

    state_t addState()
    {
        state_t s = new state_t;
        s.index = states.length;
        states ~= s;
        return s;
    }

    trans_t addTransition(PriorityClass pc = PriorityClass.normal)
    {
        trans_t trans = new trans_t(pc);
        transitions ~= trans;
        return trans;
    }

    uint parseNumber()
    {
        uint res;
        while ( !endOfPattern )
        {
            auto c = peekPattern;
            if ( c < '0' || c > '9' )
                break;
            res = res*10+(c-'0');
            readPattern;
        }
        return res;
    }

    void parseOccurCount(out uint minOccur, out uint maxOccur)
    {
        assert(pattern[cursor] == '{');