routines ( prelude postlude mark_regions RV R1 R2 standard_suffix i_verb_suffix verb_suffix residual_suffix un_double un_accent ) externals ( stem ) integers ( pV p1 p2 ) groupings ( v keep_with_s ) stringescapes {} /* special characters (in ISO Latin I) */ stringdef a^ hex 'E2' // a-circumflex stringdef a` hex 'E0' // a-grave stringdef c, hex 'E7' // c-cedilla stringdef e" hex 'EB' // e-diaeresis (rare) stringdef e' hex 'E9' // e-acute stringdef e^ hex 'EA' // e-circumflex stringdef e` hex 'E8' // e-grave stringdef i" hex 'EF' // i-diaeresis stringdef i^ hex 'EE' // i-circumflex stringdef o^ hex 'F4' // o-circumflex stringdef u^ hex 'FB' // u-circumflex stringdef u` hex 'F9' // u-grave define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}' define prelude as repeat goto ( ( v [ ('u' ] v <- 'U') or ('i' ] v <- 'I') or ('y' ] <- 'Y') ) or ( ['y'] v <- 'Y' ) or ( 'q' ['u'] <- 'U' ) ) define mark_regions as ( $pV = limit $p1 = limit $p2 = limit // defaults do ( ( v v next ) or among ( // this exception list begun Nov 2006 'par' // paris, parie, pari 'col' // colis 'tap' // tapis // extensions possible here ) or ( next gopast v ) setmark pV ) do ( gopast v gopast non-v setmark p1 gopast v gopast non-v setmark p2 ) ) define postlude as repeat ( [substring] among( 'I' (<- 'i') 'U' (<- 'u') 'Y' (<- 'y') '' (next) ) ) backwardmode ( define RV as $pV <= cursor define R1 as $p1 <= cursor define R2 as $p2 <= cursor define standard_suffix as ( [substring] among( 'ance' 'iqUe' 'isme' 'able' 'iste' 'eux' 'ances' 'iqUes' 'ismes' 'ables' 'istes' ( R2 delete ) 'atrice' 'ateur' 'ation' 'atrices' 'ateurs' 'ations' ( R2 delete try ( ['ic'] (R2 delete) or <-'iqU' ) ) 'logie' 'logies' ( R2 <- 'log' ) 'usion' 'ution' 'usions' 'utions' ( R2 <- 'u' ) 'ence' 'ences' ( R2 <- 'ent' ) 'ement' 'ements' ( RV delete try ( [substring] among( 'iv' (R2 delete ['at'] R2 delete) 'eus' ((R2 delete) or (R1<-'eux')) 'abl' 'iqU' (R2 delete) 'i{e`}r' 'I{e`}r' //) (RV <-'i') //)--new 2 Sept 02 ) ) ) 'it{e'}' 'it{e'}s' ( R2 delete try ( [substring] among( 'abil' ((R2 delete) or <-'abl') 'ic' ((R2 delete) or <-'iqU') 'iv' (R2 delete) ) ) ) 'if' 'ive' 'ifs' 'ives' ( R2 delete try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' ) ) 'eaux' (<- 'eau') 'aux' (R1 <- 'al') 'euse' 'euses'((R2 delete) or (R1<-'eux')) 'issement' 'issements'(R1 non-v delete) // verbal // fail(...) below forces entry to verb_suffix. -ment typically // follows the p.p., e.g 'confus{e'}ment'. 'amment' (RV fail(<- 'ant')) 'emment' (RV fail(<- 'ent')) 'ment' 'ments' (test(v RV) fail(delete)) // v is e,i,u,{e'},I or U ) ) define i_verb_suffix as setlimit tomark pV for ( [substring] among ( '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai' 'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez' 'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait' 'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses' 'issez' 'issiez' 'issions' 'issons' 'it' (non-v delete) ) ) define verb_suffix as setlimit tomark pV for ( [substring] among ( 'ions' (R2 delete) '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai' 'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions' 'erons' 'eront' 'ez' 'iez' // 'ons' //-best omitted (delete) '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant' 'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez' 'assions' (delete try(['e'] delete) ) ) ) define keep_with_s 'aiou{e`}s' define residual_suffix as ( try(['s'] test non-keep_with_s delete) setlimit tomark pV for ( [substring] among( 'ion' (R2 's' or 't' delete) 'ier' 'i{e`}re' 'Ier' 'I{e`}re' (<-'i') 'e' (delete) '{e"}' ('gu' delete) ) ) ) define un_double as ( test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete ) define un_accent as ( atleast 1 non-v [ '{e'}' or '{e`}' ] <-'e' ) ) define stem as ( do prelude do mark_regions backwards ( do ( ( ( standard_suffix or i_verb_suffix or verb_suffix ) and try( [ ('Y' ] <- 'i' ) or ('{c,}'] <- 'c' ) ) ) or residual_suffix ) // try(['ent'] RV delete) // is best omitted do un_double do un_accent ) do postlude )