An Armenian stemming algorithm


 

Links to resources

Snowball main page
Tar-gzipped file of the delivery from Astghik Mkrtchyan


A stemmer for Armenian was sent to us by Astghik Mkrtchyan with this accompanying email:
From: astghik mkrtchyan Date: Sat, 12 Jun 2010 20:27:02 +0400 Subject: Armenian Stemmer Hello, I newbie here. Recently I've googled for Armenian stemmer. So I have found nothing, decided to write one. And now I'm sending Armenian stemmer (stem_Unicode.sbl) and generated java file. Regards, Astghik
Here is the script of the stemmer in Snowball,


stringescapes {} stringdef a hex '561' // 531 stringdef b hex '562' // 532 stringdef g hex '563' // 533 stringdef d hex '564' // 534 stringdef ye hex '565' // 535 stringdef z hex '566' // 536 stringdef e hex '567' // 537 stringdef y hex '568' // 538 stringdef dt hex '569' // 539 stringdef zh hex '56A' // 53A stringdef i hex '56B' // 53B stringdef l hex '56C' // 53C stringdef kh hex '56D' // 53D stringdef ts hex '56E' // 53E stringdef k hex '56F' // 53F stringdef h hex '570' // 540 stringdef dz hex '571' // 541 stringdef gh hex '572' // 542 stringdef djch hex '573' // 543 stringdef m hex '574' // 544 stringdef j hex '575' // 545 stringdef n hex '576' // 546 stringdef sh hex '577' // 547 stringdef vo hex '578' // 548 stringdef ch hex '579' // 549 stringdef p hex '57A' // 54A stringdef dj hex '57B' // 54B stringdef r hex '57C' // 54C stringdef s hex '57D' // 54D stringdef v hex '57E' // 54E stringdef t hex '57F' // 54F stringdef r' hex '580' // 550 stringdef c hex '581' // 551 stringdef u hex '582' // 552 //vjun stringdef bp hex '583' // 553 stringdef q hex '584' // 554 stringdef ev hex '587' stringdef o hex '585' // 555 stringdef f hex '586' // 556 routines ( mark_regions R2 adjective verb noun ending ) externals ( stem ) integers ( pV p2 ) groupings ( v ) define v '{a}{e}{i}{o}{u}{ye}{vo}{y}' define mark_regions as ( $pV = limit $p2 = limit do ( gopast v setmark pV gopast non-v gopast v gopast non-v setmark p2 ) ) backwardmode ( define R2 as $p2 <= cursor define adjective as ( [substring] among ( '{b}{a}{r'}' '{p}{ye}{s}' '{vo}{r'}{e}{n}' '{vo}{v}{i}{n}' '{a}{k}{i}' '{l}{a}{j}{n}' '{r'}{vo}{r'}{d}' '{ye}{r'}{vo}{r'}{d}' '{a}{k}{a}{n}' '{a}{l}{i}' '{k}{vo}{t}' '{ye}{k}{ye}{n}' '{vo}{r'}{a}{k}' '{ye}{gh}' '{v}{vo}{u}{n}' '{ye}{r'}{ye}{n}' '{a}{r'}{a}{n}' '{ye}{n}' '{a}{v}{ye}{t}' '{g}{i}{n}' '{i}{v}' '{a}{t}' '{i}{n}' (delete) ) ) define verb as ( [substring] among ( '{vo}{u}{m}' '{v}{vo}{u}{m}' '{a}{l}{vo}{u}' '{ye}{l}{vo}{u}' '{v}{ye}{l}' '{a}{n}{a}{l}' '{ye}{l}{vo}{u}{c}' '{a}{l}{vo}{u}{c}' '{y}{a}{l}' '{y}{ye}{l}' '{a}{l}{vo}{v}' '{ye}{l}{vo}{v}' '{a}{l}{i}{s}' '{ye}{l}{i}{s}' '{ye}{n}{a}{l}' '{a}{c}{n}{a}{l}' '{ye}{c}{n}{ye}{l}' '{c}{n}{ye}{l}' '{n}{ye}{l}' '{a}{t}{ye}{l}' '{vo}{t}{ye}{l}' '{k}{vo}{t}{ye}{l}' '{t}{ye}{l}' '{v}{a}{ts}' '{ye}{c}{v}{ye}{l}' '{a}{c}{v}{ye}{l}' '{ye}{c}{i}{r'}' '{a}{c}{i}{r'}' '{ye}{c}{i}{n}{q}' '{a}{c}{i}{n}{q}' '{v}{ye}{c}{i}{r'}' '{v}{ye}{c}{i}{n}{q}' '{v}{ye}{c}{i}{q}' '{v}{ye}{c}{i}{n}' '{a}{c}{r'}{i}{r'}' '{a}{c}{r'}{ye}{c}' '{a}{c}{r'}{i}{n}{q}' '{a}{c}{r'}{i}{q}' '{a}{c}{r'}{i}{n}' '{ye}{c}{i}{q}' '{a}{c}{i}{q}' '{ye}{c}{i}{n}' '{a}{c}{i}{n}' '{a}{c}{a}{r'}' '{a}{c}{a}{v}' '{a}{c}{a}{n}{q}' '{a}{c}{a}{q}' '{a}{c}{a}{n}' '{v}{ye}{c}{i}' '{a}{c}{r'}{i}' '{ye}{c}{a}{r'}' '{ye}{c}{a}{v}' '{c}{a}{n}{q}' '{c}{a}{q}' '{c}{a}{n}' '{a}{c}{a}' '{a}{c}{i}' '{ye}{c}{a}' '{ch}{ye}{l}' '{ye}{c}{i}' '{a}{r'}' '{a}{v}' '{a}{n}{q}' '{a}{q}' '{a}{n}' '{a}{l}' '{ye}{l}' '{ye}{c}' '{a}{c}' '{v}{ye}' '{a}' (delete) ) ) define noun as ( [substring] among ( '{a}{ts}{vo}' '{a}{n}{a}{k}' '{a}{n}{o}{c}' '{a}{r'}{a}{n}' '{a}{r'}{q}' '{p}{a}{n}' '{s}{t}{a}{n}' '{ye}{gh}{e}{n}' '{ye}{n}{q}' '{i}{k}' '{i}{ch}' '{i}{q}' '{m}{vo}{u}{n}{q}' '{j}{a}{k}' '{j}{vo}{u}{n}' '{vo}{n}{q}' '{vo}{r'}{d}' '{vo}{c}' '{ch}{ye}{q}' '{v}{a}{ts}{q}' '{v}{vo}{r'}' '{a}{v}{vo}{r'}' '{vo}{u}{dt}{j}{vo}{u}{n}' '{vo}{u}{k}' '{vo}{u}{h}{i}' '{vo}{u}{j}{dt}' '{vo}{u}{j}{q}' '{vo}{u}{s}{t}' '{vo}{u}{s}' '{c}{i}' '{a}{l}{i}{q}' '{a}{n}{i}{q}' '{i}{l}' '{i}{ch}{q}' '{vo}{u}{n}{q}' '{g}{a}{r'}' '{vo}{u}' '{a}{k}' '{a}{n}' '{q}' (delete) ) ) define ending as ( [substring] R2 among ( '{n}{ye}{r'}{y}' '{n}{ye}{r'}{n}' '{n}{ye}{r'}{i}' '{n}{ye}{r'}{d}' '{ye}{r'}{i}{c}' '{n}{ye}{r'}{i}{c}' '{ye}{r'}{i}' '{ye}{r'}{d}' '{ye}{r'}{n}' '{ye}{r'}{y}' '{n}{ye}{r'}{i}{n}' '{vo}{u}{dt}{j}{a}{n}{n}' '{vo}{u}{dt}{j}{a}{n}{y}' '{vo}{u}{dt}{j}{a}{n}{s}' '{vo}{u}{dt}{j}{a}{n}{d}' '{vo}{u}{dt}{j}{a}{n}' '{ye}{r'}{i}{n}' '{i}{n}' '{s}{a}' '{vo}{dj}' '{i}{c}' '{ye}{r'}{vo}{v}' '{n}{ye}{r'}{vo}{v}' '{ye}{r'}{vo}{u}{m}' '{n}{ye}{r'}{vo}{u}{m}' '{vo}{u}{n}' '{vo}{u}{d}' '{v}{a}{n}{s}' '{v}{a}{n}{y}' '{v}{a}{n}{d}' '{a}{n}{y}' '{a}{n}{d}' '{v}{a}{n}' '{vo}{dj}{y}' '{vo}{dj}{s}' '{vo}{dj}{d}' '{vo}{c}' '{vo}{u}{c}' '{vo}{dj}{i}{c}' '{c}{i}{c}' '{v}{i}{c}' '{v}{i}' '{v}{vo}{v}' '{vo}{v}' '{a}{n}{vo}{v}' '{a}{n}{vo}{u}{m}' '{v}{a}{n}{i}{c}' '{a}{m}{b}' '{a}{n}' '{n}{ye}{r'}' '{ye}{r'}' '{v}{a}' '{y}' '{n}' '{d}' '{c}' '{i}' (delete) ) ) ) define stem as ( do mark_regions backwards setlimit tomark pV for ( do ending do verb do adjective do noun ) )