# interface:
# constructor takes a lex def'n and produces a lexor
# lexor.write takes some data
# lexor.read returns a list of symbols and string for each match

import re

class lex:
    def __init__(self, dict={}):
        self.dict = dict
        self.data = ""
        reg = []
        for i in dict.keys():
            val = dict[i]
            if val:
                reg.append("(?P<"+val+">"+i+")")
            else:
                reg.append(i)
        self.matcher = re.compile(reduce(lambda a,b:a+"|"+b, reg))
        pass
    def write(self, data):
        self.data = self.data + data
    def read(self):
        l = []
        posn = 0
        m = self.matcher.match(self.data)
        while m:
            d = m.groupdict()
            for i in d.keys():
                if d[i]:
                    #print "found:"+i
                    l.append((i, m.group(i)))
            posn = m.end()
            m = self.matcher.search(self.data, posn)
        self.data = self.data[posn:]
        return l


if __name__ == "__main__":
    # A test example:
    l = lex({'if':'A',
             '\d+':'number',
             ';':'terminator',
             '\s+':None})
    
    l.write("if 12345        ;")
    print l.read(),"\n"
    
    l = lex({'\w+':'A',
             '\d+':'number',
             '[!@#$%^&*()_+-=`~?/.>,<\'\";:]':'punc',
             '\s+':None})
    
    l.write("if I were a rich man?")
    print l.read(),"\n"
    l.write("then I'd be rich!")
    print l.data,"\n"
    print l.read(),"\n"
