A lexer for Debian changelog files

This is a (hopefully) useful example of a OO-style lexer. I use it when I editing Debian changelog files and it helps me quickly spotting silly little details that will make the Debian packaging tools unhappy.

--- Debian changelog LPeg lexer.
-- @author [Pedro A. Aranda](https://github.com/paaguti)
-- copyright 2017-18
-- @license MIT (see LICENSE)
-- @module changelog

-- TODO: indentation
local lexer = require("lexer")
local token, word_match = lexer.token, lexer.word_match
local P, R, S = lpeg.P, lpeg.R, lpeg.S

local lex = lexer.new('changelog', { lex_by_line = true })

local name = (R("AZ")+R("az")+P('.'))^1*(P(" ")^1*(R("AZ")+R("az")+P("."))^1)^0
local email = lexer.delimited_range('<>',true,false,false)
local date = lexer.word_match([[Mon Tue Wed Thu Fri Sat Sun]]) * P(", ") *
   R("03") * R("09") * P(" ") *
   lexer.word_match([[Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec]]) * P(" ") *
   R("12") * R("09") * R("09") * R("09") * P(" ")*
   R("02") * R("09") * P(":") * R("05") * R ("05") * P(":") * R("05") * R("09") * P(" ") *
   S("+-") * R("09") * R("09") * R("05") * R("09")

local release_token = token('release',lexer.starts_line(lexer.alnum) * lexer.nonnewline^0)
local change_token = token('change',(lexer.starts_line('  * ') + lexer.starts_line('    - ')) * lexer.nonnewline^0)
local date_token = token('timestamp',lexer.starts_line(' -- ') * name * P(" ") * email * P("  ") * date)
local skip_token = token('skip',lexer.newline)
local error_token = token(lexer.ERROR, lexer.any)

lex:add_rule('release',   release_token)
lex:add_rule('change',    change_token)
lex:add_rule('timestamp', date_token)
lex:add_rule('skip',      skip_token)
lex:add_rule('error',     error_token)

lex:add_style('release',   lexer.STYLE_PREPROCESSOR)
lex:add_style('change',    lexer.STYLE_STRING)
lex:add_style('timestamp', lexer.STYLE_EMBEDDED)
lex:add_style('skip',      lexer.STYLE_DEFAULT)
lex:add_style('error',     lexer.STYLE_ERROR)

return lex

Edited on Saturday, February 17th 2018 at 6:06 AM.