String Query

lua-users home
wiki

stringquery is a string pattern matching and transformation library inspired partly by [jQuery].

Description

Primary design qualities are

The semblance to jQuery is that we construct a query object containing a set of selections for the thing being studied, we then perform a series of chained method calls to refine and alter those selections (all at once as a set), and finally we perform an operation on those selections (e.g. replace or return). See examples in the below test suite.

Status

The design of this library is preliminary and originally intended only as an experiment. Improvements welcome.

Author

DavidManura

test_stringquery.lua

-- test_stringquery.lua

-- test of dmlib.stringquery.



local SQ = require "dmlib.stringquery"

local sq = SQ.sq



local function asserteq(a,b)

  if a ~= b then

    error('[' .. tostring(a) .. '] ~= [' .. tostring(b) .. ']', 2)

  end

end



assert(

  sq("this is a test"):match("%w+"):replace('_')

  == '_ _ _ _'

)



assert(

  sq('<p>this is a <a href="../index.html">test</a> http://lua-users.org http://lua.org </p>')

  :match("<[^>]*>")

  :invert()

  :match('http://[^ ]+')

  :filter('user')

  :replace(function(s) return '<a href="' .. s .. '">' .. s .. '</a>' end)

  == '<p>this is a <a href="../index.html">test</a> <a href="http://lua-users.org">' ..

     'http://lua-users.org</a> http://lua.org </p>'

)



assert(

  sq("the red book, the green book, and the blue book")

  :match("%w+ book")

  :filter(SQ.any("^green ", "^red"))

  :replace(string.upper)

  == 'the RED BOOK, the GREEN BOOK, and the blue book'

)



-- solution to problem from http://lua-users.org/wiki/FrontierPattern

assert(

  sq("the QUICK BROwn fox")

  :match("%w+")

  :filter("^[A-Z]*$")

  :get_unpacked()

  == 'QUICK'

)



-- examples in docs

asserteq(

  table.concat(sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get(), ','),

  'A,BETA,G,A' )

asserteq(

  table.concat(sq'this is a test':match'%w+':filter'^....$':get(), ','),

  'this,test' )

asserteq(

  table.concat(sq'123 abc 234':match'%a+':invert():get(), ','),

  '123 , 234' )

asserteq(

  table.concat({sq'this is a test':match'%w+':get_unpacked()}, ','),

  'this,is,a,test' )

asserteq(

  table.concat(sq'hello':get(), ','),

  'hello' )

asserteq(

  SQ.any('%a%d', '%d%a')(' a1 '), true )

asserteq(

  SQ.all('%a%d', '%d%a')(' a1 2b '), true )



print 'DONE'

dmlib/stringquery.lua

-- dmlib/stringquery.lua (dmlib.stringquery)

--

-- String matching/replacing library inspired partly by jquery

--

-- Warning: preliminary design.

--

-- (c) 2009 David Manura, Licensed under the same terms as Lua (MIT license).



local M = {}





-- Replace array part of table dst with array part of table src.

local function tioverride(dst, src)

  for k    in ipairs(dst) do dst[k] = nil end

  for k, v in ipairs(src) do dst[k] = v end

  return dst

end



-- Returns array of substrings in s, paritioned

-- by array of ranges (1-based start and end indicies).

-- Always returns odd-number of substrings (even indexed

-- substrings are inside the ranges).

-- Example:

--   partition("abcdefg", {{1,2},{4,5}})

--   --> {'','ab', 'c','de', 'fg'}

local function partition(s, ranges)

  local result = {}

  local i = 1

  for _,range in ipairs(ranges) do

    local ia,ib = unpack(range)

    table.insert(result, s:sub(i,ia-1))

    table.insert(result, s:sub(ia,ib))

    i = ib+1

  end

  table.insert(result, s:sub(i))

  return result

end





-- Helper function.

-- Processes argument, allowing function or

-- pattern matching function represented as string.

local function getarg(o)

  local f

  if type(o) == 'string' then

    f = function(s) return s:match(o) end

  else

    f = o

  end

  return f

end





local mt = {}

mt.__index = mt





-- Defines new selections based on matches of

-- pattern inside current selections.

-- Example:

--   sq'Alpha BETA GammA':match'%w+':match'[A-Z]+':get()

--   --> {'A', 'BETA', 'G', 'A'}

function mt:match(pat)

  local results = {}

  for _,range in ipairs(self) do

    local ia0,ib0 = unpack(range)

    local stmp = self.s:sub(ia0,ib0)

    local ia = 1

    repeat

      local ib

      ia,ib = stmp:find(pat, ia)

      if ia then

        table.insert(results, {ia+ia0-1,ib+ia0-1})

        ia = ib+1

      end

    until not ia

  end

  tioverride(self, results)

  return self

end





-- Defines new selections based only on current selections

-- that match object o.  o can be a function (s -> b),

-- return returns Boolean b whether string s matches.

-- Alternately o can be a string pattern.

-- Example:

--   sq'this is a test':match'%w+':filter'^....$':get()

--   --> {'this', 'test'}

function mt:filter(o)

  local f = getarg(o)



  local result = {}

  for _,range in ipairs(self) do

    local ia,ib = unpack(range)

    local si = self.s:sub(ia,ib)

    if f(si) then

      table.insert(result, {ia,ib})

    end

  end

  tioverride(self, result)

  return self

end





-- Defines new selections that form the inverse (compliment)

-- of the current selections.

-- warning: might not be fully correct (e.g. would

-- sq(s):invert():invert() == sq(s)?).

-- Example:

--   sq'123 abc 234':match'%a+':invert():get()

--   --> {'123 ', ' 234'}

function mt:invert()

  local result = {}

  local i=1

  for _,range in ipairs(self) do

    local ia,ib = unpack(range)

    if ia > i then

      table.insert(result, {i,ia-1})

    end

    i = ib+1

  end

  if i < #self.s then

    table.insert(result, {i,#self.s})

  end

  tioverride(self, result)

  return self

end





-- Replace selections using o and return string.

-- o can be a function (s1 -> s2) that indicates that

-- string s1 should be replaced with string s2).

-- Alternately, o can be a string that all selections

-- will be replaced with.

function mt:replace(o)

  local f

  if type(o) == 'string' then

    f = function(s) return o end

  else

    f = o

  end



  local result = partition(self.s, self)

  for i=2,#result,2 do

    result[i] = f(result[i]) or ''

  end



  return table.concat(result, '')

end





-- Returns all string selections as array.

-- Example:

--   sq'this is a test':match'%w+':get()

--   --> {'this', 'is', 'a', 'test'}

function mt:get()

  local parts = partition(self.s, self)

  local result = {}

  for i=2,#parts,2 do

    table.insert(result, parts[i])

  end

  return result 

end





-- Returns all string selections as unpacked list.

-- Example:

--   sq'this is a test':match'%w+':get()

--   --> 'this', 'is', 'a', 'test'

function mt:get_unpacked()

  return unpack(self:get())

end





-- Prints selections.

-- For debugging.  Requires penlight 0.6.3

function mt:print_dump()

  local dump = require "pl.pretty" . write

  print(dump(self))

  return self

end





-- Constructor for string query given string s.

-- The selection by default is the entire string.

-- Example:

--   sq'hello':get() --> {'hello'}

local function sq(s)

  return setmetatable({s=s, {1,#s}}, mt)

end

M.sq = sq





-- Returns a predicate function that matches

-- *any* of the given predicate functions.

-- Predicates can also be represented as string

-- patterns.

-- Useful for sq(s):filter.

-- Example:

--   any('%a%d', '%d%a')(' a1 ') --> true

local function any(...)

  local os = {...}

  for i,v in ipairs(os) do os[i] = getarg(v) end

  return function(s)

    for _,o in ipairs(os) do

      if o(s) then return true end

    end

    return false

  end

end

M.any = any





-- Returns a predicate function that matches

-- *all* of the given predicate functions.

-- Predicates can also be represented as string

-- patterns.

-- Useful for sq(s):filter.

-- Example:

--   all('%a%d', '%d%a')(' a1 2b ') --> true

local function all(...)

  local os = {...}

  for i,v in ipairs(os) do os[i] = getarg(v) end

  return function(s)

    for _,o in ipairs(os) do

      if not o(s) then return false end

    end

    return true

  end

end

M.all = all





return M

See Also


RecentChanges · preferences
edit · history
Last edited October 31, 2009 6:46 pm GMT (diff)