Interpolating Search

lua-users home
wiki

When searching for a value in a sorted array, the to preferring search algorithm is the binary search. In certain cases although we can be a lot faster using an interpolating search mechanism. tables optimal for a interpolating search are { 1,2,3,4,5,6,7,8,9,10 }, where as tables looking like { 1,1,1,1,1,1,1,1,1,2,10 } and searching it for the value 2 will use a alot more time than the binary search. So it depends on the scenario, but if used right one can be 3 times as fast as the binary search. I wrote an extra function for searching a reversed table, since it was the better option rather to changing all the variables and the < check.

--[[

   Interpolating Search v 0.2

   

   LUA 5.1 compatible

   

   Can only search for values of type number

   

   table.intsearch( table, value [, compval ] ), for searching a normal sorted table

   table.intsearchrev( table, value [, compval ] ), for searching a reversed sorted table

   

   If the  value is found:

      it returns a table holding all the matching indices (e.g. { startindice,endindice } )

      endindice may be the same as startindice if only one matching indice was found

   If compval is given:

      then it must be a function that takes one value and returns a second value2,

      to be compared with the searched value, e.g.:

      compvalue = function( value ) return value[1] end

   Return value:

      on success: a table holding matching indices (e.g. { startindice,endindice } )

      on failure: nil

]]--

do

   -- Avoid heap allocs for performance

   local default_fcompval = function( value ) return value end

   function table.intsearch( t,value,fcompval )

      -- Initialise functions

      local fcompval = fcompval or default_fcompval

      -- Inistialise numbers

      local ilow,ihigh = 1,#t

      -- return on empty table

      if not t[ilow] then return end

      -- get values of indices

      local _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )

      -- make sure slope cannot become 0

      while _ilow and _ilow < _ihigh do

         -- get interpolated position

         local pos = math.floor( (value-_ilow)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow

         -- check for out of range

         if pos < ilow or pos > ihigh then return end

         -- get compare value

         local compval = fcompval( t[pos] )

         if value == compval then

            -- insert found position

            local tfound,num = { pos,pos },pos-1

            -- get all values that match

            while value == fcompval( t[num] ) do

               tfound[1],num = num,num-1

            end

            num = pos+1

            while value == fcompval( t[num] ) do

               tfound[2],num = num,num+1

            end

            return tfound

         -- keep searching,

         -- left part of the table

         elseif value < compval then

            ihigh = pos-1

         else

            ilow = pos+1

         end

         _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )

      end

      if value == fcompval( t[ilow] ) then

         -- only add values --> of ilow

         local tfound,num = { ilow,ilow },ilow+1

         while value == fcompval( t[num] ) do

            tfound[2],num = num,num+1

         end

         return tfound         

      end

   end

   -- Interpolated search on a reversed table

   function table.intsearchrev( t,value,fcompval )

      -- Initialise functions

      local fcompval = fcompval or default_fcompval

      -- Inistialise numbers

      local ilow,ihigh = 1,#t

      if not t[ilow] then return end

      local _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )

      while _ilow and _ilow > _ihigh do

         -- get interpolated position

         local pos = math.floor( (_ihigh-value)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow

         -- check for out of range

         if pos < ilow or pos > ihigh then return end

         local compval = fcompval( t[pos] )

         if value == compval then

            -- insert found position

            local tfound,num = { pos,pos },pos-1

            -- get all values that match

            while value == fcompval( t[num] ) do

               tfound[1],num = num,num-1

            end

            num = pos+1

            while value == fcompval( t[num] ) do

               tfound[2],num = num,num+1

            end

            return tfound

            -- keep searching,

            -- left part of the table

         elseif value > compval then

            ihigh = pos-1

         else

            ilow = pos+1

         end

         _ilow,_ihigh = fcompval( t[ilow] ),fcompval( t[ihigh] )

      end

      if value == fcompval( t[ilow] ) then

         -- only add values --> of ilow

         local tfound,num = { ilow,ilow },ilow+1

         while value == fcompval( t[num] ) do

            tfound[2],num = num,num+1

         end

         return tfound         

      end

   end

end

-- CHILLCODEĀ™

Testsuit:
-- test: table size 0

t = {}

local v = table.intsearch(t, 5); assert(v == nil)



-- test: table size 1

t = {5}

local v = table.intsearch(t, 4); assert(v == nil)

local v = table.intsearch(t, 5); assert(v[1] == 1)

local v = table.intsearch(t, 6); assert(v == nil)



-- test: table size 2

t = {4,6}

local v = table.intsearch(t, 3); assert(v == nil)

local v = table.intsearch(t, 4); assert(v[1] == 1)

local v = table.intsearch(t, 5); assert(v == nil)

local v = table.intsearch(t, 6); assert(v[1] == 2)

local v = table.intsearch(t, 7); assert(v == nil)



-- test: typical, with duplicate

t = {0,2,4,4,6,8,10}

local v = table.intsearch(t, 0); assert(v[1] == 1)

local v = table.intsearch(t, 10); assert(v[1] == 7)

local v = table.intsearch(t, 4); assert(v[1] == 3 and v[2] == 4)

local v = table.intsearch(t, 5); assert(v == nil)

local v = table.intsearch(t, 11); assert(v == nil)

local v = table.intsearch(t, -1); assert(v == nil)



-- test: identical

t = {1,1,1,1,1,1,1,1,1,1}

local v = table.intsearch(t, 1); assert(v[1] == 1 and v[2] == 10)



-- test: fcomp

t = {10,52,34,44,86,38}

local v = table.intsearch(t, 6, function(v) return v % 10 end); assert(v[1] == 5)

local v = table.intsearch(t, 4, function(v) return v % 10 end); assert(v[1] == 3 and v[2] == 4)



-- test: reverse

t = {10,8,6,4,4,2,0}

local v = table.intsearchrev(t, 6); assert(v[1] == 3)

local v = table.intsearchrev(t, 4); assert(v[1] == 4 and v[2] == 5)



print "DONE"

Interpolating Search for a String

This combines the interpolating search with the binary search to be able to search tables holding strings. This was more a test if it is actually possible, and in some cases, this method could be faster than the binary search.

--[[

   Interpolating Search on a String

   

   LUA 5.1 compatible

   

   Can only search sorted tables with value string

   

   table.intsearchstr( table, value ), for searching a normal sorted table

   table.intsearchstrrev( table, value ), for searching a reversed sorted table

   

   If the  value is found:

      it returns a table holding all the matching indices (e.g. { startindice,endindice } )

      endindice may be the same as startindice if only one matching indice was found

   Return value:

      on success: a table holding matching indices (e.g. { startindice,endindice } )

      on failure: nil

]]--

do

   -- Avoid heap allocs for performance

   local getbytevalue = function( value )

      if value then

         local num,mul = 0,1

         -- set bytedept, 4 or 5 seems appropriate

         for i = 4,1,-1 do

            local byte = string.byte( string.sub( value,i,i ) ) or -1

            byte,num,mul = byte + 1,num + mul*byte,mul*257

         end

         return num

      end

   end   

   -- Load the optimised binary function

   -- Avoid heap allocs for performance

   local fcompf = function( a,b ) return a < b end

   local fcompr = function( a,b ) return a > b end

   local function tablebinsearch( t,value,reversed,iStart,iEnd )

      -- Initialise functions

      local fcomp = reversed and fcompr or fcompf

      --  Initialise numbers

      local iStart,iEnd,iMid = iStart or 1,iEnd or #t,0

      -- Binary Search

      while iStart <= iEnd do

         -- calculate middle

         iMid = math.floor( (iStart+iEnd)/2 )

         -- get compare value

         local value2 = t[iMid]

         -- get all values that match

         if value == value2 then

            local tfound,num = { iMid,iMid },iMid - 1

            while value == t[num] do

               tfound[1],num = num,num - 1

            end

            num = iMid + 1

            while value == t[num] do

               tfound[2],num = num,num + 1

            end

            return tfound

         -- keep searching

         elseif fcomp( value,value2 ) then

            iEnd = iMid - 1

         else

            iStart = iMid + 1

         end

      end

   end

   

   -- The interpolationg Search Function on a String

   function table.intsearchstr( t,value )

      -- Inistialise numbers

      local ilow,ihigh = 1,#t

      -- return on empty table

      if not t[ilow] then return end

      -- get byte values values of indices and searched value

      local _ilow,_ihigh,_value = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),getbytevalue(value)

      local oldpos = -1

      -- make sure slope cannot become 0

      while _ilow and _ilow < _ihigh do

         -- get interpolated position

         local pos = math.floor( (_value-_ilow)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow

         -- check for out of range

         if pos < ilow or pos > ihigh then return end

         -- check for real match

         if value == t[pos] then

            -- insert found position

            local tfound,num = { pos,pos },pos-1

            -- get all values that match

            while value == t[num] do

               tfound[1],num = num,num-1

            end

            num = pos+1

            while value == t[num] do

               tfound[2],num = num,num+1

            end

            return tfound

         -- keep searching,

         -- left part of the table,check for real lower

         elseif value < t[pos] then

            ihigh = pos-1

         else

            ilow = pos+1

         end

         -- check if new position is further than 1 away

         if oldpos+1 == pos or oldpos-1 == pos then

            -- start binary search

            return tablebinsearch( t,value,_,ilow,ihigh )

         end

         _ilow,_ihigh,oldpos = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),pos

      end

      -- initiate binary seach function here since we could be in a flat for the interpolating search

      return tablebinsearch( t,value,_,ilow,ihigh )

   end

   -- The interpolationg Search Function on a String

   function table.intsearchstrrev( t,value )

      -- Inistialise numbers

      local ilow,ihigh = 1,#t

      -- return on empty table

      if not t[ilow] then return end

      -- get byte values values of indices and searched value

      local _ilow,_ihigh,_value = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),getbytevalue(value)

      local oldpos = -1

      -- make sure slope cannot become 0

      while _ilow and _ilow > _ihigh do

         -- get interpolated position

         local pos = math.floor( (_ihigh-_value)*(ihigh-ilow)/(_ihigh-_ilow) ) + ilow

         -- check for out of range

         if pos < ilow or pos > ihigh then return end

         -- check for real match

         if value == t[pos] then

            -- insert found position

            local tfound,num = { pos,pos },pos-1

            -- get all values that match

            while value == t[num] do

               tfound[1],num = num,num-1

            end

            num = pos+1

            while value == t[num] do

               tfound[2],num = num,num+1

            end

            return tfound

         -- keep searching,

         -- left part of the table,check for real lower

         elseif value > t[pos] then

            ihigh = pos-1

         else

            ilow = pos+1

         end

         -- check if new position is further than 1 away

         if oldpos+1 == pos or oldpos-1 == pos then

            -- start binary search

            return tablebinsearch( t,value,1,ilow,ihigh )

         end

         _ilow,_ihigh,oldpos = getbytevalue(t[ilow]),getbytevalue(t[ihigh]),pos

      end

      -- initiate binary seach function here since we could be in a flat for the interpolating search

      return tablebinsearch( t,value,1,ilow,ihigh )

   end

end

-- CHILLCODEĀ™

Test suit:
-- test: table size 0

t = { "a","a","a","a","a","a","a","a","a","a","aa","z" }

local v = table.intsearchstr(t, "aa"); assert(v[1] == 11)



-- test flat for interpolatin search function

t = { "aabb","aabbcc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabc","aabca","aabcc" }

local v = table.intsearchstr(t, "aabca"); assert(v[1] == 14)



-- test: table size 1

t = { "a" }

local v = table.intsearchstr(t, "b"); assert(v == nil)

local v = table.intsearchstr(t, "a"); assert(v[1] == 1)

local v = table.intsearchstr(t, "c"); assert(v == nil)



-- test: table size 2

t = {"a","c"}

local v = table.intsearchstr(t, "0"); assert(v == nil)

local v = table.intsearchstr(t, "a"); assert(v[1] == 1)

local v = table.intsearchstr(t, "b"); assert(v == nil)

local v = table.intsearchstr(t, "c"); assert(v[1] == 2)

local v = table.intsearchstr(t, "d"); assert(v == nil)



-- test: typical, with duplicate

t = {"a","b","c","c","d","e","f"}

local v = table.intsearchstr(t, "a"); assert(v[1] == 1)

local v = table.intsearchstr(t, "f"); assert(v[1] == 7)

local v = table.intsearchstr(t, "c"); assert(v[1] == 3 and v[2] == 4)

local v = table.intsearchstr(t, "da"); assert(v == nil)

local v = table.intsearchstr(t, "fa"); assert(v == nil)

local v = table.intsearchstr(t, "0"); assert(v == nil)



-- test: identical

t = {"a","a","a","a","a","a","a","a","a","a"}

local v = table.intsearchstr(t, "a"); assert(v[1] == 1 and v[2] == 10)



-- test: reverse

t = {"f","e","d","c","c","b","a"}

local v = table.intsearchstrrev(t, "d"); assert(v[1] == 3)

local v = table.intsearchstrrev(t, "c"); assert(v[1] == 4 and v[2] == 5)



print "DONE"

See Also


RecentChanges · preferences
edit · history
Last edited June 10, 2007 1:59 pm GMT (diff)