Enhanced File Lines

lua-users home
wiki

Here are functions for portable line-by-line text processing (handling OS differences in '\r' and '\n' characters).

-- (c) 2008 David Manura. Licensed under the same terms as Lua (MIT).



-- file_lines(f) is similar to f:lines() for file f.

-- The main difference is that read_lines includes

-- new-line character sequences ("\n", "\r\n", "\r"),

-- if any, at the end of each line.  Embedded "\0" are also handled.



-- Caution: The newline behavior can depend on whether f is opened

-- in binary or ASCII mode.

--

local function file_lines(f)

  local CHUNK_SIZE = 1024

  local buffer = ""

  local pos_beg = 1

  return function()

    local pos, chars

    while 1 do

      pos, chars = buffer:match('()([\r\n].)', pos_beg)

      if pos or not f then

        break

      elseif f then

        local chunk = f:read(CHUNK_SIZE)

        if chunk then

          buffer = buffer:sub(pos_beg) .. chunk

          pos_beg = 1

        else

          f = nil

        end

      end

    end

    if not pos then

      pos = #buffer

    elseif chars == '\r\n' then

      pos = pos + 1

    end

    local line = buffer:sub(pos_beg, pos)

    pos_beg = pos + 1

    if #line > 0 then

      return line

    end    

  end

end





--

-- Splits string s into array of lines, returning the result.

-- New-line character sequences ("\n", "\r\n", "\r"),

-- if any, are included at the ends of the lines.

--

local function split_newlines(s)

  local ts = {}

  local posa = 1

  while 1 do

    local pos, chars = s:match('()([\r\n].?)', posa)

    if pos then

      if chars == '\r\n' then pos = pos + 1 end

      local line = s:sub(posa, pos)

      ts[#ts+1] = line

      posa = pos + 1

    else

      local line = s:sub(posa)

      if line ~= '' then ts[#ts+1] = line end

      break      

    end

  end

  return ts

end





--[=[slower implementation

local function split_newlines(s)

  local ts = {}

  local lastc

  s:gsub('([^\r\n]*)([\r\n])', function(a,b)

    if a == '' and lastc == '\r' and b == '\n' then

      ts[#ts] = ts[#ts] .. b

      lastc = nil

    else

      ts[#ts+1] = a .. b

      lastc = b

    end

    return ''

  end)

  local line = s:match('([^\r\n]+)$')

  if line then ts[#ts+1] = line end

  return ts

end

--]=]





-- test suite





-- utility function for test suite.

-- Create mock file for string s.

local function mock_file(s)

  local f = {}

  function f:read(n, ...)

    assert(type(n)=='number' and select('#', ...) == 0, 'NOT IMPL')

    local chunk = s:sub(1,n)

    s = s:sub(n+1)

    return chunk ~= '' and chunk or nil

  end

  return f

end





-- utility function for test suite.

local function mytostring(s)

  return type(s) == 'string'

         and string.format('%q', s):gsub('\n','n')

         or  tostring(s)

end





-- utility function for test suite.

local function asserteq(a,b,level)

  level = (level or 1) + 1

  if a ~= b then

    error(mytostring(a) .. '~=' .. mytostring(b), level)

  end

end





-- utility function for test suite (wrap file_lines)

local function wrap1(s)

  local f = mock_file(s)

  local ts = {}

  for line in file_lines(f) do ts[#ts+1] = line end

  return table.concat(ts, '|')

end





-- utility function for test suite (wrap split_newlines)

local function wrap2(s)

  return table.concat(split_newlines(s), '|')

end





local SZ = 1024 -- chunk size



-- test basics

for _,f in ipairs{wrap1, wrap2} do

for _,i in ipairs{0,1,2,SZ-3,SZ-2,SZ-1,SZ,SZ+1,SZ+2,SZ+3} do

  local s = (' '):rep(i)

  local function test(a, b)

    asserteq(f(s .. a), s .. b)

  end

  test('', '')

  test('\r', '\r')

  test('\n', '\n')

  test('a',  'a')

  test('\r\n', '\r\n')

  test('\n\r', '\n|\r')

  test('\r\r', '\r|\r')

  test('\n\n', '\n|\n')

  test('a\n',  'a\n')

  test('a\r',  'a\r')

  test('\na',  '\n|a')

  test('\ra',  '\r|a')

end end



-- check that two implementations are equivalent on a lot of data.

local cs = {'', 'a', '\r', '\n', ' '}

for _,i in ipairs{0,1,SZ-3,SZ-2,SZ-1,SZ,SZ+1,SZ+2,SZ+3} do

for j=0,1 do

  local s = (' '):rep(i + j * SZ)

  for _,c1 in ipairs(cs) do

  for _,c2 in ipairs(cs) do

  for _,c3 in ipairs(cs) do

  for _,c4 in ipairs(cs) do

  for _,c5 in ipairs(cs) do

    local s = c1 .. c2 .. c3 .. c4 ..c5

    local t1 = wrap1(s)

    local t2 = wrap2(s)

    asserteq(t1, t2)

  end end end end end

end end





print 'DONE'

Note: these functions are used in LuaPatch.

--DavidManura

See Also


RecentChanges · preferences
edit · history
Last edited December 28, 2008 2:41 am GMT (diff)