Lua Gen Plus Plus

lua-users home
wiki

luagen++ is a C++ header for generating efficient sequences of Lua C API calls at compile-time via template metaprogramming techniques.

Description

Warning: see status message below for status of this project.

This might begin a long article describing the implementation of a C++ template metaprogramming technique for the purpose of generating at compile time sequences of Lua C API calls that execute efficiently. This will culminate in a single reusable C++ header file (luagen.hpp - referred to as "luagen++") that implements these techniques. You can use this to bind C++ and Lua code in a clean but still very efficient manner.

As an example, the following C++ code


#include <luagen.hpp>

using namespace luagen;

...

eval(L, global("print")(global("math")["sqrt"](lnumber(2.0))));

expands at compile time to this code:


lua_getglobal(L,"print")

lua_getglobal(L,"math")

lua_getfield(L,-1,"sqrt")

lua_remove(L,-2)

lua_pushnumber(L,2.0)

lua_call(L,1, -1)

lua_call(L,1, 0)

Now, there have been many techniques for binding Lua and C++ code together (BindingCodeToLua). The approach here has the following characteristics:

The closest related project I've found to this is object.hpp in Luabind [3]. That also uses C++ template metaprogramming techniques, but the code generation does not appear to have the optimal qualities of Point #1 above. Luabind includes the Boost headers [4] and has about 10K lines of headers files of complex template metaprogramming code that is not throughly documented, and it provides higher level and more automatic definition (e.g. module and classes inheritance definition) than desired here. Though luagen.hpp will likely be a few thousand lines for a fairly complete wrapping of C API calls, the core techniques themselves (which can be isolated) can be implemented in only a few hundred reasonably comprehensible lines, and this article will show you how.

So, the purpose of this article is two-fold: to demonstrate the "luagen++" header file (luagen.hpp) and to explain its implementation.

Description

TODO! - A long, very interesting discussion of the implementation might have gone here.

Disassembly

If you don't believe me, take a look at the disassembly (note: there are a few extra lua_gettop's that could probably be eliminated in luagen.hpp):


	.file	"test.cpp"

	.text

	.align 2

	.p2align 4,,15

	.def	__ZN6luagen11debugprintfEz;	.scl	3;	.type	32;	.endef

__ZN6luagen11debugprintfEz:

	pushl	%ebp

	movl	%esp, %ebp

	popl	%ebp

	ret

	.def	___main;	.scl	2;	.type	32;	.endef

	.section .rdata,"dr"

LC2:

	.ascii "sqrt\0"

LC0:

	.ascii "print\0"

LC1:

	.ascii "math\0"

	.text

	.align 2

	.p2align 4,,15

.globl _main

	.def	_main;	.scl	2;	.type	32;	.endef

_main:

	pushl	%ebp

	movl	$16, %eax

	movl	%esp, %ebp

	pushl	%edi

	pushl	%esi

	pushl	%ebx

	subl	$92, %esp

	andl	$-16, %esp

	call	__alloca

	call	___main

	call	_luaL_newstate

	movl	%eax, (%esp)

	movl	%eax, %ebx

	call	_luaL_openlibs

	movl	$LC2, -28(%ebp)

	leal	-24(%ebp), %eax

	movl	-28(%ebp), %edx

	movl	%eax, -32(%ebp)

	movl	-32(%ebp), %eax

	movl	%edx, -36(%ebp)

	movl	%ebx, (%esp)

	movl	%eax, -40(%ebp)

	leal	-40(%ebp), %eax

	movl	%eax, -56(%ebp)

	leal	-48(%ebp), %eax

	movl	%eax, -52(%ebp)

	movl	-56(%ebp), %eax

	movl	$LC0, -20(%ebp)

	movl	-52(%ebp), %edx

	movl	$LC1, -24(%ebp)

	movl	%eax, -64(%ebp)

	leal	-20(%ebp), %eax

	movl	%eax, -72(%ebp)

	leal	-64(%ebp), %eax

	movl	%eax, -68(%ebp)

	movl	-72(%ebp), %eax

	movl	%edx, -60(%ebp)

	movl	-68(%ebp), %edx

	movl	$0, -48(%ebp)

	movl	$1073741824, -44(%ebp)

	movl	%edx, -76(%ebp)

	movl	%eax, -80(%ebp)

	call	_lua_gettop

	movl	-80(%ebp), %eax

	movl	(%eax), %eax

	movl	%ebx, (%esp)

	movl	%eax, 8(%esp)

	movl	$-10002, %eax

	movl	%eax, 4(%esp)

	call	_lua_getfield

	movl	%ebx, (%esp)

	call	_lua_gettop

	movl	%eax, -84(%ebp)

	movl	-76(%ebp), %edi

	movl	%ebx, (%esp)

	call	_lua_gettop

	movl	(%edi), %esi

	movl	(%esi), %eax

	movl	(%eax), %eax

	movl	%ebx, (%esp)

	movl	%eax, 8(%esp)

	movl	$-10002, %eax

	movl	%eax, 4(%esp)

	call	_lua_getfield

	movl	4(%esi), %eax

	movl	%ebx, (%esp)

	movl	%eax, 8(%esp)

	movl	$-1, %eax

	movl	%eax, 4(%esp)

	call	_lua_getfield

	movl	%ebx, (%esp)

	movl	$-2, %eax

	movl	%eax, 4(%esp)

	call	_lua_remove

	movl	%ebx, (%esp)

	call	_lua_gettop

	movl	%eax, %esi

	movl	4(%edi), %eax

	fldl	(%eax)

	movl	%ebx, (%esp)

	fstpl	4(%esp)

	call	_lua_pushnumber

	movl	%ebx, (%esp)

	call	_lua_gettop

	movl	%ebx, (%esp)

	subl	%esi, %eax

	movl	$-1, %esi

	movl	%esi, 8(%esp)

	movl	%eax, 4(%esp)

	call	_lua_call

	movl	%ebx, (%esp)

	call	_lua_gettop

	movl	%ebx, (%esp)

	movl	-84(%ebp), %ecx

	xorl	%edx, %edx

	movl	%edx, 8(%esp)

	subl	%ecx, %eax

	movl	%eax, 4(%esp)

	call	_lua_call

	movl	%ebx, (%esp)

	call	_lua_close

	leal	-12(%ebp), %esp

	xorl	%eax, %eax

	popl	%ebx

	popl	%esi

	popl	%edi

	popl	%ebp

	ret

	.def	_lua_pushnumber;	.scl	3;	.type	32;	.endef

	.def	_lua_remove;	.scl	3;	.type	32;	.endef

	.def	_lua_getfield;	.scl	3;	.type	32;	.endef

	.def	_lua_call;	.scl	3;	.type	32;	.endef

	.def	_lua_gettop;	.scl	3;	.type	32;	.endef

	.def	_lua_close;	.scl	3;	.type	32;	.endef

	.def	_luaL_openlibs;	.scl	3;	.type	32;	.endef

	.def	_luaL_newstate;	.scl	3;	.type	32;	.endef

And here's the output of g++ 4.3 under the "-fdump-tree-optimized" flag ("test.cpp.126t.optimized") showing a C-like representation of the code following optimization:


;; Function int main() (main)



Analyzing Edge Insertions.

int main() ()

{

  int D.5767;

  int pos;

  int D.5766;

  int pos;

  const struct getfield_ * this.27;

  const struct call_ * this.24;

  struct lua_State * L;

  struct global D.4728;

  struct global D.4729;

  const struct getfield_ D.4820;

  struct lnumber D.4830;

  const struct call_ D.4917;



<bb 2>:

  L = luaL_newstate ();

  luaL_openlibs (L);

  D.4830.v_ = 2.0e+0;

  D.4729.name_ = &"math"[0];

  D.4820.k_ = &"sqrt"[0];

  D.4820.t_ = (struct val *) &D.4729;

  D.4917.p1_ = (const struct val *) &D.4830;

  D.4917.f_ = (const struct val *) &D.4820;

  D.4728.name_ = &"print"[0];

  lua_gettop (L);

  lua_getfield (L, -10002, ((const struct global *) (struct val *) &D.4728)->name_);

  pos = lua_gettop (L);

  this.24 = (const struct call_ *) (const struct val *) &D.4917;

  lua_gettop (L);

  this.27 = (const struct getfield_ *) this.24->f_;

  lua_getfield (L, -10002, ((const struct global *) this.27->t_)->name_);

  lua_getfield (L, -1, this.27->k_);

  lua_remove (L, -2);

  pos = lua_gettop (L);

  lua_pushnumber (L, ((const struct lnumber *) this.24->p1_)->v_);

  D.5766 = lua_gettop (L);

  lua_call (L, D.5766 - pos, -1);

  D.5767 = lua_gettop (L);

  lua_call (L, D.5767 - pos, 0);

  lua_close (L);

  return 0;

Note: the above code generation results are from an older version. In practice, optimal code output under all input is difficult. Temporary objects may be constructed.

Download Source

WARNING: this code is still in development. There are likely bugs and missing API features, though it passes an initial test suite. Please post any problems in the comments section below.

Status

[2008-07-16]

I'm somewhat discouraged with this project. The approach is sound, but a difficulty is that goal #1 above relies on efficient template expansion and inlining by the compiler. That behavior is implemention dependent. I find myself frequently dissasembling, examining the g++ 4.x gimple output, examining the linker .map files, etc., just to make sure that the compiler isn't generating unnecesary template instantiations or failing to inline temporary objects. Sometimes the generation is quite efficient, particularly for simple expressions with a shallow AST, othertimes there is code bloat, and this depends on the compiler (e.g. g++ 3.x, g++ 4.x, MSVC++9) and compiler options. Fiddling with const v.s. const & and inline v.s. forceinline in the code can help, but the results don't seem as consistent as desired. Maybe someone else wants to figure it out what works for popular compilers. Plus, complex templates increase build times and cause cryptic compiler errors. This all does not seem to be simplifying things and leading to productivity.

So...I started a new project: LuaToCee. Instead of relying on horrible C++ templates for the metaprogramming, I now use Lua for the metaprogramming. This is much more satisfying. It may not achieve all the same goals above, but it has its own uses.

The Lua C API is not that bad anyway. In fact, to avoid excessive use of the C API, write as much of the code as possible in Lua, compile it into bytecodes, run BinToCee, and call that function from C, passing it the remaining data from C. There are also techniques to avoid pitfalls in using the C API (e.g. stack corruption).

Author

DavidManura

User Commments

...

See Also


RecentChanges · preferences
edit · history
Last edited October 31, 2009 6:37 pm GMT (diff)