当前位置：首页 > article >正文

LuaJIT2.1 和 Lua5.4.8 性能对比

article 2025/8/23 20:51:07

在这里插入图片描述

说明

最近在学习 LuaJIT，想看看把它接入到项目中使用，会提高多大的性能。

今天抽时间，简单地测试了一下 LuaJIT 2.2 和 Lua5.4.8 的性能。

测试平台：

系统：Windows 10 WSL
CPU：Intel® Core™ i7-8700 CPU @ 3.20GHz 3.19 GHz
内存：48.0 GB

下面测试结果只是我简单测试的结果，仅供参考。
相关代码在最后面。

综合性能对比分析

第一组测试（详细性能对比）

测试项目	Lua 5.4	LuaJIT	性能提升
Fibonacci(30) 递归	0.0515秒	0.0095秒	5.4倍
数学操作(10万次)	0.0125秒	0.0022秒	5.7倍
字符串操作	0.0033秒	0.0043秒	0.8倍
表操作(10万)	0.0797秒	0.0322秒	2.5倍

第二组测试（深度分析）

测试规模/类型	Lua 5.4	LuaJIT	性能提升
100万次循环	0.0041秒	0.0010秒	4.1倍
500万次循环	0.0204秒	0.0051秒	4.0倍
1000万次循环	0.0407秒	0.0102秒	4.0倍
浮点运算(100万)	0.0298秒	0.0056秒	5.3倍
整数操作	0.0062秒	0.0010秒	6.2倍
浮点操作	0.0069秒	0.0010秒	6.9倍
顺序访问	0.0020秒	0.0006秒	3.3倍
随机访问	0.0034秒	0.0010秒	3.4倍

关键说明

1. 稳定的性能提升

LuaJIT在所有数值计算任务上都展现了4-7倍的性能提升，这个倍数很稳定，说明JIT优化效果是可预测的。

2. 规模无关的优化效果

从100万到1000万次循环，性能提升倍数保持在4倍左右，说明LuaJIT的优化效果不受问题规模影响。

3. 内存使用效率

Lua 5.4: 1048.76 KB
LuaJIT: 563.17 KB

LuaJIT使用了约**46%**更少的内存，这可能因为：

更高效的对象表示
不同的垃圾回收策略
JIT编译后的代码更紧凑

4. 类型统一优化

在LuaJIT中，整数和浮点操作的性能几乎相同（都是0.0010秒），这说明JIT编译器成功地进行了类型特化优化。

5. 内存访问模式优化

LuaJIT对顺序访问和随机访问都有显著优化，但顺序访问的优势更明显。

6. JIT预热效果

有趣的是，这次测试中JIT预热效果不明显（1.01倍），这可能因为：

测试代码相对简单，很快就被优化了
测试规模足够大，预热时间相对较短

相关代码

测试1

-- detailed_comparison.lua
print("=== Detailed Performance Comparison ===")
print("Lua Version:", _VERSION)
print("Engine:", jit and jit.version or "Standard Lua Interpreter")
print()local function benchmark(name, func, ...)collectgarbage("collect")local start = os.clock()local result = func(...)local elapsed = os.clock() - startprint(string.format("%-30s: %8.4f seconds", name, elapsed))return elapsed
end-- 避免溢出的斐波那契测试
local function safe_fibonacci(n)if n <= 1 then return n endlocal a, b = 0, 1for i = 2, n doa, b = b, a + b-- 检查是否即将溢出if b > 1e15 thenreturn b  -- 提前返回避免溢出endendreturn b
end-- 不同规模的递归测试
local function fib_recursive(n)if n <= 1 then return n endreturn fib_recursive(n-1) + fib_recursive(n-2)
end-- 数学计算密集
local function math_intensive(n)local sum = 0for i = 1, n dosum = sum + math.sin(i) * math.cos(i) + math.sqrt(i)endreturn sum
end-- 字符串操作
local function string_operations(n)local result = ""for i = 1, n doresult = result .. tostring(i)if #result > 100000 then break end  -- 避免内存问题endreturn #result
end-- table密集操作
local function table_intensive(n)local t = {}for i = 1, n dot[i] = {x = i, y = i * 2, data = "item" .. i}endlocal sum = 0for i = 1, n dosum = sum + t[i].x + t[i].yendreturn sum
endprint("Running benchmarks...")-- 适中的测试规模
benchmark("Fibonacci(30) recursive", fib_recursive, 30)
benchmark("Safe Fibonacci(100000)", safe_fibonacci, 100000)
benchmark("Math operations (100K)", math_intensive, 100000)
benchmark("String operations", string_operations, 5000)
benchmark("Table operations (100K)", table_intensive, 100000)-- 显示内存使用
collectgarbage("collect")
print(string.format("\nMemory usage: %.2f KB", collectgarbage("count")))-- JIT特定信息
if jit thenprint("\nJIT Information:")print("Status:", jit.status())print("Architecture:", jit.arch)-- 显示编译的trace数量local traces = 0for i = 1, 1000 doif jit.util and jit.util.traceinfo and jit.util.traceinfo(i) thentraces = traces + 1endendif traces > 0 thenprint("Compiled traces:", traces)end
end

测试代码2：

-- deep_analysis.lua
print("=== Deep Performance Analysis ===")
print("Engine:", jit and jit.version or ("Standard " .. _VERSION))
print()local function benchmark_with_analysis(name, func, iterations, ...)-- 预热运行func(...)-- 多次测试求平均值local times = {}for i = 1, iterations docollectgarbage("collect")local start = os.clock()local result = func(...)local elapsed = os.clock() - starttimes[i] = elapsedend-- 计算统计信息local total = 0local min_time = times[1]local max_time = times[1]for i = 1, iterations dototal = total + times[i]if times[i] < min_time then min_time = times[i] endif times[i] > max_time then max_time = times[i] endendlocal avg_time = total / iterationsprint(string.format("%-25s: avg=%.4fs, min=%.4fs, max=%.4fs", name, avg_time, min_time, max_time))return avg_time
end-- 不同规模的循环测试
local function loop_test(n)local sum = 0for i = 1, n dosum = sum + iendreturn sum
end-- 浮点数密集计算
local function float_intensive(n)local x = 1.0for i = 1, n dox = x * 1.000001x = math.sqrt(x)endreturn x
end-- 整数vs浮点数操作
local function integer_ops(n)local sum = 0for i = 1, n dosum = sum + (i * 2)  -- 整数运算endreturn sum
endlocal function float_ops(n)local sum = 0.0for i = 1, n dosum = sum + (i * 2.0)  -- 浮点运算endreturn sum
end-- 表访问模式测试
local function sequential_access(n)local t = {}for i = 1, n dot[i] = iendlocal sum = 0for i = 1, n dosum = sum + t[i]endreturn sum
endlocal function random_access(n)local t = {}for i = 1, n dot[i] = iendlocal sum = 0for i = 1, n dolocal idx = (i * 17 + 31) % n + 1  -- 伪随机访问sum = sum + t[idx]endreturn sum
endprint("Multiple runs for statistical accuracy:")
print()-- 不同规模的测试
local sizes = {1000000, 5000000, 10000000}
for _, size in ipairs(sizes) doprint(string.format("=== Scale: %d operations ===", size))benchmark_with_analysis("Loop " .. size, loop_test, 3, size)if size <= 1000000 then  -- 避免浮点运算太慢benchmark_with_analysis("Float " .. size, float_intensive, 3, size)endprint()
endprint("=== Data Type Comparison ===")
benchmark_with_analysis("Integer operations", integer_ops, 5, 1000000)
benchmark_with_analysis("Float operations", float_ops, 5, 1000000)
print()print("=== Memory Access Patterns ===")
benchmark_with_analysis("Sequential access", sequential_access, 3, 100000)
benchmark_with_analysis("Random access", random_access, 3, 100000)
print()-- JIT特定分析
if jit thenprint("=== JIT Warmup Analysis ===")local function warmup_test(n)local sum = 0for i = 1, n dosum = sum + math.sin(i) * math.cos(i)endreturn sumend-- 冷启动local start = os.clock()warmup_test(100000)local cold_time = os.clock() - start-- 预热后local start2 = os.clock()warmup_test(100000)local warm_time = os.clock() - start2print(string.format("Cold start: %.4fs", cold_time))print(string.format("After warmup: %.4fs", warm_time))print(string.format("Warmup speedup: %.2fx", cold_time / warm_time))
end