Skip to content

Add jit_stack_size option #44

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 38 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
81039e3
Add ngx.re.opt with a "jit_stack_size" option
alubbe Jun 3, 2016
388a59c
Add error messages
alubbe Jun 15, 2016
1af05e8
Disallow the changing of jit_stack_size once regex cache is populated
alubbe Jul 1, 2016
eceb788
Update error message to be the same as non-ffi
alubbe Jul 7, 2016
b5bd59c
Add NYI tests
alubbe Jul 7, 2016
6bf7add
Update travis.yml
alubbe Jul 22, 2016
1c6c7ad
Tests: Move ngx.re.opt into init_by_lua
alubbe Jul 22, 2016
27d3064
Use *_by_lua_block in new tests
alubbe Nov 13, 2016
8338ad6
Extract magic numbers
alubbe Nov 15, 2016
3aa105b
Focus on success path
alubbe Nov 15, 2016
efd0adb
Move ngx.re.opt to ngx_re.opt
alubbe Dec 16, 2016
96f85ab
Fix tests
alubbe Dec 16, 2016
8ed4d22
Add http_config to the ngx.re tests
alubbe Dec 16, 2016
850e3e4
Merge HttpConfig and init_by_lua manually for test 2
alubbe Dec 16, 2016
5057830
Expand $pwd in test 2
alubbe Dec 16, 2016
11c1da5
Pass http_config as a string
alubbe Dec 16, 2016
6a49919
Another attempt to pass http_config as a string
alubbe Dec 16, 2016
22ff594
Maybe this helps?
alubbe Dec 16, 2016
7c833ab
Collectivism to the rescue!
alubbe Dec 16, 2016
2e62edb
Check for existing compiled regexs in lua instead of C
alubbe Dec 19, 2016
7d5e63e
Save error and next as local variables
alubbe Dec 19, 2016
d099691
Uncapitalize error messages
alubbe Jan 9, 2017
96564d5
Remove unnecessary branches
alubbe Jan 9, 2017
4ae9051
Re-insert empty last line
alubbe Jan 9, 2017
2ee5c72
Cache variable lookup
alubbe Jan 9, 2017
2762ef0
Use a private flag to determine whether the regex cache is empty
alubbe Jan 9, 2017
8da1c98
Update test to expect uncapitalized error message
alubbe Jan 9, 2017
236d832
update re.md
alubbe Jan 9, 2017
e919051
fix variable name
alubbe Jan 9, 2017
f68cbd0
Add spacing
alubbe Jan 12, 2017
a2a7303
Added handling of missing pcre jit support
alubbe Jan 12, 2017
cca774d
Minor improvements to the readme
alubbe Jan 14, 2017
047a97f
Document the minimum jit_stack_size
alubbe Jan 14, 2017
687a9ff
Read error messages from C
alubbe Apr 14, 2017
6e47027
Add missing pointer declaration
alubbe Apr 14, 2017
da1a14f
Add missing bracket
alubbe Apr 14, 2017
8053a23
Add missing imports
alubbe Apr 14, 2017
1e05ce7
Restore return value that got lost in rebasing
alubbe Apr 14, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ install:
- git clone https://github.com/openresty/openresty.git ../openresty
- git clone https://github.com/openresty/nginx-devel-utils.git
- git clone https://github.com/simpl/ngx_devel_kit.git ../ndk-nginx-module
- git clone https://github.com/openresty/lua-nginx-module.git ../lua-nginx-module
- git clone -b pcre_jit_stack_size https://github.com/alubbe/lua-nginx-module.git ../lua-nginx-module
- git clone https://github.com/openresty/no-pool-nginx.git ../no-pool-nginx
- git clone https://github.com/openresty/echo-nginx-module.git ../echo-nginx-module
- git clone https://github.com/openresty/lua-resty-lrucache.git
Expand Down
38 changes: 38 additions & 0 deletions lib/ngx/re.lua
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,24 @@ local core_regex = require "resty.core.regex"


local C = ffi.C
local ffi_str = ffi.string
local sub = string.sub
local error = error
local type = type
local band = bit.band
local new_tab = base.new_tab
local tostring = tostring
local math_max = math.max
local math_min = math.min
local is_regex_cache_empty = core_regex.is_regex_cache_empty
local re_match_compile = core_regex.re_match_compile
local destroy_compiled_regex = core_regex.destroy_compiled_regex
local get_string_buf = base.get_string_buf
local get_size_ptr = base.get_size_ptr
local FFI_OK = base.FFI_OK


local MAX_ERR_MSG_LEN = 128
local FLAG_DFA = 0x02
local PCRE_ERROR_NOMATCH = -1
local DEFAULT_SPLIT_RES_SIZE = 4
Expand All @@ -28,6 +35,12 @@ local DEFAULT_SPLIT_RES_SIZE = 4
local split_ctx = new_tab(0, 1)


ffi.cdef[[
int ngx_http_lua_ffi_set_jit_stack_size(int size, unsigned char *errstr,
size_t *errstr_size);
]]


local _M = { version = base.version }


Expand Down Expand Up @@ -219,4 +232,29 @@ function _M.split(subj, regex, opts, ctx, max, res)
end


function _M.opt(option, value)
if option == "jit_stack_size" then
if not is_regex_cache_empty() then
return error("changing jit stack size is not allowed when some " ..
"regexs have already been compiled and cached")
end

local errbuf = get_string_buf(MAX_ERR_MSG_LEN)
local sizep = get_size_ptr()
sizep[0] = MAX_ERR_MSG_LEN

local rc = C.ngx_http_lua_ffi_set_jit_stack_size(value, errbuf, sizep)

if rc == FFI_OK then
return

else
return error(ffi_str(errbuf, sizep[0]))
end
end

return error("unrecognized option name")
end


return _M
37 changes: 37 additions & 0 deletions lib/ngx/re.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ local ngx_re = require "ngx.re"
-- split
local res, err = ngx_re.split("a,b,c,d", ",")
--> res is now {"a", "b", "c", "d"}

-- opt
ngx_re.opt("jit_stack_size", 128 * 1024)
--> the PCRE jit stack can now handle more complex regular expressions
```

[Back to TOC](#table-of-contents)
Expand Down Expand Up @@ -141,6 +145,39 @@ local res, err = ngx_re.split("a,b", ",", nil, nil, nil, my_table)
When the trailing `nil` is not enough for your purpose, you should
clear the table yourself before feeding it into the `split` function.

opt
-----
**syntax:** *ngx_re.opt(option, value)*

Allows changing of regex settings. Currently, it can only change the
`jit_stack_size` of the PCRE engine, like so:

```nginx

init_by_lua_block { ngx.re.opt("jit_stack_size", 128 * 1024) }

server {
location /re {
content_by_lua_block {
-- full regex and string are taken from https://github.com/JuliaLang/julia/issues/8278
local very_long_string = [[71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] ...]]
local very_complicated_regex = [[([\d\.]+) ([\w.-]+) ([\w.-]+) (\[.+\]) ...]]
local from, to, err = ngx.re.find(very_long_string, very_complicated_regex, "jo")

-- with the regular jit_stack_size, we would get the error 'pcre_exec() failed: -27'
-- instead, we get a match
ngx.print(from .. "-" .. to) -- prints '1-1563'
}
}
}
```

The `jit_stack_size` cannot be set to a value lower than PCRE's default of 32K.

This method requires the PCRE library enabled in Nginx.

This feature was first introduced in the `XXX` release.

[Back to TOC](#table-of-contents)

Community
Expand Down
17 changes: 16 additions & 1 deletion lib/resty/core/regex.lua
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ local _M = {

local buf_grow_ratio = 2


function _M.set_buf_grow_ratio(ratio)
buf_grow_ratio = ratio
end
Expand All @@ -154,6 +155,20 @@ local function get_max_regex_cache_size()
end


local regex_cache_is_empty = true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite like this variable name. How about regex_cache_updated?


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style: two blank lines needed here.


function _M.is_regex_cache_empty()
return regex_cache_is_empty
end


local function lrucache_set_wrapper(...)
regex_cache_is_empty = false
lrucache_set(...)
end


local function parse_regex_opts(opts)
local t = cached_re_opts[opts]
if t then
Expand Down Expand Up @@ -341,7 +356,7 @@ local function re_match_compile(regex, opts)

if compile_once then
-- print("inserting compiled regex into cache")
lrucache_set(regex_match_cache, key, compiled)
lrucache_set_wrapper(regex_match_cache, key, compiled)
end
end

Expand Down
1 change: 1 addition & 0 deletions t/re-find.t
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,4 @@ matched: 234
NYI
--- error_log eval
qr/\[TRACE \d+ content_by_lua\(nginx\.conf:\d+\):4 loop\]/

175 changes: 175 additions & 0 deletions t/re-opt.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# vim:set ft= ts=4 sw=4 et fdm=marker:
use lib 'lib';
use Test::Nginx::Socket::Lua;
use Cwd qw(cwd);

#worker_connections(1014);
#master_process_enabled(1);
#log_level('warn');

repeat_each(2);

plan tests => repeat_each() * blocks() * 3;

our $pwd = cwd();

our $HttpConfig = <<_EOC_;
lua_package_path "$pwd/lib/?.lua;../lua-resty-lrucache/lib/?.lua;;";
init_by_lua_block {
-- local verbose = true
local verbose = false
local outfile = "$Test::Nginx::Util::ErrLogFile"
-- local outfile = "/tmp/v.log"
if verbose then
local dump = require "jit.dump"
dump.on(nil, outfile)
else
local v = require "jit.v"
v.on(outfile)
end

require "resty.core"
-- jit.opt.start("hotloop=1")
-- jit.opt.start("loopunroll=1000000")
-- jit.off()
}
_EOC_

no_diff();
no_long_string();
check_accum_error_log();
run_tests();

__DATA__

=== TEST 1: default jit_stack_size too small
--- http_config eval: $::HttpConfig
--- config
location /re {
content_by_lua_block {
-- regex is taken from https://github.com/JuliaLang/julia/issues/8278
local very_long_string = [[71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] "GET emptymind.org/thevacantwall/wp-content/uploads/2013/02/DSC_006421.jpg HTTP/1.1" 200 492513 "http://images.search.yahoo.com/images/view;_ylt=AwrB8py9gdlTGEwADcSjzbkF;_ylu=X3oDMTI2cGZrZTA5BHNlYwNmcC1leHAEc2xrA2V4cARvaWQDNTA3NTRiMzYzY2E5OTEwNjBiMjc2YWJhMjkxMTEzY2MEZ3BvcwM0BGl0A2Jpbmc-?back=http%3A%2F%2Fus.yhs4.search.yahoo.com%2Fyhs%2Fsearch%3Fei%3DUTF-8%26p%3Dapartheid%2Bwall%2Bin%2Bpalestine%26type%3Dgrvydef%26param1%3D1%26param2%3Dsid%253Db01676f9c26355f014f8a9db87545d61%2526b%253DChrome%2526ip%253D71.163.72.113%2526p%253Dgroovorio%2526x%253DAC811262A746D3CD%2526dt%253DS940%2526f%253D7%2526a%253Dgrv_tuto1_14_30%26hsimp%3Dyhs-fullyhosted_003%26hspart%3Dironsource&w=588&h=387&imgurl=occupiedpalestine.files.wordpress.com%2F2012%2F08%2F5-peeking-through-the-wall.jpg%3Fw%3D588%26h%3D387&rurl=http%3A%2F%2Fwww.stopdebezetting.com%2Fwereldpers%2Fcompare-the-berlin-wall-vs-israel-s-apartheid-wall-in-palestine.html&size=49.0KB&name=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&p=apartheid+wall+in+palestine&oid=50754b363ca991060b276aba291113cc&fr2=&fr=&tt=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&b=0&ni=21&no=4&ts=&tab=organic&sigr=13evdtqdq&sigb=19k7nsjvb&sigi=12o2la1db&sigt=12lia2m0j&sign=12lia2m0j&.crumb=.yUtKgFI6DE&hsimp=yhs-fullyhosted_003&hspart=ironsource" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36]]
local very_complicated_regex = [[([\d\.]+) ([\w.-]+) ([\w.-]+) (\[.+\]) "([^"\r\n]*|[^"\r\n\[]*\[.+\][^"]+|[^"\r\n]+.[^"]+)" (\d{3}) (\d+|-) ("(?:[^"]|\")+)"? ("(?:[^"]|\")+)"?]]
local from, to, err = ngx.re.find(very_long_string, very_complicated_regex, "jo")
if from or to then
ngx.say("from: ", from)
ngx.say("to: ", to)
else
if err then
ngx.say("error: ", err)
return
end
ngx.say("not matched!")
end
}
}
--- request
GET /re
--- response_body
error: pcre_exec() failed: -27
--- no_error_log
[error]



=== TEST 2: increase jit_stack_size
--- http_config eval
<<_EOC_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe most tests in this module (and elsewhere) use the qq operator as in:

--- http_config eval
qq{
    lua_package_path "...";
    init_by_lua_block {

    }
}

lua_package_path "$::pwd/lib/?.lua;../lua-resty-lrucache/lib/?.lua;;";
init_by_lua_block {
-- local verbose = true
local verbose = false
local outfile = "$Test::Nginx::Util::ErrLogFile"
-- local outfile = "/tmp/v.log"
if verbose then
local dump = require "jit.dump"
dump.on(nil, outfile)
else
local v = require "jit.v"
v.on(outfile)
end

require "resty.core"
-- jit.opt.start("hotloop=1")
-- jit.opt.start("loopunroll=1000000")
-- jit.off()

local ngx_re = require "ngx.re"
ngx_re.opt("jit_stack_size", 128 * 1024)
}
_EOC_
--- config
location /re {
content_by_lua_block {
-- regex is taken from https://github.com/JuliaLang/julia/issues/8278
local very_long_string = [[71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] "GET emptymind.org/thevacantwall/wp-content/uploads/2013/02/DSC_006421.jpg HTTP/1.1" 200 492513 "http://images.search.yahoo.com/images/view;_ylt=AwrB8py9gdlTGEwADcSjzbkF;_ylu=X3oDMTI2cGZrZTA5BHNlYwNmcC1leHAEc2xrA2V4cARvaWQDNTA3NTRiMzYzY2E5OTEwNjBiMjc2YWJhMjkxMTEzY2MEZ3BvcwM0BGl0A2Jpbmc-?back=http%3A%2F%2Fus.yhs4.search.yahoo.com%2Fyhs%2Fsearch%3Fei%3DUTF-8%26p%3Dapartheid%2Bwall%2Bin%2Bpalestine%26type%3Dgrvydef%26param1%3D1%26param2%3Dsid%253Db01676f9c26355f014f8a9db87545d61%2526b%253DChrome%2526ip%253D71.163.72.113%2526p%253Dgroovorio%2526x%253DAC811262A746D3CD%2526dt%253DS940%2526f%253D7%2526a%253Dgrv_tuto1_14_30%26hsimp%3Dyhs-fullyhosted_003%26hspart%3Dironsource&w=588&h=387&imgurl=occupiedpalestine.files.wordpress.com%2F2012%2F08%2F5-peeking-through-the-wall.jpg%3Fw%3D588%26h%3D387&rurl=http%3A%2F%2Fwww.stopdebezetting.com%2Fwereldpers%2Fcompare-the-berlin-wall-vs-israel-s-apartheid-wall-in-palestine.html&size=49.0KB&name=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&p=apartheid+wall+in+palestine&oid=50754b363ca991060b276aba291113cc&fr2=&fr=&tt=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&b=0&ni=21&no=4&ts=&tab=organic&sigr=13evdtqdq&sigb=19k7nsjvb&sigi=12o2la1db&sigt=12lia2m0j&sign=12lia2m0j&.crumb=.yUtKgFI6DE&hsimp=yhs-fullyhosted_003&hspart=ironsource" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36]]
local very_complicated_regex = [[([\d\.]+) ([\w.-]+) ([\w.-]+) (\[.+\]) "([^"\r\n]*|[^"\r\n\[]*\[.+\][^"]+|[^"\r\n]+.[^"]+)" (\d{3}) (\d+|-) ("(?:[^"]|\")+)"? ("(?:[^"]|\")+)"?]]
local from, to, err = ngx.re.find(very_long_string, very_complicated_regex, "jo")
if from or to then
ngx.say("from: ", from)
ngx.say("to: ", to)
else
if err then
ngx.say("error: ", err)
return
end
ngx.say("not matched!")
end
}
}
--- request
GET /re
--- response_body
from: 1
to: 1563
--- no_error_log
[error]



=== TEST 3: jit_stack_size change disallowed once regex cache is populated
--- http_config eval: $::HttpConfig
--- config
location /re {
content_by_lua_block {
local ngx_re = require "ngx.re"

local status, err = pcall(ngx_re.opt, "jit_stack_size", 128 * 1024)
if err then ngx.log(ngx.ERR, err) end
local s = "hello, 1234"
local from, to = ngx.re.find(s, "(hello world)|([0-9])", "jo")
ngx.say("from: ", from)
ngx.say("to: ", to)
}
}
--- request
GET /re
--- response_body
from: 8
to: 8

--- grep_error_log eval
qr/changing jit stack size is not allowed when some regexs have already been compiled and cached/

--- grep_error_log_out eval
["", "changing jit stack size is not allowed when some regexs have already been compiled and cached\n"]



=== TEST 4: passing unknown options to ngx_re.opt throws an error
--- http_config eval: $::HttpConfig
--- config
location /re {
content_by_lua_block {
local ngx_re = require "ngx.re"

local status, err = pcall(ngx_re.opt, "foo", 123)
ngx.say(err)
}
}
--- request
GET /re
--- response_body
unrecognized option name
--- no_error_log
[error]