Skip to content

Commit 473c899

Browse files
committed
select/pairs: don't ignore provided bucket_id
The bug is simple: crud ignores provided bucket_id, when unable to determine it itself. For example, when no conditions are given or when given condition involves a secondary index, which is not entirely in the primary index. It leads to incorrect select/pairs result: tuples are collected from all replicasets, while should be collected from one replicaset pointed by bucket_id. Second, it involves all replicasets into the request processing (performs map-reduce) that may dramatically drop performance. One existing test case was changed: 'test_opts_not_damaged' in ipairs_test.lua. The crud.pairs() request in this test case was affected by the problem and incorrect result was expected. The idea of the fix is suggested by Michael Filonenko in PR #221. Nice suggestions were given by Sergey Bronnikov (see PR #222). Fixes #220
1 parent a6e34e1 commit 473c899

File tree

5 files changed

+208
-3
lines changed

5 files changed

+208
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1919
### Fixed
2020

2121
* Damaging of opts table by CRUD methods.
22+
* Ignoring of `bucket_id` option in `crud.select()`/`crud.pairs()` (#220).
2223

2324
### Added
2425

crud/select/compat/select.lua

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,40 @@ local function build_select_iterator(space_name, user_conditions, opts)
6565
-- set replicasets to select from
6666
local replicasets_to_select = replicasets
6767

68-
if plan.sharding_key ~= nil and opts.force_map_call ~= true then
68+
-- Whether to call one storage replicaset or perform
69+
-- map-reduce?
70+
--
71+
-- If map-reduce is requested explicitly, ignore provided
72+
-- bucket_id and fetch data from all storage replicasets.
73+
--
74+
-- Otherwise:
75+
--
76+
-- 1. If particular replicaset is pointed by a caller (using
77+
-- the bucket_id option[^1]), crud MUST fetch data only
78+
-- from this storage replicaset: disregarding whether other
79+
-- storages have tuples that fit given condition.
80+
--
81+
-- 2. If a replicaset may be deduced from conditions
82+
-- (conditions -> sharding key -> bucket id -> replicaset),
83+
-- fetch data only from the replicaset. It does not change
84+
-- the result[^2], but significantly reduces network
85+
-- pressure.
86+
--
87+
-- 3. Fallback to map-reduce otherwise.
88+
--
89+
-- [^1]: We can change meaning of this option is a future,
90+
-- see gh-190. But now bucket_id points a storage
91+
-- replicaset, not a virtual bucket.
92+
--
93+
-- [^2]: It is correct statement only if we'll turn a blind
94+
-- eye to resharding. However, AFAIU, the optimization
95+
-- does not make the result less consistent (sounds
96+
-- weird, huh?).
97+
local perform_map_reduce = opts.force_map_call == true or
98+
(opts.bucket_id == nil and plan.sharding_key == nil)
99+
if not perform_map_reduce then
69100
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)
101+
assert(bucket_id ~= nil)
70102

71103
local err
72104
replicasets_to_select, err = common.get_replicasets_by_sharding_key(bucket_id)

crud/select/compat/select_old.lua

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,13 @@ local function build_select_iterator(space_name, user_conditions, opts)
118118
-- set replicasets to select from
119119
local replicasets_to_select = replicasets
120120

121-
if plan.sharding_key ~= nil and opts.force_map_call ~= true then
121+
-- See explanation of this logic in
122+
-- crud/select/compat/select.lua.
123+
local perform_map_reduce = opts.force_map_call == true or
124+
(opts.bucket_id == nil and plan.sharding_key == nil)
125+
if not perform_map_reduce then
122126
local bucket_id = sharding.key_get_bucket_id(plan.sharding_key, opts.bucket_id)
127+
assert(bucket_id ~= nil)
123128

124129
local err
125130
replicasets_to_select, err = common.get_replicasets_by_sharding_key(bucket_id)

test/integration/pairs_test.lua

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ local t = require('luatest')
55
local crud_utils = require('crud.common.utils')
66

77
local helpers = require('test.helper')
8+
local storage_stat = require('test.helpers.storage_stat')
89

910
local pgroup = t.group('pairs', {
1011
{engine = 'memtx'},
@@ -25,6 +26,13 @@ pgroup.before_all(function(g)
2526
g.cluster:start()
2627

2728
g.space_format = g.cluster.servers[2].net_box.space.customers:format()
29+
30+
helpers.call_on_storages(g.cluster, function(server)
31+
server.net_box:eval([[
32+
local storage_stat = require('test.helpers.storage_stat')
33+
storage_stat.init_on_storage()
34+
]])
35+
end)
2836
end)
2937

3038
pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end)
@@ -758,15 +766,19 @@ end
758766
pgroup.test_opts_not_damaged = function(g)
759767
local customers = helpers.insert_objects(g, 'customers', {
760768
{
769+
-- bucket_id is 477, storage is s-2
761770
id = 1, name = "Elizabeth", last_name = "Jackson",
762771
age = 12, city = "Los Angeles",
763772
}, {
773+
-- bucket_id is 401, storage is s-2
764774
id = 2, name = "Mary", last_name = "Brown",
765775
age = 46, city = "London",
766776
}, {
777+
-- bucket_id is 2804, storage is s-1
767778
id = 3, name = "David", last_name = "Smith",
768779
age = 33, city = "Los Angeles",
769780
}, {
781+
-- bucket_id is 1161, storage is s-2
770782
id = 4, name = "William", last_name = "White",
771783
age = 46, city = "Chicago",
772784
},
@@ -775,7 +787,6 @@ pgroup.test_opts_not_damaged = function(g)
775787
table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)
776788

777789
local expected_customers = {
778-
{id = 3, name = "David", age = 33},
779790
{id = 4, name = "William", age = 46},
780791
}
781792

@@ -805,3 +816,80 @@ pgroup.test_opts_not_damaged = function(g)
805816
t.assert_equals(objects, expected_customers)
806817
t.assert_equals(new_pairs_opts, pairs_opts)
807818
end
819+
820+
-- gh-220: bucket_id argument is ignored when it cannot be deduced
821+
-- from provided select/pairs conditions.
822+
pgroup.test_pairs_no_map_reduce = function(g)
823+
local customers = helpers.insert_objects(g, 'customers', {
824+
{
825+
-- bucket_id is 477, storage is s-2
826+
id = 1, name = 'Elizabeth', last_name = 'Jackson',
827+
age = 12, city = 'New York',
828+
}, {
829+
-- bucket_id is 401, storage is s-2
830+
id = 2, name = 'Mary', last_name = 'Brown',
831+
age = 46, city = 'Los Angeles',
832+
}, {
833+
-- bucket_id is 2804, storage is s-1
834+
id = 3, name = 'David', last_name = 'Smith',
835+
age = 33, city = 'Los Angeles',
836+
}, {
837+
-- bucket_id is 1161, storage is s-2
838+
id = 4, name = 'William', last_name = 'White',
839+
age = 81, city = 'Chicago',
840+
},
841+
})
842+
843+
table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)
844+
845+
local stat_a = storage_stat.collect(g.cluster)
846+
847+
-- Case: no conditions, just bucket id.
848+
local rows = g.cluster.main_server.net_box:eval([[
849+
local crud = require('crud')
850+
851+
return crud.pairs(...):totable()
852+
]], {
853+
'customers',
854+
nil,
855+
{bucket_id = 2804, timeout = 1},
856+
})
857+
t.assert_equals(rows, {
858+
{3, 2804, 'David', 'Smith', 33, 'Los Angeles'},
859+
})
860+
861+
local stat_b = storage_stat.collect(g.cluster)
862+
t.assert_equals(storage_stat.diff(stat_b, stat_a), {
863+
['s-1'] = {
864+
select_requests = 1,
865+
},
866+
['s-2'] = {
867+
select_requests = 0,
868+
},
869+
})
870+
871+
-- Case: EQ on secondary index, which is not in the sharding
872+
-- index (primary index in the case).
873+
local rows = g.cluster.main_server.net_box:eval([[
874+
local crud = require('crud')
875+
876+
return crud.pairs(...):totable()
877+
]], {
878+
'customers',
879+
{{'==', 'age', 81}},
880+
{bucket_id = 1161, timeout = 1},
881+
})
882+
t.assert_equals(rows, {
883+
{4, 1161, 'William', 'White', 81, 'Chicago'},
884+
})
885+
886+
local stat_c = storage_stat.collect(g.cluster)
887+
t.assert_equals(storage_stat.diff(stat_c, stat_b), {
888+
['s-1'] = {
889+
select_requests = 0,
890+
},
891+
['s-2'] = {
892+
select_requests = 1,
893+
},
894+
})
895+
end

test/integration/select_test.lua

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ local crud = require('crud')
66
local crud_utils = require('crud.common.utils')
77

88
local helpers = require('test.helper')
9+
local storage_stat = require('test.helpers.storage_stat')
910

1011
local pgroup = t.group('select', {
1112
{engine = 'memtx'},
@@ -26,6 +27,13 @@ pgroup.before_all(function(g)
2627
g.cluster:start()
2728

2829
g.space_format = g.cluster.servers[2].net_box.space.customers:format()
30+
31+
helpers.call_on_storages(g.cluster, function(server)
32+
server.net_box:eval([[
33+
local storage_stat = require('test.helpers.storage_stat')
34+
storage_stat.init_on_storage()
35+
]])
36+
end)
2937
end)
3038

3139
pgroup.after_all(function(g) helpers.stop_cluster(g.cluster) end)
@@ -1581,3 +1589,74 @@ pgroup.test_opts_not_damaged = function(g)
15811589
t.assert_equals(err, nil)
15821590
t.assert_equals(new_select_opts, select_opts)
15831591
end
1592+
1593+
-- gh-220: bucket_id argument is ignored when it cannot be deduced
1594+
-- from provided select/pairs conditions.
1595+
pgroup.test_select_no_map_reduce = function(g)
1596+
local customers = helpers.insert_objects(g, 'customers', {
1597+
{
1598+
-- bucket_id is 477, storage is s-2
1599+
id = 1, name = 'Elizabeth', last_name = 'Jackson',
1600+
age = 12, city = 'New York',
1601+
}, {
1602+
-- bucket_id is 401, storage is s-2
1603+
id = 2, name = 'Mary', last_name = 'Brown',
1604+
age = 46, city = 'Los Angeles',
1605+
}, {
1606+
-- bucket_id is 2804, storage is s-1
1607+
id = 3, name = 'David', last_name = 'Smith',
1608+
age = 33, city = 'Los Angeles',
1609+
}, {
1610+
-- bucket_id is 1161, storage is s-2
1611+
id = 4, name = 'William', last_name = 'White',
1612+
age = 81, city = 'Chicago',
1613+
},
1614+
})
1615+
1616+
table.sort(customers, function(obj1, obj2) return obj1.id < obj2.id end)
1617+
1618+
local stat_a = storage_stat.collect(g.cluster)
1619+
1620+
-- Case: no conditions, just bucket id.
1621+
local result, err = g.cluster.main_server.net_box:call('crud.select', {
1622+
'customers',
1623+
nil,
1624+
{bucket_id = 2804, timeout = 1},
1625+
})
1626+
t.assert_equals(err, nil)
1627+
t.assert_equals(result.rows, {
1628+
{3, 2804, 'David', 'Smith', 33, 'Los Angeles'},
1629+
})
1630+
1631+
local stat_b = storage_stat.collect(g.cluster)
1632+
t.assert_equals(storage_stat.diff(stat_b, stat_a), {
1633+
['s-1'] = {
1634+
select_requests = 1,
1635+
},
1636+
['s-2'] = {
1637+
select_requests = 0,
1638+
},
1639+
})
1640+
1641+
-- Case: EQ on secondary index, which is not in the sharding
1642+
-- index (primary index in the case).
1643+
local result, err = g.cluster.main_server.net_box:call('crud.select', {
1644+
'customers',
1645+
{{'==', 'age', 81}},
1646+
{bucket_id = 1161, timeout = 1},
1647+
})
1648+
t.assert_equals(err, nil)
1649+
t.assert_equals(result.rows, {
1650+
{4, 1161, 'William', 'White', 81, 'Chicago'},
1651+
})
1652+
1653+
local stat_c = storage_stat.collect(g.cluster)
1654+
t.assert_equals(storage_stat.diff(stat_c, stat_b), {
1655+
['s-1'] = {
1656+
select_requests = 0,
1657+
},
1658+
['s-2'] = {
1659+
select_requests = 1,
1660+
},
1661+
})
1662+
end

0 commit comments

Comments
 (0)