Skip to content

Commit ab86c7f

Browse files
committed
rebalancer: add logging of routes
This patch adds rebalancer routes' logging. The log file now includes information about the source storage, the number of buckets, and the destination storage where the buckets will be moved. Since the rebalancer service has changed logging of routes that were sent, we change the `rebalancer/rebalancer.test.lua` and `rebalancer/stress_add_remove_several_rs.test.lua` tests. Part of #212 NO_DOC=bugfix
1 parent 93c964a commit ab86c7f

File tree

6 files changed

+32
-10
lines changed

6 files changed

+32
-10
lines changed

test/rebalancer/rebalancer.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ test_run:switch('box_1_a')
149149
vshard.storage.rebalancer_enable()
150150
---
151151
...
152-
wait_rebalancer_state("Rebalance routes are sent", test_run)
152+
wait_rebalancer_state("The following rebalancer routes were sent", test_run)
153153
---
154154
...
155155
wait_rebalancer_state('The cluster is balanced ok', test_run)
@@ -239,7 +239,7 @@ cfg.rebalancer_disbalance_threshold = 0.01
239239
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
240240
---
241241
...
242-
wait_rebalancer_state('Rebalance routes are sent', test_run)
242+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
243243
---
244244
...
245245
wait_rebalancer_state('The cluster is balanced ok', test_run)

test/rebalancer/rebalancer.test.lua

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ util.map_bucket_protection(test_run, {REPLICASET_1}, true)
7878

7979
test_run:switch('box_1_a')
8080
vshard.storage.rebalancer_enable()
81-
wait_rebalancer_state("Rebalance routes are sent", test_run)
81+
wait_rebalancer_state("The following rebalancer routes were sent", test_run)
8282

8383
wait_rebalancer_state('The cluster is balanced ok', test_run)
8484
_bucket.index.status:count({vshard.consts.BUCKET.ACTIVE})
@@ -118,7 +118,7 @@ _bucket.index.status:count({vshard.consts.BUCKET.ACTIVE})
118118
-- Return 1%.
119119
cfg.rebalancer_disbalance_threshold = 0.01
120120
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
121-
wait_rebalancer_state('Rebalance routes are sent', test_run)
121+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
122122
wait_rebalancer_state('The cluster is balanced ok', test_run)
123123
_bucket.index.status:count({vshard.consts.BUCKET.ACTIVE})
124124
_bucket.index.status:min({vshard.consts.BUCKET.ACTIVE})

test/rebalancer/stress_add_remove_several_rs.result

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ add_replicaset()
175175
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
176176
---
177177
...
178-
wait_rebalancer_state('Rebalance routes are sent', test_run)
178+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
179179
---
180180
...
181181
-- Now, add a second replicaset.
@@ -422,7 +422,7 @@ remove_second_replicaset_first_stage()
422422
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
423423
---
424424
...
425-
wait_rebalancer_state('Rebalance routes are sent', test_run)
425+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
426426
---
427427
...
428428
-- Rebalancing has been started - now remove second replicaset.

test/rebalancer/stress_add_remove_several_rs.test.lua

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ fiber.sleep(0.5)
7171
test_run:switch('box_1_a')
7272
add_replicaset()
7373
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
74-
wait_rebalancer_state('Rebalance routes are sent', test_run)
74+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
7575

7676
-- Now, add a second replicaset.
7777

@@ -153,7 +153,7 @@ fiber.sleep(0.5)
153153
test_run:switch('box_1_a')
154154
remove_second_replicaset_first_stage()
155155
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)
156-
wait_rebalancer_state('Rebalance routes are sent', test_run)
156+
wait_rebalancer_state('The following rebalancer routes were sent', test_run)
157157
-- Rebalancing has been started - now remove second replicaset.
158158
remove_replicaset_first_stage()
159159
vshard.storage.cfg(cfg, util.name_to_uuid.box_1_a)

test/storage-luatest/storage_1_1_1_test.lua

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,24 @@ rebalancer_recovery_group.test_no_logs_while_unsuccess_recovery = function(g)
199199
wait_for_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
200200
hanged_bucket_id_2)
201201
end
202+
203+
rebalancer_recovery_group.test_rebalancer_routes_logging = function(g)
204+
local moved_bucket_from_2 = vtest.storage_first_bucket(g.replica_2_a)
205+
start_bucket_move(g.replica_2_a, g.replica_1_a, moved_bucket_from_2)
206+
local moved_bucket_from_3 = vtest.storage_first_bucket(g.replica_3_a)
207+
start_bucket_move(g.replica_3_a, g.replica_1_a, moved_bucket_from_3)
208+
t.helpers.retrying({timeout = 60}, function()
209+
g.replica_1_a:exec(function()
210+
ivshard.storage.rebalancer_wakeup()
211+
end)
212+
t.assert(g.replica_1_a:grep_log('Apply rebalancer routes with 1 ' ..
213+
'workers'))
214+
end)
215+
local rebalancer_routes_msg = string.format(
216+
"{\"%s\":{\"%s\":1,\"%s\":1}}", g.replica_1_a:replicaset_uuid(),
217+
g.replica_3_a:replicaset_uuid(), g.replica_2_a:replicaset_uuid())
218+
t.assert(g.replica_1_a:grep_log(rebalancer_routes_msg))
219+
t.helpers.retrying({}, function()
220+
g.replica_1_a:grep_log('The cluster is balanced ok.')
221+
end)
222+
end

vshard/storage/init.lua

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2903,8 +2903,9 @@ local function rebalancer_service_f(service)
29032903
goto continue
29042904
end
29052905
end
2906-
log.info('Rebalance routes are sent. Schedule next wakeup after '..
2907-
'%f seconds', consts.REBALANCER_WORK_INTERVAL)
2906+
log.info('The following rebalancer routes were sent: %s. ' ..
2907+
'Schedule next wakeup after %f seconds', json_encode(routes),
2908+
consts.REBALANCER_WORK_INTERVAL)
29082909
service:set_activity('idling')
29092910
lfiber.testcancel()
29102911
lfiber.sleep(consts.REBALANCER_WORK_INTERVAL)

0 commit comments

Comments
 (0)