From 32e58227cd10d9a91c349433fc20bd0cb36869f1 Mon Sep 17 00:00:00 2001 From: Tim Beale Date: Thu, 1 Nov 2018 09:42:33 +1300 Subject: [PATCH] traffic_replay: Write group memberships once per group Each user-group membership was being written to the DB in a single operation. With large numbers of users (e.g. 10,000 in average 15 groups each), this becomes a lot of operations (e.g. 150,000). This patch reworks the code so that we write the memberships for a group in one operation. E.g. instead of 150,000 DB operations, we might make 1,500. This makes writing the group memberships several times faster. Note that rthere is a performance vs memory tradeoff. When we hit 10,000+ members in a group, memory-usage in the underlying DB modify operation becomes very inefficient/costly. So we avoid potential memory usage problems by writing no more than 1,000 users to a group at once. Signed-off-by: Tim Beale Reviewed-by: Douglas Bagnall --- python/samba/emulate/traffic.py | 41 +++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/python/samba/emulate/traffic.py b/python/samba/emulate/traffic.py index 0087b03a379..ab23652a271 100644 --- a/python/samba/emulate/traffic.py +++ b/python/samba/emulate/traffic.py @@ -1944,24 +1944,41 @@ class GroupAssignments(object): def add_users_to_groups(db, instance_id, assignments): """Takes the assignments of users to groups and applies them to the DB.""" + for group in assignments.get_groups(): + users_in_group = assignments.users_in_group(group) + if len(users_in_group) == 0: + continue + + # Split up the users into chunks, so we write no more than 1K at a + # time. (Minimizing the DB modifies is more efficient, but writing + # 10K+ users to a single group becomes inefficient memory-wise) + for chunk in range(0, len(users_in_group), 1000): + chunk_of_users = users_in_group[chunk:chunk + 1000] + add_group_members(db, instance_id, group, chunk_of_users) + + +def add_group_members(db, instance_id, group, users_in_group): + """Adds the given users to group specified.""" + + start = time.time() ou = ou_name(db, instance_id) def build_dn(name): return("cn=%s,%s" % (name, ou)) - for group in assignments.get_groups(): - for user in assignments.users_in_group(group): - user_dn = build_dn(user_name(instance_id, user)) - group_dn = build_dn(group_name(instance_id, group)) + group_dn = build_dn(group_name(instance_id, group)) + m = ldb.Message() + m.dn = ldb.Dn(db, group_dn) - m = ldb.Message() - m.dn = ldb.Dn(db, group_dn) - m["member"] = ldb.MessageElement(user_dn, ldb.FLAG_MOD_ADD, "member") - start = time.time() - db.modify(m) - end = time.time() - duration = end - start - LOGGER.info("%f\t0\tadd\tuser\t%f\tTrue\t" % (end, duration)) + for user in users_in_group: + user_dn = build_dn(user_name(instance_id, user)) + idx = "member-" + str(user) + m[idx] = ldb.MessageElement(user_dn, ldb.FLAG_MOD_ADD, "member") + + db.modify(m) + end = time.time() + duration = end - start + LOGGER.info("%f\t0\tadd\tuser(s)\t%f\tTrue\t" % (end, duration)) def generate_stats(statsdir, timing_file):