Python实战开发及案例分析(11)—— b树

发布于:2024-05-08 ⋅ 阅读:(24) ⋅ 点赞:(0)

        B树(B-Tree)是一种自平衡的树数据结构,它允许高效地进行查找、插入、删除等操作,广泛应用于数据库和文件系统。B树的每个节点可以包含多个键,并且有多个子节点。B树的关键特点包括:

  • 每个节点可以有多个子节点(度数m)。
  • 每个节点存储至少ceil(m/2) - 1个键,最多m - 1个键。
  • 根节点可以存储少于ceil(m/2) - 1个键。
  • 所有叶子节点处于同一层次上。

案例分析:使用 Python 实现 B 树

Python 实现 B 树

class BTreeNode:
    def __init__(self, t, leaf=False):
        self.t = t  # 最小度数
        self.leaf = leaf  # 是否是叶节点
        self.keys = []  # 节点的键
        self.children = []  # 节点的子节点

class BTree:
    def __init__(self, t):
        self.root = BTreeNode(t, True)
        self.t = t  # 最小度数

    def search(self, k, node=None):
        if node is None:
            node = self.root

        # 找到第一个大于或等于k的键
        i = 0
        while i < len(node.keys) and k > node.keys[i]:
            i += 1

        if i < len(node.keys) and node.keys[i] == k:
            return node, i

        if node.leaf:
            return None

        return self.search(k, node.children[i])

    def insert(self, k):
        root = self.root
        if len(root.keys) == (2 * self.t) - 1:
            new_root = BTreeNode(self.t, False)
            new_root.children.append(self.root)
            self.split_child(new_root, 0)
            self.root = new_root
            self.insert_non_full(new_root, k)
        else:
            self.insert_non_full(root, k)

    def insert_non_full(self, node, k):
        i = len(node.keys) - 1

        if node.leaf:
            node.keys.append(None)
            while i >= 0 and k < node.keys[i]:
                node.keys[i + 1] = node.keys[i]
                i -= 1
            node.keys[i + 1] = k
        else:
            while i >= 0 and k < node.keys[i]:
                i -= 1
            i += 1
            if len(node.children[i].keys) == (2 * self.t) - 1:
                self.split_child(node, i)
                if k > node.keys[i]:
                    i += 1
            self.insert_non_full(node.children[i], k)

    def split_child(self, parent, i):
        t = self.t
        new_node = BTreeNode(t, parent.children[i].leaf)
        node_to_split = parent.children[i]
        parent.keys.insert(i, node_to_split.keys[t - 1])
        parent.children.insert(i + 1, new_node)
        new_node.keys = node_to_split.keys[t:t + t - 1]
        node_to_split.keys = node_to_split.keys[0:t - 1]

        if not node_to_split.leaf:
            new_node.children = node_to_split.children[t:t + t]
            node_to_split.children = node_to_split.children[0:t]

    def traverse(self, node=None):
        if node is None:
            node = self.root

        i = 0
        for i in range(len(node.keys)):
            if not node.leaf:
                self.traverse(node.children[i])
            print(node.keys[i], end=" ")

        if not node.leaf:
            self.traverse(node.children[i + 1])

    def delete(self, k):
        self._delete(self.root, k)

        if len(self.root.keys) == 0:
            if not self.root.leaf:
                self.root = self.root.children[0]
            else:
                self.root = None

    def _delete(self, node, k):
        t = self.t
        i = 0
        while i < len(node.keys) and k > node.keys[i]:
            i += 1

        if i < len(node.keys) and node.keys[i] == k:
            if node.leaf:
                node.keys.pop(i)
            else:
                self._delete_non_leaf(node, i)
        else:
            if node.leaf:
                print(f"Key {k} not found in the tree")
                return

            flag = (i == len(node.keys))

            if len(node.children[i].keys) < t:
                self._fill(node, i)

            if flag and i > len(node.keys):
                self._delete(node.children[i - 1], k)
            else:
                self._delete(node.children[i], k)

    def _delete_non_leaf(self, node, i):
        k = node.keys[i]
        t = self.t

        if len(node.children[i].keys) >= t:
            pred = self._get_pred(node, i)
            node.keys[i] = pred
            self._delete(node.children[i], pred)

        elif len(node.children[i + 1].keys) >= t:
            succ = self._get_succ(node, i)
            node.keys[i] = succ
            self._delete(node.children[i + 1], succ)

        else:
            self._merge(node, i)
            self._delete(node.children[i], k)

    def _get_pred(self, node, i):
        curr = node.children[i]
        while not curr.leaf:
            curr = curr.children[len(curr.keys)]
        return curr.keys[len(curr.keys) - 1]

    def _get_succ(self, node, i):
        curr = node.children[i + 1]
        while not curr.leaf:
            curr = curr.children[0]
        return curr.keys[0]

    def _fill(self, node, i):
        t = self.t

        if i != 0 and len(node.children[i - 1].keys) >= t:
            self._borrow_from_prev(node, i)
        elif i != len(node.children) - 1 and len(node.children[i + 1].keys) >= t:
            self._borrow_from_next(node, i)
        else:
            if i != len(node.children) - 1:
                self._merge(node, i)
            else:
                self._merge(node, i - 1)

    def _borrow_from_prev(self, node, i):
        child = node.children[i]
        sibling = node.children[i - 1]

        child.keys.insert(0, node.keys[i - 1])

        if not child.leaf:
            child.children.insert(0, sibling.children.pop())

        node.keys[i - 1] = sibling.keys.pop()

    def _borrow_from_next(self, node, i):
        child = node.children[i]
        sibling = node.children[i + 1]

        child.keys.append(node.keys[i])

        if not child.leaf:
            child.children.append(sibling.children.pop(0))

        node.keys[i] = sibling.keys.pop(0)

    def _merge(self, node, i):
        t = self.t
        child = node.children[i]
        sibling = node.children[i + 1]

        child.keys.append(node.keys.pop(i))

        child.keys.extend(sibling.keys)

        if not child.leaf:
            child.children.extend(sibling.children)

        node.children.pop(i + 1)

# 示例使用
btree = BTree(3)

elements = [8, 9, 10, 11, 15, 20, 17, 25, 30, 35, 40, 45]
for elem in elements:
    btree.insert(elem)

print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nDeleting 15...")
btree.delete(15)
print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nDeleting 30...")
btree.delete(30)
print("Traversal of the B-Tree:")
btree.traverse()

解释与总结

        在以上代码中,我们实现了一个简单的B树,支持查找、插入和删除操作。以下是每个部分的解释:

  1. BTreeNode类

    • 表示B树的节点,包含keyschildren列表。
  2. BTree类

    • 管理整个B树结构。
    • search:查找键是否存在。
    • insert:插入键。
    • split_child:当节点满时拆分节点。
    • delete:删除键。
    • traverse:遍历树的所有节点。
  3. 示例代码

    • 初始化一个B树实例,并进行一系列的插入、删除和遍历操作,展示B树的完整功能。

结论

        通过这个简单的实现,我们可以看到B树在查找、插入和删除操作上具有高效性,并且能够保持平衡。它适用于需要频繁读写数据的场景,如数据库和文件系统。

        继续深入探讨 B 树的应用和功能增强,我们可以实现以下功能:

  1. 查找功能:添加在树中查找某个键的详细方法。
  2. 打印功能:打印 B 树结构,方便理解树的结构。
  3. 增强的删除功能:实现删除任意节点的完整逻辑。

增强版 B 树代码

class BTreeNode:
    def __init__(self, t, leaf=False):
        self.t = t  # 最小度数
        self.leaf = leaf  # 是否是叶节点
        self.keys = []  # 节点的键
        self.children = []  # 节点的子节点

    def is_full(self):
        return len(self.keys) == (2 * self.t) - 1

class BTree:
    def __init__(self, t):
        self.root = BTreeNode(t, True)
        self.t = t  # 最小度数

    def search(self, k, node=None):
        if node is None:
            node = self.root

        # 找到第一个大于或等于k的键
        i = 0
        while i < len(node.keys) and k > node.keys[i]:
            i += 1

        if i < len(node.keys) and node.keys[i] == k:
            return node, i

        if node.leaf:
            return None

        return self.search(k, node.children[i])

    def insert(self, k):
        root = self.root
        if root.is_full():
            new_root = BTreeNode(self.t, False)
            new_root.children.append(self.root)
            self.split_child(new_root, 0)
            self.root = new_root
            self.insert_non_full(new_root, k)
        else:
            self.insert_non_full(root, k)

    def insert_non_full(self, node, k):
        i = len(node.keys) - 1

        if node.leaf:
            node.keys.append(None)
            while i >= 0 and k < node.keys[i]:
                node.keys[i + 1] = node.keys[i]
                i -= 1
            node.keys[i + 1] = k
        else:
            while i >= 0 and k < node.keys[i]:
                i -= 1
            i += 1
            if node.children[i].is_full():
                self.split_child(node, i)
                if k > node.keys[i]:
                    i += 1
            self.insert_non_full(node.children[i], k)

    def split_child(self, parent, i):
        t = self.t
        new_node = BTreeNode(t, parent.children[i].leaf)
        node_to_split = parent.children[i]
        parent.keys.insert(i, node_to_split.keys[t - 1])
        parent.children.insert(i + 1, new_node)
        new_node.keys = node_to_split.keys[t:t + t - 1]
        node_to_split.keys = node_to_split.keys[0:t - 1]

        if not node_to_split.leaf:
            new_node.children = node_to_split.children[t:t + t]
            node_to_split.children = node_to_split.children[0:t]

    def traverse(self, node=None, depth=0):
        if node is None:
            node = self.root

        print(" " * 4 * depth + "Node:", node.keys)
        i = 0
        for i in range(len(node.keys)):
            if not node.leaf:
                self.traverse(node.children[i], depth + 1)
        if not node.leaf:
            self.traverse(node.children[i + 1], depth + 1)

    def delete(self, k):
        self._delete(self.root, k)

        if len(self.root.keys) == 0:
            if not self.root.leaf:
                self.root = self.root.children[0]
            else:
                self.root = None

    def _delete(self, node, k):
        t = self.t
        i = 0
        while i < len(node.keys) and k > node.keys[i]:
            i += 1

        if i < len(node.keys) and node.keys[i] == k:
            if node.leaf:
                node.keys.pop(i)
            else:
                self._delete_non_leaf(node, i)
        else:
            if node.leaf:
                print(f"Key {k} not found in the tree")
                return

            flag = (i == len(node.keys))

            if len(node.children[i].keys) < t:
                self._fill(node, i)

            if flag and i > len(node.keys):
                self._delete(node.children[i - 1], k)
            else:
                self._delete(node.children[i], k)

    def _delete_non_leaf(self, node, i):
        k = node.keys[i]
        t = self.t

        if len(node.children[i].keys) >= t:
            pred = self._get_pred(node, i)
            node.keys[i] = pred
            self._delete(node.children[i], pred)

        elif len(node.children[i + 1].keys) >= t:
            succ = self._get_succ(node, i)
            node.keys[i] = succ
            self._delete(node.children[i + 1], succ)

        else:
            self._merge(node, i)
            self._delete(node.children[i], k)

    def _get_pred(self, node, i):
        curr = node.children[i]
        while not curr.leaf:
            curr = curr.children[len(curr.keys)]
        return curr.keys[len(curr.keys) - 1]

    def _get_succ(self, node, i):
        curr = node.children[i + 1]
        while not curr.leaf:
            curr = curr.children[0]
        return curr.keys[0]

    def _fill(self, node, i):
        t = self.t

        if i != 0 and len(node.children[i - 1].keys) >= t:
            self._borrow_from_prev(node, i)
        elif i != len(node.children) - 1 and len(node.children[i + 1].keys) >= t:
            self._borrow_from_next(node, i)
        else:
            if i != len(node.children) - 1:
                self._merge(node, i)
            else:
                self._merge(node, i - 1)

    def _borrow_from_prev(self, node, i):
        child = node.children[i]
        sibling = node.children[i - 1]

        child.keys.insert(0, node.keys[i - 1])

        if not child.leaf:
            child.children.insert(0, sibling.children.pop())

        node.keys[i - 1] = sibling.keys.pop()

    def _borrow_from_next(self, node, i):
        child = node.children[i]
        sibling = node.children[i + 1]

        child.keys.append(node.keys[i])

        if not child.leaf:
            child.children.append(sibling.children.pop(0))

        node.keys[i] = sibling.keys.pop(0)

    def _merge(self, node, i):
        t = self.t
        child = node.children[i]
        sibling = node.children[i + 1]

        child.keys.append(node.keys.pop(i))

        child.keys.extend(sibling.keys)

        if not child.leaf:
            child.children.extend(sibling.children)

        node.children.pop(i + 1)

# 示例使用
btree = BTree(3)

elements = [8, 9, 10, 11, 15, 20, 17, 25, 30, 35, 40, 45]
for elem in elements:
    btree.insert(elem)

print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nDeleting 15...")
btree.delete(15)
print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nDeleting 30...")
btree.delete(30)
print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nSearching for 20...")
result = btree.search(20)
if result:
    node, idx = result
    print(f"Found key 20 in node with keys: {node.keys}")
else:
    print("Key 20 not found in the B-Tree.")

解释与总结

  1. 增强的打印功能

    • traverse:增强了打印逻辑,包含深度信息以可视化树结构。
  2. 搜索功能

    • search:用于在树中查找键的位置。
  3. 完整的删除功能

    • _delete_non_leaf:删除非叶节点的键。
    • _get_pred_get_succ:获取前驱和后继键。
    • _fill_borrow_from_prev_borrow_from_next_merge:在删除操作中保持 B 树平衡。

结论

通过完整的B树实现,包含搜索、插入、删除和遍历功能,使得 B 树在数据库和文件系统等高效查找领域具备广泛的应用。

        让我们继续增强和完善 B 树的功能,并增加更多的示例应用。以下是进一步的增强:

  1. 打印更详细的 B 树结构:可视化显示每个节点的层次和子节点的键。
  2. 修改树的最小度数(t:允许动态调整最小度数。
  3. 统计功能:包括计算树的高度和统计总节点数、总键数等。

增强版 B 树代码

class BTreeNode:
    def __init__(self, t, leaf=False):
        self.t = t  # 最小度数
        self.leaf = leaf  # 是否是叶节点
        self.keys = []  # 节点的键
        self.children = []  # 节点的子节点

    def is_full(self):
        return len(self.keys) == (2 * self.t) - 1

class BTree:
    def __init__(self, t):
        self.root = BTreeNode(t, True)
        self.t = t  # 最小度数

    def search(self, k, node=None):
        if node is None:
            node = self.root

        # 找到第一个大于或等于k的键
        i = 0
        while i < len(node.keys) and k > node.keys[i]:
            i += 1

        if i < len(node.keys) and node.keys[i] == k:
            return node, i

        if node.leaf:
            return None

        return self.search(k, node.children[i])

    def insert(self, k):
        root = self.root
        if root.is_full():
            new_root = BTreeNode(self.t, False)
            new_root.children.append(self.root)
            self.split_child(new_root, 0)
            self.root = new_root
            self.insert_non_full(new_root, k)
        else:
            self.insert_non_full(root, k)

    def insert_non_full(self, node, k):
        i = len(node.keys) - 1

        if node.leaf:
            node.keys.append(None)
            while i >= 0 and k < node.keys[i]:
                node.keys[i + 1] = node.keys[i]
                i -= 1
            node.keys[i + 1] = k
        else:
            while i >= 0 and k < node.keys[i]:
                i -= 1
            i += 1
            if node.children[i].is_full():
                self.split_child(node, i)
                if k > node.keys[i]:
                    i += 1
            self.insert_non_full(node.children[i], k)

    def split_child(self, parent, i):
        t = self.t
        new_node = BTreeNode(t, parent.children[i].leaf)
        node_to_split = parent.children[i]
        parent.keys.insert(i, node_to_split.keys[t - 1])
        parent.children.insert(i + 1, new_node)
        new_node.keys = node_to_split.keys[t:t + t - 1]
        node_to_split.keys = node_to_split.keys[0:t - 1]

        if not node_to_split.leaf:
            new_node.children = node_to_split.children[t:t + t]
            node_to_split.children = node_to_split.children[0:t]

    def traverse(self, node=None, depth=0):
        if node is None:
            node = self.root

        print(" " * 4 * depth + f"Level {depth}: {node.keys}")
        i = 0
        for i in range(len(node.keys)):
            if not node.leaf:
                self.traverse(node.children[i], depth + 1)
        if not node.leaf:
            self.traverse(node.children[i + 1], depth + 1)

    def delete(self, k):
        self._delete(self.root, k)

        if len(self.root.keys) == 0:
            if not self.root.leaf:
                self.root = self.root.children[0]
            else:
                self.root = None

    def _delete(self, node, k):
        t = self.t
        i = 0
        while i < len(node.keys) and k > node.keys[i]:
            i += 1

        if i < len(node.keys) and node.keys[i] == k:
            if node.leaf:
                node.keys.pop(i)
            else:
                self._delete_non_leaf(node, i)
        else:
            if node.leaf:
                print(f"Key {k} not found in the tree")
                return

            flag = (i == len(node.keys))

            if len(node.children[i].keys) < t:
                self._fill(node, i)

            if flag and i > len(node.keys):
                self._delete(node.children[i - 1], k)
            else:
                self._delete(node.children[i], k)

    def _delete_non_leaf(self, node, i):
        k = node.keys[i]
        t = self.t

        if len(node.children[i].keys) >= t:
            pred = self._get_pred(node, i)
            node.keys[i] = pred
            self._delete(node.children[i], pred)

        elif len(node.children[i + 1].keys) >= t:
            succ = self._get_succ(node, i)
            node.keys[i] = succ
            self._delete(node.children[i + 1], succ)

        else:
            self._merge(node, i)
            self._delete(node.children[i], k)

    def _get_pred(self, node, i):
        curr = node.children[i]
        while not curr.leaf:
            curr = curr.children[len(curr.keys)]
        return curr.keys[len(curr.keys) - 1]

    def _get_succ(self, node, i):
        curr = node.children[i + 1]
        while not curr.leaf:
            curr = curr.children[0]
        return curr.keys[0]

    def _fill(self, node, i):
        t = self.t

        if i != 0 and len(node.children[i - 1].keys) >= t:
            self._borrow_from_prev(node, i)
        elif i != len(node.children) - 1 and len(node.children[i + 1].keys) >= t:
            self._borrow_from_next(node, i)
        else:
            if i != len(node.children) - 1:
                self._merge(node, i)
            else:
                self._merge(node, i - 1)

    def _borrow_from_prev(self, node, i):
        child = node.children[i]
        sibling = node.children[i - 1]

        child.keys.insert(0, node.keys[i - 1])

        if not child.leaf:
            child.children.insert(0, sibling.children.pop())

        node.keys[i - 1] = sibling.keys.pop()

    def _borrow_from_next(self, node, i):
        child = node.children[i]
        sibling = node.children[i + 1]

        child.keys.append(node.keys[i])

        if not child.leaf:
            child.children.append(sibling.children.pop(0))

        node.keys[i] = sibling.keys.pop(0)

    def _merge(self, node, i):
        t = self.t
        child = node.children[i]
        sibling = node.children[i + 1]

        child.keys.append(node.keys.pop(i))

        child.keys.extend(sibling.keys)

        if not child.leaf:
            child.children.extend(sibling.children)

        node.children.pop(i + 1)

    def get_height(self, node=None):
        if node is None:
            node = self.root

        if node.leaf:
            return 1
        return 1 + self.get_height(node.children[0])

    def count_nodes_and_keys(self, node=None):
        if node is None:
            node = self.root

        node_count = 1
        key_count = len(node.keys)

        if not node.leaf:
            for child in node.children:
                child_node_count, child_key_count = self.count_nodes_and_keys(child)
                node_count += child_node_count
                key_count += child_key_count

        return node_count, key_count

# 示例使用
btree = BTree(3)

elements = [8, 9, 10, 11, 15, 20, 17, 25, 30, 35, 40, 45]
for elem in elements:
    btree.insert(elem)

print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nDeleting 15...")
btree.delete(15)
print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nDeleting 30...")
btree.delete(30)
print("Traversal of the B-Tree:")
btree.traverse()

print("\n\nSearching for 20...")
result = btree.search(20)
if result:
    node, idx = result
    print(f"Found key 20 in node with keys: {node.keys}")
else:
    print("Key 20 not found in the B-Tree.")

print("\n\nStatistics:")
height = btree.get_height()
nodes, keys = btree.count_nodes_and_keys()
print(f"Height of the B-Tree: {height}")
print(f"Total nodes in the B-Tree: {nodes}")
print(f"Total keys in the B-Tree: {keys}")

解释与总结

  1. B 树层次打印

    • traverse 方法打印每一层的键值。
  2. 统计功能

    • get_height:计算树的高度。
    • count_nodes_and_keys:计算节点和键的总数。
  3. 完整的插入与删除功能

    • insertdelete 操作保持树的平衡性。

结论

        通过实现增强版的 B 树功能,我们可以看到这是一种高效的多路搜索树结构,适合用于数据库索引和文件系统管理。利用这些功能增强,B 树的应用前景更加广阔。