Python数据结构性能优化

选择正确的数据结构是性能优化的基础，不同数据结构在不同操作上有显著性能差异。

时间复杂度对比

数据结构	查找	插入	删除	遍历
list	O(n)	O(1)尾部 / O(n)中间	O(n)	O(n)
dict	O(1)	O(1)	O(1)	O(n)
set	O(1)	O(1)	O(1)	O(n)
deque	O(n)	O(1)两端	O(1)两端	O(n)
堆(heapq)	O(n)	O(log n)	O(log n)	O(n)

列表优化

预分配大小

Python

# 性能对比
import time

def grow_list():
    lst = []
    for i in range(1000000):
        lst.append(i)
    return lst

def preallocate():
    lst = [None] * 1000000
    for i in range(1000000):
        lst[i] = i
    return lst

# 预分配避免了多次扩容，性能更优

列表推导式

Python

# 低效：循环append
result = []
for i in range(1000000):
    result.append(i * 2)

# 高效：列表推导式
result = [i * 2 for i in range(1000000)]  # 约2倍速度提升

避免频繁插入删除

Python

# 低效：列表中间插入
lst = list(range(1000000))
for i in range(1000):
    lst.insert(500000, i)  # 每次O(n)

# 高效：使用deque
from collections import deque
d = deque(range(1000000))
for i in range(1000):
    d.insert(500000, i)  # deque两端操作O(1)

批量操作

Python

# 低效：逐个添加
lst = []
for item in items:
    lst.append(item)

# 高效：extend批量添加
lst = []
lst.extend(items)

# 或直接创建
lst = list(items)

字典优化

直接查找优于线性搜索

Python

# 低效：列表查找
names = ['alice', 'bob', 'charlie', ...]  # 100万条
if 'bob' in names:  # O(n)

# 高效：字典查找
names_dict = {name: index for name, index in enumerate(names)}
if 'bob' in names_dict:  # O(1)

字典推导式

Python

# 低效
result = {}
for k, v in data:
    result[k] = v * 2

# 高效
result = {k: v * 2 for k, v in data}

defaultdict避免键检查

Python

from collections import defaultdict

# 低效：每次检查键是否存在
counts = {}
for word in words:
    if word not in counts:
        counts[word] = 0
    counts[word] += 1

# 高效：defaultdict自动初始化
counts = defaultdict(int)
for word in words:
    counts[word] += 1

字典视图避免复制

Python

d = {'a': 1, 'b': 2, 'c': 3}

# 视图：不创建新数据
keys = d.keys()    # 返回视图，O(1)
values = d.values()  # 返回视图，O(1)
items = d.items()   # 返回视图，O(1)

# 需要列表时才转换
key_list = list(d.keys())  # O(n)

集合优化

集合用于去重和成员检查

Python

# 低效：列表去重
unique = []
for item in items:
    if item not in unique:  # O(n)每次检查
        unique.append(item)

# 高效：集合去重
unique = list(set(items))  # O(n)整体

# 低效：列表成员检查
if item in large_list:  # O(n)

# 高效：集合成员检查
large_set = set(large_list)
if item in large_set:  # O(1)

集合操作

Python

a = {1, 2, 3, 4}
b = {3, 4, 5, 6}

# 交集
intersection = a & b  # {3, 4}

# 并集
union = a | b  # {1, 2, 3, 4, 5, 6}

# 差集
difference = a - b  # {1, 2}

# 对称差集（异或）
symmetric_diff = a ^ b  # {1, 2, 5, 6}

deque双端队列

两端高效操作

Python

from collections import deque

# 队列操作
q = deque()
q.append(1)      # 右端添加 O(1)
q.appendleft(0)  # 左端添加 O(1)
q.pop()          # 右端删除 O(1)
q.popleft()      # 左端删除 O(1)

# 旋转操作
q.rotate(2)   # 右移2位
q.rotate(-2)  # 左移2位

滑动窗口

Python

def sliding_max(nums, k):
    "滑动窗口最大值"
    from collections import deque
    dq = deque()  # 存储索引
    result = []

    for i, num in enumerate(nums):
        # 移除超出窗口的索引
        while dq and dq[0] <= i - k:
            dq.popleft()

        # 移除比当前小的元素
        while dq and nums[dq[-1]] < num:
            dq.pop()

        dq.append(i)

        if i >= k - 1:
            result.append(nums[dq[0]])

    return result

heapq堆

优先队列实现

Python

import heapq

# 最小堆
heap = []
heapq.heappush(heap, 3)
heapq.heappush(heap, 1)
heapq.heappush(heap, 2)

min_val = heapq.heappop(heap)  # 1（最小值）

# 从列表创建堆
lst = [3, 1, 4, 1, 5, 9]
heapq.heapify(lst)  # O(n)

# 获取前n个最小值
smallest = heapq.nsmallest(3, lst)  # [1, 1, 3]

# 最大堆（取负）
max_heap = [-x for x in lst]
heapq.heapify(max_heap)
max_val = -heapq.heappop(max_heap)

合并有序序列

Python

import heapq

def merge_sorted(*sequences):
    "合并多个有序序列"
    return heapq.merge(*sequences)

# 使用
result = list(heapq.merge([1, 3, 5], [2, 4, 6], [0, 7]))
# [0, 1, 2, 3, 4, 5, 6, 7]

bisect二分查找

维护有序列表

Python

import bisect

sorted_list = [1, 3, 5, 7, 9]

# 插入并保持有序
bisect.insort(sorted_list, 4)  # [1, 3, 4, 5, 7, 9]

# 查找插入位置
pos = bisect.bisect_left(sorted_list, 4)  # 2
pos = bisect.bisect_right(sorted_list, 4)  # 3

# 查找元素是否存在
pos = bisect.bisect_left(sorted_list, 5)
if pos < len(sorted_list) and sorted_list[pos] == 5:
    print("Found at", pos)

二分查找函数

Python

def binary_search(arr, target):
    "O(log n)查找"
    import bisect
    pos = bisect.bisect_left(arr, target)
    if pos < len(arr) and arr[pos] == target:
        return pos
    return -1

array数组

固定类型高效存储

Python

from array import array

# 整数数组（比list节省内存）
arr = array('i', [1, 2, 3, 4, 5])  # 'i'表示整数

# 类型码：
# 'i' - 整数
# 'f' - 浮点数
# 'd' - 双精度浮点数
# 'b' - 有符号字符

# 内存对比
import sys
lst = [i for i in range(1000000)]
arr = array('i', range(1000000))

print(sys.getsizeof(lst))  # ~8MB
print(sys.getsizeof(arr))  # ~4MB（约50%节省）

namedtuple命名元组

轻量级数据类

Python

from collections import namedtuple

# 定义命名元组
Point = namedtuple('Point', ['x', 'y'])
Person = namedtuple('Person', 'name age city')

# 使用
p = Point(3, 4)
print(p.x, p.y)  # 3 4

person = Person('Alice', 30, 'NYC')
print(person.name)  # Alice

# 内存对比（比普通类更小）
class PointClass:
    def __init__(self, x, y):
        self.x = x
        self.y = y

p1 = PointClass(3, 4)
p2 = Point(3, 4)
# namedtuple比普通类节省约30%内存

特殊场景优化

计数器Counter

Python

from collections import Counter

# 统计频率
words = ['apple', 'banana', 'apple', 'orange', 'banana', 'apple']
counts = Counter(words)
# Counter({'apple': 3, 'banana': 2, 'orange': 1})

# 最常见的n个元素
top3 = counts.most_common(3)

# 计数运算
c1 = Counter('aabbcc')
c2 = Counter('abcd')
print(c1 + c2)  # Counter({'a': 3, 'b': 3, 'c': 2, 'd': 1})
print(c1 - c2)  # Counter({'a': 1, 'b': 1, 'c': 1})

有序字典OrderedDict

Python

from collections import OrderedDict

# 保持插入顺序（Python 3.7+ dict已原生支持）
od = OrderedDict()
od['a'] = 1
od['b'] = 2
od['c'] = 3

# 移动到末尾
od.move_to_end('a')

# 弹出最早插入的
first = od.popitem(last=False)

选择决策表

场景	推荐数据结构
频繁查找	dict / set
两端增删	deque
保持有序	heapq / bisect
固定类型大批量数据	array
频率统计	Counter
滑动窗口	deque
优先队列	heapq
去重	set

注意：性能优化要基于实际测量，使用timeit或cProfile验证效果。

要点总结

list适合顺序访问，中间插入删除效率低，预分配减少扩容开销
dict/set查找O(1)，适合成员检查，使用defaultdict简化代码
deque两端操作O(1)，适合队列和滑动窗口场景
heapq实现优先队列，nsmallest/nlargest获取极值
bisect维护有序列表，二分查找O(log n)
array节省内存，适合固定类型大数据存储
根据操作频率选择数据结构，用timeit验证优化效果

存放路径：articles/PYTHON/专家/性能优化/数据结构性能优化.md

📝 发现内容有误？点击此处直接编辑