Python 生成器函数
生成器函数使用 yield 关键字,自动创建迭代器,实现惰性求值。
基本概念
Python
def simple_generator():
yield 1
yield 2
yield 3
gen = simple_generator()
print(type(gen)) # generator
# 逐个获取值
print(next(gen)) # 1
print(next(gen)) # 2
print(next(gen)) # 3
print(next(gen)) # StopIteration
yield 关键字
yield 暂停函数执行,保存状态,返回值:
Python
def counter(max_count):
count = 0
while count < max_count:
yield count
count += 1
gen = counter(5)
for num in gen:
print(num) # 0, 1, 2, 3, 4
惰性求值
生成器只在需要时计算,节省内存:
Python
def lazy_range(n):
"生成器实现 range"
i = 0
while i < n:
yield i
i += 1
# 列表方式:立即占用内存
big_list = [x for x in range(1000000)] # 占用大量内存
# 生成器方式:惰性生成
big_gen = lazy_range(1000000) # 几乎不占内存
print(next(big_gen)) # 0(按需计算)
内存对比
Python
import sys
# 列表:预分配所有元素
lst = [x for x in range(1000)]
print(sys.getsizeof(lst)) # 约 8872 bytes
# 生成器:只存储状态
gen = (x for x in range(1000))
print(sys.getsizeof(gen)) # 约 112 bytes
函数状态保存
每次 yield 后函数状态被保存:
Python
def stateful_generator():
print("开始")
x = yield 1 # 接收 send() 发送的值
print(f"接收到: {x}")
yield x + 10
print("结束")
gen = stateful_generator()
next(gen) # 开始,返回 1
gen.send(5) # 接收到: 5,返回 15
next(gen) # 结束,StopIteration
send() 方法
send() 向生成器发送值并获取下一个 yield:
Python
def accumulator():
total = 0
while True:
value = yield total # yield 当前值,接收新值
if value is None:
break
total += value
acc = accumulator()
next(acc) # 启动生成器,返回 0
print(acc.send(10)) # 10
print(acc.send(20)) # 30
print(acc.send(5)) # 35
acc.close() # 关闭生成器
throw() 方法
向生成器内部抛出异常:
Python
def error_handler():
try:
while True:
value = yield
print(f"处理: {value}")
except ValueError as e:
yield f"捕获异常: {e}"
gen = error_handler()
next(gen) # 启动
gen.send(1) # 处理: 1
print(gen.throw(ValueError, "错误")) # 捕获异常: 错误
close() 方法
关闭生成器,触发 GeneratorExit 异常:
Python
def infinite_counter():
try:
i = 0
while True:
yield i
i += 1
except GeneratorExit:
print("生成器关闭")
gen = infinite_counter()
print(next(gen)) # 0
print(next(gen)) # 1
gen.close() # 生成器关闭
print(next(gen)) # StopIteration
生成器 vs 普通函数
| 特性 | 普通函数 | 生成器函数 |
|---|---|---|
| 关键字 | return | yield |
| 执行 | 一次性执行完 | 可暂停恢复 |
| 返回值 | 单个值 | 多个值序列 |
| 内存 | 可占用大量内存 | 惰性求值,节省内存 |
| 状态 | 不保存 | 自动保存 |
yield vs return
Python
def with_return(n):
"普通函数"
result = []
for i in range(n):
result.append(i)
return result
def with_yield(n):
"生成器函数"
for i in range(n):
yield i
# return 立即返回完整结果
lst = with_return(5)
print(lst) # [0, 1, 2, 3, 4]
# yield 惰性生成
gen = with_yield(5)
print(list(gen)) # [0, 1, 2, 3, 4]
实际应用
读取大文件
Python
def read_large_file(filepath, chunk_size=1024):
"分块读取大文件"
with open(filepath, 'r') as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
for chunk in read_large_file('large.txt'):
process(chunk) # 按块处理,避免一次性加载
无限序列
Python
def fibonacci():
"无限斐波那契序列"
a, b = 0, 1
while True:
yield a
a, b = b, a + b
# 使用 itertools.islice 限制
from itertools import islice
fib = fibonacci()
print(list(islice(fib, 10))) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
数据流处理
Python
def pipeline(data):
"数据处理管道"
for item in data:
# 过滤
if item > 0:
# 转换
processed = item * 2
# 输出
yield processed
def filter_negative(gen):
"过滤负数"
for item in gen:
if item >= 0:
yield item
data = [-1, 2, -3, 4, 5]
result = pipeline(filter_negative(data))
print(list(result)) # [4, 8, 10]
扁平化嵌套
Python
def flatten(nested):
"递归展平嵌套列表"
for item in nested:
if isinstance(item, list):
yield from flatten(item)
else:
yield item
nested = [1, [2, [3, 4]], 5]
print(list(flatten(nested))) # [1, 2, 3, 4, 5]
生成器属性
Python
def my_gen():
yield 1
yield 2
gen = my_gen()
print(gen.gi_code) # 代码对象
print(gen.gi_frame) # 执行帧
print(gen.gi_running) # 是否正在执行
print(gen.gi_yieldfrom) # yield from 委托对象
要点总结
| 特性 | 说明 |
|---|---|
| yield | 暂停函数,返回值,保存状态 |
| 惰性 | 只在需要时计算,节省内存 |
| send() | 向生成器发送值 |
| throw() | 向生成器抛出异常 |
| close() | 关闭生成器 |
生成器函数自动实现迭代器协议,是处理大数据和流式数据的高效工具。
D:\git2\jwdev\articles\PYTHON\进阶\迭代器与生成器\生成器函数.md
📝 发现内容有误?点击此处直接编辑