Python C扩展加速
Python调用C代码可突破性能瓶颈,适用于CPU密集型计算场景。
Cython扩展
安装Cython
Bash
pip install cython
基本Cython文件
Python
# mymodule.pyx
def sum_range(int n):
"Cython函数,类型声明加速"
cdef int i, total = 0
for i in range(n):
total += i
return total
def python_sum(n):
"纯Python版本"
total = 0
for i in range(n):
total += i
return total
编译Cython
Python
# setup.py
from setuptools import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize("mymodule.pyx")
)
Bash
# 编译
python setup.py build_ext --inplace
# 生成mymodule.so(或.pyd on Windows)
使用编译后的模块
Python
import mymodule
# Cython版本(快约100倍)
result = mymodule.sum_range(10000000)
# Python版本
result = mymodule.python_sum(10000000)
类型声明优化
Python
# typed_module.pyx
def compute(int n):
"类型声明的关键优化"
cdef int i
cdef double result = 0.0
for i in range(n):
result += i ** 0.5
return result
# cdef定义C函数(不可Python调用)
cdef double _internal_func(double x):
return x * x
# cpdef同时支持C和Python调用
cpdef double hybrid_func(double x):
return _internal_func(x) + 1.0
使用numpy数组
Python
# numpy_module.pyx
import numpy as np
cimport numpy as cnp
def array_sum(cnp.ndarray[cnp.double_t, ndim=1] arr):
"高效numpy数组操作"
cdef int i, n = arr.shape[0]
cdef double total = 0.0
for i in range(n):
total += arr[i]
return total
释放GIL
Python
# nogil_module.pyx
cdef void _heavy_compute(int n) nogil:
"nogil标记:执行时释放GIL"
cdef int i, total = 0
for i in range(n):
total += i
def run_nogil(int n):
"Python入口"
with nogil:
_heavy_compute(n)
return "Done"
# 多线程可并行调用nogil函数
ctypes调用共享库
基本使用
Python
import ctypes
# 加载共享库
# Linux: .so
# Windows: .dll
# macOS: .dylib
# 示例:使用libc
libc = ctypes.CDLL("libc.so.6") # Linux
# 或
libc = ctypes.cdll.msvcrt # Windows
# 调用C函数
libc.printf(b"Hello from C!\n")
定义函数签名
Python
import ctypes
# 加载库
mylib = ctypes.CDLL("./mylib.so")
# 定义函数原型
# int add(int a, int b)
mylib.add.argtypes = [ctypes.c_int, ctypes.c_int]
mylib.add.restype = ctypes.c_int
# 调用
result = mylib.add(10, 20)
print(result) # 30
# double multiply(double a, double b)
mylib.multiply.argtypes = [ctypes.c_double, ctypes.c_double]
mylib.multiply.restype = ctypes.c_double
result = mylib.multiply(3.14, 2.0)
处理数组
Python
import ctypes
# 创建C数组
arr = (ctypes.c_int * 5)(1, 2, 3, 4, 5)
# 传递给C函数
mylib.process_array(arr, 5)
# 从C返回数组
def get_array():
mylib.get_array.restype = ctypes.POINTER(ctypes.c_int)
ptr = mylib.get_array()
# 转换为Python列表
return [ptr[i] for i in range(5)]
处理结构体
Python
import ctypes
# 定义结构体
class Point(ctypes.Structure):
_fields_ = [
("x", ctypes.c_double),
("y", ctypes.c_double)
]
# 创建实例
p = Point(3.0, 4.0)
# 传递给C函数
mylib.process_point.argtypes = [Point]
mylib.process_point(p)
# 接收结构体
mylib.create_point.restype = Point
new_point = mylib.create_point()
print(new_point.x, new_point.y)
字符串处理
Python
import ctypes
# 传递字符串
mylib.process_string.argtypes = [ctypes.c_char_p]
mylib.process_string.restype = ctypes.c_char_p
result = mylib.process_string(b"input")
print(result.decode()) # 解码返回字符串
# 创建可修改的字符串缓冲
buffer = ctypes.create_string_buffer(100)
mylib.fill_buffer(buffer)
print(buffer.value.decode())
C API直接扩展
最简C扩展
C
// simple_module.c
#include <Python.h>
static PyObject* sum_range(PyObject* self, PyObject* args) {
int n;
if (!PyArg_ParseTuple(args, "i", &n)) {
return NULL;
}
long total = 0;
for (int i = 0; i < n; i++) {
total += i;
}
return PyLong_FromLong(total);
}
static PyMethodDef methods[] = {
{"sum_range", sum_range, METH_VARARGS, "Sum integers from 0 to n-1"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef module = {
PyModuleDef_HEAD_INIT,
"simple_module",
NULL,
-1,
methods
};
PyMODINIT_FUNC PyInit_simple_module(void) {
return PyModule_Create(&module);
}
编译C扩展
Python
# setup.py
from setuptools import setup, Extension
module = Extension(
'simple_module',
sources=['simple_module.c']
)
setup(
name='simple_module',
ext_modules=[module]
)
Bash
python setup.py build_ext --inplace
处理Python对象
C
// list_module.c
#include <Python.h>
static PyObject* list_sum(PyObject* self, PyObject* args) {
PyObject* list;
if (!PyArg_ParseTuple(args, "O!", &PyList_Type, &list)) {
return NULL;
}
long total = 0;
Py_ssize_t size = PyList_Size(list);
for (Py_ssize_t i = 0; i < size; i++) {
PyObject* item = PyList_GetItem(list, i);
if (!PyLong_Check(item)) {
PyErr_SetString(PyExc_TypeError, "List must contain integers");
return NULL;
}
total += PyLong_AsLong(item);
}
return PyLong_FromLong(total);
}
性能对比
Cython vs Python
Python
# 性能测试
import timeit
import mymodule
n = 10000000
# Cython版本
time_cython = timeit.timeit(
lambda: mymodule.sum_range(n),
number=10
)
# Python版本
time_python = timeit.timeit(
lambda: sum(range(n)),
number=10
)
print(f"Cython: {time_cython:.3f}s")
print(f"Python: {time_python:.3f}s")
print(f"Speedup: {time_python / time_cython:.1f}x")
# Cython通常快50-100倍
选择决策
| 场景 | 推荐方案 |
|---|---|
| 简单数学计算 | Cython类型声明 |
| 已有C库 | ctypes调用 |
| 复杂扩展 | C API |
| numpy密集计算 | Cython+numpy |
| 需释放GIL | Cython nogil |
最佳实践
Cython优化技巧
Python
# typed_optimized.pyx
# 1. 使用cdef声明C变量
cdef int i, n = 1000000
cdef double result = 0.0
# 2. 使用内存视图代替numpy数组
def process_array(double[:] arr):
cdef int i
cdef double total = 0.0
for i in range(arr.shape[0]):
total += arr[i]
return total
# 3. 内联函数
cdef inline double square(double x):
return x * x
# 4. 使用编译器指令
# cython: boundscheck=False
# cython: wraparound=False
@cython.boundscheck(False)
@cython.wraparound(False)
def fast_array_op(int[:] arr):
cdef int i, n = arr.shape[0]
for i in range(n):
arr[i] *= 2
错误处理
Python
# ctypes错误处理
import ctypes
try:
result = mylib.some_function()
if mylib.some_function.errcheck:
# 设置错误检查
pass
except Exception as e:
print(f"C function error: {e}")
# Cython错误处理
def safe_call():
try:
return cython_func()
except RuntimeError:
return fallback()
注意:C扩展失去Python安全性,需要手动处理内存和类型,确保正确性。
要点总结
- Cython通过类型声明(cdef)可将Python代码编译为C,性能提升50-100倍
nogil标记释放GIL,允许多线程真正并行执行C代码- ctypes调用共享库,简单灵活但不如Cython高效
- C API直接扩展最底层,完全控制但开发复杂
- 使用numpy类型声明和内存视图优化数组操作
- boundscheck=False和wraparound=False进一步加速数组访问
存放路径:articles/PYTHON/专家/性能优化/C扩展加速.md
📝 发现内容有误?点击此处直接编辑