Python敏感数据处理
敏感数据(API密钥、密码、证书)的不当处理是安全漏洞的主要来源。掌握安全的存储和使用方法。
敏感数据识别
Python
# 常见敏感数据类型
SENSITIVE_DATA_TYPES = {
'api_key', 'secret_key', 'access_token',
'password', 'passwd', 'pwd',
'private_key', 'secret', 'token',
'certificate', 'credential',
'auth', 'authorization',
}
# 正则匹配敏感字段名
import re
SENSITIVE_PATTERN = re.compile(
r'(password|passwd|pwd|secret|key|token|auth|credential)',
re.IGNORECASE
)
def is_sensitive_field(field_name: str) -> bool:
"判断字段是否敏感"
return bool(SENSITIVE_PATTERN.search(field_name))
环境变量存储
Python
import os
from dotenv import load_dotenv
# 从 .env 文件加载(不提交到版本控制)
load_dotenv()
# 获取敏感配置
api_key = os.environ.get('API_KEY')
db_password = os.environ.get('DB_PASSWORD')
# 使用默认值(开发环境)
debug_mode = os.environ.get('DEBUG', 'false').lower() == 'true'
# 验证必需配置
def get_required_env(key: str) -> str:
"获取必需的环境变量"
value = os.environ.get(key)
if value is None:
raise EnvironmentError(f"缺少必需的环境变量: {key}")
return value
api_key = get_required_env('API_KEY')
Python
# .env 文件示例(不提交到 Git)
"
# .env - 不要提交到版本控制
API_KEY=your_secret_api_key
DB_PASSWORD=your_db_password
SECRET_KEY=your_app_secret
DEBUG=false
"
# .gitignore 添加:
"
.env
*.env
.env.local
"
配置管理类
Python
import os
import json
from typing import Any, Optional
class SecureConfig:
"安全配置管理"
def __init__(self):
self._config = {}
self._secrets = {}
self._loaded = False
def load(self):
"加载配置"
# 从环境变量加载敏感数据
self._secrets = {
'api_key': os.environ.get('API_KEY'),
'db_password': os.environ.get('DB_PASSWORD'),
'secret_key': os.environ.get('SECRET_KEY'),
}
# 从配置文件加载非敏感数据
self._load_non_sensitive()
self._loaded = True
def _load_non_sensitive(self):
"加载非敏感配置"
config_file = os.environ.get('CONFIG_FILE', 'config.json')
if os.path.exists(config_file):
with open(config_file) as f:
self._config = json.load(f)
def get(self, key: str, default: Any = None) -> Any:
"获取配置"
if not self._loaded:
self.load()
# 敏感数据从 secrets 获取
if is_sensitive_field(key):
return self._secrets.get(key, default)
return self._config.get(key, default)
def get_secret(self, key: str) -> str:
"获取敏感配置"
if not self._loaded:
self.load()
value = self._secrets.get(key)
if value is None:
raise KeyError(f"未找到敏感配置: {key}")
return value
def __repr__(self):
"安全的字符串表示"
return f"SecureConfig(config={len(self._config)}, secrets={len(self._secrets)})"
config = SecureConfig()
api_key = config.get_secret('api_key')
内存中的敏感数据
Python
# 安全的字符串类
class SecureString:
"敏感字符串的安全包装"
def __init__(self, value: str):
self._value = value
def __str__(self) -> str:
"返回遮蔽的字符串"
return self.mask()
def __repr__(self) -> str:
return f"SecureString(masked)"
def mask(self, show_length: int = 4) -> str:
"遮蔽显示"
if len(self._value) <= show_length:
return '*' * len(self._value)
return self._value[:show_length] + '*' * (len(self._value) - show_length)
def get_value(self) -> str:
"获取实际值"
return self._value
def clear(self):
"清除内存中的值"
self._value = ''
secret = SecureString("my_secret_api_key_12345")
print(secret) # my_s********************
print(secret.get_value()) # my_secret_api_key_12345
# 日志中不会泄露
import logging
logging.info(f"API配置: {secret}") # 输出遮蔽值
Python
# 安全上下文管理器
class SecretContext:
"敏感数据的临时使用上下文"
def __init__(self, secret: str):
self._secret = secret
self._original = None
def __enter__(self):
self._original = self._secret
return self._secret
def __exit__(self, exc_type, exc_val, exc_tb):
# 清除敏感数据
self._secret = ''
return False
# 使用
with SecretContext(api_key) as key:
result = api_call(key)
# 退出后 key 被清除
日志中的敏感数据
Python
import logging
import re
class SensitiveDataFilter(logging.Filter):
"日志敏感数据过滤器"
SENSITIVE_FIELDS = ['password', 'api_key', 'token', 'secret']
def filter(self, record):
"过滤日志记录"
record.msg = self._sanitize(record.msg)
if hasattr(record, 'args') and record.args:
record.args = self._sanitize_args(record.args)
return True
def _sanitize(self, message: str) -> str:
"清理消息"
for field in self.SENSITIVE_FIELDS:
pattern = rf'{field}[=:]\s*\S+'
message = re.sub(
pattern,
f'{field}=[REDACTED]',
message,
flags=re.IGNORECASE
)
return message
def _sanitize_args(self, args):
"清理参数"
if isinstance(args, dict):
return {
k: '[REDACTED]' if is_sensitive_field(k) else v
for k, v in args.items()
}
return args
# 配置日志
logger = logging.getLogger()
logger.addFilter(SensitiveDataFilter())
# 安全日志
logger.info(f"API调用: api_key={api_key}") # 输出: api_key=[REDACTED]
文件存储安全
Python
import os
import json
from pathlib import Path
class SecureFileStorage:
"安全文件存储"
def __init__(self, base_path: str):
self.base_path = Path(base_path)
self._ensure_secure_permissions()
def _ensure_secure_permissions(self):
"确保安全权限"
# 设置目录权限(仅所有者可访问)
if self.base_path.exists():
os.chmod(self.base_path, 0o700)
def save_secret(self, name: str, value: str):
"保存敏感数据"
file_path = self.base_path / name
with open(file_path, 'w') as f:
f.write(value)
# 设置文件权限(仅所有者可读写)
os.chmod(file_path, 0o600)
def load_secret(self, name: str) -> str:
"加载敏感数据"
file_path = self.base_path / name
if not file_path.exists():
raise FileNotFoundError(f"未找到: {name}")
with open(file_path) as f:
return f.read()
def delete_secret(self, name: str):
"删除敏感数据"
file_path = self.base_path / name
if file_path.exists():
# 安全删除:先覆盖再删除
with open(file_path, 'w') as f:
f.write('\x00' * 100) # 覆盖
file_path.unlink()
storage = SecureFileStorage('/secure/config')
storage.save_secret('api_key', 'my_secret_key')
密钥管理服务集成
Python
# AWS Secrets Manager 示例(需要 boto3)
import boto3
from botocore.exceptions import ClientError
class AWSSecretsManager:
"AWS Secrets Manager 集成"
def __init__(self, region='us-east-1'):
self.client = boto3.client('secretsmanager', region_name=region)
def get_secret(self, secret_name: str) -> dict:
"获取秘密"
try:
response = self.client.get_secret_value(SecretId=secret_name)
secret = response['SecretString']
return json.loads(secret)
except ClientError as e:
raise RuntimeError(f"获取秘密失败: {e}")
# 使用
secrets_manager = AWSSecretsManager()
db_credentials = secrets_manager.get_secret('prod/db/credentials')
# 类似:Azure Key Vault, HashiCorp Vault 等
代码中的敏感数据检测
Python
# 检测代码中的硬编码敏感数据
import re
HARD_CODED_PATTERNS = [
# API密钥模式
(r'api[_-]?key\s*[=:]\s*["\'][a-zA-Z0-9]{20,}["\']', 'API密钥'),
# 密码模式
(r'password\s*[=:]\s*["\'][^"\']{6,}["\']', '密码'),
# AWS密钥
(r'AKIA[0-9A-Z]{16}', 'AWS访问密钥'),
# 私钥标记
(r'BEGIN\s+RSA\s+PRIVATE\s+KEY', 'RSA私钥'),
]
def scan_for_secrets(code: str) -> list:
"扫描代码中的敏感数据"
findings = []
for pattern, description in HARD_CODED_PATTERNS:
matches = re.finditer(pattern, code, re.IGNORECASE)
for match in matches:
findings.append({
'type': description,
'line': code[:match.start()].count('\n') + 1,
'match': match.group()[:20] + '...' # 部分显示
})
return findings
# 使用
code_sample = "
api_key = "sk_live_abc123def456ghi789"
password = "my_secret_password"
"
findings = scan_for_secrets(code_sample)
for f in findings:
print(f"发现 {f['type']} 于第 {f['line']} 行")
要点总结
- 环境变量存储敏感配置,不硬编码在代码中
.env文件不入库,添加到.gitignore- 日志过滤遮蔽敏感数据输出
- 文件权限设置为
0o600(仅所有者可读写) - 密钥管理服务是生产环境的最佳方案
📝 发现内容有误?点击此处直接编辑