今天冉冉博客带来一篇实用技术教程,主题是Python 生成器入门到进阶。这些技巧经过实际项目验证,直接可用。
一、Python 环境配置与虚拟环境
Python 环境管理是每个开发者的必修课。合理使用虚拟环境可以隔离项目依赖,避免版本冲突。冉冉博客的所有脚本项目都使用虚拟环境管理。
1.1 venv 虚拟环境详解
# 创建虚拟环境
python -m venv myproject/venv
# 激活虚拟环境
# Linux/Mac:
source myproject/venv/bin/activate
# Windows:
myprojectenvScriptsactivate
# 安装依赖
pip install -r requirements.txt
pip freeze > requirements.txt
# 退出
deactivate
1.2 Poetry 现代化依赖管理
# 安装 Poetry
pip install poetry
# 初始化项目
poetry init
# 添加依赖
poetry add requests pandas
poetry add --group dev black pytest
# 安装并运行
poetry install
poetry run python script.py
# 构建发布
poetry build
poetry publish
二、Python 数据类与类型提示
Python 3.7+ 引入的 dataclasses 模块和完整的类型提示系统,让代码更清晰、更易维护。
2.1 dataclass 实战应用
from dataclasses import dataclass, field
from typing import List, Optional
@dataclass
class Article:
title: str
content: str
author: str
views: int = 0
tags: List[str] = field(default_factory=list)
@property
def word_count(self) -> int:
return len(self.content.split())
def is_popular(self) -> bool:
return self.views > 1000
article = Article(
title="Python 教程",
content="详细内容...",
author="冉冉",
tags=["Python", "编程"]
)
2.2 类型提示完整指南
from typing import Union, Callable, Generator
# 基础类型
name: str = "冉冉博客"
count: int = 42
active: bool = True
# 联合类型
Id = Union[int, str]
# 可调用类型
Callback = Callable[[int, str], bool]
# 生成器类型
def process_items(items: list[int]) -> Generator[int, None, None]:
for item in items:
yield item * 2
三、Python 装饰器与上下文管理器
装饰器和上下文管理器是 Python 最优雅的两种编程模式。合理使用可以让代码更简洁、更易读。
3.1 装饰器深入解析
import functools
import time
def retry(max_attempts=3, delay=1.0):
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_attempts - 1: raise
time.sleep(delay)
return wrapper
return decorator
def timer(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
elapsed = time.perf_counter() - start
print(f"{func.__name__} 耗时: {elapsed:.4f}s")
return result
return wrapper
3.2 上下文管理器
from contextlib import contextmanager
import sqlite3
@contextmanager
def db_transaction(db_path):
conn = sqlite3.connect(db_path)
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
# 使用
with db_transaction('blog.db') as conn:
conn.execute("UPDATE posts SET views = views + 1 WHERE id = ?", (1,))
四、Python 异步编程与并发处理
Python asyncio 是处理 IO 密集型任务的神器。在网络请求、文件操作等场景中,异步编程能带来数倍的性能提升。
4.1 asyncio 异步函数
import asyncio
import aiohttp
async def fetch_all(urls: list[str]) -> list[str]:
async with aiohttp.ClientSession() as session:
tasks = [fetch_one(session, url) for url in urls]
return await asyncio.gather(*tasks)
async def fetch_one(session, url):
async with session.get(url) as response:
return await response.text()
# 运行
urls = [f"https://api.example.com/item/{i}" for i in range(100)]
results = asyncio.run(fetch_all(urls))
4.2 线程池与进程池
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
# IO 密集型用线程池
def fetch_data(url):
import requests
return requests.get(url).json()
with ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(fetch_data, urls))
# CPU 密集型用进程池
def process_image(image_path):
from PIL import Image
img = Image.open(image_path)
return img.size
with ProcessPoolExecutor(max_workers=4) as executor:
results = list(executor.map(process_image, image_paths))
五、Python 脚本实战案例
综合以上知识,冉冉博客分享几个实用的 Python 自动化脚本。
5.1 网站数据采集脚本
import requests
from bs4 import BeautifulSoup
import csv
from dataclasses import dataclass
from typing import List
@dataclass
class Article:
title: str
url: str
views: int = 0
def crawl_articles(base_url: str, pages: int = 10) -> List[Article]:
articles = []
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
for page in range(1, pages + 1):
url = f"{base_url}/page/{page}"
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
for item in soup.select('.article-item'):
article = Article(
title=item.select_one('h3').text.strip(),
url=item.select_one('a')['href'],
views=int(item.select_one('.views').text or 0)
)
articles.append(article)
time.sleep(1)
return articles
5.2 自动化备份脚本
import os, shutil, hashlib
from datetime import datetime
from dataclasses import dataclass
@dataclass
class BackupConfig:
source_dir: str
backup_dir: str
keep_versions: int = 5
def incremental_backup(config: BackupConfig):
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_path = os.path.join(config.backup_dir, 'backup_' + timestamp)
os.makedirs(backup_path, exist_ok=True)
changes = 0
for root, dirs, files in os.walk(config.source_dir):
dirs[:] = [d for d in dirs if not d.startswith('.')]
for filename in files:
if filename.startswith('.'): continue
src_path = os.path.join(root, filename)
rel_path = os.path.relpath(src_path, config.source_dir)
dst_path = os.path.join(backup_path, rel_path)
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
shutil.copy2(src_path, dst_path)
changes += 1
print(f"备份完成: {changes} 个文件")
return backup_path
if __name__ == '__main__':
config = BackupConfig(
source_dir='C:\Users\冉冉\Documents',
backup_dir='D:\Backups',
keep_versions=7
)
incremental_backup(config)
希望这篇关于Python 生成器入门到进阶的教程对你有帮助。冉冉博客会持续更新更多高质量技术文章,敬请期待!
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END
















暂无评论内容