Chain-of-Thought：让 LLM 学会逐步推理

什么是 Chain-of-Thought？

Chain-of-Thought（CoT，思维链）是一种提示技术，通过让大语言模型在给出最终答案之前，先生成中间的推理步骤，从而显著提升模型在复杂任务上的表现。

┌─────────────────────────────────────────────────────────────┐
│                  Chain-of-Thought 原理                      │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│   标准 Prompt（直接回答）：                                   │
│   Q: 一个农场有鸡和兔，头共35个，脚共94只。                  │
│      鸡和兔各有多少只？                                      │
│   A: 鸡23只，兔12只                                        │
│                                                             │
│   Chain-of-Thought（展示推理）：                             │
│   Q: 一个农场有鸡和兔，头共35个，脚共94只。                  │
│      鸡和兔各有多少只？                                      │
│   A: 让我逐步思考：                                         │
│      1. 假设全是鸡，应该有 35×2=70 只脚                      │
│      2. 实际多了 94-70=24 只脚                               │
│      3. 每只兔比鸡多2只脚                                   │
│      4. 所以有 24÷2=12 只兔                                  │
│      5. 鸡有 35-12=23 只                                     │
│      答案：鸡23只，兔12只                                    │
│                                                             │
│   核心思想：把"隐式推理"变成"显式推理"                        │
│                                                             │
└─────────────────────────────────────────────────────────────┘

CoT 的优势：

提高准确性：复杂任务准确率提升 2-3 倍
可解释性：能看到模型的推理过程
调试友好：容易发现错误步骤
泛化性强：适用于多种任务类型

Zero-shot CoT

最简单的 CoT 方法，只需在 Prompt 中添加”让我们逐步思考”。

1. 基础实现

# zero_shot_cot.py
class ZeroShotCoT:
    """Zero-shot Chain-of-Thought"""
    
    TRIGGER_PHRASES = [
        "让我们逐步思考",
        "Let's think step by step",
        "请详细说明你的推理过程",
        "请展示你的思考过程",
        "请逐步分析",
    ]
    
    @staticmethod
    def create_prompt(question: str, trigger: str = None) -> str:
        """创建 Zero-shot CoT Prompt"""
        
        if trigger is None:
            trigger = ZeroShotCoT.TRIGGER_PHRASES[0]
        
        return f"""问题：{question}

{trigger}："""
    
    @staticmethod
    def extract_answer(response: str) -> str:
        """从响应中提取答案"""
        # 通常答案在"答案"、"所以"、"因此"等词之后
        import re
        
        patterns = [
            r'答案[是：]\s*([^\n]+)',
            r'所以[，]?\s*([^\n]+)',
            r'因此[，]?\s*([^\n]+)',
            r'最终[答案]?[：是]\s*([^\n]+)',
        ]
        
        for pattern in patterns:
            match = re.search(pattern, response)
            if match:
                return match.group(1).strip()
        
        # 如果没有找到，返回最后一句
        lines = [line.strip() for line in response.split('\n') if line.strip()]
        return lines[-1] if lines else response

# 使用示例
question = "一个篮子有苹果和橘子共50个，苹果比橘子多10个。各有多少个？"
prompt = ZeroShotCoT.create_prompt(question)
print(prompt)

2. 自动 CoT（Auto-CoT）

# auto_cot.py
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

class AutoCoT:
    """自动构建 Few-shot CoT 示例"""
    
    def __init__(self, llm):
        self.llm = llm
    
    def generate_demos(
        self,
        questions: list,
        num_clusters: int = 8,
        samples_per_cluster: int = 1
    ) -> list:
        """自动生成演示示例"""
        
        # 步骤1: 聚类问题
        vectorizer = TfidfVectorizer(max_features=100)
        question_vectors = vectorizer.fit_transform(questions)
        
        kmeans = KMeans(n_clusters=num_clusters, random_state=42)
        clusters = kmeans.fit_predict(question_vectors)
        
        # 步骤2: 从每个聚类中选择代表性问题
        demos = []
        for cluster_id in range(num_clusters):
            cluster_questions = [
                q for q, c in zip(questions, clusters)
                if c == cluster_id
            ]
            
            if cluster_questions:
                # 选择最靠近中心的问题
                cluster_center = kmeans.cluster_centers_[cluster_id]
                distances = [
                    np.linalg.norm(
                        question_vectors[questions.index(q)].toarray() - cluster_center
                    )
                    for q in cluster_questions
                ]
                
                representative = cluster_questions[np.argmin(distances)]
                
                # 步骤3: 使用 Zero-shot CoT 生成推理
                reasoning = self._generate_reasoning(representative)
                
                demos.append({
                    "question": representative,
                    "reasoning": reasoning
                })
        
        return demos
    
    def _generate_reasoning(self, question: str) -> str:
        """生成推理过程"""
        prompt = f"""问题：{question}

让我们逐步思考并解决这个问题："""
        
        response = self.llm.generate(prompt)
        return response
    
    def create_prompt(self, test_question: str, demos: list) -> str:
        """创建 Auto-CoT Prompt"""
        
        prompt_parts = []
        
        # 添加示例
        for i, demo in enumerate(demos, 1):
            prompt_parts.append(f"Q{i}: {demo['question']}")
            prompt_parts.append(f"A{i}: {demo['reasoning']}")
            prompt_parts.append("")
        
        # 添加测试问题
        prompt_parts.append(f"Q: {test_question}")
        prompt_parts.append("A: Let's think step by step")
        
        return "\n".join(prompt_parts)

Few-shot CoT

通过提供包含推理过程的示例，引导模型学习如何逐步推理。

1. 手动设计示例

# few_shot_cot.py
class FewShotCoT:
    """Few-shot Chain-of-Thought"""
    
    # 数学推理示例
    MATH_EXAMPLES = [
        {
            "question": "一个长方形的长是宽的2倍，周长是36厘米。求长和宽。",
            "reasoning": """让我逐步解决这个问题：
1. 设宽为x厘米，则长为2x厘米
2. 周长公式：2×(长+宽) = 36
3. 代入：2×(2x+x) = 36
4. 简化：2×3x = 36 → 6x = 36
5. 解得：x = 6
6. 所以宽是6厘米，长是12厘米
答案：长12厘米，宽6厘米"""
        },
        {
            "question": "商店里有苹果和梨共120千克，苹果是梨的3倍。各有多少千克？",
            "reasoning": """让我逐步解决这个问题：
1. 设梨有x千克，则苹果有3x千克
2. 总数：x + 3x = 120
3. 简化：4x = 120
4. 解得：x = 30
5. 所以梨30千克，苹果90千克
答案：苹果90千克，梨30千克"""
        }
    ]
    
    # 逻辑推理示例
    LOGIC_EXAMPLES = [
        {
            "question": "甲、乙、丙三人中，一人是医生，一人是教师，一人是工程师。已知甲不是医生，乙不是教师，丙是工程师。请问各自的职业？",
            "reasoning": """让我逐步推理：
1. 已知丙是工程师
2. 剩下医生和教师给甲和乙
3. 甲不是医生，所以甲是教师
4. 那么乙就是医生
答案：甲是教师，乙是医生，丙是工程师"""
        }
    ]
    
    # 常识推理示例
    COMMONSENSE_EXAMPLES = [
        {
            "question": "为什么冰箱门打开后，里面的灯会亮？",
            "reasoning": """让我逐步分析：
1. 冰箱门上有开关装置
2. 当门关闭时，开关被压住，电路断开
3. 当门打开时，开关释放，电路接通
4. 电路接通后，灯就亮了
5. 这样设计是为了方便在黑暗中取物
答案：因为冰箱门上有门控开关，开门时开关闭合，灯就亮了。"""
        }
    ]
    
    @staticmethod
    def create_prompt(
        question: str,
        examples: list = None,
        example_type: str = "math"
    ) -> str:
        """创建 Few-shot CoT Prompt"""
        
        if examples is None:
            if example_type == "math":
                examples = FewShotCoT.MATH_EXAMPLES
            elif example_type == "logic":
                examples = FewShotCoT.LOGIC_EXAMPLES
            else:
                examples = FewShotCoT.COMMONSENSE_EXAMPLES
        
        prompt_parts = []
        
        # 添加示例
        for i, ex in enumerate(examples, 1):
            prompt_parts.append(f"Q{i}: {ex['question']}")
            prompt_parts.append(f"A{i}: {ex['reasoning']}")
            prompt_parts.append("")
        
        # 添加测试问题
        prompt_parts.append(f"Q: {question}")
        prompt_parts.append("A: Let's think step by step")
        
        return "\n".join(prompt_parts)

# 使用示例
question = "小明有5个苹果，给了小红2个，又买了3个。现在有几个苹果？"
prompt = FewShotCoT.create_prompt(question, example_type="math")

2. 动态示例选择

# dynamic_few_shot_cot.py
from sentence_transformers import SentenceTransformer
import numpy as np

class DynamicFewShotCoT:
    """动态选择示例的 Few-shot CoT"""
    
    def __init__(self, examples: list, model_name: str = "all-MiniLM-L6-v2"):
        self.examples = examples
        self.encoder = SentenceTransformer(model_name)
        
        # 预编码所有示例
        self.example_embeddings = self.encoder.encode(
            [ex['question'] for ex in examples]
        )
    
    def get_relevant_examples(self, question: str, k: int = 3) -> list:
        """获取最相关的 k 个示例"""
        
        # 编码问题
        question_embedding = self.encoder.encode([question])
        
        # 计算相似度
        similarities = np.dot(
            self.example_embeddings,
            question_embedding.T
        ).flatten()
        
        # 获取最相似的 k 个
        top_k_indices = np.argsort(similarities)[-k:][::-1]
        
        return [self.examples[i] for i in top_k_indices]
    
    def create_prompt(self, question: str, k: int = 3) -> str:
        """创建动态 Few-shot CoT Prompt"""
        
        relevant_examples = self.get_relevant_examples(question, k)
        
        prompt_parts = []
        
        for i, ex in enumerate(relevant_examples, 1):
            prompt_parts.append(f"Q{i}: {ex['question']}")
            prompt_parts.append(f"A{i}: {ex['reasoning']}")
            prompt_parts.append("")
        
        prompt_parts.append(f"Q: {question}")
        prompt_parts.append("A: Let's think step by step")
        
        return "\n".join(prompt_parts)

# 使用示例
all_examples = [
    # 数学示例
    {"question": "...", "reasoning": "...", "category": "math"},
    # 逻辑示例
    {"question": "...", "reasoning": "...", "category": "logic"},
    # 常识示例
    {"question": "...", "reasoning": "...", "category": "commonsense"},
]

dynamic_cot = DynamicFewShotCoT(all_examples)
prompt = dynamic_cot.create_prompt("新的问题...")

Self-Consistency

Self-Consistency 通过多次采样并选择最一致的答案来提升准确性。

# self_consistency.py
from collections import Counter
import re

class SelfConsistencyCoT:
    """Self-Consistency CoT"""
    
    def __init__(self, llm, num_samples: int = 10, temperature: float = 0.7):
        self.llm = llm
        self.num_samples = num_samples
        self.temperature = temperature
    
    def solve(self, question: str) -> dict:
        """使用 Self-Consistency 解决问题"""
        
        # 生成多个推理路径
        responses = []
        for i in range(self.num_samples):
            prompt = self._create_prompt(question)
            response = self.llm.generate(
                prompt,
                temperature=self.temperature
            )
            responses.append(response)
        
        # 提取所有答案
        answers = [self._extract_answer(r) for r in responses]
        
        # 统计答案频率
        answer_counts = Counter(answers)
        
        # 选择最一致的答案
        most_common = answer_counts.most_common(1)[0]
        best_answer = most_common[0]
        confidence = most_common[1] / len(answers)
        
        # 找到对应的推理过程
        best_reasoning = next(
            r for r, a in zip(responses, answers) if a == best_answer
        )
        
        return {
            "answer": best_answer,
            "confidence": confidence,
            "reasoning": best_reasoning,
            "all_answers": answers,
            "answer_distribution": dict(answer_counts)
        }
    
    def _create_prompt(self, question: str) -> str:
        """创建 CoT Prompt"""
        return f"""Q: {question}
A: Let's think step by step"""
    
    def _extract_answer(self, response: str) -> str:
        """提取答案"""
        # 尝试多种模式提取答案
        patterns = [
            r'答案[是：]?\s*([^\n]+)',
            r'答案是\s*([^\n]+)',
            r'[\d\.]+',  # 数字
        ]
        
        for pattern in patterns:
            match = re.search(pattern, response)
            if match:
                return match.group(1).strip()
        
        return response.strip()

# 使用示例
# solver = SelfConsistencyCoT(llm, num_samples=10)
# result = solver.solve("复杂问题...")
# print(f"答案：{result['answer']} (置信度: {result['confidence']:.2%})")

Tree of Thoughts (ToT)

ToT 将推理过程组织成树形结构，探索多条推理路径。

# tree_of_thoughts.py
from typing import List, Dict, Optional
import random

class ThoughtNode:
    """思维树节点"""
    
    def __init__(self, thought: str, parent: Optional['ThoughtNode'] = None):
        self.thought = thought
        self.parent = parent
        self.children: List['ThoughtNode'] = []
        self.value: Optional[float] = None
        self.visits: int = 0
    
    def add_child(self, thought: str) -> 'ThoughtNode':
        """添加子节点"""
        child = ThoughtNode(thought, parent=self)
        self.children.append(child)
        return child
    
    def get_path(self) -> List[str]:
        """获取从根到当前节点的路径"""
        path = []
        node = self
        while node:
            path.append(node.thought)
            node = node.parent
        return list(reversed(path))

class TreeOfThoughts:
    """Tree of Thoughts"""
    
    def __init__(
        self,
        llm,
        num_branches: int = 3,
        max_depth: int = 5,
        exploration_constant: float = 1.414
    ):
        self.llm = llm
        self.num_branches = num_branches
        self.max_depth = max_depth
        self.exploration_constant = exploration_constant
    
    def solve(self, problem: str) -> Dict:
        """使用 ToT 解决问题"""
        
        # 创建根节点
        root = ThoughtNode(f"问题：{problem}")
        
        # 迭代搜索
        for iteration in range(self.max_depth * self.num_branches):
            # 选择
            node = self._select(root)
            
            # 扩展
            if not node.children and self._should_expand(node):
                self._expand(node, problem)
            
            # 模拟和回溯
            if node.children:
                value = self._simulate(random.choice(node.children), problem)
            else:
                value = self._evaluate(node, problem)
            
            self._backpropagate(node, value)
        
        # 返回最佳路径
        best_path = self._get_best_path(root)
        return {
            "path": best_path,
            "answer": best_path[-1] if best_path else None,
            "tree": root
        }
    
    def _select(self, node: ThoughtNode) -> ThoughtNode:
        """使用 UCT 选择节点"""
        while node.children:
            # UCT 公式
            best_score = -float('inf')
            best_child = None
            
            for child in node.children:
                if child.visits == 0:
                    return child
                
                score = (child.value / child.visits +
                         self.exploration_constant *
                         (2 * node.visits / child.visits) ** 0.5)
                
                if score > best_score:
                    best_score = score
                    best_child = child
            
            node = best_child
        
        return node
    
    def _expand(self, node: ThoughtNode, problem: str):
        """扩展节点"""
        # 生成多个可能的下一步思考
        current_path = node.get_path()
        
        prompt = f"""问题：{problem}

当前思考路径：
{' -> '.join(current_path)}

请生成{self.num_branches}个不同的下一步思考，每个思考用数字编号：
1.
2.
3."""
        
        response = self.llm.generate(prompt)
        
        # 解析生成的思考
        thoughts = self._parse_thoughts(response)
        
        for thought in thoughts[:self.num_branches]:
            node.add_child(thought)
    
    def _simulate(self, node: ThoughtNode, problem: str) -> float:
        """模拟评估"""
        path = node.get_path()
        
        prompt = f"""问题：{problem}

思考路径：
{' -> '.join(path)}

请评估这个思考路径的质量（0-10分），并给出理由："""
        
        response = self.llm.generate(prompt)
        
        # 提取分数
        try:
            score = float(re.search(r'(\d+(?:\.\d+)?)', response).group(1))
            return min(max(score, 0), 10) / 10  # 归一化到 0-1
        except:
            return 0.5
    
    def _evaluate(self, node: ThoughtNode, problem: str) -> float:
        """评估叶子节点"""
        return self._simulate(node, problem)
    
    def _backpropagate(self, node: ThoughtNode, value: float):
        """回溯更新"""
        while node:
            node.visits += 1
            if node.value is None:
                node.value = value
            else:
                node.value += value
            node = node.parent
    
    def _get_best_path(self, root: ThoughtNode) -> List[str]:
        """获取最佳路径"""
        path = []
        node = root
        
        while node:
            path.append(node.thought)
            
            if not node.children:
                break
            
            # 选择访问次数最多的子节点
            node = max(node.children, key=lambda c: c.visits)
        
        return path
    
    def _should_expand(self, node: ThoughtNode) -> bool:
        """判断是否应该扩展"""
        return len(node.get_path()) < self.max_depth
    
    def _parse_thoughts(self, response: str) -> List[str]:
        """解析思考列表"""
        lines = response.strip().split('\n')
        thoughts = []
        
        for line in lines:
            line = line.strip()
            if line and (line[0].isdigit() or line.startswith('-')):
                thought = line.lstrip('0123456789.- ').strip()
                if thought:
                    thoughts.append(thought)
        
        return thoughts

CoT 最佳实践

1. 示例选择原则

# cot_best_practices.py
class CoTExampleSelector:
    """CoT 示例选择器"""
    
    SELECTION_STRATEGIES = {
        "diversity": "选择不同类型的示例",
        "similarity": "选择与问题相似的示例",
        "complexity": "从简单到复杂排列",
        "coverage": "覆盖所有可能的推理模式"
    }
    
    @staticmethod
    def select_examples(
        all_examples: list,
        test_question: str,
        strategy: str = "diversity",
        k: int = 3
    ) -> list:
        """选择最佳示例"""
        
        if strategy == "diversity":
            # 按类别分组，从每组选一个
            from collections import defaultdict
            by_category = defaultdict(list)
            for ex in all_examples:
                cat = ex.get("category", "default")
                by_category[cat].append(ex)
            
            selected = []
            for category in by_category:
                if len(selected) < k:
                    selected.append(by_category[category][0])
            
            return selected
        
        elif strategy == "similarity":
            # 使用相似度选择
            # 这里需要实现相似度计算
            pass
        
        elif strategy == "complexity":
            # 按复杂度排序
            return sorted(
                all_examples,
                key=lambda x: x.get("complexity", 0)
            )[:k]
        
        return all_examples[:k]

2. 提示词模板

# cot_prompt_templates.py
COT_TEMPLATES = {
    "step_by_step": {
        "prefix": "让我们逐步思考：",
        "connector": "\n",
        "suffix": "因此，答案是："
    },
    "structured": {
        "prefix": "我将按以下步骤解决：",
        "format": "步骤{step}: {reasoning}",
        "suffix": "最终答案："
    },
    "explanatory": {
        "prefix": "让我解释一下我的推理过程：",
        "style": "首先...然后...接着...最后...",
        "suffix": "所以答案是："
    }
}

def create_structured_cot_prompt(
    question: str,
    template: str = "step_by_step"
) -> str:
    """创建结构化 CoT Prompt"""
    
    tmpl = COT_TEMPLATES[template]
    
    if template == "step_by_step":
        return f"""问题：{question}

{tmpl['prefix']}"""
    
    elif template == "structured":
        return f"""问题：{question}

{tmpl['prefix']}
1. 
2. 
3. 

{tmpl['suffix']}"""
    
    return f"""问题：{question}

{tmpl['prefix']}"""

应用场景

场景	CoT 类型	效果提升	示例
数学问题	Few-shot CoT	40-70%	应用题、方程求解
逻辑推理	Zero-shot CoT	30-50%	逻辑谜题、因果分析
代码生成	Few-shot CoT	25-40%	算法实现、调试
文本分析	Self-Consistency	20-35%	情感分析、主题分类
多步决策	Tree of Thoughts	35-60%	规划问题、游戏策略

总结

CoT 的核心要点：

显式推理：让模型展示思考过程，而非直接给答案
示例引导：Few-shot 比 Zero-shot 更稳定
多样采样：Self-Consistency 提升可靠性
树形探索：复杂问题用 ToT 多路径搜索

选择指南：

简单任务：Zero-shot CoT
标准任务：Few-shot CoT（3-5个示例）
高可靠性：Self-Consistency
复杂决策：Tree of Thoughts

相关资源：