DSPy: 宣言的言語モデルプログラミング

このスキルを使用する場合

DSPyを使用する場合は以下の条件に当てはまります:

複数のコンポーネントとワークフローを持つ複雑なAIシステムを構築する
手動プロンプトエンジニアリングの代わりに、言語モデルを宣言的にプログラミングする
データ駆動型の方法を使用してプロンプトを自動的に最適化する
保守性と移植性の高いモジュール化されたAIパイプラインを作成する
オプティマイザを使用してモデル出力を体系的に改善する
信頼性の高いRAGシステム、エージェント、分類器を構築する

GitHubスター: 22,000以上 | 作成者: Stanford NLP

インストール

# 安定版リリース
pip install dspy

# 最新の開発版
pip install git+https://github.com/stanfordnlp/dspy.git

# 特定のLMプロバイダ付き
pip install dspy[openai]        # OpenAI
pip install dspy[anthropic]     # Anthropic Claude
pip install dspy[all]           # すべてのプロバイダ

クイックスタート

基本的な例: 質問応答

import dspy

# 言語モデルを設定
lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
dspy.settings.configure(lm=lm)

# シグネチャを定義 (入力 → 出力)
class QA(dspy.Signature):
    """簡潔な事実的回答で質問に答える。"""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="通常1〜5語")

# モジュールを作成
qa = dspy.Predict(QA)

# 使用
response = qa(question="フランスの首都は?")
print(response.answer)  # "Paris"

思考の連鎖推論

import dspy

lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
dspy.settings.configure(lm=lm)

# より良い推論のためにChainOfThoughtを使用
class MathProblem(dspy.Signature):
    """数学の文章問題を解く。"""
    problem = dspy.InputField()
    answer = dspy.OutputField(desc="数値回答")

# ChainOfThoughtは推論ステップを自動生成
cot = dspy.ChainOfThought(MathProblem)

response = cot(problem="ジョンが5個のりんごを持っていて、メアリーに2個あげた場合、彼はいくつ持っていますか?")
print(response.rationale)  # 推論ステップを表示
print(response.answer)     # "3"

コア概念

1. シグネチャ

シグネチャはAIタスクの構造を定義します (入力 → 出力):

# インラインシグネチャ (シンプル)
qa = dspy.Predict("question -> answer")

# クラスシグネチャ (詳細)
class Summarize(dspy.Signature):
    """テキストを要点に要約する。"""
    text = dspy.InputField()
    summary = dspy.OutputField(desc="箇条書き、3～5項目")

summarizer = dspy.ChainOfThought(Summarize)

それぞれを使用する場合:

インライン: クイックプロトタイピング、シンプルなタスク
クラス: 複雑なタスク、型ヒント、より良いドキュメント

2. モジュール

モジュールは入力を出力に変換する再利用可能なコンポーネントです:

dspy.Predict

基本的な予測モジュール:

predictor = dspy.Predict("context, question -> answer")
result = predictor(context="パリはフランスの首都です",
                   question="首都は?")

dspy.ChainOfThought

回答の前に推論ステップを生成:

cot = dspy.ChainOfThought("question -> answer")
result = cot(question="なぜ空は青いのですか?")
print(result.rationale)  # 推論ステップ
print(result.answer)     # 最終回答

dspy.ReAct

ツールを使用したエージェント的な推論:

from dspy.predict import ReAct

class SearchQA(dspy.Signature):
    """検索を使用して質問に答える。"""
    question = dspy.InputField()
    answer = dspy.OutputField()

def search_tool(query: str) -> str:
    """Wikipediaを検索。"""
    # 検索実装
    return results

react = ReAct(SearchQA, tools=[search_tool])
result = react(question="Pythonはいつ作成されましたか?")

dspy.ProgramOfThought

推論のためにコードを生成して実行:

pot = dspy.ProgramOfThought("question -> answer")
result = pot(question="240の15%は?")
# 生成: answer = 240 * 0.15

3. オプティマイザ

オプティマイザはトレーニングデータを使用してモジュールを自動的に改善します:

BootstrapFewShot

例から学習:

from dspy.teleprompt import BootstrapFewShot

# トレーニングデータ
trainset = [
    dspy.Example(question="2+2は?", answer="4").with_inputs("question"),
    dspy.Example(question="3+5は?", answer="8").with_inputs("question"),
]

# メトリクスを定義
def validate_answer(example, pred, trace=None):
    return example.answer == pred.answer

# 最適化
optimizer = BootstrapFewShot(metric=validate_answer, max_bootstrapped_demos=3)
optimized_qa = optimizer.compile(qa, trainset=trainset)

# これでoptimized_qaのパフォーマンスが向上!

MIPRO (最も重要なプロンプト最適化)

プロンプトを反復的に改善:

from dspy.teleprompt import MIPRO

optimizer = MIPRO(
    metric=validate_answer,
    num_candidates=10,
    init_temperature=1.0
)

optimized_cot = optimizer.compile(
    cot,
    trainset=trainset,
    num_trials=100
)

BootstrapFinetune

モデル微調整用のデータセットを作成:

from dspy.teleprompt import BootstrapFinetune

optimizer = BootstrapFinetune(metric=validate_answer)
optimized_module = optimizer.compile(qa, trainset=trainset)

# 微調整用のトレーニングデータをエクスポート

4. 複雑なシステムの構築

マルチステージパイプライン

import dspy

class MultiHopQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=3)
        self.generate_query = dspy.ChainOfThought("question -> search_query")
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        # ステージ1: 検索クエリを生成
        search_query = self.generate_query(question=question).search_query

        # ステージ2: コンテキストを取得
        passages = self.retrieve(search_query).passages
        context = "\n".join(passages)

        # ステージ3: 回答を生成
        answer = self.generate_answer(context=context, question=question).answer
        return dspy.Prediction(answer=answer, context=context)

# パイプラインを使用
qa_system = MultiHopQA()
result = qa_system(question="映画ブレードランナーの着想となった本を書いたのは誰ですか?")

最適化されたRAGシステム

import dspy
from dspy.retrieve.chromadb_rm import ChromadbRM

# レトリーバを設定
retriever = ChromadbRM(
    collection_name="documents",
    persist_directory="./chroma_db"
)

class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        context = self.retrieve(question).passages
        return self.generate(context=context, question=question)

# 作成と最適化
rag = RAG()

# トレーニングデータで最適化
from dspy.teleprompt import BootstrapFewShot

optimizer = BootstrapFewShot(metric=validate_answer)
optimized_rag = optimizer.compile(rag, trainset=trainset)

LMプロバイダの設定

Anthropic Claude

import dspy

lm = dspy.Claude(
    model="claude-sonnet-4-5-20250929",
    api_key="your-api-key",  # または ANTHROPIC_API_KEY 環境変数を設定
    max_tokens=1000,
    temperature=0.7
)
dspy.settings.configure(lm=lm)

OpenAI

lm = dspy.OpenAI(
    model="gpt-4",
    api_key="your-api-key",
    max_tokens=1000
)
dspy.settings.configure(lm=lm)

ローカルモデル (Ollama)

lm = dspy.OllamaLocal(
    model="llama3.1",
    base_url="http://localhost:11434"
)
dspy.settings.configure(lm=lm)

複数モデル

# タスクごとに異なるモデルを使用
cheap_lm = dspy.OpenAI(model="gpt-3.5-turbo")
strong_lm = dspy.Claude(model="claude-sonnet-4-5-20250929")

# 取得には安いモデル、推論には強力なモデルを使用
with dspy.settings.context(lm=cheap_lm):
    context = retriever(question)

with dspy.settings.context(lm=strong_lm):
    answer = generator(context=context, question=question)

一般的なパターン

パターン1: 構造化出力

from pydantic import BaseModel, Field

class PersonInfo(BaseModel):
    name: str = Field(description="フルネーム")
    age: int = Field(description="年齢")
    occupation: str = Field(description="現在の職業")

class ExtractPerson(dspy.Signature):
    """テキストから人物情報を抽出。"""
    text = dspy.InputField()
    person: PersonInfo = dspy.OutputField()

extractor = dspy.TypedPredictor(ExtractPerson)
result = extractor(text="ジョン・ドゥは35歳のソフトウェアエンジニアです。")
print(result.person.name)  # "ジョン・ドゥ"
print(result.person.age)   # 35

パターン2: アサーション駆動の最適化

import dspy
from dspy.primitives.assertions import assert_transform_module, backtrack_handler

class MathQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.solve = dspy.ChainOfThought("problem -> solution: float")

    def forward(self, problem):
        solution = self.solve(problem=problem).solution

        # ソリューションが数値であることをアサート
        dspy.Assert(
            isinstance(float(solution), float),
            "ソリューションは数値である必要があります",
            backtrack=backtrack_handler
        )

        return dspy.Prediction(solution=solution)

パターン3: 自己一貫性

import dspy
from collections import Counter

class ConsistentQA(dspy.Module):
    def __init__(self, num_samples=5):
        super().__init__()
        self.qa = dspy.ChainOfThought("question -> answer")
        self.num_samples = num_samples

    def forward(self, question):
        # 複数の回答を生成
        answers = []
        for _ in range(self.num_samples):
            result = self.qa(question=question)
            answers.append(result.answer)

        # 最も一般的な回答を返す
        most_common = Counter(answers).most_common(1)[0][0]
        return dspy.Prediction(answer=most_common)

パターン4: リランキング付きレトリーバル

class RerankedRAG(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=10)
        self.rerank = dspy.Predict("question, passage -> relevance_score: float")
        self.answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        # 候補を取得
        passages = self.retrieve(question).passages

        # パッセージをリランク
        scored = []
        for passage in passages:
            score = float(self.rerank(question=question, passage=passage).relevance_score)
            scored.append((score, passage))

        # トップ3を取得
        top_passages = [p for _, p in sorted(scored, reverse=True)[:3]]
        context = "\n\n".join(top_passages)

        # 回答を生成
        return self.answer(context=context, question=question)

評価とメトリクス

カスタムメトリクス

def exact_match(example, pred, trace=None):
    """完全一致メトリクス。"""
    return example.answer.lower() == pred.answer.lower()

def f1_score(example, pred, trace=None):
    """テキスト重複のF1スコア。"""
    pred_tokens = set(pred.answer.lower().split())
    gold_tokens = set(example.answer.lower().split())

    if not pred_tokens:
        return 0.0

    precision = len(pred_tokens & gold_tokens) / len(pred_tokens)
    recall = len(pred_tokens & gold_tokens) / len(gold_tokens)

    if precision + recall == 0:
        return 0.0

    return 2 * (precision * recall) / (precision + recall)

評価

from dspy.evaluate import Evaluate

# 評価器を作成
evaluator = Evaluate(
    devset=testset,
    metric=exact_match,
    num_threads=4,
    display_progress=True
)

# モデルを評価
score = evaluator(qa_system)
print(f"正確性: {score}")

# 最適化前後を比較
score_before = evaluator(qa)
score_after = evaluator(optimized_qa)
print(f"改善: {score_after - score_before:.2%}")

ベストプラクティス

1. シンプルに始めて、反復

# Predictから始める
qa = dspy.Predict("question -> answer")

# 必要に応じて推論を追加
qa = dspy.ChainOfThought("question -> answer")

# データを取得したら最適化
optimized_qa = optimizer.compile(qa, trainset=data)

2. 説明的なシグネチャを使用

# ❌ 不適切: 曖昧
class Task(dspy.Signature):
    input = dspy.InputField()
    output = dspy.OutputField()

# ✅ 良好: 説明的
class SummarizeArticle(dspy.Signature):
    """ニュース記事を3〜5つの重要ポイントに要約。"""
    article = dspy.InputField(desc="記事全文")
    summary = dspy.OutputField(desc="箇条書き、3～5項目")

3. 代表的なデータで最適化

# 多様なトレーニング例を作成
trainset = [
    dspy.Example(question="事実型", answer="...").with_inputs("question"),
    dspy.Example(question="推論型", answer="...").with_inputs("question"),
    dspy.Example(question="計算型", answer="...").with_inputs("question"),
]

# 検証セットをメトリクスに使用
def metric(example, pred, trace=None):
    return example.answer in pred.answer

4. 最適化されたモデルを保存・読み込み

# 保存
optimized_qa.save("models/qa_v1.json")

# 読み込み
loaded_qa = dspy.ChainOfThought("question -> answer")
loaded_qa.load("models/qa_v1.json")

5. 監視とデバッグ

# トレースを有効化
dspy.settings.configure(lm=lm, trace=[])

# 予測を実行
result = qa(question="...")

# トレースを検査
for call in dspy.settings.trace:
    print(f"プロンプト: {call['prompt']}")
    print(f"レスポンス: {call['response']}")

他のアプローチとの比較

機能	手動プロンプティング	LangChain	DSPy
プロンプトエンジニアリング	手動	手動	自動
最適化	試行錯誤	なし	データ駆動
モジュール性	低い	中程度	高い
型安全性	いいえ	限定的	はい (シグネチャ)
移植性	低い	中程度	高い
学習曲線	低い	中程度	中程度～高い

DSPyを選ぶ場合:

トレーニングデータがあるか、生成できる
体系的なプロンプト改善が必要
複雑なマルチステージシステムを構築している
異なるLM全体で最適化したい

他の選択肢を選ぶ場合:

クイックプロトタイプ (手動プロンプティング)
既存ツールを使用したシンプルなチェーン (LangChain)
カスタム最適化ロジックが必要

リソース

ドキュメント: https://dspy.ai
GitHub: https://github.com/stanfordnlp/dspy (22k以上のスター)
Discord: https://discord.gg/XCGy2WDCQB
Twitter: @DSPyOSS
論文: "DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines"

SKILL.md 本文