[
  {
    "title": "Fable 的判断力",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jul/3/judgement/",
    "date": "2026-07-03",
    "type": "agent-workflow",
    "tags": [
      "Claude Code",
      "agent 判断",
      "工作流"
    ],
    "why_it_matters": "Agent 省钱不只靠更便宜的 token，也靠减少错误循环。",
    "hook_angle": "Agent 省钱不只是少 token，而是少干预、少返工、少错误 loop。",
    "intro": "Claude Code 团队分享的一个重点是：真正的节省来自更好的判断、更少返工和更少人工干预。",
    "locale": "zh"
  },
  {
    "title": "发布：llm-coding-agent 0.1a0",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jul/2/llm-coding-agent/",
    "date": "2026-07-02",
    "type": "coding-agent",
    "tags": [
      "coding agent",
      "LLM",
      "Python"
    ],
    "why_it_matters": "一个最小 coding agent 可以拆出 token 花在哪里。",
    "hook_angle": "一个 coding agent 到底由哪些 token-consuming steps 组成？",
    "intro": "这个 LLM 驱动的 coding agent 展示了任务规划、文件读取、代码修改和验证各自消耗 token 的位置。",
    "locale": "zh"
  },
  {
    "title": "用 DSPy 评估并改进 Datasette Agent 的 SQL 系统提示词",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jul/2/dspy-datasette-agent-prompts/",
    "date": "2026-07-02",
    "type": "prompt-evaluation",
    "tags": [
      "DSPy",
      "系统提示词",
      "评测"
    ],
    "why_it_matters": "提示词优化应该用评测框架验证，而不是凭感觉。",
    "hook_angle": "少花 token 的第一步：知道你的 system prompt 是否真的有效。",
    "intro": "项目用 DSPy 评估和改进 Datasette Agent 的只读 SQL 问答系统提示词，把 prompt 质量放进可测试流程。",
    "locale": "zh"
  },
  {
    "title": "Vercel 的 Andrew Qu：为什么 agents 是一种新软件",
    "source": "Latent Space",
    "url": "https://www.latent.space/p/vercel-agents-new-software",
    "date": "2026-07-03",
    "type": "agent-infrastructure",
    "tags": [
      "agents",
      "Vercel",
      "agent-readable 网站"
    ],
    "why_it_matters": "面向 agent 可读的网站正在成为产品表面的一部分。",
    "hook_angle": "未来网站不只是给人看，也要给 agent 读。",
    "intro": "Vercel 解释其 agent 框架 eve，以及 skills、沙箱和 agent-readable websites 为什么重要。",
    "locale": "zh"
  },
  {
    "title": "Cursor 如何在企业内部部署 AI",
    "source": "Latent Space",
    "url": "https://www.latent.space/p/cursor-forward-deployed-engineers",
    "date": "2026-07-01",
    "type": "enterprise-agent-workflow",
    "tags": [
      "Cursor",
      "软件工厂",
      "企业 AI"
    ],
    "why_it_matters": "当工作流规模化，vibe coding 会变成团队预算问题。",
    "hook_angle": "Cursor 的真正成本不是订阅费，而是整个软件工厂的 token burn。",
    "intro": "Cursor 的 Forward Deployed Engineers 帮企业搭建 agent 工作流，本质上是在搭建新的软件工厂。",
    "locale": "zh"
  },
  {
    "title": "Claude Sonnet 5 有哪些新变化",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jun/30/claude-sonnet-5/",
    "date": "2026-06-30",
    "type": "model-update",
    "tags": [
      "Claude",
      "Sonnet",
      "模型更新"
    ],
    "why_it_matters": "新模型发布会影响默认选择、agent 成本和失败率。",
    "hook_angle": "模型升级后，是否应该切默认模型？看 docs，不看营销。",
    "intro": "模型升级后应该先看开发者文档，而不是营销稿，判断是否值得切换默认模型。",
    "locale": "zh"
  },
  {
    "title": "ScarfBench：企业 Java 迁移的 AI Agent 基准",
    "source": "Hugging Face / IBM Research",
    "url": "https://huggingface.co/blog/ibm-research/scarfbench",
    "date": "2026-06-30",
    "type": "benchmark",
    "tags": [
      "基准",
      "AI agents",
      "Java 迁移"
    ],
    "why_it_matters": "Agent benchmark 正在走向真实企业迁移任务。",
    "hook_angle": "真正有用的 agent benchmark 应该衡量任务完成成本。",
    "intro": "IBM Research 的 ScarfBench 把 agent 放到企业 Java 框架迁移场景里评测。",
    "locale": "zh"
  },
  {
    "title": "Ornith-1.0：用于 Agentic Coding 的自搭脚手架 LLM",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jun/29/ornith/",
    "date": "2026-06-29",
    "type": "open-model",
    "tags": [
      "开放权重",
      "agentic coding",
      "coding 模型"
    ],
    "why_it_matters": "开放权重 coding 模型可能改变 API 成本结构。",
    "hook_angle": "如果 open models 足够会写代码，个人 agent 成本结构会变。",
    "intro": "DeepReinforce 发布的 MIT 许可开放权重模型，提供多种 dense 和 MoE 规模，用于 agentic coding。",
    "locale": "zh"
  },
  {
    "title": "OpenAI 内部 Codex 输出 token 中位数大幅增长",
    "source": "Latent Space / AINews",
    "url": "https://www.latent.space/p/ainews-openai-reports-median-internal",
    "date": "2026-06-26",
    "type": "token-usage-signal",
    "tags": [
      "Codex",
      "输出 token",
      "agent 使用"
    ],
    "why_it_matters": "输出 token 增长是 agent 工作流里的隐性大成本。",
    "hook_angle": "Agent 时代，贵的不一定是 input，可能是 output 和 loop。",
    "intro": "Agent 时代，真正昂贵的可能不是输入，而是输出、重试和循环。",
    "locale": "zh"
  },
  {
    "title": "GLM-5.2 是开放 agent 的跃迁点",
    "source": "Interconnects",
    "url": "https://www.interconnects.ai/p/glm-52-is-the-step-change-for-open",
    "date": "2026-06-22",
    "type": "open-model-analysis",
    "tags": [
      "GLM",
      "开放 agents",
      "中国模型"
    ],
    "why_it_matters": "中国和开放模型是全球 agent 成本/性能比较的一部分。",
    "hook_angle": "中国模型不是边缘信息，而是全球 agent cost/performance 版图的一部分。",
    "intro": "开放模型能力阈值正在变化，GLM-5.2 是值得纳入 agent 成本地图的信号。",
    "locale": "zh"
  },
  {
    "title": "够不够 agentic？在自己的工具链上评测开放模型",
    "source": "Hugging Face",
    "url": "https://huggingface.co/blog/is-it-agentic-enough",
    "date": "2026-06-18",
    "type": "benchmark",
    "tags": [
      "agent 基准",
      "开放模型",
      "工具链"
    ],
    "why_it_matters": "你的工具链可能比公开排行榜排名更重要。",
    "hook_angle": "别问哪个模型最好，问哪个模型在你的 agent stack 上最便宜地成功。",
    "intro": "不要只问哪个模型最好，要问哪个模型能在你的 agent stack 上以最低成本成功。",
    "locale": "zh"
  },
  {
    "title": "Qwen3 基准结果",
    "source": "Aider",
    "url": "https://aider.chat/2025/05/08/qwen3.html",
    "date": "2025-05-08",
    "type": "coding-benchmark",
    "tags": [
      "Qwen",
      "Aider",
      "coding 基准"
    ],
    "why_it_matters": "这是连接中国模型和 coding-agent 评测的耐久桥梁。",
    "hook_angle": "中文/中国模型在 coding agent 里的位置，应该用实际 coding benchmark 讨论。",
    "intro": "Aider polyglot coding benchmark 给 Qwen3 模型提供了可讨论的 coding-agent 证据。",
    "locale": "zh"
  },
  {
    "title": "Claude Code 如何使用 prompt caching",
    "source": "Claude Code Docs",
    "url": "https://code.claude.com/docs/en/prompt-caching",
    "date": "2026-07-04",
    "type": "official-docs",
    "tags": [
      "Claude Code",
      "prompt caching",
      "token 节省"
    ],
    "why_it_matters": "Prompt caching 直接影响速度和 token 成本。",
    "hook_angle": "你以为 Claude Code 慢/贵，其实可能是 cache miss。",
    "intro": "Claude Code 自动管理提示缓存；模型切换、/compact、CLAUDE.md 修改和 cache hit rate 都会影响体验。",
    "locale": "zh"
  },
  {
    "title": "从 Claude 转向 OpenCode 和 OpenRouter",
    "source": "Ian Wootten",
    "url": "https://www.ianwootten.co.uk/2026/07/01/ditching-claude-for-opencode-and-openrouter/",
    "date": "2026-07-01",
    "type": "field-report",
    "tags": [
      "OpenCode",
      "OpenRouter",
      "成本"
    ],
    "why_it_matters": "这是从默认工具切到开放 router/模型工作流的真实案例。",
    "hook_angle": "什么时候值得离开默认工具，改用开放 router 和开源模型？",
    "intro": "一个用户在六月放弃 Claude Code，改用 OpenCode、OpenRouter 和开放权重模型，提供了实际迁移线索。",
    "locale": "zh"
  },
  {
    "title": "Contextify：Claude Code 和 Codex 的可搜索历史",
    "source": "Contextify",
    "url": "https://contextify.sh/",
    "date": "2026-07-04",
    "type": "tool",
    "tags": [
      "Claude Code",
      "Codex",
      "agent memory"
    ],
    "why_it_matters": "Agent 历史和可复用上下文能减少重复 token 消耗。",
    "hook_angle": "如果每次 agent 都忘记上下文，你就在重复烧 token。",
    "intro": "如果 agent 每次都忘记上下文，你就在重复烧 token；Contextify 把历史保存为可搜索数据库。",
    "locale": "zh"
  }
]
