[ { "title": "Fable 的判断力", "source": "Simon Willison", "url": "https://simonwillison.net/2026/Jul/3/judgement/", "date": "2026-07-03", "type": "agent-workflow", "tags": [ "Claude Code", "agent 判断", "工作流" ], "why_it_matters": "Agent 省钱不只靠更便宜的 token，也靠减少错误循环。", "hook_angle": "Agent 省钱不只是少 token，而是少干预、少返工、少错误 loop。", "intro": "Claude Code 团队分享的一个重点是：真正的节省来自更好的判断、更少返工和更少人工干预。", "locale": "zh" }, { "title": "发布：llm-coding-agent 0.1a0", "source": "Simon Willison", "url": "https://simonwillison.net/2026/Jul/2/llm-coding-agent/", "date": "2026-07-02", "type": "coding-agent", "tags": [ "coding agent", "LLM", "Python" ], "why_it_matters": "一个最小 coding agent 可以拆出 token 花在哪里。", "hook_angle": "一个 coding agent 到底由哪些 token-consuming steps 组成？", "intro": "这个 LLM 驱动的 coding agent 展示了任务规划、文件读取、代码修改和验证各自消耗 token 的位置。", "locale": "zh" }, { "title": "用 DSPy 评估并改进 Datasette Agent 的 SQL 系统提示词", "source": "Simon Willison", "url": "https://simonwillison.net/2026/Jul/2/dspy-datasette-agent-prompts/", "date": "2026-07-02", "type": "prompt-evaluation", "tags": [ "DSPy", "系统提示词", "评测" ], "why_it_matters": "提示词优化应该用评测框架验证，而不是凭感觉。", "hook_angle": "少花 token 的第一步：知道你的 system prompt 是否真的有效。", "intro": "项目用 DSPy 评估和改进 Datasette Agent 的只读 SQL 问答系统提示词，把 prompt 质量放进可测试流程。", "locale": "zh" }, { "title": "Vercel 的 Andrew Qu：为什么 agents 是一种新软件", "source": "Latent Space", "url": "https://www.latent.space/p/vercel-agents-new-software", "date": "2026-07-03", "type": "agent-infrastructure", "tags": [ "agents", "Vercel", "agent-readable 网站" ], "why_it_matters": "面向 agent 可读的网站正在成为产品表面的一部分。", "hook_angle": "未来网站不只是给人看，也要给 agent 读。", "intro": "Vercel 解释其 agent 框架 eve，以及 skills、沙箱和 agent-readable websites 为什么重要。", "locale": "zh" }, { "title": "Cursor 如何在企业内部部署 AI", "source": "Latent Space", "url": "https://www.latent.space/p/cursor-forward-deployed-engineers", "date": "2026-07-01", "type": "enterprise-agent-workflow", "tags": [ "Cursor", "软件工厂", "企业 AI" ], "why_it_matters": "当工作流规模化，vibe coding 会变成团队预算问题。", "hook_angle": "Cursor 的真正成本不是订阅费，而是整个软件工厂的 token burn。", "intro": "Cursor 的 Forward Deployed Engineers 帮企业搭建 agent 工作流，本质上是在搭建新的软件工厂。", "locale": "zh" }, { "title": "Claude Sonnet 5 有哪些新变化", "source": "Simon Willison", "url": "https://simonwillison.net/2026/Jun/30/claude-sonnet-5/", "date": "2026-06-30", "type": "model-update", "tags": [ "Claude", "Sonnet", "模型更新" ], "why_it_matters": "新模型发布会影响默认选择、agent 成本和失败率。", "hook_angle": "模型升级后，是否应该切默认模型？看 docs，不看营销。", "intro": "模型升级后应该先看开发者文档，而不是营销稿，判断是否值得切换默认模型。", "locale": "zh" }, { "title": "ScarfBench：企业 Java 迁移的 AI Agent 基准", "source": "Hugging Face / IBM Research", "url": "https://huggingface.co/blog/ibm-research/scarfbench", "date": "2026-06-30", "type": "benchmark", "tags": [ "基准", "AI agents", "Java 迁移" ], "why_it_matters": "Agent benchmark 正在走向真实企业迁移任务。", "hook_angle": "真正有用的 agent benchmark 应该衡量任务完成成本。", "intro": "IBM Research 的 ScarfBench 把 agent 放到企业 Java 框架迁移场景里评测。", "locale": "zh" }, { "title": "Ornith-1.0：用于 Agentic Coding 的自搭脚手架 LLM", "source": "Simon Willison", "url": "https://simonwillison.net/2026/Jun/29/ornith/", "date": "2026-06-29", "type": "open-model", "tags": [ "开放权重", "agentic coding", "coding 模型" ], "why_it_matters": "开放权重 coding 模型可能改变 API 成本结构。", "hook_angle": "如果 open models 足够会写代码，个人 agent 成本结构会变。", "intro": "DeepReinforce 发布的 MIT 许可开放权重模型，提供多种 dense 和 MoE 规模，用于 agentic coding。", "locale": "zh" }, { "title": "OpenAI 内部 Codex 输出 token 中位数大幅增长", "source": "Latent Space / AINews", "url": "https://www.latent.space/p/ainews-openai-reports-median-internal", "date": "2026-06-26", "type": "token-usage-signal", "tags": [ "Codex", "输出 token", "agent 使用" ], "why_it_matters": "输出 token 增长是 agent 工作流里的隐性大成本。", "hook_angle": "Agent 时代，贵的不一定是 input，可能是 output 和 loop。", "intro": "Agent 时代，真正昂贵的可能不是输入，而是输出、重试和循环。", "locale": "zh" }, { "title": "GLM-5.2 是开放 agent 的跃迁点", "source": "Interconnects", "url": "https://www.interconnects.ai/p/glm-52-is-the-step-change-for-open", "date": "2026-06-22", "type": "open-model-analysis", "tags": [ "GLM", "开放 agents", "中国模型" ], "why_it_matters": "中国和开放模型是全球 agent 成本/性能比较的一部分。", "hook_angle": "中国模型不是边缘信息，而是全球 agent cost/performance 版图的一部分。", "intro": "开放模型能力阈值正在变化，GLM-5.2 是值得纳入 agent 成本地图的信号。", "locale": "zh" }, { "title": "够不够 agentic？在自己的工具链上评测开放模型", "source": "Hugging Face", "url": "https://huggingface.co/blog/is-it-agentic-enough", "date": "2026-06-18", "type": "benchmark", "tags": [ "agent 基准", "开放模型", "工具链" ], "why_it_matters": "你的工具链可能比公开排行榜排名更重要。", "hook_angle": "别问哪个模型最好，问哪个模型在你的 agent stack 上最便宜地成功。", "intro": "不要只问哪个模型最好，要问哪个模型能在你的 agent stack 上以最低成本成功。", "locale": "zh" }, { "title": "Qwen3 基准结果", "source": "Aider", "url": "https://aider.chat/2025/05/08/qwen3.html", "date": "2025-05-08", "type": "coding-benchmark", "tags": [ "Qwen", "Aider", "coding 基准" ], "why_it_matters": "这是连接中国模型和 coding-agent 评测的耐久桥梁。", "hook_angle": "中文/中国模型在 coding agent 里的位置，应该用实际 coding benchmark 讨论。", "intro": "Aider polyglot coding benchmark 给 Qwen3 模型提供了可讨论的 coding-agent 证据。", "locale": "zh" }, { "title": "Claude Code 如何使用 prompt caching", "source": "Claude Code Docs", "url": "https://code.claude.com/docs/en/prompt-caching", "date": "2026-07-04", "type": "official-docs", "tags": [ "Claude Code", "prompt caching", "token 节省" ], "why_it_matters": "Prompt caching 直接影响速度和 token 成本。", "hook_angle": "你以为 Claude Code 慢/贵，其实可能是 cache miss。", "intro": "Claude Code 自动管理提示缓存；模型切换、/compact、CLAUDE.md 修改和 cache hit rate 都会影响体验。", "locale": "zh" }, { "title": "从 Claude 转向 OpenCode 和 OpenRouter", "source": "Ian Wootten", "url": "https://www.ianwootten.co.uk/2026/07/01/ditching-claude-for-opencode-and-openrouter/", "date": "2026-07-01", "type": "field-report", "tags": [ "OpenCode", "OpenRouter", "成本" ], "why_it_matters": "这是从默认工具切到开放 router/模型工作流的真实案例。", "hook_angle": "什么时候值得离开默认工具，改用开放 router 和开源模型？", "intro": "一个用户在六月放弃 Claude Code，改用 OpenCode、OpenRouter 和开放权重模型，提供了实际迁移线索。", "locale": "zh" }, { "title": "Contextify：Claude Code 和 Codex 的可搜索历史", "source": "Contextify", "url": "https://contextify.sh/", "date": "2026-07-04", "type": "tool", "tags": [ "Claude Code", "Codex", "agent memory" ], "why_it_matters": "Agent 历史和可复用上下文能减少重复 token 消耗。", "hook_angle": "如果每次 agent 都忘记上下文，你就在重复烧 token。", "intro": "如果 agent 每次都忘记上下文，你就在重复烧 token；Contextify 把历史保存为可搜索数据库。", "locale": "zh" } ]