[
  {
    "title": "Fable's judgement",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jul/3/judgement/",
    "date": "2026-07-03",
    "type": "agent-workflow",
    "tags": [
      "Claude Code",
      "agent judgment",
      "workflow"
    ],
    "why_it_matters": "Agent savings can come from fewer bad loops, not only cheaper tokens.",
    "hook_angle": "Agent 省钱不只是少 token，而是少干预、少返工、少错误 loop。",
    "intro": "One of the most interesting tips I got from the Fireside Chat I hosted with Cat Wu and Thariq Shihipar from the Claude Code team at AIE on Wednesday was …",
    "locale": "en"
  },
  {
    "title": "Release: llm-coding-agent 0.1a0",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jul/2/llm-coding-agent/",
    "date": "2026-07-02",
    "type": "coding-agent",
    "tags": [
      "coding agent",
      "LLM",
      "Python"
    ],
    "why_it_matters": "A minimal coding agent maps where token spend happens.",
    "hook_angle": "一个 coding agent 到底由哪些 token-consuming steps 组成？",
    "intro": "A coding agent built on LLM",
    "locale": "en"
  },
  {
    "title": "Using DSPy to evaluate and improve Datasette Agent's SQL system prompts",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jul/2/dspy-datasette-agent-prompts/",
    "date": "2026-07-02",
    "type": "prompt-evaluation",
    "tags": [
      "DSPy",
      "system prompt",
      "evaluation",
      "SQL agent"
    ],
    "why_it_matters": "Prompt optimization can be evaluated with harnesses instead of vibes.",
    "hook_angle": "少花 token 的第一步：知道你的 system prompt 是否真的有效。",
    "intro": "Leveraging the DSPy framework, this project evaluates and refines the core production system prompts used by Datasette Agent’s read-only SQL question answerer. The methodology involves a harness where DSPy agents …",
    "locale": "en"
  },
  {
    "title": "Vercel's Andrew Qu on why agents are a new kind of software",
    "source": "Latent Space",
    "url": "https://www.latent.space/p/vercel-agents-new-software",
    "date": "2026-07-03",
    "type": "agent-infrastructure",
    "tags": [
      "agents",
      "Vercel",
      "sandboxes",
      "agent-readable websites"
    ],
    "why_it_matters": "Agent-readable websites are becoming part of the product surface.",
    "hook_angle": "未来网站不只是给人看，也要给 agent 读。",
    "intro": "The Vercel Chief of Software explains how its agent framework, eve, was created — and why skills, sandboxes and agent-readable websites now matter.",
    "locale": "en"
  },
  {
    "title": "How Cursor deploys AI inside the enterprise",
    "source": "Latent Space",
    "url": "https://www.latent.space/p/cursor-forward-deployed-engineers",
    "date": "2026-07-01",
    "type": "enterprise-agent-workflow",
    "tags": [
      "Cursor",
      "software factory",
      "enterprise AI",
      "agents"
    ],
    "why_it_matters": "Vibe coding becomes a team budget problem when workflows scale.",
    "hook_angle": "Cursor 的真正成本不是订阅费，而是整个软件工厂的 token burn。",
    "intro": "Cursor's Pauline Brunet explains how her team of Forward Deployed Engineers help organizations implement agents — essentially setting up software factories.",
    "locale": "en"
  },
  {
    "title": "What’s new in Claude Sonnet 5",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jun/30/claude-sonnet-5/",
    "date": "2026-06-30",
    "type": "model-update",
    "tags": [
      "Claude",
      "Sonnet",
      "model update",
      "developer docs"
    ],
    "why_it_matters": "New model releases affect defaults, agent costs, and failure rates.",
    "hook_angle": "模型升级后，是否应该切默认模型？看 docs，不看营销。",
    "intro": "Claude Sonnet 5 came out this morning. I always head straight for the \"what's new\" developer docs because they tend to have more actionable information than the official announcement post. …",
    "locale": "en"
  },
  {
    "title": "ScarfBench: Benchmarking AI Agents for Enterprise Java Framework Migration",
    "source": "Hugging Face / IBM Research",
    "url": "https://huggingface.co/blog/ibm-research/scarfbench",
    "date": "2026-06-30",
    "type": "benchmark",
    "tags": [
      "benchmark",
      "AI agents",
      "Java migration",
      "enterprise"
    ],
    "why_it_matters": "Agent benchmarks are moving toward real enterprise migration tasks.",
    "hook_angle": "真正有用的 agent benchmark 应该衡量任务完成成本。",
    "intro": "A Blog post by IBM Research on Hugging Face",
    "locale": "en"
  },
  {
    "title": "Ornith-1.0: Self-Scaffolding LLMs for Agentic Coding",
    "source": "Simon Willison",
    "url": "https://simonwillison.net/2026/Jun/29/ornith/",
    "date": "2026-06-29",
    "type": "open-model",
    "tags": [
      "open weights",
      "agentic coding",
      "coding model"
    ],
    "why_it_matters": "Open-weight coding models can change the API cost equation.",
    "hook_angle": "如果 open models 足够会写代码，个人 agent 成本结构会变。",
    "intro": "This is an interesting new open weights (MIT licensed) model, the first model release from DeepReinforce. [...] with variants including 9B Dense, 31B Dense, 35B MoE, and 397B MoE. Built …",
    "locale": "en"
  },
  {
    "title": "OpenAI reports median internal Codex output tokens grew dramatically",
    "source": "Latent Space / AINews",
    "url": "https://www.latent.space/p/ainews-openai-reports-median-internal",
    "date": "2026-06-26",
    "type": "token-usage-signal",
    "tags": [
      "Codex",
      "output tokens",
      "agent usage"
    ],
    "why_it_matters": "Output token growth is a major hidden cost in agent workflows.",
    "hook_angle": "Agent 时代，贵的不一定是 input，可能是 output 和 loop。",
    "intro": "It's happening.",
    "locale": "en"
  },
  {
    "title": "GLM-5.2 is the step change for open agents",
    "source": "Interconnects",
    "url": "https://www.interconnects.ai/p/glm-52-is-the-step-change-for-open",
    "date": "2026-06-22",
    "type": "open-model-analysis",
    "tags": [
      "GLM",
      "open agents",
      "China models",
      "open models"
    ],
    "why_it_matters": "China/open models are part of global agent cost/performance comparisons.",
    "hook_angle": "中国模型不是边缘信息，而是全球 agent cost/performance 版图的一部分。",
    "intro": "A capability threshold I've been carefully monitoring.",
    "locale": "en"
  },
  {
    "title": "Is it agentic enough? Benchmarking open models on your own tooling",
    "source": "Hugging Face",
    "url": "https://huggingface.co/blog/is-it-agentic-enough",
    "date": "2026-06-18",
    "type": "benchmark",
    "tags": [
      "agent benchmark",
      "open models",
      "tooling",
      "evaluation"
    ],
    "why_it_matters": "Your own tooling may matter more than public leaderboard rank.",
    "hook_angle": "别问哪个模型最好，问哪个模型在你的 agent stack 上最便宜地成功。",
    "intro": "We’re on a journey to advance and democratize artificial intelligence through open source and open science.",
    "locale": "en"
  },
  {
    "title": "Qwen3 benchmark results",
    "source": "Aider",
    "url": "https://aider.chat/2025/05/08/qwen3.html",
    "date": "2025-05-08",
    "type": "coding-benchmark",
    "tags": [
      "Qwen",
      "Aider",
      "coding benchmark",
      "China models"
    ],
    "why_it_matters": "A durable bridge between China models and coding-agent evaluation.",
    "hook_angle": "中文/中国模型在 coding agent 里的位置，应该用实际 coding benchmark 讨论。",
    "intro": "Benchmark results for Qwen3 models using the Aider polyglot coding benchmark.",
    "locale": "en"
  },
  {
    "title": "How Claude Code uses prompt caching",
    "source": "Claude Code Docs",
    "url": "https://code.claude.com/docs/en/prompt-caching",
    "date": "2026-07-04",
    "type": "official-docs",
    "tags": [
      "Claude Code",
      "prompt caching",
      "cache hit rate",
      "token saving"
    ],
    "why_it_matters": "Prompt caching directly changes speed and token cost.",
    "hook_angle": "你以为 Claude Code 慢/贵，其实可能是 cache miss。",
    "intro": "Claude Code manages prompt caching automatically. See why a model switch triggers a slow uncached turn, what /compact costs, why CLAUDE.md edits don't apply mid-session, and how to check your cache hit rate.",
    "locale": "en"
  },
  {
    "title": "Ditching Claude for OpenCode and OpenRouter",
    "source": "Ian Wootten",
    "url": "https://www.ianwootten.co.uk/2026/07/01/ditching-claude-for-opencode-and-openrouter/",
    "date": "2026-07-01",
    "type": "field-report",
    "tags": [
      "OpenCode",
      "OpenRouter",
      "Claude",
      "open weights",
      "cost"
    ],
    "why_it_matters": "A real switching case from default tools to open router/model workflows.",
    "hook_angle": "什么时候值得离开默认工具，改用开放 router 和开源模型？",
    "intro": "For the entirety of June I ditched Claude Code and have been using open weight models with Opencode and openrouter.ai. Here",
    "locale": "en"
  },
  {
    "title": "Contextify - Searchable History for Claude Code and Codex",
    "source": "Contextify",
    "url": "https://contextify.sh/",
    "date": "2026-07-04",
    "type": "tool",
    "tags": [
      "Claude Code",
      "Codex",
      "history",
      "agent memory",
      "developer tooling"
    ],
    "why_it_matters": "Agent history and reusable context can reduce repeated token spend.",
    "hook_angle": "如果每次 agent 都忘记上下文，你就在重复烧 token。",
    "intro": "Your Claude Code and Codex history auto-deletes. Contextify keeps it forever in a searchable database, syncs it across every machine, and runs on macOS and Linux.",
    "locale": "en"
  }
]
