<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>MLX on 赛博工具站</title>
    <link>https://haodaohang.top/tags/mlx/</link>
    <description>Recent content in MLX on 赛博工具站</description>
    <image>
      <title>赛博工具站</title>
      <url>https://haodaohang.top/images/cover.png</url>
      <link>https://haodaohang.top/images/cover.png</link>
    </image>
    <generator>Hugo -- 0.152.2</generator>
    <language>zh-cn</language>
    <lastBuildDate>Thu, 19 Mar 2026 14:00:00 +0800</lastBuildDate>
    <atom:link href="https://haodaohang.top/tags/mlx/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>笔记本跑 397B 参数大模型？Apple LLM in a Flash 技术实战</title>
      <link>https://haodaohang.top/posts/2026-03-19-llm-in-a-flash/</link>
      <pubDate>Thu, 19 Mar 2026 14:00:00 +0800</pubDate>
      <guid>https://haodaohang.top/posts/2026-03-19-llm-in-a-flash/</guid>
      <description>研究者成功在 48GB MacBook Pro M3 Max 上运行 Qwen3.5-397B-A17B 模型，达到 5.5&#43; tokens/秒。本文深度解析 Apple &amp;#39;LLM in a Flash&amp;#39; 技术原理、MoE 架构优势，以及如何在自己的设备上复现这一突破。</description>
    </item>
  </channel>
</rss>
