Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ texts/
finetune/outputs/
finetune/data/train/
.claude/
.direnv/
.envrc
152 changes: 42 additions & 110 deletions bun.lock

Large diffs are not rendered by default.

193 changes: 155 additions & 38 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
lib = pkgs.lib;
isLinux = pkgs.stdenv.isLinux;
cudaPkgs = if isLinux then pkgs.cudaPackages else null;

# SQLite with loadable extension support for sqlite-vec
sqliteWithExtensions = pkgs.sqlite.overrideAttrs (old: {
Expand All @@ -18,65 +21,179 @@
];
});

qmd = pkgs.stdenv.mkDerivation {
pname = "qmd";
version = "1.0.0";
mkQmd = {
nameSuffix ? "",
extraRuntimeLibs ? [],
extraWrapperBins ? [],
extraCmakeIncludeDirs ? [],
extraCmakeLibraryDirs ? [],
extraWrapperEnv ? {}
}:
pkgs.stdenv.mkDerivation {
pname = "qmd${nameSuffix}";
version = "1.0.0";

src = ./.;
src = ./.;

nativeBuildInputs = [ pkgs.bun pkgs.makeWrapper ];
nativeBuildInputs = [
pkgs.bun
pkgs.cmake
pkgs.makeWrapper
pkgs.nodejs_22
];

buildInputs = [ pkgs.sqlite ];
buildInputs = [ pkgs.sqlite ] ++ extraRuntimeLibs;

buildPhase = ''
export HOME=$(mktemp -d)
bun install --frozen-lockfile
'';
buildPhase = ''
export HOME=$(mktemp -d)
bun install --frozen-lockfile
'';

installPhase = ''
mkdir -p $out/lib/qmd
mkdir -p $out/bin
installPhase = let
wrapperArgs =
[
"--add-flags \"$out/lib/qmd/src/qmd.ts\""
"--prefix PATH : ${lib.makeBinPath ([ pkgs.cmake pkgs.nodejs_22 ] ++ extraWrapperBins)}"
"--set DYLD_LIBRARY_PATH \"${lib.makeLibraryPath ([ pkgs.sqlite ] ++ extraRuntimeLibs)}\""
"--set LD_LIBRARY_PATH \"${lib.makeLibraryPath ([ pkgs.sqlite ] ++ extraRuntimeLibs)}\""
]
++ lib.optional (extraCmakeIncludeDirs != [])
"--prefix CMAKE_INCLUDE_PATH : ${lib.concatStringsSep ":" extraCmakeIncludeDirs}"
++ lib.optional (extraCmakeLibraryDirs != [])
"--prefix CMAKE_LIBRARY_PATH : ${lib.concatStringsSep ":" extraCmakeLibraryDirs}"
++ (lib.mapAttrsToList (k: v: "--set ${k} \"${v}\"") extraWrapperEnv);
in ''
mkdir -p $out/lib/qmd
mkdir -p $out/bin

cp -r node_modules $out/lib/qmd/
cp -r src $out/lib/qmd/
cp package.json $out/lib/qmd/
cp -r node_modules $out/lib/qmd/
cp -r src $out/lib/qmd/
cp package.json $out/lib/qmd/

makeWrapper ${pkgs.bun}/bin/bun $out/bin/qmd \
--add-flags "$out/lib/qmd/src/qmd.ts" \
--set DYLD_LIBRARY_PATH "${pkgs.sqlite.out}/lib" \
--set LD_LIBRARY_PATH "${pkgs.sqlite.out}/lib"
'';
makeWrapper ${pkgs.bun}/bin/bun $out/bin/qmd \
${lib.concatStringsSep " \\\n " wrapperArgs}
'';

meta = with pkgs.lib; {
description = "On-device search engine for markdown notes, meeting transcripts, and knowledge bases";
homepage = "https://github.com/tobi/qmd";
license = licenses.mit;
platforms = platforms.unix;
meta = with pkgs.lib; {
description = "On-device search engine for markdown notes, meeting transcripts, and knowledge bases";
homepage = "https://github.com/tobi/qmd";
license = licenses.mit;
platforms = platforms.unix;
};
};

qmd = mkQmd { };
qmdVulkan = mkQmd {
nameSuffix = "-vulkan";
extraRuntimeLibs = lib.optionals isLinux [
pkgs.vulkan-loader
pkgs.vulkan-headers
pkgs.shaderc
];
extraWrapperBins = lib.optionals isLinux [ pkgs.shaderc ];
extraCmakeIncludeDirs = lib.optionals isLinux [ "${pkgs.vulkan-headers}/include" ];
extraCmakeLibraryDirs = lib.optionals isLinux [ "${pkgs.vulkan-loader}/lib" ];
extraWrapperEnv = lib.optionalAttrs isLinux {
NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_CXX_FLAGS = "-include cstdint";
CXXFLAGS = "-include cstdint";
};
};
qmdCuda = mkQmd {
nameSuffix = "-cuda";
extraRuntimeLibs = lib.optionals isLinux [ cudaPkgs.cudatoolkit ];
extraWrapperBins = lib.optionals isLinux [ cudaPkgs.cudatoolkit ];
};

baseShellInputs = [
pkgs.bun
pkgs.cmake
pkgs.makeWrapper
pkgs.nodejs_22
sqliteWithExtensions
];

mkShell = {
name,
extraInputs ? [],
extraRuntimeLibs ? [],
extraCmakeIncludeDirs ? [],
extraCmakeLibraryDirs ? [],
extraEnv ? {},
extraShellHook ? ""
}:
pkgs.mkShell {
buildInputs = baseShellInputs ++ extraInputs ++ extraRuntimeLibs;
shellHook = ''
export BREW_PREFIX="''${BREW_PREFIX:-${sqliteWithExtensions.out}}"
export LD_LIBRARY_PATH="${lib.makeLibraryPath ([ sqliteWithExtensions ] ++ extraRuntimeLibs)}''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
export DYLD_LIBRARY_PATH="${lib.makeLibraryPath ([ sqliteWithExtensions ] ++ extraRuntimeLibs)}''${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}"
${lib.optionalString (extraCmakeIncludeDirs != []) ''
export CMAKE_INCLUDE_PATH="${lib.concatStringsSep ":" extraCmakeIncludeDirs}''${CMAKE_INCLUDE_PATH:+:$CMAKE_INCLUDE_PATH}"
''}
${lib.optionalString (extraCmakeLibraryDirs != []) ''
export CMAKE_LIBRARY_PATH="${lib.concatStringsSep ":" extraCmakeLibraryDirs}''${CMAKE_LIBRARY_PATH:+:$CMAKE_LIBRARY_PATH}"
''}
${lib.concatStringsSep "\n" (lib.mapAttrsToList (k: v: "export ${k}=\"${v}\"") extraEnv)}
${extraShellHook}
echo "QMD development shell (${name})"
echo "Run: bun src/qmd.ts <command>"
'';
};
in
{
packages = {
default = qmd;
qmd = qmd;
};
}
// lib.optionalAttrs isLinux { "qmd-vulkan" = qmdVulkan; }
// lib.optionalAttrs isLinux { "qmd-cuda" = qmdCuda; };

apps.default = {
type = "app";
program = "${qmd}/bin/qmd";
};

devShells.default = pkgs.mkShell {
buildInputs = [
pkgs.bun
sqliteWithExtensions
];

shellHook = ''
export BREW_PREFIX="''${BREW_PREFIX:-${sqliteWithExtensions.out}}"
echo "QMD development shell"
echo "Run: bun src/qmd.ts <command>"
'';
devShells = {
default = mkShell { name = "cpu"; };
cpu = mkShell { name = "cpu"; };
}
// lib.optionalAttrs isLinux {
vulkan = mkShell {
name = "vulkan";
extraRuntimeLibs = [
pkgs.vulkan-loader
pkgs.vulkan-headers
pkgs.shaderc
];
extraInputs = [ pkgs.shaderc ];
extraCmakeIncludeDirs = [ "${pkgs.vulkan-headers}/include" ];
extraCmakeLibraryDirs = [ "${pkgs.vulkan-loader}/lib" ];
extraEnv = {
NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_CXX_FLAGS = "-include cstdint";
CXXFLAGS = "-include cstdint";
};
extraShellHook = ''
if [ -d /run/opengl-driver/share/vulkan/icd.d ]; then
icd_files=$(ls /run/opengl-driver/share/vulkan/icd.d/*.json 2>/dev/null | paste -sd ":" -)
if [ -n "$icd_files" ]; then
export VK_ICD_FILENAMES="$icd_files"
fi
fi
'';
};
}
// lib.optionalAttrs isLinux {
cuda = mkShell {
name = "cuda";
extraRuntimeLibs = [ cudaPkgs.cudatoolkit ];
extraInputs = [ cudaPkgs.cudatoolkit ];
extraShellHook = ''
export CUDA_PATH="${cudaPkgs.cudatoolkit}"
if [ -d /run/opengl-driver/lib ]; then
export LD_LIBRARY_PATH="/run/opengl-driver/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
fi
'';
};
};
}
);
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"@modelcontextprotocol/sdk": "^1.25.1",
"better-sqlite3": "^11.0.0",
"fast-glob": "^3.3.0",
"node-llama-cpp": "^3.14.5",
"node-llama-cpp": "^3.16.0",
"picomatch": "^4.0.0",
"sqlite-vec": "^0.1.7-alpha.2",
"yaml": "^2.8.2",
Expand Down
26 changes: 16 additions & 10 deletions src/bench-rerank.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,22 +193,28 @@ async function main() {
console.log("═══════════════════════════════════════════════════════════════\n");

// Detect GPU
const gpuTypes = await getLlamaGpuTypes();
const preferred = (["cuda", "metal", "vulkan"] as const).find(g => gpuTypes.includes(g));
const gpuTypes = await getLlamaGpuTypes("supported");
const preferredOrder = ["cuda", "metal", "vulkan"] as const;
const available = preferredOrder.filter(g => gpuTypes.includes(g));

let llama: Llama;
let llama: Llama | null = null;
let gpuLabel: string;
if (preferred) {
const failed: string[] = [];
for (const gpu of available) {
try {
llama = await getLlama({ gpu: preferred, logLevel: LlamaLogLevel.error });
gpuLabel = `${preferred}`;
llama = await getLlama({ gpu, logLevel: LlamaLogLevel.error });
gpuLabel = gpu;
break;
} catch {
llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
gpuLabel = "cpu (gpu init failed)";
failed.push(gpu);
}
} else {
}
if (!llama) {
llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
gpuLabel = "cpu";
gpuLabel = failed.length > 0 ? "cpu (gpu init failed)" : "cpu";
}
if (!llama) {
throw new Error("Failed to initialize llama backend");
}

// System info
Expand Down
31 changes: 19 additions & 12 deletions src/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -497,26 +497,33 @@ export class LlamaCpp implements LLM {
*/
private async ensureLlama(): Promise<Llama> {
if (!this.llama) {
// Detect available GPU types and use the best one.
// Detect available GPU types and use the best supported one.
// We can't rely on gpu:"auto" — it returns false even when CUDA is available
// (likely a binary/build config issue in node-llama-cpp).
// @ts-expect-error node-llama-cpp API compat
const gpuTypes = await getLlamaGpuTypes();
// Prefer CUDA > Metal > Vulkan > CPU
const preferred = (["cuda", "metal", "vulkan"] as const).find(g => gpuTypes.includes(g));
const gpuTypes = await getLlamaGpuTypes("supported");
const preferredOrder = ["cuda", "metal", "vulkan"] as const;
const available = preferredOrder.filter(g => gpuTypes.includes(g));

let llama: Llama;
if (preferred) {
let llama: Llama | null = null;
const failed: string[] = [];
for (const gpu of available) {
try {
llama = await getLlama({ gpu: preferred, logLevel: LlamaLogLevel.error });
llama = await getLlama({ gpu, logLevel: LlamaLogLevel.error });
break;
} catch {
llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
failed.push(gpu);
}
}
if (!llama) {
llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
if (failed.length > 0) {
process.stderr.write(
`QMD Warning: ${preferred} reported available but failed to initialize. Falling back to CPU.\n`
`QMD Warning: GPU init failed for ${failed.join(", ")}. Falling back to CPU.\n`
);
}
} else {
llama = await getLlama({ gpu: false, logLevel: LlamaLogLevel.error });
}
if (!llama) {
throw new Error("Failed to initialize llama backend");
}

if (!llama.gpu) {
Expand Down