% Compilation llama.cpp Turboquant
git clone git@github.com:TheTom/turboquant_plus.git
cd llama-cpp-turboquant/
git checkout feature/turboquant-kv-cache
cmake -B build -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON -DCMAKE_BUILD_TYPE=Release
cmake --build build -j
cd ~/Documents/15_informatique/installation/llama-cpp-turboquant/
build/bin/llama-server -m ~/Downloads/llm-models/Qwen3.5-27B.Q5_K_M.gguf --alias "Qwen35-27B" --jinja -ngl 99 -c 262144 -ctk turbo3 -ctv turbo3 -fa on -np 1 --metrics --host 0.0.0.0 --port 8082
ALler sur http://127.0.0.1:8082/
unset CC
unset CXX
unset CFLAGS
unset CXXFLAGS
cmake -B build \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
cmake --build build -j
Documentation et autres informations importantes: GitHub - TheTom/turboquant_plus · GitHub