diff --git a/.claude/rules/code-coverage.md b/.claude/rules/code-coverage.md new file mode 100644 index 00000000..26739d6a --- /dev/null +++ b/.claude/rules/code-coverage.md @@ -0,0 +1,3 @@ +# Code Coverage Measurement + +- Use `make coverage` to measure code coverage. This is the authoritative source. diff --git a/.claude/skills/coverage/SKILL.md b/.claude/skills/coverage/SKILL.md deleted file mode 100644 index 62176858..00000000 --- a/.claude/skills/coverage/SKILL.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -name: coverage -description: Find uncovered lines in the llvm-cov coverage report. Use when checking test coverage, finding gaps, or working toward 100% coverage. -allowed-tools: Bash, Read, Edit, Write, Grep, Glob ---- - -Find uncovered lines in the project coverage report. - -## Steps - -1. Run `make test.qwen3.5_0.8B.coverage.json` to collect coverage data and export JSON -2. Run `python3 scripts/coverage-uncovered-lines.py target/coverage.json` to list uncovered lines -3. Fix each uncovered line by adding tests or restructuring code -4. Repeat until `make test.qwen3.5_0.8B.coverage` passes (enforces 100% line coverage) - -## Output format - -The script prints a summary matching `--fail-under-lines` exactly, then every line with any uncovered region: - -``` -Lines: 7072 Covered: 7060 Missed: 12 (99.83%) - -context/session.rs:201: check_session_load_length(n_out, max_tokens)?; -model.rs:184: c_int::try_from(c_string.as_bytes().len())?, -``` - -## Key concepts - -- **Missed Lines** in the summary is what `--fail-under-lines 100` checks -- **Lines with uncovered regions** (the detailed list) shows every line that has any uncovered code path — fixing all of these is what reaches 100% -- The `?` operator on a covered line creates an uncovered region (the error branch) even though the line itself executed diff --git a/.github/actions/install-build-dependencies/action.yml b/.github/actions/install-build-dependencies/action.yml new file mode 100644 index 00000000..dd5bd6d4 --- /dev/null +++ b/.github/actions/install-build-dependencies/action.yml @@ -0,0 +1,20 @@ +name: install-build-dependencies +description: Install OS-specific system packages needed to build llama-cpp-bindings (CMake, libclang, GNU make). + +runs: + using: composite + steps: + - name: install linux build dependencies + if: runner.os == 'Linux' + shell: bash + run: sudo apt-get update && sudo apt-get install -y cmake libclang-dev + + - name: install windows build dependencies + if: runner.os == 'Windows' + shell: bash + run: choco install -y make + + - name: set windows libclang path + if: runner.os == 'Windows' + shell: bash + run: echo "LIBCLANG_PATH=C:\\Program Files\\LLVM\\bin" >> $GITHUB_ENV diff --git a/.github/actions/install-rust-toolchain/action.yml b/.github/actions/install-rust-toolchain/action.yml new file mode 100644 index 00000000..124e13b4 --- /dev/null +++ b/.github/actions/install-rust-toolchain/action.yml @@ -0,0 +1,11 @@ +name: install-rust-toolchain +description: Install the pinned stable Rust toolchain (with rustfmt and clippy) and configure the cargo build cache. + +runs: + using: composite + steps: + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + with: + components: rustfmt, clippy + + - uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9d241530..95deb1c8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -9,33 +9,40 @@ env: CARGO_TERM_COLOR: always jobs: - fmt: + formatting: name: formatting runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@v4 with: submodules: recursive - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable - - - uses: Swatinem/rust-cache@v2 + - uses: ./.github/actions/install-rust-toolchain - run: make fmt.check test: - name: tests - runs-on: ubuntu-latest + name: tests (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + defaults: + run: + shell: bash + env: + LLAMA_DISABLE_CCACHE: '1' + RUST_BACKTRACE: '1' steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: install system dependencies - run: sudo apt-get update && sudo apt-get install -y cmake libclang-dev - - - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable + - uses: ./.github/actions/install-build-dependencies - - uses: Swatinem/rust-cache@v2 + - uses: ./.github/actions/install-rust-toolchain - run: make test.unit diff --git a/.gitignore b/.gitignore index 0851c32e..6e05da5b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,10 @@ target/ # Mac stuff .DS_Store + +# Node modules (rust-coverage-check npm dependency) +node_modules/ + +# LLVM source-based coverage raw and merged profile artifacts +*.profraw +*.profdata diff --git a/Makefile b/Makefile index f6830ed4..5004e9ce 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,11 @@ TEST_DEVICE ?= QWEN_CAPABLE_FEATURES = multimodal_capable,mrope_model DEVICE_FEATURE = $(if $(TEST_DEVICE),--features $(TEST_DEVICE),) -LLM_BASE_FEATURE_FLAGS = $(DEVICE_FEATURE) LLM_QWEN_CAPABLE_FEATURE_FLAGS = $(DEVICE_FEATURE) --features $(QWEN_CAPABLE_FEATURES) -CARGO_TEST_LLM_FLAGS = --no-fail-fast -p llama-cpp-bindings-tests $(LLM_BASE_FEATURE_FLAGS) -- --test-threads=1 -CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE = --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1 +CARGO_TEST_LLM_FLAGS = --release --no-fail-fast -p llama-cpp-bindings-tests $(DEVICE_FEATURE) -- --test-threads=1 +CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE = --release --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1 + QWEN3_5_0_8B_ENV = \ LLAMA_TEST_HF_REPO=unsloth/Qwen3.5-0.8B-GGUF \ @@ -42,26 +42,56 @@ DEEPSEEK_R1_DISTILL_LLAMA_8B_ENV = \ LLAMA_TEST_HF_ENCODER_REPO=Xiaojian9992024/t5-small-GGUF \ LLAMA_TEST_HF_ENCODER_MODEL=t5-small.bf16.gguf +node_modules: package-lock.json + npm ci + touch node_modules + +package-lock.json: package.json + npm install --package-lock-only + .PHONY: clean.cmake clean.cmake: rm -rf target/llama-cpp-cmake-build .PHONY: clippy -clippy: clippy.core clippy.tests.base clippy.tests.qwen_capable - -.PHONY: clippy.core -clippy.core: +clippy: + cargo clippy --all-targets -p llama-cpp-bindings-types -- -D warnings cargo clippy --all-targets -p llama-cpp-log-decoder -- -D warnings + cargo clippy --all-targets -p llama-cpp-bindings-build -- -D warnings + cargo clippy --all-targets -p llama-cpp-bindings-sys $(DEVICE_FEATURE) -- -D warnings cargo clippy --all-targets -p llama-cpp-bindings $(DEVICE_FEATURE) -- -D warnings - -.PHONY: clippy.tests.base -clippy.tests.base: - cargo clippy --all-targets -p llama-cpp-bindings-tests $(LLM_BASE_FEATURE_FLAGS) -- -D warnings - -.PHONY: clippy.tests.qwen_capable -clippy.tests.qwen_capable: + cargo clippy --all-targets -p llama-cpp-bindings-tests $(DEVICE_FEATURE) -- -D warnings cargo clippy --all-targets -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- -D warnings +.PHONY: coverage +coverage: node_modules + cargo llvm-cov clean --workspace + cargo llvm-cov --no-report -p llama-cpp-log-decoder + cargo llvm-cov --no-report -p llama-cpp-bindings-types + cargo llvm-cov --no-report -p llama-cpp-bindings --lib $(DEVICE_FEATURE) + $(DEEPSEEK_R1_DISTILL_LLAMA_8B_ENV) cargo llvm-cov --no-report --no-fail-fast -p llama-cpp-bindings-tests $(DEVICE_FEATURE) -- --test-threads=1 + $(GLM4_7_FLASH_ENV) cargo llvm-cov --no-report --no-fail-fast -p llama-cpp-bindings-tests $(DEVICE_FEATURE) -- --test-threads=1 + $(QWEN3_5_0_8B_ENV) cargo llvm-cov --no-report --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1 + $(QWEN3_6_35B_A3B_ENV) cargo llvm-cov --no-report --no-fail-fast -p llama-cpp-bindings-tests $(LLM_QWEN_CAPABLE_FEATURE_FLAGS) -- --test-threads=1 + cargo llvm-cov report --json --output-path target/llvm-cov.json + cargo llvm-cov report --lcov --output-path target/lcov.info + cargo llvm-cov report + npx rust-coverage-check target/llvm-cov.json \ + --workspace-root $(CURDIR) \ + --gated llama-cpp-bindings=95 \ + --gated llama-cpp-log-decoder=99 \ + --gated llama-cpp-bindings-types=99 + +.PHONY: coverage-clean +coverage-clean: + cargo llvm-cov clean --workspace + rm -rf target/llvm-cov-target + rm -f target/llvm-cov.json target/lcov.info + +.PHONY: coverage-report +coverage-report: + cargo llvm-cov report --html + .PHONY: fmt fmt: cargo fmt --all @@ -74,11 +104,11 @@ fmt.check: test: test.unit test.llms .PHONY: test.deepseek_r1_distill_llama_8b -test.deepseek_r1_distill_llama_8b: clippy.core clippy.tests.base +test.deepseek_r1_distill_llama_8b: clippy $(DEEPSEEK_R1_DISTILL_LLAMA_8B_ENV) cargo test $(CARGO_TEST_LLM_FLAGS) .PHONY: test.glm4_7_flash -test.glm4_7_flash: clippy.core clippy.tests.base +test.glm4_7_flash: clippy $(GLM4_7_FLASH_ENV) cargo test $(CARGO_TEST_LLM_FLAGS) .PHONY: test.llms @@ -89,14 +119,14 @@ test.llms: \ test.qwen3.6_35b_a3b .PHONY: test.qwen3.5_0.8B -test.qwen3.5_0.8B: clippy.core clippy.tests.qwen_capable +test.qwen3.5_0.8B: clippy $(QWEN3_5_0_8B_ENV) cargo test $(CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE) .PHONY: test.qwen3.6_35b_a3b -test.qwen3.6_35b_a3b: clippy.core clippy.tests.qwen_capable +test.qwen3.6_35b_a3b: clippy $(QWEN3_6_35B_A3B_ENV) cargo test $(CARGO_TEST_LLM_FLAGS_QWEN_CAPABLE) .PHONY: test.unit -test.unit: clippy.core +test.unit: clippy cargo test -p llama-cpp-log-decoder cargo test -p llama-cpp-bindings $(DEVICE_FEATURE) diff --git a/llama-cpp-bindings-build/src/cmake_config.rs b/llama-cpp-bindings-build/src/cmake_config.rs index a52521e3..90b608d4 100644 --- a/llama-cpp-bindings-build/src/cmake_config.rs +++ b/llama-cpp-bindings-build/src/cmake_config.rs @@ -205,6 +205,7 @@ fn configure_platform_specific( TargetOs::Windows(WindowsVariant::Msvc) => { config.cflag("/w"); config.cxxflag("/w"); + config.cxxflag("/EHsc"); configure_msvc_release_workaround(config, profile); } TargetOs::Android => { diff --git a/llama-cpp-bindings-build/src/cpp_wrapper.rs b/llama-cpp-bindings-build/src/cpp_wrapper.rs index 722c7e41..fdd8ab37 100644 --- a/llama-cpp-bindings-build/src/cpp_wrapper.rs +++ b/llama-cpp-bindings-build/src/cpp_wrapper.rs @@ -33,6 +33,7 @@ pub fn compile_cpp_wrappers(llama_src: &Path, target_os: &TargetOs) { if target_os.is_msvc() { build.flag("/std:c++17"); + build.flag("/EHsc"); } if target_os.is_android() && cfg!(feature = "static-stdcxx") { diff --git a/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs b/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs index 7af3f01a..92f561e6 100644 --- a/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs +++ b/llama-cpp-bindings-build/src/cpp_wrapper_mtmd.rs @@ -23,6 +23,7 @@ pub fn compile_mtmd(llama_src: &Path, target_os: &TargetOs) { if target_os.is_msvc() { build.flag("/std:c++17"); + build.flag("/EHsc"); } if target_os.is_android() && cfg!(feature = "static-stdcxx") { diff --git a/llama-cpp-bindings-sys/llama.cpp b/llama-cpp-bindings-sys/llama.cpp index 846262d7..59778f01 160000 --- a/llama-cpp-bindings-sys/llama.cpp +++ b/llama-cpp-bindings-sys/llama.cpp @@ -1 +1 @@ -Subproject commit 846262d7875dcabf502a150fa3d7b9c770dde7eb +Subproject commit 59778f0196a82db32580bb649d5d839355d6d7bf diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp index f60cada6..0bf59aee 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_parse.cpp +++ b/llama-cpp-bindings-sys/wrapper_chat_parse.cpp @@ -7,6 +7,7 @@ #include "marker_probes/marker_probe.h" #include +#include #include #include @@ -16,7 +17,16 @@ struct llama_rs_parsed_chat { common_chat_msg message; }; -extern "C" llama_rs_status llama_rs_parse_chat_message( +static char * dup_or_set_alloc_flag(const std::string & source, bool * out_alloc_failed) { + *out_alloc_failed = false; + char * dup = llama_rs_dup_string(source); + if (!dup) { + *out_alloc_failed = true; + } + return dup; +} + +extern "C" llama_rs_parse_chat_message_status llama_rs_parse_chat_message( const struct llama_model * model, const char * tools_json, const char * input, @@ -29,20 +39,28 @@ extern "C" llama_rs_status llama_rs_parse_chat_message( if (out_error) { *out_error = nullptr; } - - if (!model || !input || !out_handle || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG; + } + if (!input) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG; + } + if (!out_handle) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG; + } + if (!out_error) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + return LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -91,63 +109,323 @@ extern "C" llama_rs_status llama_rs_parse_chat_message( *out_handle = handle; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_PARSE_CHAT_MESSAGE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" void llama_rs_parsed_chat_free(llama_rs_parsed_chat_handle handle) { - delete handle; +extern "C" llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free( + llama_rs_parsed_chat_handle handle, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + try { + delete handle; + return LLAMA_RS_PARSED_CHAT_FREE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION; + } } -extern "C" size_t llama_rs_parsed_chat_tool_call_count(llama_rs_parsed_chat_handle handle) { +extern "C" llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count( + llama_rs_parsed_chat_handle handle, + size_t * out_count, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_count) { + *out_count = 0; + } if (!handle) { - return 0; + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG; + } + if (!out_count) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG; + } + try { + *out_count = handle->message.tool_calls.size(); + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION; } - return handle->message.tool_calls.size(); } -extern "C" char * llama_rs_parsed_chat_tool_call_id( - llama_rs_parsed_chat_handle handle, size_t index) { - if (!handle || index >= handle->message.tool_calls.size()) { - return nullptr; +extern "C" llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } + if (!handle) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG; + } + try { + if (index >= handle->message.tool_calls.size()) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS; + } + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].id, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.tool_calls[index].id); } -extern "C" char * llama_rs_parsed_chat_tool_call_name( - llama_rs_parsed_chat_handle handle, size_t index) { - if (!handle || index >= handle->message.tool_calls.size()) { - return nullptr; +extern "C" llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } + if (!handle) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG; + } + try { + if (index >= handle->message.tool_calls.size()) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS; + } + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.tool_calls[index].name, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.tool_calls[index].name); } -extern "C" char * llama_rs_parsed_chat_tool_call_arguments( - llama_rs_parsed_chat_handle handle, size_t index) { - if (!handle || index >= handle->message.tool_calls.size()) { - return nullptr; +extern "C" llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } + if (!handle) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG; + } + try { + if (index >= handle->message.tool_calls.size()) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS; + } + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag( + handle->message.tool_calls[index].arguments, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.tool_calls[index].arguments); } -extern "C" char * llama_rs_parsed_chat_content(llama_rs_parsed_chat_handle handle) { +extern "C" llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } if (!handle) { - return nullptr; + return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG; + } + try { + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.content, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_CONTENT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.content); } -extern "C" char * llama_rs_parsed_chat_reasoning_content(llama_rs_parsed_chat_handle handle) { +extern "C" llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_string) { + *out_string = nullptr; + } if (!handle) { - return nullptr; + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG; + } + if (!out_string) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG; + } + try { + bool alloc_failed = false; + *out_string = dup_or_set_alloc_flag(handle->message.reasoning_content, &alloc_failed); + if (alloc_failed) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION; } - return llama_rs_dup_string(handle->message.reasoning_content); } diff --git a/llama-cpp-bindings-sys/wrapper_chat_parse.h b/llama-cpp-bindings-sys/wrapper_chat_parse.h index 12fed5d9..e235673c 100644 --- a/llama-cpp-bindings-sys/wrapper_chat_parse.h +++ b/llama-cpp-bindings-sys/wrapper_chat_parse.h @@ -12,20 +12,19 @@ extern "C" { struct llama_rs_parsed_chat; typedef struct llama_rs_parsed_chat * llama_rs_parsed_chat_handle; -/** - * Parse a chat-completion turn from raw assistant output using llama.cpp's - * `common_chat_parse`, driven by the model's autoparser-built peg parser. - * - * `tools_json` is a serialized JSON array of OpenAI-style tool definitions - * (or empty / null when the request had no tools). `is_partial` switches - * between mid-stream parses (partial accepts incomplete payloads) and final - * parses (rejects malformed input). - * - * On success, `*out_handle` owns the parsed message; free via - * `llama_rs_parsed_chat_free`. On failure, `*out_error` carries an - * exception message; free via `llama_rs_string_free`. - */ -llama_rs_status llama_rs_parse_chat_message( +typedef enum llama_rs_parse_chat_message_status { + LLAMA_RS_PARSE_CHAT_MESSAGE_OK = 0, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG, + LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE, + LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB, + LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parse_chat_message_status; + +llama_rs_parse_chat_message_status llama_rs_parse_chat_message( const struct llama_model * model, const char * tools_json, const char * input, @@ -33,25 +32,99 @@ llama_rs_status llama_rs_parse_chat_message( llama_rs_parsed_chat_handle * out_handle, char ** out_error); -void llama_rs_parsed_chat_free(llama_rs_parsed_chat_handle handle); +typedef enum llama_rs_parsed_chat_free_status { + LLAMA_RS_PARSED_CHAT_FREE_OK = 0, + LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_free_status; + +llama_rs_parsed_chat_free_status llama_rs_parsed_chat_free( + llama_rs_parsed_chat_handle handle, + char ** out_error); + +typedef enum llama_rs_parsed_chat_tool_call_count_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_count_status; + +llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_count( + llama_rs_parsed_chat_handle handle, + size_t * out_count, + char ** out_error); + +typedef enum llama_rs_parsed_chat_tool_call_id_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_id_status; + +llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_id( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error); + +typedef enum llama_rs_parsed_chat_tool_call_name_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_name_status; + +llama_rs_parsed_chat_tool_call_name_status llama_rs_parsed_chat_tool_call_name( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error); -size_t llama_rs_parsed_chat_tool_call_count(llama_rs_parsed_chat_handle handle); +typedef enum llama_rs_parsed_chat_tool_call_arguments_status { + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK = 0, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_tool_call_arguments_status; + +llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_arguments( + llama_rs_parsed_chat_handle handle, + size_t index, + char ** out_string, + char ** out_error); -/** - * Returns a heap-allocated UTF-8 string for the i-th tool call's `id`, - * `name`, or `arguments` field. Free with `llama_rs_string_free`. Returns - * nullptr if `handle` is null or `index` is out of bounds. - * - * `arguments` is the raw JSON string emitted by the parser — the caller is - * expected to feed it into a schema validator or hand it back to clients - * verbatim. - */ -char * llama_rs_parsed_chat_tool_call_id(llama_rs_parsed_chat_handle handle, size_t index); -char * llama_rs_parsed_chat_tool_call_name(llama_rs_parsed_chat_handle handle, size_t index); -char * llama_rs_parsed_chat_tool_call_arguments(llama_rs_parsed_chat_handle handle, size_t index); +typedef enum llama_rs_parsed_chat_content_status { + LLAMA_RS_PARSED_CHAT_CONTENT_OK = 0, + LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_content_status; -char * llama_rs_parsed_chat_content(llama_rs_parsed_chat_handle handle); -char * llama_rs_parsed_chat_reasoning_content(llama_rs_parsed_chat_handle handle); +llama_rs_parsed_chat_content_status llama_rs_parsed_chat_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error); + +typedef enum llama_rs_parsed_chat_reasoning_content_status { + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK = 0, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_parsed_chat_reasoning_content_status; + +llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_reasoning_content( + llama_rs_parsed_chat_handle handle, + char ** out_string, + char ** out_error); #ifdef __cplusplus } diff --git a/llama-cpp-bindings-sys/wrapper_common.cpp b/llama-cpp-bindings-sys/wrapper_common.cpp index e195d5d7..50f8f5e8 100644 --- a/llama-cpp-bindings-sys/wrapper_common.cpp +++ b/llama-cpp-bindings-sys/wrapper_common.cpp @@ -1,9 +1,11 @@ #include "wrapper_common.h" -#include #include #include #include +#include +#include +#include #include #include @@ -14,34 +16,55 @@ #include -extern "C" llama_rs_status llama_rs_json_schema_to_grammar( +extern "C" llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar( const char * schema_json, bool force_gbnf, char ** out_grammar, char ** out_error) { - if (!schema_json || !out_grammar || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_grammar) { + *out_grammar = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!schema_json) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG; + } + if (!out_grammar) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG; + } + if (!out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG; } - - *out_grammar = nullptr; - *out_error = nullptr; try { const auto schema = nlohmann::ordered_json::parse(schema_json); const auto grammar = json_schema_to_grammar(schema, force_gbnf); *out_grammar = llama_rs_dup_string(grammar); - - return *out_grammar ? LLAMA_RS_STATUS_OK : LLAMA_RS_STATUS_ALLOCATION_FAILED; + if (!*out_grammar) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::invalid_argument & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_INVALID_SCHEMA; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return LLAMA_RS_STATUS_EXCEPTION; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } } @@ -51,33 +74,48 @@ extern "C" void llama_rs_string_free(char * ptr) { } } -extern "C" struct llama_sampler * llama_rs_sampler_init_grammar( +extern "C" llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, + struct llama_sampler ** out_sampler, char ** out_error) { + if (out_sampler) { + *out_sampler = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG; + } if (!out_error) { - return nullptr; + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { - return llama_sampler_init_grammar(vocab, grammar_str, grammar_root); + *out_sampler = llama_sampler_init_grammar(vocab, grammar_str, grammar_root); + if (!*out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return nullptr; + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return nullptr; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( +extern "C" llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -85,13 +123,20 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( size_t num_trigger_words, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error) { + if (out_sampler) { + *out_sampler = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG; + } if (!out_error) { - return nullptr; + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { std::vector trigger_patterns; trigger_patterns.reserve(num_trigger_words); @@ -107,7 +152,7 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( trigger_patterns_c.push_back(pattern.c_str()); } - return llama_sampler_init_grammar_lazy_patterns( + *out_sampler = llama_sampler_init_grammar_lazy_patterns( vocab, grammar_str, grammar_root, @@ -115,20 +160,28 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy( trigger_patterns_c.size(), trigger_tokens, num_trigger_tokens); + if (!*out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return nullptr; + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return nullptr; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( +extern "C" llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -136,15 +189,22 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( size_t num_trigger_patterns, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error) { + if (out_sampler) { + *out_sampler = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG; + } if (!out_error) { - return nullptr; + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { - return llama_sampler_init_grammar_lazy_patterns( + *out_sampler = llama_sampler_init_grammar_lazy_patterns( vocab, grammar_str, grammar_root, @@ -152,16 +212,30 @@ extern "C" struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( num_trigger_patterns, trigger_tokens, num_trigger_tokens); + if (!*out_sampler) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::regex_error & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_INVALID_TRIGGER_PATTERN; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return nullptr; + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return nullptr; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION; } } @@ -171,134 +245,459 @@ extern "C" llama_pos llama_rs_memory_seq_pos_max( if (!ctx) { return -1; } - auto * mem = llama_get_memory(ctx); - if (!mem) { - return -1; - } - uint32_t n_seq_max = llama_n_seq_max(ctx); - if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) { + try { + auto * mem = llama_get_memory(ctx); + if (!mem) { + return -1; + } + uint32_t n_seq_max = llama_n_seq_max(ctx); + if (seq_id < 0 || (uint32_t) seq_id >= n_seq_max) { + return -1; + } + + return llama_memory_seq_pos_max(mem, seq_id); + } catch (...) { return -1; } - - return llama_memory_seq_pos_max(mem, seq_id); } -extern "C" llama_rs_status llama_rs_encode( +extern "C" llama_rs_encode_status llama_rs_encode( struct llama_context * ctx, - struct llama_batch batch) { - if (!ctx) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; } - const auto * model = llama_get_model(ctx); - if (!llama_model_has_encoder(model)) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_vendored_return_code) { + *out_vendored_return_code = 0; } - int32_t result = llama_encode(ctx, batch); - if (result != 0) { - return LLAMA_RS_STATUS_EXCEPTION; + if (!ctx) { + return LLAMA_RS_ENCODE_NULL_CTX_ARG; + } + try { + const auto * model = llama_get_model(ctx); + if (!llama_model_has_encoder(model)) { + return LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER; + } + int32_t result = llama_encode(ctx, batch); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + if (result == -2) { + return LLAMA_RS_ENCODE_OUT_OF_MEMORY; + } + if (result == -3) { + return LLAMA_RS_ENCODE_COMPUTE_FAILED; + } + return LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_ENCODE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION; } - - return LLAMA_RS_STATUS_OK; } -extern "C" llama_rs_status llama_rs_memory_seq_add( +extern "C" llama_rs_memory_seq_add_status llama_rs_memory_seq_add( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - llama_pos shift) { - if (!ctx) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + llama_pos shift, + char ** out_error) { + if (out_error) { + *out_error = nullptr; } - const auto * model = llama_get_model(ctx); - const auto rope = llama_model_rope_type(model); - if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!ctx) { + return LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG; } - auto * mem = llama_get_memory(ctx); - if (!mem) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + try { + const auto * model = llama_get_model(ctx); + const auto rope = llama_model_rope_type(model); + if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { + return LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE; + } + auto * mem = llama_get_memory(ctx); + if (!mem) { + return LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM; + } + llama_memory_seq_add(mem, seq_id, p0, p1, shift); + return LLAMA_RS_MEMORY_SEQ_ADD_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION; } - llama_memory_seq_add(mem, seq_id, p0, p1, shift); - - return LLAMA_RS_STATUS_OK; } -extern "C" llama_rs_status llama_rs_memory_seq_div( +extern "C" llama_rs_memory_seq_div_status llama_rs_memory_seq_div( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - int d) { - if (!ctx) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + int d, + char ** out_error) { + if (out_error) { + *out_error = nullptr; } - const auto * model = llama_get_model(ctx); - const auto rope = llama_model_rope_type(model); - if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!ctx) { + return LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG; } - auto * mem = llama_get_memory(ctx); - if (!mem) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + try { + const auto * model = llama_get_model(ctx); + const auto rope = llama_model_rope_type(model); + if (rope == LLAMA_ROPE_TYPE_MROPE || rope == LLAMA_ROPE_TYPE_VISION || rope == LLAMA_ROPE_TYPE_IMROPE) { + return LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE; + } + auto * mem = llama_get_memory(ctx); + if (!mem) { + return LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM; + } + llama_memory_seq_div(mem, seq_id, p0, p1, d); + return LLAMA_RS_MEMORY_SEQ_DIV_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION; } - llama_memory_seq_div(mem, seq_id, p0, p1, d); - - return LLAMA_RS_STATUS_OK; } -extern "C" llama_rs_status llama_rs_sampler_sample( +extern "C" llama_rs_sampler_sample_status llama_rs_sampler_sample( struct llama_sampler * sampler, struct llama_context * ctx, int32_t idx, llama_token * out_token, char ** out_error) { - if (!sampler || !ctx || !out_token || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_error) { + *out_error = nullptr; + } + if (!sampler) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG; + } + if (!ctx) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG; + } + if (!out_token) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG; + } + if (!out_error) { + return LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG; } - - *out_error = nullptr; - try { *out_token = llama_sampler_sample(sampler, ctx, idx); - - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_SAMPLER_SAMPLE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); - - return LLAMA_RS_STATUS_EXCEPTION; + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_status llama_rs_sampler_accept( +extern "C" llama_rs_sampler_accept_status llama_rs_sampler_accept( struct llama_sampler * sampler, llama_token token, char ** out_error) { - if (!sampler || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (out_error) { + *out_error = nullptr; + } + if (!sampler) { + return LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG; } + if (!out_error) { + return LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG; + } + try { + llama_sampler_accept(sampler, token); + return LLAMA_RS_SAMPLER_ACCEPT_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION; + } +} - *out_error = nullptr; +extern "C" llama_rs_load_model_from_file_status llama_rs_load_model_from_file( + const char * path, + struct llama_model_params params, + struct llama_model ** out_model, + char ** out_error) { + if (out_model) { + *out_model = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!path) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG; + } + if (!out_model) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG; + } + if (!out_error) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG; + } + try { + *out_model = llama_load_model_from_file(path, params); + if (!*out_model) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_LOAD_MODEL_FROM_FILE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } +} +extern "C" llama_rs_new_context_with_model_status llama_rs_new_context_with_model( + struct llama_model * model, + struct llama_context_params params, + struct llama_context ** out_ctx, + char ** out_error) { + if (out_ctx) { + *out_ctx = nullptr; + } + if (out_error) { + *out_error = nullptr; + } + if (!model) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG; + } + if (!out_ctx) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG; + } + if (!out_error) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG; + } try { - llama_sampler_accept(sampler, token); + *out_ctx = llama_new_context_with_model(model, params); + if (!*out_ctx) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL; + } + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION; + } +} - return LLAMA_RS_STATUS_OK; +extern "C" llama_rs_decode_status llama_rs_decode( + struct llama_context * ctx, + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_vendored_return_code) { + *out_vendored_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_DECODE_NULL_CTX_ARG; + } + if (!out_error) { + return LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG; + } + try { + int32_t result = llama_decode(ctx, batch); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + if (result == -2) { + return LLAMA_RS_DECODE_OUT_OF_MEMORY; + } + if (result == -3) { + return LLAMA_RS_DECODE_COMPUTE_FAILED; + } + return LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_DECODE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & err) { - fprintf(stderr, "%s: C++ exception: %s\n", __func__, err.what()); *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION; + } +} - return LLAMA_RS_STATUS_EXCEPTION; +extern "C" llama_rs_tokenize_status llama_rs_tokenize( + const struct llama_vocab * vocab, + const char * text, + int32_t text_len, + llama_token * tokens, + int32_t n_tokens_max, + bool add_special, + bool parse_special, + int32_t * out_returned_count, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_returned_count) { + *out_returned_count = 0; + } + if (!vocab) { + return LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG; + } + if (!text) { + return LLAMA_RS_TOKENIZE_NULL_TEXT_ARG; + } + if (!out_returned_count) { + return LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG; + } + if (!out_error) { + return LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG; + } + try { + int32_t count = llama_tokenize( + vocab, text, text_len, tokens, n_tokens_max, add_special, parse_special); + *out_returned_count = count; + return LLAMA_RS_TOKENIZE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { - fprintf(stderr, "%s: unknown C++ exception\n", __func__); - *out_error = llama_rs_dup_string("unknown C++ exception"); + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } +} - return LLAMA_RS_STATUS_EXCEPTION; +extern "C" llama_rs_sampler_apply_status llama_rs_sampler_apply( + struct llama_sampler * sampler, + struct llama_token_data_array * data_array, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (!sampler) { + return LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG; + } + if (!data_array) { + return LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG; + } + if (!out_error) { + return LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG; + } + try { + llama_sampler_apply(sampler, data_array); + return LLAMA_RS_SAMPLER_APPLY_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_common.h b/llama-cpp-bindings-sys/wrapper_common.h index 3d990abf..f790408a 100644 --- a/llama-cpp-bindings-sys/wrapper_common.h +++ b/llama-cpp-bindings-sys/wrapper_common.h @@ -4,6 +4,7 @@ #include #include +#include struct llama_model; struct llama_sampler; @@ -15,19 +16,48 @@ struct llama_vocab; extern "C" { #endif -llama_rs_status llama_rs_json_schema_to_grammar( +typedef enum llama_rs_json_schema_to_grammar_status { + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK = 0, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_INVALID_SCHEMA, + LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_json_schema_to_grammar_status; + +llama_rs_json_schema_to_grammar_status llama_rs_json_schema_to_grammar( const char * schema_json, bool force_gbnf, char ** out_grammar, char ** out_error); -struct llama_sampler * llama_rs_sampler_init_grammar( +typedef enum llama_rs_sampler_init_grammar_status { + LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK = 0, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_init_grammar_status; + +llama_rs_sampler_init_grammar_status llama_rs_sampler_init_grammar( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, + struct llama_sampler ** out_sampler, char ** out_error); -struct llama_sampler * llama_rs_sampler_init_grammar_lazy( +typedef enum llama_rs_sampler_init_grammar_lazy_status { + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK = 0, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_init_grammar_lazy_status; + +llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_lazy( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -35,9 +65,20 @@ struct llama_sampler * llama_rs_sampler_init_grammar_lazy( size_t num_trigger_words, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error); -struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( +typedef enum llama_rs_sampler_init_grammar_lazy_patterns_status { + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK = 0, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_INVALID_TRIGGER_PATTERN, + LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_init_grammar_lazy_patterns_status; + +llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_patterns( const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root, @@ -45,14 +86,33 @@ struct llama_sampler * llama_rs_sampler_init_grammar_lazy_patterns( size_t num_trigger_patterns, const llama_token * trigger_tokens, size_t num_trigger_tokens, + struct llama_sampler ** out_sampler, char ** out_error); -llama_rs_status llama_rs_sampler_accept( +typedef enum llama_rs_sampler_accept_status { + LLAMA_RS_SAMPLER_ACCEPT_OK = 0, + LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG, + LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_accept_status; + +llama_rs_sampler_accept_status llama_rs_sampler_accept( struct llama_sampler * sampler, llama_token token, char ** out_error); -llama_rs_status llama_rs_sampler_sample( +typedef enum llama_rs_sampler_sample_status { + LLAMA_RS_SAMPLER_SAMPLE_OK = 0, + LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG, + LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG, + LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG, + LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_sample_status; + +llama_rs_sampler_sample_status llama_rs_sampler_sample( struct llama_sampler * sampler, struct llama_context * ctx, int32_t idx, @@ -65,23 +125,140 @@ llama_pos llama_rs_memory_seq_pos_max( struct llama_context * ctx, llama_seq_id seq_id); -llama_rs_status llama_rs_encode( +typedef enum llama_rs_encode_status { + LLAMA_RS_ENCODE_OK = 0, + LLAMA_RS_ENCODE_NULL_CTX_ARG, + LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER, + LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_ENCODE_OUT_OF_MEMORY, + LLAMA_RS_ENCODE_COMPUTE_FAILED, + LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_encode_status; + +llama_rs_encode_status llama_rs_encode( struct llama_context * ctx, - struct llama_batch batch); + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error); -llama_rs_status llama_rs_memory_seq_add( +typedef enum llama_rs_memory_seq_add_status { + LLAMA_RS_MEMORY_SEQ_ADD_OK = 0, + LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG, + LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE, + LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM, + LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_memory_seq_add_status; + +llama_rs_memory_seq_add_status llama_rs_memory_seq_add( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - llama_pos shift); + llama_pos shift, + char ** out_error); -llama_rs_status llama_rs_memory_seq_div( +typedef enum llama_rs_memory_seq_div_status { + LLAMA_RS_MEMORY_SEQ_DIV_OK = 0, + LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG, + LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE, + LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM, + LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_memory_seq_div_status; + +llama_rs_memory_seq_div_status llama_rs_memory_seq_div( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, llama_pos p1, - int d); + int d, + char ** out_error); + +typedef enum llama_rs_load_model_from_file_status { + LLAMA_RS_LOAD_MODEL_FROM_FILE_OK = 0, + LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG, + LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG, + LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG, + LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL, + LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_load_model_from_file_status; + +llama_rs_load_model_from_file_status llama_rs_load_model_from_file( + const char * path, + struct llama_model_params params, + struct llama_model ** out_model, + char ** out_error); + +typedef enum llama_rs_new_context_with_model_status { + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK = 0, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_new_context_with_model_status; + +llama_rs_new_context_with_model_status llama_rs_new_context_with_model( + struct llama_model * model, + struct llama_context_params params, + struct llama_context ** out_ctx, + char ** out_error); + +typedef enum llama_rs_decode_status { + LLAMA_RS_DECODE_OK = 0, + LLAMA_RS_DECODE_NULL_CTX_ARG, + LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG, + LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_DECODE_OUT_OF_MEMORY, + LLAMA_RS_DECODE_COMPUTE_FAILED, + LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_decode_status; + +llama_rs_decode_status llama_rs_decode( + struct llama_context * ctx, + struct llama_batch batch, + int32_t * out_vendored_return_code, + char ** out_error); + +typedef enum llama_rs_tokenize_status { + LLAMA_RS_TOKENIZE_OK = 0, + LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG, + LLAMA_RS_TOKENIZE_NULL_TEXT_ARG, + LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG, + LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG, + LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_tokenize_status; + +llama_rs_tokenize_status llama_rs_tokenize( + const struct llama_vocab * vocab, + const char * text, + int32_t text_len, + llama_token * tokens, + int32_t n_tokens_max, + bool add_special, + bool parse_special, + int32_t * out_returned_count, + char ** out_error); + +typedef enum llama_rs_sampler_apply_status { + LLAMA_RS_SAMPLER_APPLY_OK = 0, + LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG, + LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG, + LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG, + LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_sampler_apply_status; + +llama_rs_sampler_apply_status llama_rs_sampler_apply( + struct llama_sampler * sampler, + struct llama_token_data_array * data_array, + char ** out_error); #ifdef __cplusplus } diff --git a/llama-cpp-bindings-sys/wrapper_fit.cpp b/llama-cpp-bindings-sys/wrapper_fit.cpp index 1ec7d169..02eee839 100644 --- a/llama-cpp-bindings-sys/wrapper_fit.cpp +++ b/llama-cpp-bindings-sys/wrapper_fit.cpp @@ -1,10 +1,12 @@ #include "wrapper_fit.h" +#include "wrapper_utils.h" #include +#include #include "llama.cpp/common/fit.h" -extern "C" llama_rs_fit_status llama_rs_fit_params( +extern "C" llama_rs_fit_params_status llama_rs_fit_params( const char * path_model, struct llama_model_params * mparams, struct llama_context_params * cparams, @@ -12,21 +14,49 @@ extern "C" llama_rs_fit_status llama_rs_fit_params( struct llama_model_tensor_buft_override * tensor_buft_overrides, size_t * margins, uint32_t n_ctx_min, - enum ggml_log_level log_level) { + enum ggml_log_level log_level, + int32_t * out_unrecognized_status_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_unrecognized_status_code) { + *out_unrecognized_status_code = 0; + } + try { const common_params_fit_status status = common_fit_params( path_model, mparams, cparams, tensor_split, tensor_buft_overrides, margins, n_ctx_min, log_level); switch (status) { case COMMON_PARAMS_FIT_STATUS_SUCCESS: - return LLAMA_RS_FIT_STATUS_SUCCESS; + return LLAMA_RS_FIT_PARAMS_OK; case COMMON_PARAMS_FIT_STATUS_FAILURE: - return LLAMA_RS_FIT_STATUS_FAILURE; + return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE; case COMMON_PARAMS_FIT_STATUS_ERROR: - return LLAMA_RS_FIT_STATUS_ERROR; + return LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR; + } + if (out_unrecognized_status_code) { + *out_unrecognized_status_code = static_cast(status); + } + return LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE; + } catch (const std::bad_alloc &) { + return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED; + } } - return LLAMA_RS_FIT_STATUS_ERROR; - } catch (const std::exception &) { - return LLAMA_RS_FIT_STATUS_ERROR; + return LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_fit.h b/llama-cpp-bindings-sys/wrapper_fit.h index c00a2620..9a6ac6ef 100644 --- a/llama-cpp-bindings-sys/wrapper_fit.h +++ b/llama-cpp-bindings-sys/wrapper_fit.h @@ -10,13 +10,16 @@ extern "C" { #endif -typedef enum llama_rs_fit_status { - LLAMA_RS_FIT_STATUS_SUCCESS = 0, - LLAMA_RS_FIT_STATUS_FAILURE = 1, - LLAMA_RS_FIT_STATUS_ERROR = 2, -} llama_rs_fit_status; +typedef enum llama_rs_fit_params_status { + LLAMA_RS_FIT_PARAMS_OK = 0, + LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE, + LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR, + LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE, + LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_fit_params_status; -llama_rs_fit_status llama_rs_fit_params( +llama_rs_fit_params_status llama_rs_fit_params( const char * path_model, struct llama_model_params * mparams, struct llama_context_params * cparams, @@ -24,7 +27,9 @@ llama_rs_fit_status llama_rs_fit_params( struct llama_model_tensor_buft_override * tensor_buft_overrides, size_t * margins, uint32_t n_ctx_min, - enum ggml_log_level log_level); + enum ggml_log_level log_level, + int32_t * out_unrecognized_status_code, + char ** out_error); #ifdef __cplusplus } diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.cpp b/llama-cpp-bindings-sys/wrapper_mtmd.cpp new file mode 100644 index 00000000..bff5b958 --- /dev/null +++ b/llama-cpp-bindings-sys/wrapper_mtmd.cpp @@ -0,0 +1,273 @@ +#include "wrapper_mtmd.h" +#include "wrapper_utils.h" + +#include +#include +#include + +extern "C" llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( + const char * mmproj_path, + const struct llama_model * text_model, + struct mtmd_context_params ctx_params, + struct mtmd_context ** out_ctx, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (!out_ctx) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG; + } + *out_ctx = nullptr; + if (!mmproj_path) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG; + } + if (!text_model) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG; + } + + try { + struct mtmd_context * ctx = mtmd_init_from_file(mmproj_path, text_model, ctx_params); + if (!ctx) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL; + } + *out_ctx = ctx; + return LLAMA_RS_MTMD_INIT_FROM_FILE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file( + struct mtmd_context * ctx, + const char * fname, + struct mtmd_bitmap ** out_bitmap, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (!out_bitmap) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG; + } + *out_bitmap = nullptr; + if (!ctx) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG; + } + if (!fname) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG; + } + + try { + struct mtmd_bitmap * bitmap = mtmd_helper_bitmap_init_from_file(ctx, fname); + if (!bitmap) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL; + } + *out_bitmap = bitmap; + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( + struct mtmd_context * ctx, + struct mtmd_input_chunks * output, + const struct mtmd_input_text * text, + const struct mtmd_bitmap ** bitmaps, + size_t num_bitmaps, + int32_t * out_undocumented_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_undocumented_return_code) { + *out_undocumented_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG; + } + if (!output) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG; + } + if (!text) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG; + } + if (num_bitmaps > 0 && !bitmaps) { + return LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO; + } + + try { + int32_t result = mtmd_tokenize(ctx, output, text, bitmaps, num_bitmaps); + switch (result) { + case 0: + return LLAMA_RS_MTMD_TOKENIZE_OK; + case 1: + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT; + case 2: + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR; + default: + if (out_undocumented_return_code) { + *out_undocumented_return_code = result; + } + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE; + } + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( + struct mtmd_context * ctx, + const struct mtmd_input_chunk * chunk, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_vendored_return_code) { + *out_vendored_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG; + } + if (!chunk) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG; + } + + try { + int32_t result = mtmd_encode_chunk(ctx, chunk); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION; + } +} + +extern "C" llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single( + struct mtmd_context * ctx, + struct llama_context * lctx, + const struct mtmd_input_chunk * chunk, + llama_pos n_past, + llama_seq_id seq_id, + int32_t n_batch, + bool logits_last, + llama_pos * out_new_n_past, + int32_t * out_vendored_return_code, + char ** out_error) { + if (out_error) { + *out_error = nullptr; + } + if (out_vendored_return_code) { + *out_vendored_return_code = 0; + } + if (!ctx) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG; + } + if (!lctx) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG; + } + if (!chunk) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG; + } + if (!out_new_n_past) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG; + } + + try { + int32_t result = mtmd_helper_eval_chunk_single( + ctx, lctx, chunk, n_past, seq_id, n_batch, logits_last, out_new_n_past); + if (result != 0) { + if (out_vendored_return_code) { + *out_vendored_return_code = result; + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE; + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; + } catch (const std::exception & err) { + if (out_error) { + *out_error = llama_rs_dup_string(err.what()); + if (!*out_error) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION; + } catch (...) { + if (out_error) { + *out_error = llama_rs_dup_string("unknown c++ exception"); + if (!*out_error) { + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED; + } + } + return LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION; + } +} diff --git a/llama-cpp-bindings-sys/wrapper_mtmd.h b/llama-cpp-bindings-sys/wrapper_mtmd.h index 72fb2111..ac2f5314 100644 --- a/llama-cpp-bindings-sys/wrapper_mtmd.h +++ b/llama-cpp-bindings-sys/wrapper_mtmd.h @@ -1,2 +1,109 @@ +#pragma once + +#include "llama.cpp/include/llama.h" #include "llama.cpp/tools/mtmd/mtmd.h" #include "llama.cpp/tools/mtmd/mtmd-helper.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum llama_rs_mtmd_init_from_file_status { + LLAMA_RS_MTMD_INIT_FROM_FILE_OK = 0, + LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG, + LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG, + LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG, + LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL, + LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_init_from_file_status; + +llama_rs_mtmd_init_from_file_status llama_rs_mtmd_init_from_file( + const char * mmproj_path, + const struct llama_model * text_model, + struct mtmd_context_params ctx_params, + struct mtmd_context ** out_ctx, + char ** out_error); + +typedef enum llama_rs_mtmd_bitmap_init_from_file_status { + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK = 0, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_bitmap_init_from_file_status; + +llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_bitmap_init_from_file( + struct mtmd_context * ctx, + const char * fname, + struct mtmd_bitmap ** out_bitmap, + char ** out_error); + +typedef enum llama_rs_mtmd_tokenize_status { + LLAMA_RS_MTMD_TOKENIZE_OK = 0, + LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG, + LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG, + LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG, + LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE, + LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_tokenize_status; + +llama_rs_mtmd_tokenize_status llama_rs_mtmd_tokenize( + struct mtmd_context * ctx, + struct mtmd_input_chunks * output, + const struct mtmd_input_text * text, + const struct mtmd_bitmap ** bitmaps, + size_t num_bitmaps, + int32_t * out_undocumented_return_code, + char ** out_error); + +typedef enum llama_rs_mtmd_encode_chunk_status { + LLAMA_RS_MTMD_ENCODE_CHUNK_OK = 0, + LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG, + LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG, + LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_encode_chunk_status; + +llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_encode_chunk( + struct mtmd_context * ctx, + const struct mtmd_input_chunk * chunk, + int32_t * out_vendored_return_code, + char ** out_error); + +typedef enum llama_rs_mtmd_eval_chunk_single_status { + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK = 0, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_mtmd_eval_chunk_single_status; + +llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_eval_chunk_single( + struct mtmd_context * ctx, + struct llama_context * lctx, + const struct mtmd_input_chunk * chunk, + llama_pos n_past, + llama_seq_id seq_id, + int32_t n_batch, + bool logits_last, + llama_pos * out_new_n_past, + int32_t * out_vendored_return_code, + char ** out_error); + +#ifdef __cplusplus +} +#endif diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.cpp b/llama-cpp-bindings-sys/wrapper_reasoning.cpp index 36b0763e..7970b4ee 100644 --- a/llama-cpp-bindings-sys/wrapper_reasoning.cpp +++ b/llama-cpp-bindings-sys/wrapper_reasoning.cpp @@ -6,6 +6,7 @@ #include "marker_probes/marker_probe.h" #include +#include #include #include @@ -26,7 +27,7 @@ std::string token_text_or_empty(const llama_vocab * vocab, llama_token token) { } // namespace -extern "C" llama_rs_status llama_rs_detect_reasoning_markers( +extern "C" llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers( const struct llama_model * model, char ** out_open, char ** out_close, @@ -40,20 +41,28 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers( if (out_error) { *out_error = nullptr; } - - if (!model || !out_open || !out_close || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG; + } + if (!out_open) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG; + } + if (!out_close) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG; + } + if (!out_error) { + return LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -112,7 +121,7 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers( } if (!detected) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; } char * open_dup = llama_rs_dup_string(detected_start); @@ -122,21 +131,27 @@ extern "C" llama_rs_status llama_rs_detect_reasoning_markers( std::free(open_dup); std::free(close_dup); - return LLAMA_RS_STATUS_ALLOCATION_FAILED; + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } *out_open = open_dup; *out_close = close_dup; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DETECT_REASONING_MARKERS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_reasoning.h b/llama-cpp-bindings-sys/wrapper_reasoning.h index eca91ea8..a22f79ba 100644 --- a/llama-cpp-bindings-sys/wrapper_reasoning.h +++ b/llama-cpp-bindings-sys/wrapper_reasoning.h @@ -7,21 +7,17 @@ extern "C" { #endif -/** - * Detect the reasoning open/close marker strings for a model by analyzing its - * Jinja chat template via llama.cpp's autoparser. - * - * On success (LLAMA_RS_STATUS_OK): - * - If the model has detected reasoning markers, *out_open and *out_close are - * set to heap-allocated null-terminated strings owned by the caller. Free - * each via llama_rs_string_free. - * - If no reasoning markers were detected, *out_open and *out_close are left - * as nullptr. - * - * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set to a heap-allocated message; - * free via llama_rs_string_free. - */ -llama_rs_status llama_rs_detect_reasoning_markers( +typedef enum llama_rs_detect_reasoning_markers_status { + LLAMA_RS_DETECT_REASONING_MARKERS_OK = 0, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG, + LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_detect_reasoning_markers_status; + +llama_rs_detect_reasoning_markers_status llama_rs_detect_reasoning_markers( const struct llama_model * model, char ** out_open, char ** out_close, diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp index eb869201..54b3a999 100644 --- a/llama-cpp-bindings-sys/wrapper_tool_calls.cpp +++ b/llama-cpp-bindings-sys/wrapper_tool_calls.cpp @@ -7,6 +7,7 @@ #include "llama.cpp/include/llama.h" #include +#include #include #include @@ -107,7 +108,7 @@ std::string detect_tool_call_haystack( } // namespace -extern "C" llama_rs_status llama_rs_compute_tool_call_haystack( +extern "C" llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack( const struct llama_model * model, char ** out_haystack, char ** out_error) { @@ -117,20 +118,25 @@ extern "C" llama_rs_status llama_rs_compute_tool_call_haystack( if (out_error) { *out_error = nullptr; } - - if (!model || !out_haystack || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG; + } + if (!out_haystack) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG; + } + if (!out_error) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -142,29 +148,35 @@ extern "C" llama_rs_status llama_rs_compute_tool_call_haystack( std::string haystack = detect_tool_call_haystack(tmpl, reasoning); if (haystack.empty()) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; } char * haystack_dup = llama_rs_dup_string(haystack); if (!haystack_dup) { - return LLAMA_RS_STATUS_ALLOCATION_FAILED; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } *out_haystack = haystack_dup; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION; } } -extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( +extern "C" llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders( const struct llama_model * model, char ** out_no_tools, char ** out_with_tools, @@ -178,20 +190,28 @@ extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( if (out_error) { *out_error = nullptr; } - - if (!model || !out_no_tools || !out_with_tools || !out_error) { - return LLAMA_RS_STATUS_INVALID_ARGUMENT; + if (!model) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG; + } + if (!out_no_tools) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG; + } + if (!out_with_tools) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG; + } + if (!out_error) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG; } try { const char * tmpl_src = llama_model_chat_template(model, nullptr); if (!tmpl_src) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } const llama_vocab * vocab = llama_model_get_vocab(model); if (!vocab) { - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; } std::string bos_token = token_text_or_empty(vocab, llama_vocab_bos(vocab)); @@ -259,20 +279,26 @@ extern "C" llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( std::free(a_dup); std::free(b_dup); - return LLAMA_RS_STATUS_ALLOCATION_FAILED; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } *out_no_tools = a_dup; *out_with_tools = b_dup; - return LLAMA_RS_STATUS_OK; + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK; + } catch (const std::bad_alloc &) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; } catch (const std::exception & ex) { *out_error = llama_rs_dup_string(std::string(ex.what())); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION; } catch (...) { *out_error = llama_rs_dup_string(std::string("unknown c++ exception")); - - return LLAMA_RS_STATUS_EXCEPTION; + if (!*out_error) { + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED; + } + return LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION; } } diff --git a/llama-cpp-bindings-sys/wrapper_tool_calls.h b/llama-cpp-bindings-sys/wrapper_tool_calls.h index e6a59e20..7c96c20b 100644 --- a/llama-cpp-bindings-sys/wrapper_tool_calls.h +++ b/llama-cpp-bindings-sys/wrapper_tool_calls.h @@ -7,40 +7,31 @@ extern "C" { #endif -/** - * Render the model's chat template with the autoparser's standard tool-call - * vs. plain-assistant synthetic turns and return the diff slice that surrounds - * the tool-call payload. The returned haystack is the text that lives between - * the model's tool-call open/close markers (with any reasoning prelude - * stripped). Marker extraction from the haystack is performed in Rust. - * - * On success (LLAMA_RS_STATUS_OK): - * - If the model declares no tool-call markers (or an empty haystack), - * *out_haystack is left as nullptr. - * - Otherwise *out_haystack is a heap-allocated null-terminated string owned - * by the caller. Free via llama_rs_string_free. - * - * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set to a heap-allocated message; - * free via llama_rs_string_free. - */ -llama_rs_status llama_rs_compute_tool_call_haystack( +typedef enum llama_rs_compute_tool_call_haystack_status { + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK = 0, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_compute_tool_call_haystack_status; + +llama_rs_compute_tool_call_haystack_status llama_rs_compute_tool_call_haystack( const struct llama_model * model, char ** out_haystack, char ** out_error); -/** - * Render the model's chat template with the autoparser's standard synthetic - * inputs (assistant_no_tools vs assistant_with_tools). Useful for diagnosing - * why marker detection fails. - * - * On success (LLAMA_RS_STATUS_OK): - * - *out_no_tools and *out_with_tools point to heap-allocated rendered - * outputs (free via llama_rs_string_free). Either can be empty when the - * template throws during rendering. - * - * On LLAMA_RS_STATUS_EXCEPTION, *out_error is set. - */ -llama_rs_status llama_rs_diagnose_tool_call_synthetic_renders( +typedef enum llama_rs_diagnose_tool_call_synthetic_renders_status { + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK = 0, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED, + LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION, +} llama_rs_diagnose_tool_call_synthetic_renders_status; + +llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_diagnose_tool_call_synthetic_renders( const struct llama_model * model, char ** out_no_tools, char ** out_with_tools, diff --git a/llama-cpp-bindings-tests/tests/context_kv_cache.rs b/llama-cpp-bindings-tests/tests/context_kv_cache.rs index 0095bff6..39ee2714 100644 --- a/llama-cpp-bindings-tests/tests/context_kv_cache.rs +++ b/llama-cpp-bindings-tests/tests/context_kv_cache.rs @@ -5,6 +5,7 @@ use anyhow::Result; use llama_cpp_bindings::context::LlamaContext; use llama_cpp_bindings::context::kv_cache::KvCacheConversionError; use llama_cpp_bindings::context::params::LlamaContextParams; +use llama_cpp_bindings::error::{KvCacheSeqAddError, KvCacheSeqDivError}; use llama_cpp_bindings::llama_batch::LlamaBatch; use llama_cpp_bindings::model::AddBos; use llama_cpp_bindings_tests::FixtureSession; @@ -126,7 +127,10 @@ fn kv_cache_seq_add_returns_error_for_mrope_model() -> Result<()> { let result = context.kv_cache_seq_add(0, Some(0), None, 1); - assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + KvCacheSeqAddError::IncompatibleRopeType, + )); Ok(()) } @@ -149,7 +153,10 @@ fn kv_cache_seq_div_returns_error_for_mrope_model() -> Result<()> { let divisor = NonZeroU8::new(2).ok_or_else(|| anyhow::anyhow!("2 is non-zero"))?; let result = context.kv_cache_seq_div(0, Some(0), None, divisor); - assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + KvCacheSeqDivError::IncompatibleRopeType, + )); Ok(()) } @@ -363,7 +370,7 @@ fn kv_cache_seq_add_rejects_p0_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P0TooLarge(_), + KvCacheSeqAddError::P0TooLarge(_), )); Ok(()) @@ -382,7 +389,7 @@ fn kv_cache_seq_add_rejects_p1_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P1TooLarge(_), + KvCacheSeqAddError::P1TooLarge(_), )); Ok(()) @@ -402,7 +409,7 @@ fn kv_cache_seq_div_rejects_p0_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P0TooLarge(_), + KvCacheSeqDivError::P0TooLarge(_), )); Ok(()) @@ -422,7 +429,7 @@ fn kv_cache_seq_div_rejects_p1_exceeding_i32_max() -> Result<()> { assert!(matches!( result.unwrap_err(), - KvCacheConversionError::P1TooLarge(_), + KvCacheSeqDivError::P1TooLarge(_), )); Ok(()) diff --git a/llama-cpp-bindings-tests/tests/model.rs b/llama-cpp-bindings-tests/tests/model.rs index b69f0bd9..47e27e30 100644 --- a/llama-cpp-bindings-tests/tests/model.rs +++ b/llama-cpp-bindings-tests/tests/model.rs @@ -1,6 +1,6 @@ use std::num::NonZeroU16; use std::num::NonZeroU32; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::Result; use llama_cpp_bindings::ChatTemplateError; @@ -261,15 +261,15 @@ fn load_model_with_invalid_path_returns_error() { let model_params = LlamaModelParams::default(); let result = LlamaModel::load_from_file(backend, "/nonexistent/model.gguf", &model_params); - assert_eq!( + assert!(matches!( result.unwrap_err(), - LlamaModelLoadError::FileNotFound(PathBuf::from("/nonexistent/model.gguf")) - ); + LlamaModelLoadError::FileNotFound(path) if path == Path::new("/nonexistent/model.gguf"), + )); } #[test] #[serial] -fn load_model_with_invalid_file_content_returns_null_result() -> Result<()> { +fn load_model_with_invalid_file_content_returns_unloadable_or_reported() -> Result<()> { let fixture = FixtureSession::open()?; let backend = fixture.backend(); let model_params = LlamaModelParams::default(); @@ -278,7 +278,10 @@ fn load_model_with_invalid_file_content_returns_null_result() -> Result<()> { let result = LlamaModel::load_from_file(backend, &dummy_path, &model_params); - assert_eq!(result.unwrap_err(), LlamaModelLoadError::NullResult); + assert!(matches!( + result.unwrap_err(), + LlamaModelLoadError::Unloadable | LlamaModelLoadError::Reported { .. }, + )); let _ = std::fs::remove_file(&dummy_path); Ok(()) @@ -294,14 +297,14 @@ fn load_model_with_non_utf8_path_returns_path_to_str_error() { let fixture = FixtureSession::open().expect("open fixture"); let backend = fixture.backend(); let model_params = LlamaModelParams::default(); - let non_utf8_path = std::path::Path::new(OsStr::from_bytes(b"/tmp/\xff\xfe.gguf")); + let non_utf8_path = Path::new(OsStr::from_bytes(b"/tmp/\xff\xfe.gguf")); let result = LlamaModel::load_from_file(backend, non_utf8_path, &model_params); - assert_eq!( + assert!(matches!( result.unwrap_err(), - LlamaModelLoadError::PathToStrError(non_utf8_path.to_path_buf()) - ); + LlamaModelLoadError::PathToStrError(path) if path == non_utf8_path.to_path_buf() + )); } #[cfg(unix)] @@ -313,7 +316,7 @@ fn lora_adapter_init_with_non_utf8_path_returns_error() { let fixture = FixtureSession::open().expect("open fixture"); let model = fixture.default_model(); - let non_utf8_path = std::path::Path::new(OsStr::from_bytes(b"/tmp/\xff\xfe.gguf")); + let non_utf8_path = Path::new(OsStr::from_bytes(b"/tmp/\xff\xfe.gguf")); let result = model.lora_adapter_init(non_utf8_path); @@ -625,7 +628,7 @@ fn chat_template_with_nonexistent_name_returns_error() { #[test] #[serial] -fn lora_adapter_init_with_invalid_gguf_returns_null_result() -> Result<()> { +fn lora_adapter_init_with_invalid_gguf_returns_unloadable() -> Result<()> { let fixture = FixtureSession::open()?; let model = fixture.default_model(); let dummy_path = std::env::temp_dir().join("llama_test_dummy_lora.gguf"); @@ -633,7 +636,7 @@ fn lora_adapter_init_with_invalid_gguf_returns_null_result() -> Result<()> { let result = model.lora_adapter_init(&dummy_path); - assert_eq!(result.unwrap_err(), LlamaLoraAdapterInitError::NullResult); + assert_eq!(result.unwrap_err(), LlamaLoraAdapterInitError::Unloadable); let _ = std::fs::remove_file(&dummy_path); Ok(()) diff --git a/llama-cpp-bindings-tests/tests/sampling.rs b/llama-cpp-bindings-tests/tests/sampling.rs index 8033ccfc..5c1120fe 100644 --- a/llama-cpp-bindings-tests/tests/sampling.rs +++ b/llama-cpp-bindings-tests/tests/sampling.rs @@ -131,6 +131,23 @@ fn grammar_lazy_patterns_with_null_byte_in_pattern_returns_error() -> Result<()> Ok(()) } +#[test] +#[serial] +fn grammar_lazy_patterns_with_malformed_regex_returns_invalid_trigger_pattern() -> Result<()> { + let fixture = FixtureSession::open()?; + let model = fixture.default_model(); + let patterns = vec!["[".to_string()]; + let result = + LlamaSampler::grammar_lazy_patterns(model, "root ::= \"hello\"", "root", &patterns, &[]); + + assert!(matches!( + result, + Err(GrammarError::InvalidTriggerPattern { .. }), + )); + + Ok(()) +} + #[test] #[serial] fn llguidance_method_creates_sampler() -> Result<()> { diff --git a/llama-cpp-bindings/src/context.rs b/llama-cpp-bindings/src/context.rs index 410ade82..b980f831 100644 --- a/llama-cpp-bindings/src/context.rs +++ b/llama-cpp-bindings/src/context.rs @@ -111,15 +111,36 @@ impl<'model> LlamaContext<'model> { params: LlamaContextParams, ) -> Result { let context_params = params.context_params; - let context = unsafe { - llama_cpp_bindings_sys::llama_new_context_with_model( + let mut out_ctx: *mut llama_cpp_bindings_sys::llama_context = std::ptr::null_mut(); + let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_new_context_with_model( model.model.as_ptr(), context_params, + &raw mut out_ctx, + &raw mut out_error, ) }; - let context = NonNull::new(context).ok_or(LlamaContextLoadError::NullReturn)?; - - Ok(Self::new(model, context, params.embeddings())) + match status { + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK => { + let context = NonNull::new(out_ctx) + .ok_or(LlamaContextLoadError::Unconstructible)?; + Ok(Self::new(model, context, params.embeddings())) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL => { + Err(LlamaContextLoadError::Unconstructible) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED => { + Err(LlamaContextLoadError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(LlamaContextLoadError::Reported { message }) + } + other => unreachable!( + "llama_rs_new_context_with_model returned unrecognized status {other}" + ), + } } /// Gets the max number of logical tokens that can be submitted to decode. Must be greater than or equal to [`Self::n_ubatch`]. @@ -203,22 +224,46 @@ impl<'model> LlamaContext<'model> { /// # Errors /// /// - `DecodeError` if the decoding failed. - /// - /// # Panics - /// - /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems) pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError> { - let result = unsafe { - llama_cpp_bindings_sys::llama_decode(self.context.as_ptr(), batch.llama_batch) + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_decode( + self.context.as_ptr(), + batch.llama_batch, + &raw mut out_vendored_return_code, + &raw mut out_error, + ) }; - - match NonZeroI32::new(result) { - None => { + match status { + llama_cpp_bindings_sys::LLAMA_RS_DECODE_OK => { self.initialized_logits .clone_from(&batch.initialized_logits); Ok(()) } - Some(error) => Err(DecodeError::from(error)), + llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE => { + let code = NonZeroI32::new(out_vendored_return_code).unwrap_or_else(|| { + unreachable!( + "llama_rs_decode reported a nonzero return code but the value was zero" + ) + }); + Err(DecodeError::from(code)) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_OUT_OF_MEMORY => { + Err(DecodeError::DecodeOutOfMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_COMPUTE_FAILED => { + Err(DecodeError::ComputeFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED => { + Err(DecodeError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(DecodeError::Reported { message }) + } + other => unreachable!("llama_rs_decode returned unrecognized status {other}"), } } @@ -226,34 +271,50 @@ impl<'model> LlamaContext<'model> { /// /// # Errors /// - /// - `EncodeError` if the decoding failed. - /// - /// # Panics - /// - /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems) + /// - `EncodeError` if the encoding failed. pub fn encode(&mut self, batch: &mut LlamaBatch) -> Result<(), EncodeError> { + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut std::os::raw::c_char = std::ptr::null_mut(); let status = unsafe { - llama_cpp_bindings_sys::llama_rs_encode(self.context.as_ptr(), batch.llama_batch) + llama_cpp_bindings_sys::llama_rs_encode( + self.context.as_ptr(), + batch.llama_batch, + &raw mut out_vendored_return_code, + &raw mut out_error, + ) }; - - self.handle_encode_result(status, batch) - } - - fn handle_encode_result( - &mut self, - status: llama_cpp_bindings_sys::llama_rs_status, - batch: &mut LlamaBatch, - ) -> Result<(), EncodeError> { - if crate::status_is_ok(status) { - self.initialized_logits - .clone_from(&batch.initialized_logits); - - Ok(()) - } else { - Err(EncodeError::from( - NonZeroI32::new(crate::status_to_i32(status)) - .unwrap_or(NonZeroI32::new(1).expect("1 is non-zero")), - )) + match status { + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_OK => { + self.initialized_logits + .clone_from(&batch.initialized_logits); + Ok(()) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER => { + Err(EncodeError::ModelHasNoEncoder) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE => { + let code = NonZeroI32::new(out_vendored_return_code).unwrap_or_else(|| { + unreachable!( + "llama_rs_encode reported a nonzero return code but the value was zero" + ) + }); + Err(EncodeError::from(code)) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_OUT_OF_MEMORY => { + Err(EncodeError::EncodeOutOfMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_COMPUTE_FAILED => { + Err(EncodeError::ComputeFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED => { + Err(EncodeError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(EncodeError::Reported { message }) + } + other => unreachable!("llama_rs_encode returned unrecognized status {other}"), } } diff --git a/llama-cpp-bindings/src/context/kv_cache.rs b/llama-cpp-bindings/src/context/kv_cache.rs index 4250fe94..dff5e2aa 100644 --- a/llama-cpp-bindings/src/context/kv_cache.rs +++ b/llama-cpp-bindings/src/context/kv_cache.rs @@ -1,8 +1,13 @@ //! utilities for working with the kv cache -use crate::context::LlamaContext; use std::ffi::c_int; use std::num::{NonZeroU8, TryFromIntError}; +use std::os::raw::c_char; +use std::ptr; + +use crate::context::LlamaContext; +use crate::error::{KvCacheSeqAddError, KvCacheSeqDivError}; +use crate::ffi_error_reader::read_and_free_cpp_error; /// Errors that can occur when attempting to prepare values for the kv cache #[derive(Debug, Eq, PartialEq, thiserror::Error)] @@ -16,9 +21,6 @@ pub enum KvCacheConversionError { /// Position 1 conversion to i32 failed #[error("Provided end position is too large for a i32")] P1TooLarge(#[source] TryFromIntError), - /// The operation is not supported by the current model/context configuration. - #[error("operation not supported by this model: {0}")] - UnsupportedOperation(String), } impl LlamaContext<'_> { @@ -131,20 +133,21 @@ impl LlamaContext<'_> { /// * `delta` - The relative position to add to the tokens /// /// # Errors - /// If either position exceeds [`i32::MAX`]. + /// If either position exceeds [`i32::MAX`], or the underlying memory operation reports a failure. pub fn kv_cache_seq_add( &mut self, seq_id: i32, p0: Option, p1: Option, delta: i32, - ) -> Result<(), KvCacheConversionError> { + ) -> Result<(), KvCacheSeqAddError> { let p0 = p0 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P0TooLarge)?; + .map_err(KvCacheSeqAddError::P0TooLarge)?; let p1 = p1 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P1TooLarge)?; + .map_err(KvCacheSeqAddError::P1TooLarge)?; + let mut out_error: *mut c_char = ptr::null_mut(); let status = unsafe { llama_cpp_bindings_sys::llama_rs_memory_seq_add( self.context.as_ptr(), @@ -152,16 +155,25 @@ impl LlamaContext<'_> { p0, p1, delta, + &raw mut out_error, ) }; - - if crate::status_is_ok(status) { - Ok(()) - } else { - Err(KvCacheConversionError::UnsupportedOperation(format!( - "kv_cache_seq_add failed (status {})", - crate::status_to_i32(status) - ))) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE => { + Err(KvCacheSeqAddError::IncompatibleRopeType) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM => { + Err(KvCacheSeqAddError::MemoryHandleUnavailable) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED => { + Err(KvCacheSeqAddError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(KvCacheSeqAddError::Reported { message }) + } + other => unreachable!("llama_rs_memory_seq_add returned unrecognized status {other}"), } } @@ -181,21 +193,22 @@ impl LlamaContext<'_> { /// * `d` - The factor to divide the positions by /// /// # Errors - /// If either position exceeds [`i32::MAX`]. + /// If either position exceeds [`i32::MAX`], or the underlying memory operation reports a failure. pub fn kv_cache_seq_div( &mut self, seq_id: i32, p0: Option, p1: Option, d: NonZeroU8, - ) -> Result<(), KvCacheConversionError> { + ) -> Result<(), KvCacheSeqDivError> { let p0 = p0 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P0TooLarge)?; + .map_err(KvCacheSeqDivError::P0TooLarge)?; let p1 = p1 .map_or(Ok(-1), i32::try_from) - .map_err(KvCacheConversionError::P1TooLarge)?; + .map_err(KvCacheSeqDivError::P1TooLarge)?; let d = c_int::from(d.get()); + let mut out_error: *mut c_char = ptr::null_mut(); let status = unsafe { llama_cpp_bindings_sys::llama_rs_memory_seq_div( self.context.as_ptr(), @@ -203,16 +216,25 @@ impl LlamaContext<'_> { p0, p1, d, + &raw mut out_error, ) }; - - if crate::status_is_ok(status) { - Ok(()) - } else { - Err(KvCacheConversionError::UnsupportedOperation(format!( - "kv_cache_seq_div failed (status {})", - crate::status_to_i32(status) - ))) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE => { + Err(KvCacheSeqDivError::IncompatibleRopeType) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM => { + Err(KvCacheSeqDivError::MemoryHandleUnavailable) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED => { + Err(KvCacheSeqDivError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(KvCacheSeqDivError::Reported { message }) + } + other => unreachable!("llama_rs_memory_seq_div returned unrecognized status {other}"), } } diff --git a/llama-cpp-bindings/src/error.rs b/llama-cpp-bindings/src/error.rs index 2314452f..ba684109 100644 --- a/llama-cpp-bindings/src/error.rs +++ b/llama-cpp-bindings/src/error.rs @@ -8,7 +8,10 @@ pub mod eval_multimodal_chunks_error; pub mod fit_error; pub mod grammar_error; pub mod json_object_failure; +pub mod json_schema_to_grammar_error; pub mod key_value_xml_tags_failure; +pub mod kv_cache_seq_add_error; +pub mod kv_cache_seq_div_error; pub mod llama_context_load_error; pub mod llama_cpp_error; pub mod llama_lora_adapter_init_error; @@ -41,7 +44,10 @@ pub use eval_multimodal_chunks_error::EvalMultimodalChunksError; pub use fit_error::FitError; pub use grammar_error::GrammarError; pub use json_object_failure::JsonObjectFailure; +pub use json_schema_to_grammar_error::JsonSchemaToGrammarError; pub use key_value_xml_tags_failure::KeyValueXmlTagsFailure; +pub use kv_cache_seq_add_error::KvCacheSeqAddError; +pub use kv_cache_seq_div_error::KvCacheSeqDivError; pub use llama_context_load_error::LlamaContextLoadError; pub use llama_cpp_error::LlamaCppError; pub use llama_lora_adapter_init_error::LlamaLoraAdapterInitError; diff --git a/llama-cpp-bindings/src/error/decode_error.rs b/llama-cpp-bindings/src/error/decode_error.rs index 1a404605..74121bc0 100644 --- a/llama-cpp-bindings/src/error/decode_error.rs +++ b/llama-cpp-bindings/src/error/decode_error.rs @@ -1,31 +1,33 @@ use std::num::NonZeroI32; use std::os::raw::c_int; -/// Failed to decode a batch. #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum DecodeError { - /// No kv cache slot was available. - #[error("Decode Error 1: NoKvCacheSlot")] + #[error("no KV cache slot was available")] NoKvCacheSlot, - /// The computation was aborted by the abort callback. - #[error("Decode Error 2: Aborted")] + #[error("decode aborted by callback")] Aborted, - /// The number of tokens in the batch was 0. - #[error("Decode Error -1: n_tokens == 0")] - NTokensZero, - /// An unknown error occurred. - #[error("Decode Error {0}: unknown")] - Unknown(c_int), + #[error("decode batch is invalid (empty, output mismatch, or initialization failure)")] + BatchInvalid, + #[error("decode ran out of memory")] + DecodeOutOfMemory, + #[error("backend compute failed during decode")] + ComputeFailed, + #[error("decode returned an unknown status code: {code}")] + UnknownStatus { code: c_int }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } -/// Decode a error from llama.cpp into a [`DecodeError`]. impl From for DecodeError { fn from(value: NonZeroI32) -> Self { match value.get() { 1 => Self::NoKvCacheSlot, 2 => Self::Aborted, - -1 => Self::NTokensZero, - error_code => Self::Unknown(error_code), + -1 => Self::BatchInvalid, + error_code => Self::UnknownStatus { code: error_code }, } } } @@ -37,34 +39,30 @@ mod tests { use super::DecodeError; #[test] - fn decode_error_no_kv_cache_slot() { + fn no_kv_cache_slot_maps_from_code_one() { let error = DecodeError::from(NonZeroI32::new(1).expect("1 is non-zero")); assert_eq!(error, DecodeError::NoKvCacheSlot); - assert_eq!(error.to_string(), "Decode Error 1: NoKvCacheSlot"); } #[test] - fn decode_error_n_tokens_zero() { - let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero")); + fn aborted_maps_from_code_two() { + let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero")); - assert_eq!(error, DecodeError::NTokensZero); - assert_eq!(error.to_string(), "Decode Error -1: n_tokens == 0"); + assert_eq!(error, DecodeError::Aborted); } #[test] - fn decode_error_aborted() { - let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero")); + fn batch_invalid_maps_from_code_negative_one() { + let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero")); - assert_eq!(error, DecodeError::Aborted); - assert_eq!(error.to_string(), "Decode Error 2: Aborted"); + assert_eq!(error, DecodeError::BatchInvalid); } #[test] - fn decode_error_unknown() { + fn unrecognized_code_falls_through_to_unknown_status() { let error = DecodeError::from(NonZeroI32::new(42).expect("42 is non-zero")); - assert_eq!(error, DecodeError::Unknown(42)); - assert_eq!(error.to_string(), "Decode Error 42: unknown"); + assert_eq!(error, DecodeError::UnknownStatus { code: 42 }); } } diff --git a/llama-cpp-bindings/src/error/encode_error.rs b/llama-cpp-bindings/src/error/encode_error.rs index 33999d61..022ac6d7 100644 --- a/llama-cpp-bindings/src/error/encode_error.rs +++ b/llama-cpp-bindings/src/error/encode_error.rs @@ -1,27 +1,32 @@ use std::num::NonZeroI32; use std::os::raw::c_int; -/// Failed to decode a batch. #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum EncodeError { - /// No kv cache slot was available. - #[error("Encode Error 1: NoKvCacheSlot")] + #[error("model has no encoder")] + ModelHasNoEncoder, + #[error("no KV cache slot was available")] NoKvCacheSlot, - /// The number of tokens in the batch was 0. - #[error("Encode Error -1: n_tokens == 0")] - NTokensZero, - /// An unknown error occurred. - #[error("Encode Error {0}: unknown")] - Unknown(c_int), + #[error("encode batch is invalid (empty or initialization failure)")] + BatchInvalid, + #[error("encode ran out of memory")] + EncodeOutOfMemory, + #[error("backend compute failed during encode")] + ComputeFailed, + #[error("encode returned an unknown status code: {code}")] + UnknownStatus { code: c_int }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } -/// Encode a error from llama.cpp into a [`EncodeError`]. impl From for EncodeError { fn from(value: NonZeroI32) -> Self { match value.get() { 1 => Self::NoKvCacheSlot, - -1 => Self::NTokensZero, - error_code => Self::Unknown(error_code), + -1 => Self::BatchInvalid, + error_code => Self::UnknownStatus { code: error_code }, } } } @@ -33,26 +38,23 @@ mod tests { use super::EncodeError; #[test] - fn encode_error_no_kv_cache_slot() { + fn no_kv_cache_slot_maps_from_code_one() { let error = EncodeError::from(NonZeroI32::new(1).expect("1 is non-zero")); assert_eq!(error, EncodeError::NoKvCacheSlot); - assert_eq!(error.to_string(), "Encode Error 1: NoKvCacheSlot"); } #[test] - fn encode_error_n_tokens_zero() { + fn batch_invalid_maps_from_code_negative_one() { let error = EncodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero")); - assert_eq!(error, EncodeError::NTokensZero); - assert_eq!(error.to_string(), "Encode Error -1: n_tokens == 0"); + assert_eq!(error, EncodeError::BatchInvalid); } #[test] - fn encode_error_unknown() { + fn unrecognized_code_falls_through_to_unknown_status() { let error = EncodeError::from(NonZeroI32::new(99).expect("99 is non-zero")); - assert_eq!(error, EncodeError::Unknown(99)); - assert_eq!(error.to_string(), "Encode Error 99: unknown"); + assert_eq!(error, EncodeError::UnknownStatus { code: 99 }); } } diff --git a/llama-cpp-bindings/src/error/fit_error.rs b/llama-cpp-bindings/src/error/fit_error.rs index 7585530d..2d6fe6b5 100644 --- a/llama-cpp-bindings/src/error/fit_error.rs +++ b/llama-cpp-bindings/src/error/fit_error.rs @@ -1,11 +1,20 @@ /// Returned by [`crate::model::params::LlamaModelParams::fit_params`]. -#[derive(Debug, Clone, Copy, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] pub enum FitError { - /// Could not find allocations that fit available memory. - #[error("could not find allocations that fit available memory")] - Failure, - /// A hard error occurred during fitting (e.g. model not found at the specified path, - /// or the C++ wrapper threw an exception). - #[error("hard error during parameter fitting")] - Error, + /// No combination of model parameters fits the available device memory. + #[error("no parameter combination fits available memory")] + NoFittingMemoryLayout, + /// Parameter fitting was aborted by a hard error reported by the underlying library + /// (e.g., model file missing, backend initialization failed). + #[error("parameter fitting aborted")] + Aborted, + /// The fitting helper returned a status code the wrapper does not recognise. + #[error("parameter fitting returned an unknown status code: {code}")] + UnknownStatus { code: i32 }, + /// Wrapper could not allocate memory for an error message. + #[error("not enough memory")] + NotEnoughMemory, + /// Generic exception caught at the wrapper boundary, with the underlying message. + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/error/grammar_error.rs b/llama-cpp-bindings/src/error/grammar_error.rs index 58216b8c..1910476e 100644 --- a/llama-cpp-bindings/src/error/grammar_error.rs +++ b/llama-cpp-bindings/src/error/grammar_error.rs @@ -1,27 +1,31 @@ use std::ffi::NulError; -/// Errors that can occur when initializing a grammar sampler -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum GrammarError { - /// The grammar root was not found in the grammar string - #[error("Grammar root not found in grammar string")] + #[error("grammar root not found in grammar string")] RootNotFound, - /// The trigger word contains null bytes - #[error("Trigger word contains null bytes: {0}")] + #[error("trigger word contains null bytes: {0}")] TriggerWordNullBytes(NulError), - /// The grammar string or root contains null bytes - #[error("Grammar string or root contains null bytes: {0}")] + #[error("grammar string or root contains null bytes: {0}")] GrammarNullBytes(NulError), - /// A string contains null bytes - #[error("String contains null bytes: {0}")] + #[error("string contains null bytes: {0}")] NulError(#[from] NulError), - /// The grammar call returned null - #[error("Grammar initialization failed: {0}")] - NullGrammar(String), - /// An integer value exceeded the allowed range - #[error("Integer overflow: {0}")] + #[error("integer overflow: {0}")] IntegerOverflow(String), - /// An error from the llguidance library #[error("llguidance error: {0}")] LlguidanceError(String), + #[error("grammar is malformed")] + GrammarMalformed, + #[error("lazy grammar is malformed")] + LazyGrammarMalformed, + #[error("lazy-patterns grammar is malformed")] + LazyPatternsGrammarMalformed, + #[error("trigger pattern is not a valid regex: {message}")] + InvalidTriggerPattern { message: String }, + #[error("llguidance sampler could not be created")] + LlguidanceSamplerUnavailable, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs b/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs new file mode 100644 index 00000000..d09f041d --- /dev/null +++ b/llama-cpp-bindings/src/error/json_schema_to_grammar_error.rs @@ -0,0 +1,16 @@ +use std::ffi::NulError; +use std::string::FromUtf8Error; + +#[derive(Debug, thiserror::Error)] +pub enum JsonSchemaToGrammarError { + #[error("schema string contains an interior NUL byte: {0}")] + SchemaContainsNulByte(#[from] NulError), + #[error("JSON schema is invalid: {message}")] + InvalidSchema { message: String }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, + #[error("grammar returned by json_schema_to_grammar is not valid UTF-8")] + GrammarNotUtf8(#[from] FromUtf8Error), +} diff --git a/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs b/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs new file mode 100644 index 00000000..c3a3248b --- /dev/null +++ b/llama-cpp-bindings/src/error/kv_cache_seq_add_error.rs @@ -0,0 +1,17 @@ +use std::num::TryFromIntError; + +#[derive(Debug, thiserror::Error)] +pub enum KvCacheSeqAddError { + #[error("provided start position is too large for an i32")] + P0TooLarge(#[source] TryFromIntError), + #[error("provided end position is too large for an i32")] + P1TooLarge(#[source] TryFromIntError), + #[error("model rope type is incompatible with sequence position arithmetic")] + IncompatibleRopeType, + #[error("context has no memory module available")] + MemoryHandleUnavailable, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, +} diff --git a/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs b/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs new file mode 100644 index 00000000..c6ac0ca4 --- /dev/null +++ b/llama-cpp-bindings/src/error/kv_cache_seq_div_error.rs @@ -0,0 +1,17 @@ +use std::num::TryFromIntError; + +#[derive(Debug, thiserror::Error)] +pub enum KvCacheSeqDivError { + #[error("provided start position is too large for an i32")] + P0TooLarge(#[source] TryFromIntError), + #[error("provided end position is too large for an i32")] + P1TooLarge(#[source] TryFromIntError), + #[error("model rope type is incompatible with sequence position arithmetic")] + IncompatibleRopeType, + #[error("context has no memory module available")] + MemoryHandleUnavailable, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, +} diff --git a/llama-cpp-bindings/src/error/llama_context_load_error.rs b/llama-cpp-bindings/src/error/llama_context_load_error.rs index 752c88af..ffbf746f 100644 --- a/llama-cpp-bindings/src/error/llama_context_load_error.rs +++ b/llama-cpp-bindings/src/error/llama_context_load_error.rs @@ -1,7 +1,9 @@ -/// Failed to Load context -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum LlamaContextLoadError { - /// llama.cpp returned null - #[error("null reference from llama.cpp")] - NullReturn, + #[error("context could not be constructed")] + Unconstructible, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/error/llama_cpp_error.rs b/llama-cpp-bindings/src/error/llama_cpp_error.rs index b99fefdd..e40664a8 100644 --- a/llama-cpp-bindings/src/error/llama_cpp_error.rs +++ b/llama-cpp-bindings/src/error/llama_cpp_error.rs @@ -4,47 +4,34 @@ use crate::error::decode_error::DecodeError; use crate::error::embeddings_error::EmbeddingsError; use crate::error::encode_error::EncodeError; use crate::error::fit_error::FitError; +use crate::error::json_schema_to_grammar_error::JsonSchemaToGrammarError; use crate::error::llama_context_load_error::LlamaContextLoadError; use crate::error::llama_model_load_error::LlamaModelLoadError; -/// All errors that can occur in the llama-cpp crate. -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum LlamaCppError { - /// The backend was already initialized. This can generally be ignored as initializing the backend - /// is idempotent. #[error("BackendAlreadyInitialized")] BackendAlreadyInitialized, - /// There was an error while get the chat template from model. - #[error("{0}")] + #[error(transparent)] ChatTemplateError(#[from] ChatTemplateError), - /// There was an error while decoding a batch. - #[error("{0}")] + #[error(transparent)] DecodeError(#[from] DecodeError), - /// There was an error while encoding a batch. - #[error("{0}")] + #[error(transparent)] EncodeError(#[from] EncodeError), - /// There was an error loading a model. - #[error("{0}")] + #[error(transparent)] LlamaModelLoadError(#[from] LlamaModelLoadError), - /// There was an error creating a new model context. - #[error("{0}")] + #[error(transparent)] LlamaContextLoadError(#[from] LlamaContextLoadError), - /// There was an error adding a token to a batch. - #[error["{0}"]] + #[error(transparent)] BatchAddError(#[from] BatchAddError), - /// see [`EmbeddingsError`] #[error(transparent)] EmbeddingError(#[from] EmbeddingsError), - /// Backend device not found #[error("Backend device {0} not found")] BackendDeviceNotFound(usize), - /// Max devices exceeded #[error("Max devices exceeded. Max devices is {0}")] MaxDevicesExceeded(usize), - /// Failed to convert JSON schema to grammar. - #[error("JsonSchemaToGrammarError: {0}")] - JsonSchemaToGrammarError(String), - /// see [`FitError`] + #[error(transparent)] + JsonSchemaToGrammarError(#[from] JsonSchemaToGrammarError), #[error(transparent)] FitError(#[from] FitError), } diff --git a/llama-cpp-bindings/src/error/llama_lora_adapter_init_error.rs b/llama-cpp-bindings/src/error/llama_lora_adapter_init_error.rs index 9a294994..d8d6bafb 100644 --- a/llama-cpp-bindings/src/error/llama_lora_adapter_init_error.rs +++ b/llama-cpp-bindings/src/error/llama_lora_adapter_init_error.rs @@ -1,19 +1,14 @@ use std::ffi::NulError; use std::path::PathBuf; -/// An error that can occur when loading a model. #[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum LlamaLoraAdapterInitError { - /// There was a null byte in a provided string and thus it could not be converted to a C string. #[error("null byte in string {0}")] NullError(#[from] NulError), - /// llama.cpp returned a nullptr - this could be many different causes. - #[error("null result from llama cpp")] - NullResult, - /// Failed to convert the path to a rust str. This means the path was not valid unicode + #[error("adapter could not be loaded")] + Unloadable, #[error("failed to convert path {0} to str")] PathToStrError(PathBuf), - /// The adapter file does not exist at the given path. #[error("adapter file not found: {0}")] FileNotFound(PathBuf), } diff --git a/llama-cpp-bindings/src/error/llama_model_load_error.rs b/llama-cpp-bindings/src/error/llama_model_load_error.rs index a7b24012..4385aaff 100644 --- a/llama-cpp-bindings/src/error/llama_model_load_error.rs +++ b/llama-cpp-bindings/src/error/llama_model_load_error.rs @@ -1,19 +1,18 @@ use std::ffi::NulError; use std::path::PathBuf; -/// An error that can occur when loading a model. -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum LlamaModelLoadError { - /// There was a null byte in a provided string and thus it could not be converted to a C string. #[error("null byte in string {0}")] NullError(#[from] NulError), - /// llama.cpp returned a nullptr - this could be many different causes. - #[error("null result from llama cpp")] - NullResult, - /// Failed to convert the path to a rust str. This means the path was not valid unicode #[error("failed to convert path {0} to str")] PathToStrError(PathBuf), - /// The model file does not exist at the given path. #[error("model file not found: {0}")] FileNotFound(PathBuf), + #[error("model could not be loaded")] + Unloadable, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/error/marker_detection_error.rs b/llama-cpp-bindings/src/error/marker_detection_error.rs index aa755878..d2c4361b 100644 --- a/llama-cpp-bindings/src/error/marker_detection_error.rs +++ b/llama-cpp-bindings/src/error/marker_detection_error.rs @@ -1,15 +1,15 @@ use std::string::FromUtf8Error; -/// Failed to detect tool-call diagnostic markers for a model. #[derive(Debug, thiserror::Error)] pub enum MarkerDetectionError { - /// llama.cpp returned an error code from the marker detection FFI call. - #[error("ffi error {0}")] - FfiError(i32), - /// The C++ side threw an exception during template analysis. - #[error("c++ exception during template analysis: {0}")] - AnalyzeException(String), - /// llama.cpp returned a marker string but its bytes were not valid UTF-8. #[error("ffi returned non-utf8 marker bytes: {0}")] MarkerUtf8Error(#[from] FromUtf8Error), + #[error("not enough memory")] + NotEnoughMemory, + #[error("reasoning-marker detection failed: {message}")] + ReasoningMarkerDetectionFailed { message: String }, + #[error("tool-call haystack computation failed: {message}")] + ToolCallHaystackComputationFailed { message: String }, + #[error("tool-call synthetic-render diagnosis failed: {message}")] + ToolCallSyntheticRenderDiagnosisFailed { message: String }, } diff --git a/llama-cpp-bindings/src/error/parse_chat_message_error.rs b/llama-cpp-bindings/src/error/parse_chat_message_error.rs index 75460ed4..f70ac2ab 100644 --- a/llama-cpp-bindings/src/error/parse_chat_message_error.rs +++ b/llama-cpp-bindings/src/error/parse_chat_message_error.rs @@ -2,31 +2,34 @@ use std::string::FromUtf8Error; use crate::error::tool_call_format_failure::ToolCallFormatFailure; -/// Failed to parse a chat message via [`crate::Model::parse_chat_message`]. #[derive(Debug, thiserror::Error)] pub enum ParseChatMessageError { - /// llama.cpp returned an error code from the parse FFI call. - #[error("ffi error {0}")] - FfiError(i32), - /// The C++ side threw an exception while parsing. - #[error("c++ exception during chat parse: {0}")] - ParseException(String), - /// An accessor returned bytes that were not valid UTF-8. + #[error("model has no chat template")] + NoChatTemplate, + #[error("model has no vocab")] + NoVocab, + #[error("not enough memory")] + NotEnoughMemory, + #[error("chat-template parse failed: {message}")] + ParseFailed { message: String }, + #[error("parsed-chat destructor failed: {message}")] + DestructorFailed { message: String }, + #[error("tool-call id index {index} out of bounds")] + ToolCallIdIndexOutOfBounds { index: usize }, + #[error("tool-call name index {index} out of bounds")] + ToolCallNameIndexOutOfBounds { index: usize }, + #[error("tool-call arguments index {index} out of bounds")] + ToolCallArgumentsIndexOutOfBounds { index: usize }, #[error("ffi returned non-utf8 string: {0}")] StringUtf8Error(#[from] FromUtf8Error), - /// The caller passed a `tools_json` argument that is not valid JSON. #[error("tools_json is not valid JSON: {0}")] ToolsJsonInvalid(#[source] serde_json::Error), - /// The caller passed a `tools_json` argument that parses as JSON but is not an array. #[error("tools_json must be a JSON array")] ToolsJsonNotArray, - /// Failed to serialize the tools array for the FFI call. #[error("could not serialize tools to JSON: {0}")] ToolsSerialization(String), - /// The model has no usable chat template, so the parser cannot be built. - #[error("model has no chat template")] - NoChatTemplate, - /// The wrapper-side fallback parser detected a structural issue while parsing the body. #[error("template-override fallback parser failed: {0}")] TemplateOverrideFailed(#[from] ToolCallFormatFailure), + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/error/sample_error.rs b/llama-cpp-bindings/src/error/sample_error.rs index a7bbf4e8..176cc6cb 100644 --- a/llama-cpp-bindings/src/error/sample_error.rs +++ b/llama-cpp-bindings/src/error/sample_error.rs @@ -1,11 +1,7 @@ -/// Errors that can occur when sampling a token. -#[derive(Debug, Eq, PartialEq, thiserror::Error)] +#[derive(Debug, thiserror::Error)] pub enum SampleError { - /// A C++ exception was thrown during sampling - #[error("C++ exception during sampling: {0}")] - CppException(String), - - /// An invalid argument was passed to the sampler - #[error("Invalid argument passed to sampler")] - InvalidArgument, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/error/sampler_accept_error.rs b/llama-cpp-bindings/src/error/sampler_accept_error.rs index afa32a61..b89ea406 100644 --- a/llama-cpp-bindings/src/error/sampler_accept_error.rs +++ b/llama-cpp-bindings/src/error/sampler_accept_error.rs @@ -1,11 +1,7 @@ -/// Failed to accept a token in a sampler. #[derive(Debug, thiserror::Error)] pub enum SamplerAcceptError { - /// A C++ exception was thrown during accept - #[error("C++ exception during sampler accept: {0}")] - CppException(String), - - /// An invalid argument was passed (null sampler or null error pointer) - #[error("Invalid argument passed to sampler accept")] - InvalidArgument, + #[error("not enough memory")] + NotEnoughMemory, + #[error("grammar state corrupted during accept: {message}")] + GrammarStateCorrupted { message: String }, } diff --git a/llama-cpp-bindings/src/error/string_to_token_error.rs b/llama-cpp-bindings/src/error/string_to_token_error.rs index dc00b484..d0dff449 100644 --- a/llama-cpp-bindings/src/error/string_to_token_error.rs +++ b/llama-cpp-bindings/src/error/string_to_token_error.rs @@ -1,12 +1,13 @@ use std::ffi::NulError; -/// Failed to convert a string to a token sequence. #[derive(Debug, thiserror::Error)] pub enum StringToTokenError { - /// the string contained a null byte and thus could not be converted to a c string. #[error("{0}")] NulError(#[from] NulError), #[error("{0}")] - /// Failed to convert a provided integer to a [`c_int`]. CIntConversionError(#[from] std::num::TryFromIntError), + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/ggml_time_us.rs b/llama-cpp-bindings/src/ggml_time_us.rs index 06f61dfc..4db4b490 100644 --- a/llama-cpp-bindings/src/ggml_time_us.rs +++ b/llama-cpp-bindings/src/ggml_time_us.rs @@ -22,10 +22,15 @@ pub fn ggml_time_us() -> i64 { #[cfg(test)] mod tests { + use serial_test::serial; + use super::ggml_time_us; + use crate::llama_backend::LlamaBackend; #[test] + #[serial] fn returns_positive_value() { + let _backend = LlamaBackend::init().unwrap(); let time_microseconds = ggml_time_us(); assert!(time_microseconds > 0); diff --git a/llama-cpp-bindings/src/gguf_type.rs b/llama-cpp-bindings/src/gguf_type.rs index c1060782..33de25cd 100644 --- a/llama-cpp-bindings/src/gguf_type.rs +++ b/llama-cpp-bindings/src/gguf_type.rs @@ -85,7 +85,10 @@ mod tests { #[test] fn from_raw_returns_none_for_unknown() { assert_eq!(GgufType::from_raw(99), None); - assert_eq!(GgufType::from_raw(u32::MAX), None); + assert_eq!( + GgufType::from_raw(llama_cpp_bindings_sys::gguf_type::MAX), + None, + ); } #[test] diff --git a/llama-cpp-bindings/src/json_schema_to_grammar.rs b/llama-cpp-bindings/src/json_schema_to_grammar.rs index 34590a82..6949e549 100644 --- a/llama-cpp-bindings/src/json_schema_to_grammar.rs +++ b/llama-cpp-bindings/src/json_schema_to_grammar.rs @@ -1,15 +1,14 @@ use std::ffi::{CStr, CString, c_char}; -use crate::error::{LlamaCppError, Result}; -use crate::ffi_status_is_ok::status_is_ok; +use crate::error::JsonSchemaToGrammarError; +use crate::ffi_error_reader::read_and_free_cpp_error; -/// Convert a JSON schema string into a llama.cpp grammar string. -/// /// # Errors -/// Returns an error if the schema contains null bytes or the conversion fails. -pub fn json_schema_to_grammar(schema_json: &str) -> Result { - let schema_cstr = CString::new(schema_json) - .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?; +/// +/// Returns [`JsonSchemaToGrammarError`] if the schema string contains a NUL byte, +/// the wrapper reports any non-OK status, or the returned grammar is not valid UTF-8. +pub fn json_schema_to_grammar(schema_json: &str) -> Result { + let schema_cstr = CString::new(schema_json)?; let mut out: *mut c_char = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); @@ -22,63 +21,80 @@ pub fn json_schema_to_grammar(schema_json: &str) -> Result { ) }; - if !status_is_ok(status) || out.is_null() { - let message = if error_ptr.is_null() { - "unknown error".to_owned() - } else { - let message = unsafe { CStr::from_ptr(error_ptr) } - .to_string_lossy() - .into_owned(); - - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(error_ptr) }; - - message - }; - - return Err(LlamaCppError::JsonSchemaToGrammarError(message)); + match status { + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK => { + let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec(); + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out) }; + Ok(String::from_utf8(grammar_bytes)?) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED => { + Err(JsonSchemaToGrammarError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_INVALID_SCHEMA => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(JsonSchemaToGrammarError::InvalidSchema { message }) + } + llama_cpp_bindings_sys::LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(JsonSchemaToGrammarError::Reported { message }) + } + other => { + unreachable!("llama_rs_json_schema_to_grammar returned unrecognized status {other}") + } } - - let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec(); - - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out) }; - - String::from_utf8(grammar_bytes) - .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string())) } #[cfg(test)] mod tests { use super::json_schema_to_grammar; + use crate::error::JsonSchemaToGrammarError; #[test] fn simple_object() { let schema = r#"{"type": "object", "properties": {"name": {"type": "string"}}}"#; - let grammar = json_schema_to_grammar(schema).unwrap(); + let grammar = json_schema_to_grammar(schema).expect("schema converts to grammar"); assert!(!grammar.is_empty()); } #[test] - fn null_byte_returns_error() { + fn null_byte_returns_schema_contains_nul_byte_error() { let schema = "{\x00}"; let result = json_schema_to_grammar(schema); - assert!(result.is_err()); + assert!(matches!( + result, + Err(JsonSchemaToGrammarError::SchemaContainsNulByte(_)), + )); } #[test] fn simple_string() { let schema = r#"{"type": "string"}"#; - let grammar = json_schema_to_grammar(schema).unwrap(); + let grammar = json_schema_to_grammar(schema).expect("schema converts to grammar"); assert!(!grammar.is_empty()); } #[test] - fn invalid_json_returns_ffi_error() { + fn invalid_json_returns_reported() { let schema = "not valid json at all"; let result = json_schema_to_grammar(schema); - assert!(result.is_err()); + assert!(matches!( + result, + Err(JsonSchemaToGrammarError::Reported { .. }), + )); + } + + #[test] + fn unresolved_ref_returns_invalid_schema() { + let schema = r##"{"$ref": "#/$defs/Missing"}"##; + let result = json_schema_to_grammar(schema); + + assert!( + matches!(result, Err(JsonSchemaToGrammarError::InvalidSchema { .. })), + "expected InvalidSchema, got {result:?}", + ); } } diff --git a/llama-cpp-bindings/src/lib.rs b/llama-cpp-bindings/src/lib.rs index b77d14a4..9bed927b 100644 --- a/llama-cpp-bindings/src/lib.rs +++ b/llama-cpp-bindings/src/lib.rs @@ -64,11 +64,12 @@ pub mod tool_call_template_overrides; pub use error::{ ApplyChatTemplateError, ChatTemplateError, DecodeError, EmbeddingsError, EncodeError, - EvalMultimodalChunksError, GrammarError, LlamaContextLoadError, LlamaCppError, - LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError, - LlamaModelLoadError, LogitsError, MarkerDetectionError, MetaValError, ModelParamsError, - NewLlamaChatMessageError, ParseChatMessageError, Result, SampleError, SamplerAcceptError, - SamplingError, StringToTokenError, TokenSamplingError, TokenToStringError, + EvalMultimodalChunksError, GrammarError, JsonSchemaToGrammarError, KvCacheSeqAddError, + KvCacheSeqDivError, LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError, + LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError, + MarkerDetectionError, MetaValError, ModelParamsError, NewLlamaChatMessageError, + ParseChatMessageError, Result, SampleError, SamplerAcceptError, SamplingError, + StringToTokenError, TokenSamplingError, TokenToStringError, }; pub use chat_message_parse_outcome::ChatMessageParseOutcome; diff --git a/llama-cpp-bindings/src/llama_backend.rs b/llama-cpp-bindings/src/llama_backend.rs index 20ad3ac3..ff6b09f9 100644 --- a/llama-cpp-bindings/src/llama_backend.rs +++ b/llama-cpp-bindings/src/llama_backend.rs @@ -38,7 +38,7 @@ impl LlamaBackend { /// /// let backend = LlamaBackend::init()?; /// // the llama backend can only be initialized once - /// assert_eq!(Err(LlamaCppError::BackendAlreadyInitialized), LlamaBackend::init()); + /// assert!(matches!(LlamaBackend::init(), Err(LlamaCppError::BackendAlreadyInitialized))); /// ///# Ok(()) ///# } @@ -161,10 +161,10 @@ mod tests { fn double_init_returns_error() { let _backend = LlamaBackend::init().unwrap(); let second = LlamaBackend::init(); - assert_eq!( + assert!(matches!( second.unwrap_err(), LlamaCppError::BackendAlreadyInitialized - ); + )); } #[test] diff --git a/llama-cpp-bindings/src/llama_token_attrs.rs b/llama-cpp-bindings/src/llama_token_attrs.rs index 37d46651..688d228f 100644 --- a/llama-cpp-bindings/src/llama_token_attrs.rs +++ b/llama-cpp-bindings/src/llama_token_attrs.rs @@ -5,6 +5,16 @@ use enumflags2::BitFlags; use crate::llama_token_attr::LlamaTokenAttr; use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError; +#[cfg(target_env = "msvc")] +const fn llama_token_type_to_u32(value: llama_cpp_bindings_sys::llama_token_type) -> u32 { + value.cast_unsigned() +} + +#[cfg(not(target_env = "msvc"))] +const fn llama_token_type_to_u32(value: llama_cpp_bindings_sys::llama_token_type) -> u32 { + value +} + /// A set of [`LlamaTokenAttr`] flags. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct LlamaTokenAttrs(pub BitFlags); @@ -27,11 +37,11 @@ impl TryFrom for LlamaTokenAttrs { type Error = LlamaTokenAttrsFromIntError; fn try_from(value: llama_cpp_bindings_sys::llama_vocab_type) -> Result { - Ok(Self(BitFlags::from_bits(value as _).map_err( - |bit_flag_error| { + Ok(Self( + BitFlags::from_bits(llama_token_type_to_u32(value)).map_err(|bit_flag_error| { LlamaTokenAttrsFromIntError::UnknownValue(bit_flag_error.invalid_bits()) - }, - )?)) + })?, + )) } } @@ -55,7 +65,7 @@ mod tests { #[test] fn try_from_zero_produces_empty_flags() { - let attrs = LlamaTokenAttrs::try_from(0u32); + let attrs = LlamaTokenAttrs::try_from(0); assert!(attrs.is_ok()); assert!(attrs.expect("valid attribute").is_empty()); @@ -63,14 +73,13 @@ mod tests { #[test] fn try_from_invalid_bits_returns_error() { - let invalid_value = 0xFFFF_FFFFu32; - let result = LlamaTokenAttrs::try_from(invalid_value); + let result = LlamaTokenAttrs::try_from(!0); assert!(result.is_err()); - matches!( + assert!(matches!( result.expect_err("should fail"), - LlamaTokenAttrsFromIntError::UnknownValue(_) - ); + LlamaTokenAttrsFromIntError::UnknownValue(_), + )); } #[test] diff --git a/llama-cpp-bindings/src/llguidance_sampler.rs b/llama-cpp-bindings/src/llguidance_sampler.rs index ffd51d75..76a987a9 100644 --- a/llama-cpp-bindings/src/llguidance_sampler.rs +++ b/llama-cpp-bindings/src/llguidance_sampler.rs @@ -153,9 +153,7 @@ pub fn create_llg_sampler( }; if sampler.is_null() { - Err(GrammarError::NullGrammar( - "llguidance sampler returned null".to_owned(), - )) + Err(GrammarError::LlguidanceSamplerUnavailable) } else { Ok(LlamaSampler { sampler }) } diff --git a/llama-cpp-bindings/src/model.rs b/llama-cpp-bindings/src/model.rs index de22549d..d55ee679 100644 --- a/llama-cpp-bindings/src/model.rs +++ b/llama-cpp-bindings/src/model.rs @@ -31,7 +31,6 @@ use llama_cpp_bindings_types::ToolCallArguments; use llama_cpp_bindings_types::ToolCallMarkers; use crate::chat_message_parse_outcome::ChatMessageParseOutcome; -use crate::ffi_status_to_i32::status_to_i32; use crate::llama_backend::LlamaBackend; use crate::llama_token_attrs::LlamaTokenAttrs; use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError; @@ -219,35 +218,29 @@ impl LlamaModel { let (c_string, c_string_len) = cstring_with_validated_len(str)?; let buffer_capacity = c_int::try_from(buffer.capacity())?; - let size = unsafe { - llama_cpp_bindings_sys::llama_tokenize( + let size = invoke_rs_tokenize( + self.vocab_ptr(), + c_string.as_ptr(), + c_string_len, + buffer + .as_mut_ptr() + .cast::(), + buffer_capacity, + add_bos, + )?; + + let size = if size.is_negative() { + buffer.reserve_exact(usize::try_from(-size)?); + invoke_rs_tokenize( self.vocab_ptr(), c_string.as_ptr(), c_string_len, buffer .as_mut_ptr() .cast::(), - buffer_capacity, + -size, add_bos, - true, - ) - }; - - let size = if size.is_negative() { - buffer.reserve_exact(usize::try_from(-size)?); - unsafe { - llama_cpp_bindings_sys::llama_tokenize( - self.vocab_ptr(), - c_string.as_ptr(), - c_string_len, - buffer - .as_mut_ptr() - .cast::(), - -size, - add_bos, - true, - ) - } + )? } else { size }; @@ -578,22 +571,43 @@ impl LlamaModel { } let cstr = CString::new(path_str)?; - let llama_model = unsafe { - llama_cpp_bindings_sys::llama_load_model_from_file(cstr.as_ptr(), params.params) + let mut out_model: *mut llama_cpp_bindings_sys::llama_model = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_load_model_from_file( + cstr.as_ptr(), + params.params, + &raw mut out_model, + &raw mut out_error, + ) }; - - let model = match NonNull::new(llama_model) { - Some(ptr) => ptr, - None if !path.exists() => { - return Err(LlamaModelLoadError::FileNotFound(path.to_path_buf())); + match status { + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_OK => { + let model = NonNull::new(out_model) + .ok_or(LlamaModelLoadError::Unloadable)?; + Ok(Self { + model, + tok_env: OnceLock::new(), + }) } - None => return Err(LlamaModelLoadError::NullResult), - }; - - Ok(Self { - model, - tok_env: OnceLock::new(), - }) + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL => { + if path.exists() { + Err(LlamaModelLoadError::Unloadable) + } else { + Err(LlamaModelLoadError::FileNotFound(path.to_path_buf())) + } + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => { + Err(LlamaModelLoadError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(LlamaModelLoadError::Reported { message }) + } + other => unreachable!( + "llama_rs_load_model_from_file returned unrecognized status {other}" + ), + } } /// Initializes a lora adapter from a file. @@ -621,7 +635,7 @@ impl LlamaModel { }; let Some(adapter) = NonNull::new(raw_adapter) else { - return Err(LlamaLoraAdapterInitError::NullResult); + return Err(LlamaLoraAdapterInitError::Unloadable); }; Ok(LlamaLoraAdapter { @@ -741,22 +755,9 @@ impl LlamaModel { /// Returns [`MarkerDetectionError`] when any underlying FFI call fails. pub fn streaming_markers(&self) -> Result { let (reasoning_open_str, reasoning_close_str) = - invoke_ffi_string_pair_detector(|first, second, error| unsafe { - llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers( - self.model.as_ptr(), - first, - second, - error, - ) - })?; + invoke_detect_reasoning_markers(self.model.as_ptr())?; - let tool_call_haystack = invoke_ffi_single_string_detector(|haystack, error| unsafe { - llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack( - self.model.as_ptr(), - haystack, - error, - ) - })?; + let tool_call_haystack = invoke_compute_tool_call_haystack(self.model.as_ptr())?; let autoparser_pair = tool_call_haystack.as_deref().and_then( crate::extract_tool_call_markers_from_haystack::extract_tool_call_markers_from_haystack, @@ -817,14 +818,7 @@ impl LlamaModel { /// # Errors /// Returns [`MarkerDetectionError`] when the underlying FFI call fails. pub fn reasoning_markers(&self) -> Result, MarkerDetectionError> { - let (open, close) = invoke_ffi_string_pair_detector(|first, second, error| unsafe { - llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers( - self.model.as_ptr(), - first, - second, - error, - ) - })?; + let (open, close) = invoke_detect_reasoning_markers(self.model.as_ptr())?; match (open, close) { (Some(open), Some(close)) if !open.is_empty() && !close.is_empty() => { @@ -936,12 +930,12 @@ impl LlamaModel { synthesize_missing_tool_call_ids(&mut parsed.tool_calls); Ok(ChatMessageParseOutcome::Recognized(parsed)) } - Err(ParseChatMessageError::ParseException(ffi_error_message)) => { + Err(ParseChatMessageError::ParseFailed { message }) => { Ok(ChatMessageParseOutcome::Unrecognized(RawChatMessage { tools_json: tools_json.to_owned(), text: input.to_owned(), is_partial, - ffi_error_message, + ffi_error_message: message, })) } Err(other) => Err(other), @@ -974,18 +968,65 @@ impl LlamaModel { }; let parsed = match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => collect_parsed_chat_message(handle), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => { - let message = read_optional_owned_cstr_lossy(out_error); - Err(ParseChatMessageError::ParseException(message)) + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_OK => { + collect_parsed_chat_message(handle) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE => { + Err(ParseChatMessageError::NoChatTemplate) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB => { + Err(ParseChatMessageError::NoVocab) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED => { + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + out_error = ptr::null_mut(); + Err(ParseChatMessageError::ParseFailed { message }) + } + other => { + unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}") } - other => Err(ParseChatMessageError::FfiError(status_to_i32(other))), }; - unsafe { llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle) }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; - - parsed + let mut free_error: *mut c_char = ptr::null_mut(); + let free_status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle, &raw mut free_error) + }; + match (parsed, free_status) { + (Ok(value), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_OK) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Ok(value) + } + ( + Ok(_), + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION, + ) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(free_error) }; + Err(ParseChatMessageError::DestructorFailed { message }) + } + ( + Ok(_), + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED, + ) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + (Ok(_), other) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) }; + unreachable!("llama_rs_parsed_chat_free returned unrecognized status {other}") + } + (Err(parse_err), _) => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) }; + Err(parse_err) + } + } } /// Render the model's chat template with the autoparser's synthetic @@ -1001,14 +1042,7 @@ impl LlamaModel { &self, ) -> Result<(String, String), MarkerDetectionError> { let (no_tools, with_tools) = - invoke_ffi_string_pair_detector(|first, second, error| unsafe { - llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders( - self.model.as_ptr(), - first, - second, - error, - ) - })?; + invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?; Ok((no_tools.unwrap_or_default(), with_tools.unwrap_or_default())) } @@ -1071,26 +1105,15 @@ fn collect_parsed_chat_message( return Ok(ParsedChatMessage::default()); } - let content = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_content(handle) - })?; - let reasoning_content = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content(handle) - })?; - - let count = unsafe { llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count(handle) }; + let content = read_parsed_chat_content(handle)?; + let reasoning_content = read_parsed_chat_reasoning_content(handle)?; + let count = read_parsed_chat_tool_call_count(handle)?; let mut tool_calls = Vec::with_capacity(count); for index in 0..count { - let id = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id(handle, index) - })?; - let name = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name(handle, index) - })?; - let arguments_json = read_owned_cstr_for_parse(unsafe { - llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments(handle, index) - })?; + let id = read_parsed_chat_tool_call_id(handle, index)?; + let name = read_parsed_chat_tool_call_name(handle, index)?; + let arguments_json = read_parsed_chat_tool_call_arguments(handle, index)?; let arguments = ToolCallArguments::from_string(arguments_json); tool_calls.push(ParsedToolCall::new(id, name, arguments)); @@ -1103,6 +1126,211 @@ fn collect_parsed_chat_message( )) } +fn read_parsed_chat_content( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_content( + handle, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::Reported { message }) + } + other => unreachable!("llama_rs_parsed_chat_content returned unrecognized status {other}"), + } +} + +fn read_parsed_chat_reasoning_content( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content( + handle, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::Reported { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_reasoning_content returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_count( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, +) -> Result { + let mut out_count: usize = 0; + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count( + handle, + &raw mut out_count, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK => Ok(out_count), + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::Reported { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_count returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_id( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, + index: usize, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id( + handle, + index, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS => { + Err(ParseChatMessageError::ToolCallIdIndexOutOfBounds { index }) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::Reported { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_id returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_name( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, + index: usize, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name( + handle, + index, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS => { + Err(ParseChatMessageError::ToolCallNameIndexOutOfBounds { index }) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::Reported { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_name returned unrecognized status {other}" + ), + } +} + +fn read_parsed_chat_tool_call_arguments( + handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat, + index: usize, +) -> Result { + let mut out_string: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments( + handle, + index, + &raw mut out_string, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK => { + consume_accessor_string(out_string) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS => { + Err(ParseChatMessageError::ToolCallArgumentsIndexOutOfBounds { index }) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED => { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; + Err(ParseChatMessageError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(ParseChatMessageError::Reported { message }) + } + other => unreachable!( + "llama_rs_parsed_chat_tool_call_arguments returned unrecognized status {other}" + ), + } +} + +fn consume_accessor_string(ptr: *mut c_char) -> Result { + if ptr.is_null() { + return Ok(String::new()); + } + let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec(); + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) }; + Ok(String::from_utf8(bytes)?) +} + struct ReasoningSplit { reasoning: String, content: String, @@ -1152,93 +1380,133 @@ fn synthesize_missing_tool_call_ids(tool_calls: &mut [ParsedToolCall]) { } } -fn parse_single_string_status( - status: llama_cpp_bindings_sys::llama_rs_status, - out_value: *mut c_char, - out_error: *mut c_char, -) -> Result, MarkerDetectionError> { - match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => read_optional_owned_cstr(out_value), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => { - let message = read_optional_owned_cstr_lossy(out_error); +fn invoke_detect_reasoning_markers( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result<(Option, Option), MarkerDetectionError> { + let mut out_open: *mut c_char = ptr::null_mut(); + let mut out_close: *mut c_char = ptr::null_mut(); + let mut out_error: *mut c_char = ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers( + model, + &raw mut out_open, + &raw mut out_close, + &raw mut out_error, + ) + }; - Err(MarkerDetectionError::AnalyzeException(message)) + let parsed = match status { + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_OK => { + collect_optional_cstr_pair(out_open, out_close) + } + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::NotEnoughMemory) } - other => Err(MarkerDetectionError::FfiError(status_to_i32(other))), + llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::ReasoningMarkerDetectionFailed { message }) + } + other => unreachable!( + "llama_rs_detect_reasoning_markers returned unrecognized status {other}" + ), + }; + + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_open) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_close) }; + if !matches!( + parsed, + Err(MarkerDetectionError::ReasoningMarkerDetectionFailed { .. }) + ) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; } + + parsed } -fn invoke_ffi_single_string_detector( - invoke: TInvoke, -) -> Result, MarkerDetectionError> -where - TInvoke: FnOnce(*mut *mut c_char, *mut *mut c_char) -> llama_cpp_bindings_sys::llama_rs_status, -{ - let mut out_value: *mut c_char = ptr::null_mut(); +fn invoke_compute_tool_call_haystack( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result, MarkerDetectionError> { + let mut out_haystack: *mut c_char = ptr::null_mut(); let mut out_error: *mut c_char = ptr::null_mut(); - let status = invoke(&raw mut out_value, &raw mut out_error); - let parsed = parse_single_string_status(status, out_value, out_error); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack( + model, + &raw mut out_haystack, + &raw mut out_error, + ) + }; - unsafe { - if !out_value.is_null() { - llama_cpp_bindings_sys::llama_rs_string_free(out_value); + let parsed = match status { + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK => { + read_optional_owned_cstr(out_haystack) } - if !out_error.is_null() { - llama_cpp_bindings_sys::llama_rs_string_free(out_error); + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::NotEnoughMemory) } + llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::ToolCallHaystackComputationFailed { message }) + } + other => unreachable!( + "llama_rs_compute_tool_call_haystack returned unrecognized status {other}" + ), + }; + + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_haystack) }; + if !matches!( + parsed, + Err(MarkerDetectionError::ToolCallHaystackComputationFailed { .. }) + ) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; } parsed } -fn invoke_ffi_string_pair_detector( - invoke: TInvoke, -) -> Result<(Option, Option), MarkerDetectionError> -where - TInvoke: FnOnce( - *mut *mut c_char, - *mut *mut c_char, - *mut *mut c_char, - ) -> llama_cpp_bindings_sys::llama_rs_status, -{ - let mut out_first: *mut c_char = ptr::null_mut(); - let mut out_second: *mut c_char = ptr::null_mut(); +fn invoke_diagnose_tool_call_synthetic_renders( + model: *const llama_cpp_bindings_sys::llama_model, +) -> Result<(Option, Option), MarkerDetectionError> { + let mut out_no_tools: *mut c_char = ptr::null_mut(); + let mut out_with_tools: *mut c_char = ptr::null_mut(); let mut out_error: *mut c_char = ptr::null_mut(); - let status = invoke(&raw mut out_first, &raw mut out_second, &raw mut out_error); - - let parsed = (|| match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => { - let first = read_optional_owned_cstr(out_first)?; - let second = read_optional_owned_cstr(out_second)?; + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders( + model, + &raw mut out_no_tools, + &raw mut out_with_tools, + &raw mut out_error, + ) + }; - Ok((first, second)) + let parsed = match status { + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK => { + collect_optional_cstr_pair(out_no_tools, out_with_tools) } - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION => { - let message = read_optional_owned_cstr_lossy(out_error); - - Err(MarkerDetectionError::AnalyzeException(message)) + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED => { + Err(MarkerDetectionError::NotEnoughMemory) } - other => Err(MarkerDetectionError::FfiError(status_to_i32(other))), - })(); - - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_first) }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_second) }; - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; - - parsed -} + llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(MarkerDetectionError::ToolCallSyntheticRenderDiagnosisFailed { message }) + } + other => unreachable!( + "llama_rs_diagnose_tool_call_synthetic_renders returned unrecognized status {other}" + ), + }; -fn read_owned_cstr_for_parse(ptr: *mut c_char) -> Result { - if ptr.is_null() { - return Ok(String::new()); + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_no_tools) }; + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_with_tools) }; + if !matches!( + parsed, + Err(MarkerDetectionError::ToolCallSyntheticRenderDiagnosisFailed { .. }) + ) { + unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) }; } - let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec(); - unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) }; - - Ok(String::from_utf8(bytes)?) + parsed } fn read_optional_owned_cstr(ptr: *const c_char) -> Result, MarkerDetectionError> { @@ -1251,14 +1519,49 @@ fn read_optional_owned_cstr(ptr: *const c_char) -> Result, Marker Ok(Some(String::from_utf8(bytes)?)) } -fn read_optional_owned_cstr_lossy(ptr: *const c_char) -> String { - if ptr.is_null() { - return String::new(); +fn invoke_rs_tokenize( + vocab: *const llama_cpp_bindings_sys::llama_vocab, + text: *const c_char, + text_len: c_int, + tokens: *mut llama_cpp_bindings_sys::llama_token, + n_tokens_max: c_int, + add_bos: bool, +) -> Result { + let mut out_count: i32 = 0; + let mut out_error: *mut c_char = ptr::null_mut(); + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_tokenize( + vocab, + text, + text_len, + tokens, + n_tokens_max, + add_bos, + true, + &raw mut out_count, + &raw mut out_error, + ) + }; + match status { + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_OK => Ok(out_count), + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => { + Err(StringToTokenError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + Err(StringToTokenError::Reported { message }) + } + other => unreachable!("llama_rs_tokenize returned unrecognized status {other}"), } +} - unsafe { CStr::from_ptr(ptr) } - .to_string_lossy() - .into_owned() +fn collect_optional_cstr_pair( + first_ptr: *const c_char, + second_ptr: *const c_char, +) -> Result<(Option, Option), MarkerDetectionError> { + let first = read_optional_owned_cstr(first_ptr)?; + let second = read_optional_owned_cstr(second_ptr)?; + Ok((first, second)) } fn extract_meta_string( @@ -1392,152 +1695,3 @@ mod extract_meta_string_tests { assert!(result.is_err()); } } - -#[cfg(test)] -mod ffi_helper_tests { - use std::ffi::CString; - use std::ptr; - - use super::invoke_ffi_single_string_detector; - use super::invoke_ffi_string_pair_detector; - use super::parse_single_string_status; - use super::read_optional_owned_cstr_lossy; - use crate::MarkerDetectionError; - - #[test] - fn read_optional_owned_cstr_lossy_returns_empty_for_null() { - let result = read_optional_owned_cstr_lossy(ptr::null()); - - assert!(result.is_empty()); - } - - #[test] - fn read_optional_owned_cstr_lossy_returns_string_for_valid_pointer() { - let owned = CString::new("hello").expect("static literal has no nuls"); - let result = read_optional_owned_cstr_lossy(owned.as_ptr()); - - assert_eq!(result, "hello"); - } - - #[test] - fn read_optional_owned_cstr_lossy_handles_invalid_utf8_via_replacement() { - let owned = CString::new(vec![b'a', 0xFF, b'b']).expect("no interior nul"); - let result = read_optional_owned_cstr_lossy(owned.as_ptr()); - - assert!(result.starts_with('a')); - assert!(result.ends_with('b')); - } - - #[test] - fn parse_single_string_status_returns_none_for_ok_with_null() { - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK, - ptr::null_mut(), - ptr::null_mut(), - ); - - assert_eq!(result.expect("OK + null returns Ok(None)"), None); - } - - #[test] - fn parse_single_string_status_returns_some_for_ok_with_value() { - let owned = CString::new("present").expect("no nul"); - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK, - owned.as_ptr().cast_mut(), - ptr::null_mut(), - ); - - assert_eq!( - result.expect("OK + value returns Ok(Some)"), - Some("present".to_owned()) - ); - } - - #[test] - fn parse_single_string_status_returns_analyze_exception() { - let owned = CString::new("boom").expect("no nul"); - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION, - ptr::null_mut(), - owned.as_ptr().cast_mut(), - ); - - match result.expect_err("EXCEPTION must yield Err") { - MarkerDetectionError::AnalyzeException(message) => assert_eq!(message, "boom"), - other => panic!("expected AnalyzeException, got {other:?}"), - } - } - - #[test] - fn parse_single_string_status_returns_ffi_error_for_other_status() { - let result = parse_single_string_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT, - ptr::null_mut(), - ptr::null_mut(), - ); - - match result.expect_err("invalid status must yield Err") { - MarkerDetectionError::FfiError(_) => {} - other => panic!("expected FfiError, got {other:?}"), - } - } - - #[test] - fn invoke_ffi_single_string_detector_propagates_invalid_argument_status() { - let result = invoke_ffi_single_string_detector(|_value, _error| { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT - }); - - assert!(matches!(result, Err(MarkerDetectionError::FfiError(_)))); - } - - #[test] - fn invoke_ffi_single_string_detector_returns_none_for_ok_with_null() { - let result = invoke_ffi_single_string_detector(|value, _error| { - unsafe { - *value = ptr::null_mut(); - } - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK - }); - - assert_eq!(result.expect("OK + null returns Ok(None)"), None); - } - - #[test] - fn invoke_ffi_string_pair_detector_propagates_invalid_argument_status() { - let result = invoke_ffi_string_pair_detector(|_first, _second, _error| { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT - }); - - assert!(matches!(result, Err(MarkerDetectionError::FfiError(_)))); - } - - #[test] - fn invoke_ffi_string_pair_detector_returns_pair_of_none_for_ok_with_nulls() { - let result = invoke_ffi_string_pair_detector(|first, second, _error| { - unsafe { - *first = ptr::null_mut(); - *second = ptr::null_mut(); - } - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK - }); - - assert_eq!( - result.expect("OK with both null returns Ok((None, None))"), - (None, None) - ); - } - - #[test] - fn invoke_ffi_string_pair_detector_propagates_invalid_status_codes() { - let result = invoke_ffi_string_pair_detector(|_first, _second, _error| { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_ALLOCATION_FAILED - }); - - match result.expect_err("non-OK status yields Err") { - MarkerDetectionError::FfiError(code) => assert!(code != 0), - other => panic!("expected FfiError, got {other:?}"), - } - } -} diff --git a/llama-cpp-bindings/src/model/params.rs b/llama-cpp-bindings/src/model/params.rs index ebd7edd7..3b5bb2aa 100644 --- a/llama-cpp-bindings/src/model/params.rs +++ b/llama-cpp-bindings/src/model/params.rs @@ -421,9 +421,7 @@ impl LlamaModelParams { /// /// # Errors /// - /// Returns [`FitError::Failure`] if no fitting allocation could be found, or - /// [`FitError::Error`] on a hard error (e.g. the model file could not be read or the C++ - /// implementation threw an exception). + /// Returns one of the [`FitError`] variants matching the vendored wrapper's status code. pub fn fit_params( mut self: Pin<&mut Self>, model_path: &CStr, @@ -450,6 +448,9 @@ impl LlamaModelParams { self.params.tensor_split = null::(); self.params.tensor_buft_overrides = null(); + let mut out_unrecognized_status_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); + let status = unsafe { llama_cpp_bindings_sys::llama_rs_fit_params( model_path.as_ptr(), @@ -460,13 +461,33 @@ impl LlamaModelParams { margins.as_mut_ptr(), n_ctx_min, log_level, + &raw mut out_unrecognized_status_code, + &raw mut out_error, ) }; match status { - llama_cpp_bindings_sys::LLAMA_RS_FIT_STATUS_SUCCESS => {} - llama_cpp_bindings_sys::LLAMA_RS_FIT_STATUS_FAILURE => return Err(FitError::Failure), - _ => return Err(FitError::Error), + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_OK => {} + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE => { + return Err(FitError::NoFittingMemoryLayout); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR => { + return Err(FitError::Aborted); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE => { + return Err(FitError::UnknownStatus { + code: out_unrecognized_status_code, + }); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED => { + return Err(FitError::NotEnoughMemory); + } + llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION => { + let message = + unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) }; + return Err(FitError::Reported { message }); + } + other => unreachable!("llama_rs_fit_params returned unrecognized wrapper status: {other}"), } self.params.tensor_split = self.tensor_split.as_ptr(); @@ -681,10 +702,10 @@ mod tests { fn with_devices_invalid_index_returns_error() { let result = LlamaModelParams::default().with_devices(&[999_999]); - assert_eq!( + assert!(matches!( result.unwrap_err(), crate::LlamaCppError::BackendDeviceNotFound(999_999) - ); + )); } #[test] @@ -829,6 +850,9 @@ mod tests { llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE, ); - assert_eq!(result, Err(FitError::Error)); + assert!( + matches!(result, Err(FitError::Aborted | FitError::Reported { .. })), + "expected Aborted or Reported, got {result:?}" + ); } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs index 08e2ce6c..14ab3664 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap.rs @@ -1,7 +1,10 @@ use std::ffi::{CStr, CString, c_char}; +use std::path::PathBuf; use std::ptr::NonNull; use std::slice; +use crate::ffi_error_reader::read_and_free_cpp_error; + use super::mtmd_bitmap_error::MtmdBitmapError; use super::mtmd_context::MtmdContext; @@ -62,7 +65,7 @@ impl MtmdBitmap { let bitmap = unsafe { llama_cpp_bindings_sys::mtmd_bitmap_init(nx, ny, data.as_ptr()) }; - let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::NullResult)?; + let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::BitmapDecodeFailed)?; Ok(Self { bitmap }) } @@ -91,7 +94,7 @@ impl MtmdBitmap { llama_cpp_bindings_sys::mtmd_bitmap_init_from_audio(data.len(), data.as_ptr()) }; - let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::NullResult)?; + let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::BitmapDecodeFailed)?; Ok(Self { bitmap }) } @@ -104,20 +107,46 @@ impl MtmdBitmap { /// /// # Errors /// - /// * `CStringError` - Path contains null bytes - /// * `NullResult` - File could not be loaded or processed + /// Returns an [`MtmdBitmapError`] variant matching the wrapper's status code. pub fn from_file(ctx: &MtmdContext, path: &str) -> Result { let path_cstr = CString::new(path)?; - let bitmap = unsafe { - llama_cpp_bindings_sys::mtmd_helper_bitmap_init_from_file( + let mut out_bitmap: *mut llama_cpp_bindings_sys::mtmd_bitmap = std::ptr::null_mut(); + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_bitmap_init_from_file( ctx.context.as_ptr(), path_cstr.as_ptr(), + &raw mut out_bitmap, + &raw mut out_error, ) }; - let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::NullResult)?; - - Ok(Self { bitmap }) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK => { + let bitmap = NonNull::new(out_bitmap).ok_or_else(|| { + MtmdBitmapError::FileUnreadable { + path: PathBuf::from(path), + } + })?; + Ok(Self { bitmap }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL => { + Err(MtmdBitmapError::FileUnreadable { + path: PathBuf::from(path), + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdBitmapError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdBitmapError::Reported { message }) + } + other => unreachable!( + "llama_rs_mtmd_bitmap_init_from_file returned unrecognized status: {other}" + ), + } } /// Create a bitmap from a buffer containing file data. @@ -138,7 +167,7 @@ impl MtmdBitmap { ) }; - let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::NullResult)?; + let bitmap = NonNull::new(bitmap).ok_or(MtmdBitmapError::BitmapDecodeFailed)?; Ok(Self { bitmap }) } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs index c0ad849c..0ffa58ca 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_bitmap_error.rs @@ -1,16 +1,21 @@ -/// Errors that can occur when working with MTMD bitmaps +use std::path::PathBuf; + #[derive(thiserror::Error, Debug)] pub enum MtmdBitmapError { - /// Failed to create `CString` from input - #[error("Failed to create CString: {0}")] + #[error("Failed to create CString from bitmap-source path: {0}")] CStringError(#[from] std::ffi::NulError), - /// Invalid data size for bitmap + #[error("Bitmap-source path is not valid UTF-8: {0:?}")] + PathToStrError(PathBuf), #[error("Invalid data size for bitmap")] InvalidDataSize, - /// Image dimensions too small for processing (minimum 2x2) #[error("Image dimensions too small: {0}x{1} (minimum 2x2)")] ImageDimensionsTooSmall(u32, u32), - /// Bitmap creation returned null - #[error("Bitmap creation returned null")] - NullResult, + #[error("bitmap data could not be decoded")] + BitmapDecodeFailed, + #[error("bitmap file is unreadable: {path:?}")] + FileUnreadable { path: PathBuf }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_context.rs b/llama-cpp-bindings/src/mtmd/mtmd_context.rs index 4445a6ad..21ab2c11 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_context.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_context.rs @@ -1,6 +1,8 @@ use std::ffi::CString; +use std::ffi::c_char; use std::ptr::NonNull; +use crate::ffi_error_reader::read_and_free_cpp_error; use crate::model::LlamaModel; use super::mtmd_bitmap::MtmdBitmap; @@ -12,19 +14,56 @@ use super::mtmd_input_chunks::MtmdInputChunks; use super::mtmd_input_text::MtmdInputText; use super::mtmd_tokenize_error::MtmdTokenizeError; -const fn tokenize_result_to_error(result: i32) -> MtmdTokenizeError { - match result { - 1 => MtmdTokenizeError::BitmapCountMismatch, - 2 => MtmdTokenizeError::ImagePreprocessingError, - _ => MtmdTokenizeError::UnknownError(result), +fn map_tokenize_status( + status: llama_cpp_bindings_sys::llama_rs_mtmd_tokenize_status, + undocumented_return_code: i32, + out_error: *mut c_char, +) -> Result<(), MtmdTokenizeError> { + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT => { + Err(MtmdTokenizeError::BitmapCountDoesNotMatchMarkerCount) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR => { + Err(MtmdTokenizeError::MediaPreprocessingFailed) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE => { + Err(MtmdTokenizeError::UnknownStatus { + code: undocumented_return_code, + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdTokenizeError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdTokenizeError::Reported { message }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO => unreachable!("llama_rs_mtmd_tokenize NULL_BITMAPS_ARG: Rust always passes a non-null bitmaps pointer when count > 0"), + other => unreachable!("llama_rs_mtmd_tokenize returned unrecognized status: {other}"), } } -const fn check_encode_result(result: i32) -> Result<(), MtmdEncodeError> { - if result == 0 { - Ok(()) - } else { - Err(MtmdEncodeError::EncodeFailure(result)) +fn map_encode_chunk_status( + status: llama_cpp_bindings_sys::llama_rs_mtmd_encode_chunk_status, + vendored_return_code: i32, + out_error: *mut c_char, +) -> Result<(), MtmdEncodeError> { + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE => { + Err(MtmdEncodeError::EncodingFailed { + code: vendored_return_code, + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdEncodeError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdEncodeError::Reported { message }) + } + other => unreachable!("llama_rs_mtmd_encode_chunk returned unrecognized status: {other}"), } } @@ -46,9 +85,7 @@ impl MtmdContext { /// /// # Errors /// - /// This function will return an error if: - /// - The path cannot be converted to a C string - /// - The underlying C function returns null (indicating initialization failure) + /// Returns an [`MtmdInitError`] variant matching the wrapper's status code. pub fn init_from_file( mmproj_path: &str, text_model: &LlamaModel, @@ -57,17 +94,42 @@ impl MtmdContext { let path_cstr = CString::new(mmproj_path)?; let ctx_params = llama_cpp_bindings_sys::mtmd_context_params::from(params); - let context = unsafe { - llama_cpp_bindings_sys::mtmd_init_from_file( + let mut out_ctx: *mut llama_cpp_bindings_sys::mtmd_context = std::ptr::null_mut(); + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_init_from_file( path_cstr.as_ptr(), text_model.model.as_ptr(), ctx_params, + &raw mut out_ctx, + &raw mut out_error, ) }; - let context = NonNull::new(context).ok_or(MtmdInitError::NullResult)?; - - Ok(Self { context }) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_OK => { + let context = NonNull::new(out_ctx).ok_or_else(|| MtmdInitError::Unloadable { + path: std::path::PathBuf::from(mmproj_path), + })?; + Ok(Self { context }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL => { + Err(MtmdInitError::Unloadable { + path: std::path::PathBuf::from(mmproj_path), + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdInitError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdInitError::Reported { message }) + } + other => { + unreachable!("llama_rs_mtmd_init_from_file returned unrecognized status: {other}") + } + } } /// Check whether non-causal attention mask is needed before `llama_decode` @@ -117,24 +179,7 @@ impl MtmdContext { /// /// # Errors /// - /// * `BitmapCountMismatch` - Number of bitmaps doesn't match number of markers - /// * `ImagePreprocessingError` - Error occurred during image preprocessing - /// * `UnknownError` - Other tokenization error occurred - /// - /// # Example - /// - /// ```no_run - /// # use llama_cpp_bindings::mtmd::*; - /// # fn example(ctx: &MtmdContext, bitmap: &MtmdBitmap) -> Result<(), Box> { - /// let text = MtmdInputText { - /// text: "Here is an image: <__media__>\nDescribe it.".to_string(), - /// add_special: true, - /// parse_special: true, - /// }; - /// let chunks = ctx.tokenize(text, &[bitmap])?; - /// # Ok(()) - /// # } - /// ``` + /// Returns an [`MtmdTokenizeError`] variant matching the wrapper's status code. pub fn tokenize( &self, text: MtmdInputText, @@ -153,34 +198,44 @@ impl MtmdContext { .map(|bitmap| bitmap.bitmap.as_ptr().cast_const()) .collect(); - let result = unsafe { - llama_cpp_bindings_sys::mtmd_tokenize( + let mut out_undocumented_return_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_tokenize( self.context.as_ptr(), chunks.chunks.as_ptr(), &raw const input_text, bitmap_ptrs.as_ptr().cast_mut(), bitmaps.len(), + &raw mut out_undocumented_return_code, + &raw mut out_error, ) }; - if result == 0 { - Ok(chunks) - } else { - Err(tokenize_result_to_error(result)) - } + map_tokenize_status(status, out_undocumented_return_code, out_error)?; + Ok(chunks) } /// Encode a chunk for image/audio processing. /// /// # Errors /// - /// Returns `MtmdEncodeError::EncodeFailure` if encoding fails. + /// Returns an [`MtmdEncodeError`] variant matching the wrapper's status code. pub fn encode_chunk(&self, chunk: &MtmdInputChunk) -> Result<(), MtmdEncodeError> { - let result = unsafe { - llama_cpp_bindings_sys::mtmd_encode_chunk(self.context.as_ptr(), chunk.chunk.as_ptr()) + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); + + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_encode_chunk( + self.context.as_ptr(), + chunk.chunk.as_ptr(), + &raw mut out_vendored_return_code, + &raw mut out_error, + ) }; - check_encode_result(result) + map_encode_chunk_status(status, out_vendored_return_code, out_error) } } @@ -192,44 +247,86 @@ impl Drop for MtmdContext { #[cfg(test)] mod unit_tests { - use super::check_encode_result; - use super::tokenize_result_to_error; + use super::map_encode_chunk_status; + use super::map_tokenize_status; + use crate::mtmd::mtmd_encode_error::MtmdEncodeError; + use crate::mtmd::mtmd_tokenize_error::MtmdTokenizeError; #[test] - fn tokenize_result_bitmap_count_mismatch() { - let error = tokenize_result_to_error(1); + fn tokenize_status_maps_bitmap_count_mismatch() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT, + 0, + std::ptr::null_mut(), + ); - assert!(error.to_string().contains("does not match")); + assert!(matches!( + result, + Err(MtmdTokenizeError::BitmapCountDoesNotMatchMarkerCount) + )); } #[test] - fn tokenize_result_image_preprocessing_error() { - let error = tokenize_result_to_error(2); + fn tokenize_status_maps_media_preprocessing_failed() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR, + 0, + std::ptr::null_mut(), + ); - assert!(error.to_string().contains("Image preprocessing")); + assert!(matches!( + result, + Err(MtmdTokenizeError::MediaPreprocessingFailed) + )); } #[test] - fn tokenize_result_unknown_error() { - let error = tokenize_result_to_error(42); + fn tokenize_status_maps_unknown_status_with_value() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE, + 42, + std::ptr::null_mut(), + ); - assert!(error.to_string().contains("Unknown error: 42")); + assert!(matches!( + result, + Err(MtmdTokenizeError::UnknownStatus { code: 42 }) + )); } #[test] - fn check_encode_result_ok_for_zero() { - assert!(check_encode_result(0).is_ok()); + fn tokenize_status_maps_ok_to_unit() { + let result = map_tokenize_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_TOKENIZE_OK, + 0, + std::ptr::null_mut(), + ); + + assert!(matches!(result, Ok(()))); } #[test] - fn check_encode_result_error_for_nonzero() { - let result = check_encode_result(5); - - assert!( - result - .unwrap_err() - .to_string() - .contains("Encode failed with code: 5") + fn encode_chunk_status_maps_ok_to_unit() { + let result = map_encode_chunk_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_OK, + 0, + std::ptr::null_mut(), ); + + assert!(matches!(result, Ok(()))); + } + + #[test] + fn encode_chunk_status_maps_encoding_failed_with_code() { + let result = map_encode_chunk_status( + llama_cpp_bindings_sys::LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE, + 5, + std::ptr::null_mut(), + ); + + assert!(matches!( + result, + Err(MtmdEncodeError::EncodingFailed { code: 5 }) + )); } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs index fabd3311..ecc2aa9d 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_encode_error.rs @@ -1,7 +1,9 @@ -/// Errors that can occur during encoding #[derive(thiserror::Error, Debug)] pub enum MtmdEncodeError { - /// Encode operation failed - #[error("Encode failed with code: {0}")] - EncodeFailure(i32), + #[error("multimodal chunk encoding failed with code: {code}")] + EncodingFailed { code: i32 }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs index c4efa643..938711f4 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_eval_error.rs @@ -1,25 +1,19 @@ use crate::mtmd::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch; -/// Errors that can occur during evaluation #[derive(thiserror::Error, Debug)] pub enum MtmdEvalError { - /// Requested batch size exceeds the context's maximum batch size #[error("batch size {requested} exceeds context batch size {context_max}")] - BatchSizeExceedsContextLimit { - /// The batch size requested in `eval_chunks` - requested: i32, - /// The maximum batch size configured on the context - context_max: u32, - }, - /// An image chunk's token count exceeds the per-decode `n_batch` budget, - /// so handing it to `llama_decode` would trip the `GGML_ASSERT`. + BatchSizeExceedsContextLimit { requested: i32, context_max: u32 }, #[error( "image chunk has {} tokens but n_batch is {}", .0.image_tokens, .0.n_batch, )] ImageChunkExceedsBatchSize(ImageChunkBatchSizeMismatch), - /// Evaluation operation failed - #[error("Eval failed with code: {0}")] - EvalFailure(i32), + #[error("multimodal chunk eval failed with code: {code}")] + EvalFailed { code: i32 }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs index 755d6a55..db944126 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_init_error.rs @@ -1,10 +1,15 @@ -/// Errors that can occur when initializing MTMD context +use std::path::PathBuf; + #[derive(thiserror::Error, Debug)] pub enum MtmdInitError { - /// Failed to create `CString` from input - #[error("Failed to create CString: {0}")] + #[error("Failed to create CString from mmproj path: {0}")] CStringError(#[from] std::ffi::NulError), - /// MTMD context initialization returned null - #[error("MTMD context initialization returned null")] - NullResult, + #[error("Mmproj path is not valid UTF-8: {0:?}")] + PathToStrError(PathBuf), + #[error("mmproj could not be loaded: {path:?}")] + Unloadable { path: PathBuf }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs index 50643547..88d1358c 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk.rs @@ -1,8 +1,10 @@ use std::ffi::CStr; +use std::ffi::c_char; use std::ptr::NonNull; use std::slice; use crate::context::LlamaContext; +use crate::ffi_error_reader::read_and_free_cpp_error; use crate::token::LlamaToken; use super::image_chunk_batch_size_mismatch::ImageChunkBatchSizeMismatch; @@ -107,10 +109,10 @@ impl MtmdInputChunk { /// /// # Errors /// - /// Returns `MtmdInputChunkError::NullResult` if copying fails. + /// Returns `MtmdInputChunkError::ChunkOperationFailed` if copying fails. pub fn copy(&self) -> Result { let chunk = unsafe { llama_cpp_bindings_sys::mtmd_input_chunk_copy(self.chunk.as_ptr()) }; - let chunk = NonNull::new(chunk).ok_or(MtmdInputChunkError::NullResult)?; + let chunk = NonNull::new(chunk).ok_or(MtmdInputChunkError::ChunkOperationFailed)?; Ok(Self { chunk, owned: true }) } @@ -162,9 +164,11 @@ impl MtmdInputChunk { } let mut final_position: llama_cpp_bindings_sys::llama_pos = start_position; + let mut out_vendored_return_code: i32 = 0; + let mut out_error: *mut c_char = std::ptr::null_mut(); - let result = unsafe { - llama_cpp_bindings_sys::mtmd_helper_eval_chunk_single( + let status = unsafe { + llama_cpp_bindings_sys::llama_rs_mtmd_eval_chunk_single( mtmd_ctx.context.as_ptr(), llama_ctx.context.as_ptr(), self.chunk.as_ptr(), @@ -173,13 +177,28 @@ impl MtmdInputChunk { n_batch, logits_last, &raw mut final_position, + &raw mut out_vendored_return_code, + &raw mut out_error, ) }; - if result == 0 { - Ok(final_position) - } else { - Err(MtmdEvalError::EvalFailure(result)) + match status { + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK => Ok(final_position), + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE => { + Err(MtmdEvalError::EvalFailed { + code: out_vendored_return_code, + }) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED => { + Err(MtmdEvalError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(out_error) }; + Err(MtmdEvalError::Reported { message }) + } + other => unreachable!( + "llama_rs_mtmd_eval_chunk_single returned unrecognized status: {other}" + ), } } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk_error.rs index e44e1c30..ab94f14b 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunk_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunk_error.rs @@ -1,7 +1,5 @@ -/// Errors that can occur when working with individual MTMD input chunks #[derive(thiserror::Error, Debug)] pub enum MtmdInputChunkError { - /// Input chunk operation returned null - #[error("Input chunk operation returned null")] - NullResult, + #[error("input chunk operation failed")] + ChunkOperationFailed, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs index a74eb296..aee5d5b0 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks.rs @@ -11,7 +11,7 @@ const fn check_eval_result(result: i32) -> Result<(), MtmdEvalError> { if result == 0 { Ok(()) } else { - Err(MtmdEvalError::EvalFailure(result)) + Err(MtmdEvalError::EvalFailed { code: result }) } } @@ -31,7 +31,7 @@ impl MtmdInputChunks { /// /// # Errors /// - /// Returns `MtmdInputChunksError::NullResult` if the underlying llama.cpp function + /// Returns `MtmdInputChunksError::ChunksCreationFailed` if the underlying llama.cpp function /// returns null. /// /// # Examples @@ -45,7 +45,7 @@ impl MtmdInputChunks { /// ``` pub fn new() -> Result { let chunks = unsafe { llama_cpp_bindings_sys::mtmd_input_chunks_init() }; - let chunks = NonNull::new(chunks).ok_or(MtmdInputChunksError::NullResult)?; + let chunks = NonNull::new(chunks).ok_or(MtmdInputChunksError::ChunksCreationFailed)?; Ok(Self { chunks }) } @@ -174,15 +174,11 @@ mod tests { #[test] fn check_eval_result_error_for_nonzero() { + use super::MtmdEvalError; use super::check_eval_result; let result = check_eval_result(7); - assert!( - result - .unwrap_err() - .to_string() - .contains("Eval failed with code: 7") - ); + assert!(matches!(result, Err(MtmdEvalError::EvalFailed { code: 7 }))); } } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks_error.rs index 10a251d1..bdb29ca9 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_input_chunks_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_input_chunks_error.rs @@ -1,7 +1,5 @@ -/// Errors that can occur when working with MTMD input chunks collections #[derive(thiserror::Error, Debug)] pub enum MtmdInputChunksError { - /// Input chunks creation returned null - #[error("Input chunks creation returned null")] - NullResult, + #[error("input chunks collection could not be created")] + ChunksCreationFailed, } diff --git a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs index 8886bc19..28eaef1f 100644 --- a/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs +++ b/llama-cpp-bindings/src/mtmd/mtmd_tokenize_error.rs @@ -1,21 +1,19 @@ use crate::mtmd::mtmd_input_chunks_error::MtmdInputChunksError; -/// Errors that can occur during tokenization #[derive(thiserror::Error, Debug)] pub enum MtmdTokenizeError { - /// Number of bitmaps does not match number of markers in text - #[error("Number of bitmaps does not match number of markers")] - BitmapCountMismatch, - /// Image preprocessing error occurred - #[error("Image preprocessing error")] - ImagePreprocessingError, - /// Failed to create input chunks collection + #[error("Failed to create CString from input text: {0}")] + CStringError(#[from] std::ffi::NulError), #[error("{0}")] InputChunksError(#[from] MtmdInputChunksError), - /// Text contains characters that cannot be converted to C string - #[error("Failed to create CString from text: {0}")] - CStringError(#[from] std::ffi::NulError), - /// Unknown error occurred during tokenization - #[error("Unknown error: {0}")] - UnknownError(i32), + #[error("number of bitmaps does not match number of markers in the text")] + BitmapCountDoesNotMatchMarkerCount, + #[error("media preprocessing failed (image or audio)")] + MediaPreprocessingFailed, + #[error("mtmd_tokenize returned an unknown status code: {code}")] + UnknownStatus { code: i32 }, + #[error("not enough memory")] + NotEnoughMemory, + #[error("{message}")] + Reported { message: String }, } diff --git a/llama-cpp-bindings/src/sampling.rs b/llama-cpp-bindings/src/sampling.rs index e9aadb21..a390d6b7 100644 --- a/llama-cpp-bindings/src/sampling.rs +++ b/llama-cpp-bindings/src/sampling.rs @@ -13,30 +13,19 @@ use crate::token::logit_bias::LlamaLogitBias; use crate::{GrammarError, SampleError, SamplerAcceptError, SamplingError}; fn check_sampler_accept_status( - status: llama_cpp_bindings_sys::llama_rs_status, + status: llama_cpp_bindings_sys::llama_rs_sampler_accept_status, error_ptr: *mut c_char, ) -> Result<(), SamplerAcceptError> { match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => Ok(()), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT => { - Err(SamplerAcceptError::InvalidArgument) + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_OK => Ok(()), + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED => { + Err(SamplerAcceptError::NotEnoughMemory) } - _ => Err(SamplerAcceptError::CppException(unsafe { - read_and_free_cpp_error(error_ptr) - })), - } -} - -fn check_sampler_not_null( - sampler: *mut llama_cpp_bindings_sys::llama_sampler, - error_ptr: *mut c_char, -) -> Result { - if sampler.is_null() { - Err(GrammarError::NullGrammar(unsafe { - read_and_free_cpp_error(error_ptr) - })) - } else { - Ok(LlamaSampler { sampler }) + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(SamplerAcceptError::GrammarStateCorrupted { message }) + } + other => unreachable!("llama_rs_sampler_accept returned unrecognized status {other}"), } } @@ -85,13 +74,15 @@ impl LlamaSampler { }; match status { - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK => Ok(LlamaToken(token)), - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT => { - Err(SampleError::InvalidArgument) + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_OK => Ok(LlamaToken(token)), + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED => { + Err(SampleError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(SampleError::Reported { message }) } - _ => Err(SampleError::CppException(unsafe { - read_and_free_cpp_error(error_ptr) - })), + other => unreachable!("llama_rs_sampler_sample returned unrecognized status {other}"), } } @@ -377,18 +368,37 @@ impl LlamaSampler { ) -> Result { let (grammar_str, grammar_root) = Self::sanitize_grammar_strings(grammar_str, grammar_root)?; + let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); - let sampler = unsafe { + let status = unsafe { llama_cpp_bindings_sys::llama_rs_sampler_init_grammar( model.vocab_ptr(), grammar_str.as_ptr(), grammar_root.as_ptr(), + &raw mut sampler, &raw mut error_ptr, ) }; - check_sampler_not_null(sampler, error_ptr) + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK => { + Ok(Self { sampler }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL => { + Err(GrammarError::GrammarMalformed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED => { + Err(GrammarError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::Reported { message }) + } + other => unreachable!( + "llama_rs_sampler_init_grammar returned unrecognized status {other}" + ), + } } /// Lazy grammar sampler, introduced in @@ -407,12 +417,13 @@ impl LlamaSampler { let (grammar_str, grammar_root) = Self::sanitize_grammar_strings(grammar_str, grammar_root)?; let trigger_words = Self::sanitize_trigger_words(trigger_words)?; + let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); let mut trigger_word_ptrs: Vec<*const c_char> = trigger_words.iter().map(|cs| cs.as_ptr()).collect(); - let sampler = unsafe { + let status = unsafe { llama_cpp_bindings_sys::llama_rs_sampler_init_grammar_lazy( model.vocab_ptr(), grammar_str.as_ptr(), @@ -421,11 +432,29 @@ impl LlamaSampler { trigger_word_ptrs.len(), trigger_tokens.as_ptr().cast(), trigger_tokens.len(), + &raw mut sampler, &raw mut error_ptr, ) }; - check_sampler_not_null(sampler, error_ptr) + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK => { + Ok(Self { sampler }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL => { + Err(GrammarError::LazyGrammarMalformed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED => { + Err(GrammarError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::Reported { message }) + } + other => unreachable!( + "llama_rs_sampler_init_grammar_lazy returned unrecognized status {other}" + ), + } } /// Lazy grammar sampler using regex trigger patterns. @@ -446,12 +475,13 @@ impl LlamaSampler { let (grammar_str, grammar_root) = Self::sanitize_grammar_strings(grammar_str, grammar_root)?; let trigger_patterns = Self::sanitize_trigger_patterns(trigger_patterns)?; + let mut sampler: *mut llama_cpp_bindings_sys::llama_sampler = std::ptr::null_mut(); let mut error_ptr: *mut c_char = std::ptr::null_mut(); let mut trigger_pattern_ptrs: Vec<*const c_char> = trigger_patterns.iter().map(|cs| cs.as_ptr()).collect(); - let sampler = unsafe { + let status = unsafe { llama_cpp_bindings_sys::llama_rs_sampler_init_grammar_lazy_patterns( model.vocab_ptr(), grammar_str.as_ptr(), @@ -460,11 +490,33 @@ impl LlamaSampler { trigger_pattern_ptrs.len(), trigger_tokens.as_ptr().cast(), trigger_tokens.len(), + &raw mut sampler, &raw mut error_ptr, ) }; - check_sampler_not_null(sampler, error_ptr) + match status { + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK => { + Ok(Self { sampler }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL => { + Err(GrammarError::LazyPatternsGrammarMalformed) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED => { + Err(GrammarError::NotEnoughMemory) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_INVALID_TRIGGER_PATTERN => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::InvalidTriggerPattern { message }) + } + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION => { + let message = unsafe { read_and_free_cpp_error(error_ptr) }; + Err(GrammarError::Reported { message }) + } + other => unreachable!( + "llama_rs_sampler_init_grammar_lazy_patterns returned unrecognized status {other}" + ), + } } /// `LLGuidance` sampler for constrained decoding. @@ -719,7 +771,7 @@ mod tests { fn sanitize_grammar_strings_root_not_found() { let result = LlamaSampler::sanitize_grammar_strings("expr ::= \"hello\"", "root"); - assert_eq!(result.err(), Some(GrammarError::RootNotFound)); + assert!(matches!(result.err(), Some(GrammarError::RootNotFound))); } #[test] @@ -920,7 +972,7 @@ mod tests { #[test] fn check_sampler_accept_status_ok() { let result = super::check_sampler_accept_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_OK, + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_OK, std::ptr::null_mut(), ); @@ -928,35 +980,15 @@ mod tests { } #[test] - fn check_sampler_accept_status_invalid_argument() { + fn check_sampler_accept_status_exception_maps_to_typed_variant() { let result = super::check_sampler_accept_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_INVALID_ARGUMENT, + llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION, std::ptr::null_mut(), ); assert!(matches!( result, - Err(crate::SamplerAcceptError::InvalidArgument) + Err(crate::SamplerAcceptError::GrammarStateCorrupted { .. }) )); } - - #[test] - fn check_sampler_accept_status_exception() { - let result = super::check_sampler_accept_status( - llama_cpp_bindings_sys::LLAMA_RS_STATUS_EXCEPTION, - std::ptr::null_mut(), - ); - - assert!(matches!( - result, - Err(crate::SamplerAcceptError::CppException(_)) - )); - } - - #[test] - fn check_sampler_not_null_returns_error() { - let result = super::check_sampler_not_null(std::ptr::null_mut(), std::ptr::null_mut()); - - assert!(result.is_err()); - } } diff --git a/llama-cpp-bindings/src/send_logs_to_log.rs b/llama-cpp-bindings/src/send_logs_to_log.rs index 6bd8fbb7..4fa50e91 100644 --- a/llama-cpp-bindings/src/send_logs_to_log.rs +++ b/llama-cpp-bindings/src/send_logs_to_log.rs @@ -33,6 +33,16 @@ impl LogSource { static LLAMA_SOURCE: OnceLock = OnceLock::new(); static GGML_SOURCE: OnceLock = OnceLock::new(); +#[cfg(target_env = "msvc")] +const fn ggml_level_to_u32(level: llama_cpp_bindings_sys::ggml_log_level) -> u32 { + level.cast_unsigned() +} + +#[cfg(not(target_env = "msvc"))] +const fn ggml_level_to_u32(level: llama_cpp_bindings_sys::ggml_log_level) -> u32 { + level +} + const fn ggml_level_to_incoming(raw: llama_cpp_bindings_sys::ggml_log_level) -> IncomingLogLevel { match raw { llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE => IncomingLogLevel::None, @@ -41,7 +51,7 @@ const fn ggml_level_to_incoming(raw: llama_cpp_bindings_sys::ggml_log_level) -> llama_cpp_bindings_sys::GGML_LOG_LEVEL_WARN => IncomingLogLevel::Warn, llama_cpp_bindings_sys::GGML_LOG_LEVEL_ERROR => IncomingLogLevel::Error, llama_cpp_bindings_sys::GGML_LOG_LEVEL_CONT => IncomingLogLevel::Cont, - other => IncomingLogLevel::Unknown(other), + other => IncomingLogLevel::Unknown(ggml_level_to_u32(other)), } } diff --git a/llama-cpp-bindings/src/token/data_array.rs b/llama-cpp-bindings/src/token/data_array.rs index af2134df..40933d7a 100644 --- a/llama-cpp-bindings/src/token/data_array.rs +++ b/llama-cpp-bindings/src/token/data_array.rs @@ -125,14 +125,26 @@ impl LlamaTokenDataArray { result } - /// Modifies the data array by applying a sampler to it + /// Modifies the data array by applying a sampler to it. + /// + /// # Panics + /// + /// Panics if the vendored sampler throws a C++ exception. `llama_sampler_apply` is + /// documented to be a pure logit transform and is not expected to throw; if it does + /// the failure is propagated as a panic per the crash-fast invariant. pub fn apply_sampler(&mut self, sampler: &LlamaSampler) { unsafe { self.modify_as_c_llama_token_data_array(|c_llama_token_data_array| { - llama_cpp_bindings_sys::llama_sampler_apply( + let mut out_error: *mut std::os::raw::c_char = ptr::null_mut(); + let status = llama_cpp_bindings_sys::llama_rs_sampler_apply( sampler.sampler, c_llama_token_data_array, + &raw mut out_error, ); + if status != llama_cpp_bindings_sys::LLAMA_RS_SAMPLER_APPLY_OK { + let message = crate::ffi_error_reader::read_and_free_cpp_error(out_error); + panic!("llama_rs_sampler_apply returned status {status}: {message}"); + } }); } } diff --git a/llama-cpp-bindings/src/tool_call_format/key_value_xml_tags.rs b/llama-cpp-bindings/src/tool_call_format/key_value_xml_tags.rs index 0ea21787..f617e38e 100644 --- a/llama-cpp-bindings/src/tool_call_format/key_value_xml_tags.rs +++ b/llama-cpp-bindings/src/tool_call_format/key_value_xml_tags.rs @@ -312,6 +312,38 @@ mod tests { } } + #[test] + fn rejects_empty_key_with_typed_failure() { + let body = "fParis"; + let result = parse(body, &glm47_markers(), &glm47_shape()); + + match result.expect_err("must error") { + KeyValueXmlTagsFailure::EmptyKey { function_name } => { + assert_eq!(function_name, "f"); + } + other => panic!("expected EmptyKey, got {other:?}"), + } + } + + #[test] + fn rejects_unclosed_value_tag_with_typed_failure() { + let body = "flocationParis"; + let result = parse(body, &glm47_markers(), &glm47_shape()); + + match result.expect_err("must error") { + KeyValueXmlTagsFailure::UnclosedValueTag { + function_name, + key, + expected_close, + } => { + assert_eq!(function_name, "f"); + assert_eq!(key, "location"); + assert_eq!(expected_close, ""); + } + other => panic!("expected UnclosedValueTag, got {other:?}"), + } + } + #[test] fn returns_empty_for_body_without_open_marker() { let parsed = diff --git a/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs b/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs index eba1b87e..074fc3c3 100644 --- a/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs +++ b/llama-cpp-bindings/src/tool_call_format/paired_quote_args.rs @@ -430,4 +430,21 @@ mod tests { other => panic!("expected EmptyKey, got {other:?}"), } } + + #[test] + fn rejects_args_body_without_key_colon_with_typed_failure() { + let result = parse( + "<|tool_call>call:f{noColonHere", + &gemma4_markers(), + &gemma4_shape(), + ); + + match result.expect_err("args body without colon must produce a typed failure") { + PairedQuoteFailure::UnclosedArgumentBlock { tool_name, state } => { + assert_eq!(tool_name, "f"); + assert_eq!(state, "key"); + } + other => panic!("expected UnclosedArgumentBlock, got {other:?}"), + } + } } diff --git a/llama-cpp-bindings/src/tool_call_format/xml_function_tags.rs b/llama-cpp-bindings/src/tool_call_format/xml_function_tags.rs index 0e1cb0af..8f7bdede 100644 --- a/llama-cpp-bindings/src/tool_call_format/xml_function_tags.rs +++ b/llama-cpp-bindings/src/tool_call_format/xml_function_tags.rs @@ -273,6 +273,25 @@ mod tests { } } + #[test] + fn rejects_parameter_tag_missing_closing_angle_with_typed_failure() { + let body = ""; + let result = parse(body, &xml_shape()); + + match result.expect_err("must error") { + XmlFunctionTagsFailure::UnclosedParameterBlock { + function_name, + parameter_name, + expected_close, + } => { + assert_eq!(function_name, "f"); + assert_eq!(parameter_name, ""); + assert_eq!(expected_close, ""); + } + other => panic!("expected UnclosedParameterBlock, got {other:?}"), + } + } + #[test] fn rejects_parameter_block_missing_close_tag_with_typed_failure() { let body = "Paris"; diff --git a/llama-cpp-log-decoder/src/log_decoder.rs b/llama-cpp-log-decoder/src/log_decoder.rs index 221aba42..395e0e05 100644 --- a/llama-cpp-log-decoder/src/log_decoder.rs +++ b/llama-cpp-log-decoder/src/log_decoder.rs @@ -324,4 +324,73 @@ mod tests { } ); } + + #[test] + fn feed_debug_level() { + let mut decoder = LogDecoder::new(); + let result = decoder.feed(IncomingLogLevel::Debug, "trace\n"); + + assert_eq!( + result, + DecodeResult { + output: DecodeOutput::Line(LogLine { + level: LogLevel::Debug, + text: "trace".to_owned(), + }), + anomaly: None, + } + ); + } + + #[test] + fn feed_error_level() { + let mut decoder = LogDecoder::new(); + let result = decoder.feed(IncomingLogLevel::Error, "boom\n"); + + assert_eq!( + result, + DecodeResult { + output: DecodeOutput::Line(LogLine { + level: LogLevel::Error, + text: "boom".to_owned(), + }), + anomaly: None, + } + ); + } + + #[test] + fn feed_orphan_cont_without_newline_buffers_text() { + let mut decoder = LogDecoder::new(); + let result = decoder.feed(IncomingLogLevel::Cont, "fragment"); + + assert_eq!( + result, + DecodeResult { + output: DecodeOutput::None, + anomaly: Some(DecodeAnomaly::OrphanCont), + } + ); + + let follow_up = decoder.feed(IncomingLogLevel::Cont, " rest\n"); + assert_eq!( + follow_up, + DecodeResult { + output: DecodeOutput::Line(LogLine { + level: LogLevel::None, + text: "fragment rest".to_owned(), + }), + anomaly: None, + } + ); + } + + #[test] + fn default_construction() { + let mut default_decoder = LogDecoder::default(); + let new_decoder_result = LogDecoder::new().feed(IncomingLogLevel::Info, "compare\n"); + let default_result = default_decoder.feed(IncomingLogLevel::Info, "compare\n"); + + assert_eq!(default_result, new_decoder_result); + } } diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..ae07c85e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,26 @@ +{ + "name": "llama-cpp-bindings-coverage-check", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "llama-cpp-bindings-coverage-check", + "devDependencies": { + "@intentee/rust-coverage-check": "^0.2.0" + } + }, + "node_modules/@intentee/rust-coverage-check": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/@intentee/rust-coverage-check/-/rust-coverage-check-0.2.0.tgz", + "integrity": "sha512-RRCHxYdYLk5SjmBw4YAOsaE3GwjJYuzhpY/Gaz8frW0dLqXZNBz5Xtd2k79dI8januHjLDjH12/vYNHOoHNsSw==", + "dev": true, + "license": "MIT", + "bin": { + "rust-coverage-check": "src/main.mjs" + }, + "engines": { + "node": ">=24.0.0" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 00000000..ecd78fc6 --- /dev/null +++ b/package.json @@ -0,0 +1,8 @@ +{ + "name": "llama-cpp-bindings-coverage-check", + "private": true, + "type": "module", + "devDependencies": { + "@intentee/rust-coverage-check": "^0.2.0" + } +} diff --git a/scripts/coverage-uncovered-lines.py b/scripts/coverage-uncovered-lines.py deleted file mode 100755 index 95a6ee65..00000000 --- a/scripts/coverage-uncovered-lines.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -"""Find uncovered lines from cargo-llvm-cov JSON export. - -Usage: - python3 scripts/coverage-uncovered-lines.py target/coverage.json - -Matches the "Lines" metric used by `cargo llvm-cov --fail-under-lines`. - -LLVM segments with has_count=True define code regions with an execution count. -Each such segment applies from (line, col) until the next has_count=True segment. -A line is "executable" if any such segment overlaps it. -A line is "covered" if the maximum count of all overlapping segments is > 0. -""" - -import json -import os -import sys - -SRC_MARKER = "llama-cpp-bindings/src/" - - -def compute_line_coverage(segments): - """Walk segments and compute per-line max execution count.""" - line_max_count = {} - - # Filter to only segments with actual count data - counted_segments = [ - (line, col, count) - for line, col, count, has_count, _is_region, is_gap in segments - if has_count and not is_gap - ] - - for i, (seg_line, seg_col, count) in enumerate(counted_segments): - # This segment's count applies from seg_line until the next counted segment - if i + 1 < len(counted_segments): - end_line = counted_segments[i + 1][0] - end_col = counted_segments[i + 1][1] - - # If next segment starts at col 1, it owns that line - if end_col <= 1: - end_line -= 1 - else: - end_line = seg_line - - for line in range(seg_line, max(end_line, seg_line) + 1): - current_max = line_max_count.get(line, 0) - line_max_count[line] = max(current_max, count) - - return line_max_count - - -def find_uncovered_lines(json_path): - with open(json_path) as json_file: - data = json.load(json_file) - - totals = data["data"][0]["totals"]["lines"] - total_lines = totals["count"] - covered_lines = totals["covered"] - missed_lines = total_lines - covered_lines - percent = totals["percent"] - - print( - f"Lines: {total_lines} Covered: {covered_lines} " - f"Missed: {missed_lines} ({percent:.2f}%)" - ) - print() - - uncovered_count = 0 - - for file_data in data["data"][0]["files"]: - filename = file_data["filename"] - - if SRC_MARKER not in filename: - continue - - short_name = filename.split(SRC_MARKER, 1)[1] - summary_lines = file_data["summary"]["lines"] - - if summary_lines["count"] == summary_lines["covered"]: - continue - - segments = file_data.get("segments", []) - - if not segments: - continue - - line_max_count = compute_line_coverage(segments) - - source_lines = {} - - if os.path.isfile(filename): - with open(filename) as source_file: - for line_number, line_text in enumerate(source_file, 1): - if line_number in line_max_count and line_max_count[line_number] == 0: - source_lines[line_number] = line_text.rstrip() - - for line_number in sorted(source_lines): - code = source_lines[line_number] - print(f"{short_name}:{line_number}: {code}") - uncovered_count += 1 - - print() - print(f"Total uncovered lines: {uncovered_count}") - - -def main(): - if len(sys.argv) < 2: - print( - "Usage: python3 scripts/coverage-uncovered-lines.py ", - file=sys.stderr, - ) - sys.exit(1) - - find_uncovered_lines(sys.argv[1]) - - -if __name__ == "__main__": - main()