| // 'ggmf' in hex. | |
| extern "C" { | |
| struct rwkv_v2_context; | |
| // Loads the model from a file and prepares it for inference. | |
| // Returns NULL on any error. Error messages would be printed to stderr. | |
| // - model_file_path: path to model file in ggml format. | |
| // - n_threads: count of threads to use, must be positive. | |
| RWKV_V2_API struct rwkv_v2_context * rwkv_v2_init_from_file(const char * model_file_path, uint32_t n_threads); | |
| // Evaluates the model for a single token. | |
| // Returns false on any error. Error messages would be printed to stderr. | |
| // - token: next token index, in range 0 <= token < n_vocab. | |
| // - state_in: FP32 buffer of size rwkv_v2_get_state_buffer_element_count; or NULL, if this is a first pass. | |
| // - state_out: FP32 buffer of size rwkv_v2_get_state_buffer_element_count. This buffer will be written to. | |
| // - logits_out: FP32 buffer of size rwkv_v2_get_logits_buffer_element_count. This buffer will be written to. | |
| RWKV_V2_API bool rwkv_v2_eval(struct rwkv_v2_context * ctx, int32_t token, float * state_in, float * state_out, float * logits_out); | |
| // Returns count of FP32 elements in state buffer. | |
| RWKV_V2_API uint32_t rwkv_v2_get_state_buffer_element_count(struct rwkv_v2_context * ctx); | |
| // Returns count of FP32 elements in logits buffer. | |
| RWKV_V2_API uint32_t rwkv_v2_get_logits_buffer_element_count(struct rwkv_v2_context * ctx); | |
| // Frees all allocated memory and the context. | |
| RWKV_V2_API void rwkv_v2_free(struct rwkv_v2_context * ctx); | |
| // Quantizes FP32 or FP16 model to one of quantized formats. | |
| // Returns false on any error. Error messages would be printed to stderr. | |
| // - model_file_path_in: path to model file in ggml format, must be either FP32 or FP16. | |
| // - model_file_path_out: quantized model will be written here. | |
| // - format_name: must be one of available format names below. | |
| // Available format names: | |
| // - Q4_0 | |
| // - Q4_1 | |
| // - Q4_2 | |
| // - Q5_0 | |
| // - Q5_1 | |
| // - Q8_0 | |
| RWKV_V2_API bool rwkv_v2_quantize_model_file(const char * model_file_path_in, const char * model_file_path_out, const char * format_name); | |
| // Returns system information string. | |
| RWKV_V2_API const char * rwkv_v2_get_system_info_string(void); | |
| } | |