diff --git a/Makefile b/Makefile index 7150dbaf6..a6d2c2ec0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ BUILD_TARGETS = \ main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \ simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \ - speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o + speculative infill tokenize benchmark-matmult parallel finetune export-lora tests/test-c.o # Binaries only useful for tests TEST_TARGETS = \ @@ -594,6 +594,9 @@ infill: examples/infill/infill.cpp ggml.o llama.o $(C simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) +tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp index 17166836a..4ff8e3fa7 100644 --- a/examples/tokenize/tokenize.cpp +++ b/examples/tokenize/tokenize.cpp @@ -26,7 +26,7 @@ int main(int argc, char ** argv) { llama_context_params ctx_params = llama_context_default_params(); llama_context * ctx = llama_new_context_with_model(model, ctx_params); - const bool add_bos = true; + const bool add_bos = llama_should_add_bos_token(model); std::vector tokens;