Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llama : add reranking support #9510

Merged
merged 25 commits into from
Sep 28, 2024
Merged
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3453e62
py : add XLMRobertaForSequenceClassification [no ci]
ggerganov Sep 16, 2024
77723ed
py : fix scalar-tensor conversion [no ci]
ggerganov Sep 17, 2024
49f90de
py : fix position embeddings chop [no ci]
ggerganov Sep 17, 2024
dc0cdd8
llama : read new cls tensors [no ci]
ggerganov Sep 17, 2024
d0a7bf9
llama : add classigication head (wip) [no ci]
ggerganov Sep 18, 2024
125a067
llama : add "rank" pooling type
ggerganov Sep 19, 2024
6235c62
server : add rerank endpoint
ggerganov Sep 19, 2024
6916ed1
llama : aboud ggml_repeat during classification
ggerganov Sep 23, 2024
62a45d1
rerank : cleanup + comments
ggerganov Sep 25, 2024
7bde9a0
server : accept /rerank endpoint in addition to /v1/rerank [no ci]
ggerganov Sep 25, 2024
c62a39d
embedding : parse special tokens
ggerganov Sep 25, 2024
866c011
jina : support v1 reranker
ggerganov Sep 25, 2024
84f56f3
vocab : minor style
ggerganov Sep 25, 2024
00b3376
server : initiate tests for later
ggerganov Sep 26, 2024
877a04c
server : add docs
ggerganov Sep 26, 2024
4d45775
llama : add comment [no ci]
ggerganov Sep 26, 2024
ca99a6c
llama : fix uninitialized tensors
ggerganov Sep 26, 2024
f19554f
ci : add rerank tests
ggerganov Sep 26, 2024
f27dd69
add reranking test
ngxson Sep 26, 2024
1ae8376
change test data
ngxson Sep 26, 2024
84b0af8
Update examples/server/server.cpp
ggerganov Sep 27, 2024
0d6f6a7
add `--reranking` argument
ngxson Sep 27, 2024
a4ac45f
update server docs
ngxson Sep 27, 2024
39167b6
llama : fix comment [no ci]
ggerganov Sep 28, 2024
aeac876
Merge branch 'master' into gg/rerank
ggerganov Sep 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
llama : read new cls tensors [no ci]
  • Loading branch information
ggerganov committed Sep 25, 2024
commit dc0cdd8760547b041bb206541e3c9cf9bb879777
16 changes: 16 additions & 0 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,8 @@ enum llm_tensor {
LLM_TENSOR_ENC_FFN_DOWN,
LLM_TENSOR_ENC_FFN_UP,
LLM_TENSOR_ENC_OUTPUT_NORM,
LLM_TENSOR_CLS,
LLM_TENSOR_CLS_OUT,
};

static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = {
Expand Down Expand Up @@ -789,6 +791,8 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
{ LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_CLS, "cls" },
{ LLM_TENSOR_CLS_OUT, "cls.output" },
},
},
{
Expand Down Expand Up @@ -2882,6 +2886,12 @@ struct llama_model {
struct ggml_tensor * output_b;
struct ggml_tensor * output_norm_enc;

// classifier
struct ggml_tensor * cls;
struct ggml_tensor * cls_b;
struct ggml_tensor * cls_out;
struct ggml_tensor * cls_out_b;

std::vector<llama_layer> layers;

llama_split_mode split_mode;
Expand Down Expand Up @@ -7351,6 +7361,12 @@ static bool llm_load_tensors(

if (model.arch == LLM_ARCH_BERT) {
model.pos_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train});

model.cls = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
model.cls_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS, "bias"), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);

model.cls_out = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, 1}, llama_model_loader::TENSOR_NOT_REQUIRED);
model.cls_out_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_CLS_OUT, "bias"), {1}, llama_model_loader::TENSOR_NOT_REQUIRED);
}

model.tok_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd});
Expand Down