pj-mathematician commited on
Commit
13d4c6a
·
verified ·
1 Parent(s): 523c335

Add files using upload-large-folder tool

Browse files
Files changed (43) hide show
  1. checkpoint-4000/1_Pooling/config.json +10 -0
  2. checkpoint-4000/config.json +27 -0
  3. checkpoint-4000/config_sentence_transformers.json +10 -0
  4. checkpoint-4000/model.safetensors +3 -0
  5. checkpoint-4000/optimizer.pt +3 -0
  6. checkpoint-4000/rng_state.pth +3 -0
  7. checkpoint-4000/scaler.pt +3 -0
  8. checkpoint-4000/scheduler.pt +3 -0
  9. checkpoint-4000/sentence_bert_config.json +4 -0
  10. checkpoint-4000/special_tokens_map.json +51 -0
  11. checkpoint-4000/tokenizer_config.json +56 -0
  12. checkpoint-4000/trainer_state.json +0 -0
  13. checkpoint-4000/training_args.bin +3 -0
  14. checkpoint-4200/1_Pooling/config.json +10 -0
  15. checkpoint-4200/README.md +1438 -0
  16. checkpoint-4200/model.safetensors +3 -0
  17. checkpoint-4200/modules.json +20 -0
  18. checkpoint-4200/optimizer.pt +3 -0
  19. checkpoint-4200/rng_state.pth +3 -0
  20. checkpoint-4200/scaler.pt +3 -0
  21. checkpoint-4200/scheduler.pt +3 -0
  22. checkpoint-4200/sentence_bert_config.json +4 -0
  23. checkpoint-4200/sentencepiece.bpe.model +3 -0
  24. checkpoint-4200/training_args.bin +3 -0
  25. checkpoint-4400/config.json +27 -0
  26. checkpoint-4400/model.safetensors +3 -0
  27. checkpoint-4400/modules.json +20 -0
  28. checkpoint-4400/optimizer.pt +3 -0
  29. checkpoint-4400/scaler.pt +3 -0
  30. checkpoint-4400/scheduler.pt +3 -0
  31. checkpoint-4400/trainer_state.json +0 -0
  32. checkpoint-4400/training_args.bin +3 -0
  33. checkpoint-4600/model.safetensors +3 -0
  34. checkpoint-4600/optimizer.pt +3 -0
  35. checkpoint-4600/sentencepiece.bpe.model +3 -0
  36. checkpoint-4600/special_tokens_map.json +51 -0
  37. checkpoint-4800/1_Pooling/config.json +10 -0
  38. checkpoint-4800/model.safetensors +3 -0
  39. checkpoint-4800/optimizer.pt +3 -0
  40. checkpoint-4800/rng_state.pth +3 -0
  41. checkpoint-4800/scaler.pt +3 -0
  42. checkpoint-4800/sentencepiece.bpe.model +3 -0
  43. checkpoint-4800/training_args.bin +3 -0
checkpoint-4000/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-4000/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 8194,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.51.2",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-4000/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.51.2",
5
+ "pytorch": "2.6.0+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e396a37e5acc679024a9fc8c8cddae538f7cf082d5682d004c44cf494e226f69
3
+ size 2271064456
checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99efc690de9364ab3597a63d4ecd7e827d9b9ac9541a5cd91510bccd5027f02
3
+ size 4533972937
checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3efbf048b90dbf88fbe7c3e5343d8f5b231ce2f762e746e6dc52ad13d65a600
3
+ size 15958
checkpoint-4000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60386f0725c4fe780fb345fd957860c14c74f3535df8dbe1242c6c681a5d255b
3
+ size 988
checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:067bf8b23f4d6fd97b4d2f83930a64c8500ad6e476ffd0437ac9124a04eee854
3
+ size 1064
checkpoint-4000/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-4000/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-4000/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 8192,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "sp_model_kwargs": {},
54
+ "tokenizer_class": "XLMRobertaTokenizer",
55
+ "unk_token": "<unk>"
56
+ }
checkpoint-4000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
3
+ size 5624
checkpoint-4200/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-4200/README.md ADDED
@@ -0,0 +1,1438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:124788
8
+ - loss:GISTEmbedLoss
9
+ base_model: BAAI/bge-m3
10
+ widget:
11
+ - source_sentence: 其他机械、设备和有形货物租赁服务代表
12
+ sentences:
13
+ - 其他机械和设备租赁服务工作人员
14
+ - 电子和电信设备及零部件物流经理
15
+ - 工业主厨
16
+ - source_sentence: 公交车司机
17
+ sentences:
18
+ - 表演灯光设计师
19
+ - 乙烯基地板安装工
20
+ - 国际巴士司机
21
+ - source_sentence: online communication manager
22
+ sentences:
23
+ - trades union official
24
+ - social media manager
25
+ - budget manager
26
+ - source_sentence: Projektmanagerin
27
+ sentences:
28
+ - Projektmanager/Projektmanagerin
29
+ - Category-Manager
30
+ - Infanterist
31
+ - source_sentence: Volksvertreter
32
+ sentences:
33
+ - Parlamentarier
34
+ - Oberbürgermeister
35
+ - Konsul
36
+ pipeline_tag: sentence-similarity
37
+ library_name: sentence-transformers
38
+ metrics:
39
+ - cosine_accuracy@1
40
+ - cosine_accuracy@20
41
+ - cosine_accuracy@50
42
+ - cosine_accuracy@100
43
+ - cosine_accuracy@150
44
+ - cosine_accuracy@200
45
+ - cosine_precision@1
46
+ - cosine_precision@20
47
+ - cosine_precision@50
48
+ - cosine_precision@100
49
+ - cosine_precision@150
50
+ - cosine_precision@200
51
+ - cosine_recall@1
52
+ - cosine_recall@20
53
+ - cosine_recall@50
54
+ - cosine_recall@100
55
+ - cosine_recall@150
56
+ - cosine_recall@200
57
+ - cosine_ndcg@1
58
+ - cosine_ndcg@20
59
+ - cosine_ndcg@50
60
+ - cosine_ndcg@100
61
+ - cosine_ndcg@150
62
+ - cosine_ndcg@200
63
+ - cosine_mrr@1
64
+ - cosine_mrr@20
65
+ - cosine_mrr@50
66
+ - cosine_mrr@100
67
+ - cosine_mrr@150
68
+ - cosine_mrr@200
69
+ - cosine_map@1
70
+ - cosine_map@20
71
+ - cosine_map@50
72
+ - cosine_map@100
73
+ - cosine_map@150
74
+ - cosine_map@200
75
+ - cosine_map@500
76
+ model-index:
77
+ - name: SentenceTransformer based on BAAI/bge-m3
78
+ results:
79
+ - task:
80
+ type: information-retrieval
81
+ name: Information Retrieval
82
+ dataset:
83
+ name: full en
84
+ type: full_en
85
+ metrics:
86
+ - type: cosine_accuracy@1
87
+ value: 0.6571428571428571
88
+ name: Cosine Accuracy@1
89
+ - type: cosine_accuracy@20
90
+ value: 0.9904761904761905
91
+ name: Cosine Accuracy@20
92
+ - type: cosine_accuracy@50
93
+ value: 0.9904761904761905
94
+ name: Cosine Accuracy@50
95
+ - type: cosine_accuracy@100
96
+ value: 0.9904761904761905
97
+ name: Cosine Accuracy@100
98
+ - type: cosine_accuracy@150
99
+ value: 0.9904761904761905
100
+ name: Cosine Accuracy@150
101
+ - type: cosine_accuracy@200
102
+ value: 0.9904761904761905
103
+ name: Cosine Accuracy@200
104
+ - type: cosine_precision@1
105
+ value: 0.6571428571428571
106
+ name: Cosine Precision@1
107
+ - type: cosine_precision@20
108
+ value: 0.5042857142857142
109
+ name: Cosine Precision@20
110
+ - type: cosine_precision@50
111
+ value: 0.30342857142857144
112
+ name: Cosine Precision@50
113
+ - type: cosine_precision@100
114
+ value: 0.18485714285714283
115
+ name: Cosine Precision@100
116
+ - type: cosine_precision@150
117
+ value: 0.13161904761904764
118
+ name: Cosine Precision@150
119
+ - type: cosine_precision@200
120
+ value: 0.1020952380952381
121
+ name: Cosine Precision@200
122
+ - type: cosine_recall@1
123
+ value: 0.06749696615971254
124
+ name: Cosine Recall@1
125
+ - type: cosine_recall@20
126
+ value: 0.5373072040835736
127
+ name: Cosine Recall@20
128
+ - type: cosine_recall@50
129
+ value: 0.7066915041490871
130
+ name: Cosine Recall@50
131
+ - type: cosine_recall@100
132
+ value: 0.8223255763807351
133
+ name: Cosine Recall@100
134
+ - type: cosine_recall@150
135
+ value: 0.8681298207585033
136
+ name: Cosine Recall@150
137
+ - type: cosine_recall@200
138
+ value: 0.8939381871513931
139
+ name: Cosine Recall@200
140
+ - type: cosine_ndcg@1
141
+ value: 0.6571428571428571
142
+ name: Cosine Ndcg@1
143
+ - type: cosine_ndcg@20
144
+ value: 0.6828242233504754
145
+ name: Cosine Ndcg@20
146
+ - type: cosine_ndcg@50
147
+ value: 0.6934957075565445
148
+ name: Cosine Ndcg@50
149
+ - type: cosine_ndcg@100
150
+ value: 0.7508237653332346
151
+ name: Cosine Ndcg@100
152
+ - type: cosine_ndcg@150
153
+ value: 0.7708996755918012
154
+ name: Cosine Ndcg@150
155
+ - type: cosine_ndcg@200
156
+ value: 0.7810547976165594
157
+ name: Cosine Ndcg@200
158
+ - type: cosine_mrr@1
159
+ value: 0.6571428571428571
160
+ name: Cosine Mrr@1
161
+ - type: cosine_mrr@20
162
+ value: 0.8050793650793651
163
+ name: Cosine Mrr@20
164
+ - type: cosine_mrr@50
165
+ value: 0.8050793650793651
166
+ name: Cosine Mrr@50
167
+ - type: cosine_mrr@100
168
+ value: 0.8050793650793651
169
+ name: Cosine Mrr@100
170
+ - type: cosine_mrr@150
171
+ value: 0.8050793650793651
172
+ name: Cosine Mrr@150
173
+ - type: cosine_mrr@200
174
+ value: 0.8050793650793651
175
+ name: Cosine Mrr@200
176
+ - type: cosine_map@1
177
+ value: 0.6571428571428571
178
+ name: Cosine Map@1
179
+ - type: cosine_map@20
180
+ value: 0.5403780248322398
181
+ name: Cosine Map@20
182
+ - type: cosine_map@50
183
+ value: 0.5246924299662313
184
+ name: Cosine Map@50
185
+ - type: cosine_map@100
186
+ value: 0.5574701928996357
187
+ name: Cosine Map@100
188
+ - type: cosine_map@150
189
+ value: 0.5657362210212612
190
+ name: Cosine Map@150
191
+ - type: cosine_map@200
192
+ value: 0.5689495406824301
193
+ name: Cosine Map@200
194
+ - type: cosine_map@500
195
+ value: 0.5740394717933254
196
+ name: Cosine Map@500
197
+ - task:
198
+ type: information-retrieval
199
+ name: Information Retrieval
200
+ dataset:
201
+ name: full es
202
+ type: full_es
203
+ metrics:
204
+ - type: cosine_accuracy@1
205
+ value: 0.11351351351351352
206
+ name: Cosine Accuracy@1
207
+ - type: cosine_accuracy@20
208
+ value: 1.0
209
+ name: Cosine Accuracy@20
210
+ - type: cosine_accuracy@50
211
+ value: 1.0
212
+ name: Cosine Accuracy@50
213
+ - type: cosine_accuracy@100
214
+ value: 1.0
215
+ name: Cosine Accuracy@100
216
+ - type: cosine_accuracy@150
217
+ value: 1.0
218
+ name: Cosine Accuracy@150
219
+ - type: cosine_accuracy@200
220
+ value: 1.0
221
+ name: Cosine Accuracy@200
222
+ - type: cosine_precision@1
223
+ value: 0.11351351351351352
224
+ name: Cosine Precision@1
225
+ - type: cosine_precision@20
226
+ value: 0.5678378378378378
227
+ name: Cosine Precision@20
228
+ - type: cosine_precision@50
229
+ value: 0.38616216216216215
230
+ name: Cosine Precision@50
231
+ - type: cosine_precision@100
232
+ value: 0.24956756756756757
233
+ name: Cosine Precision@100
234
+ - type: cosine_precision@150
235
+ value: 0.18836036036036036
236
+ name: Cosine Precision@150
237
+ - type: cosine_precision@200
238
+ value: 0.14981081081081082
239
+ name: Cosine Precision@200
240
+ - type: cosine_recall@1
241
+ value: 0.0035155918996302815
242
+ name: Cosine Recall@1
243
+ - type: cosine_recall@20
244
+ value: 0.37836142042267473
245
+ name: Cosine Recall@20
246
+ - type: cosine_recall@50
247
+ value: 0.5571586783455559
248
+ name: Cosine Recall@50
249
+ - type: cosine_recall@100
250
+ value: 0.6675392853403386
251
+ name: Cosine Recall@100
252
+ - type: cosine_recall@150
253
+ value: 0.7304539075934318
254
+ name: Cosine Recall@150
255
+ - type: cosine_recall@200
256
+ value: 0.762368065923207
257
+ name: Cosine Recall@200
258
+ - type: cosine_ndcg@1
259
+ value: 0.11351351351351352
260
+ name: Cosine Ndcg@1
261
+ - type: cosine_ndcg@20
262
+ value: 0.6138712223781554
263
+ name: Cosine Ndcg@20
264
+ - type: cosine_ndcg@50
265
+ value: 0.5860105244597086
266
+ name: Cosine Ndcg@50
267
+ - type: cosine_ndcg@100
268
+ value: 0.612222606218991
269
+ name: Cosine Ndcg@100
270
+ - type: cosine_ndcg@150
271
+ value: 0.6445206608822607
272
+ name: Cosine Ndcg@150
273
+ - type: cosine_ndcg@200
274
+ value: 0.6607643472995034
275
+ name: Cosine Ndcg@200
276
+ - type: cosine_mrr@1
277
+ value: 0.11351351351351352
278
+ name: Cosine Mrr@1
279
+ - type: cosine_mrr@20
280
+ value: 0.5536036036036036
281
+ name: Cosine Mrr@20
282
+ - type: cosine_mrr@50
283
+ value: 0.5536036036036036
284
+ name: Cosine Mrr@50
285
+ - type: cosine_mrr@100
286
+ value: 0.5536036036036036
287
+ name: Cosine Mrr@100
288
+ - type: cosine_mrr@150
289
+ value: 0.5536036036036036
290
+ name: Cosine Mrr@150
291
+ - type: cosine_mrr@200
292
+ value: 0.5536036036036036
293
+ name: Cosine Mrr@200
294
+ - type: cosine_map@1
295
+ value: 0.11351351351351352
296
+ name: Cosine Map@1
297
+ - type: cosine_map@20
298
+ value: 0.48205571119205054
299
+ name: Cosine Map@20
300
+ - type: cosine_map@50
301
+ value: 0.426066001253444
302
+ name: Cosine Map@50
303
+ - type: cosine_map@100
304
+ value: 0.4286297248227863
305
+ name: Cosine Map@100
306
+ - type: cosine_map@150
307
+ value: 0.44367730975701125
308
+ name: Cosine Map@150
309
+ - type: cosine_map@200
310
+ value: 0.45055470203697434
311
+ name: Cosine Map@200
312
+ - type: cosine_map@500
313
+ value: 0.4632014183024849
314
+ name: Cosine Map@500
315
+ - task:
316
+ type: information-retrieval
317
+ name: Information Retrieval
318
+ dataset:
319
+ name: full de
320
+ type: full_de
321
+ metrics:
322
+ - type: cosine_accuracy@1
323
+ value: 0.2955665024630542
324
+ name: Cosine Accuracy@1
325
+ - type: cosine_accuracy@20
326
+ value: 0.9802955665024631
327
+ name: Cosine Accuracy@20
328
+ - type: cosine_accuracy@50
329
+ value: 0.9852216748768473
330
+ name: Cosine Accuracy@50
331
+ - type: cosine_accuracy@100
332
+ value: 0.9852216748768473
333
+ name: Cosine Accuracy@100
334
+ - type: cosine_accuracy@150
335
+ value: 0.9901477832512315
336
+ name: Cosine Accuracy@150
337
+ - type: cosine_accuracy@200
338
+ value: 0.9901477832512315
339
+ name: Cosine Accuracy@200
340
+ - type: cosine_precision@1
341
+ value: 0.2955665024630542
342
+ name: Cosine Precision@1
343
+ - type: cosine_precision@20
344
+ value: 0.5391625615763547
345
+ name: Cosine Precision@20
346
+ - type: cosine_precision@50
347
+ value: 0.3801970443349754
348
+ name: Cosine Precision@50
349
+ - type: cosine_precision@100
350
+ value: 0.2476847290640394
351
+ name: Cosine Precision@100
352
+ - type: cosine_precision@150
353
+ value: 0.18568144499178982
354
+ name: Cosine Precision@150
355
+ - type: cosine_precision@200
356
+ value: 0.14891625615763546
357
+ name: Cosine Precision@200
358
+ - type: cosine_recall@1
359
+ value: 0.01108543831680986
360
+ name: Cosine Recall@1
361
+ - type: cosine_recall@20
362
+ value: 0.3399387209539555
363
+ name: Cosine Recall@20
364
+ - type: cosine_recall@50
365
+ value: 0.5308580040187325
366
+ name: Cosine Recall@50
367
+ - type: cosine_recall@100
368
+ value: 0.6430327898382845
369
+ name: Cosine Recall@100
370
+ - type: cosine_recall@150
371
+ value: 0.7043523082318627
372
+ name: Cosine Recall@150
373
+ - type: cosine_recall@200
374
+ value: 0.7435945575564449
375
+ name: Cosine Recall@200
376
+ - type: cosine_ndcg@1
377
+ value: 0.2955665024630542
378
+ name: Cosine Ndcg@1
379
+ - type: cosine_ndcg@20
380
+ value: 0.5620736680453444
381
+ name: Cosine Ndcg@20
382
+ - type: cosine_ndcg@50
383
+ value: 0.5486209217219633
384
+ name: Cosine Ndcg@50
385
+ - type: cosine_ndcg@100
386
+ value: 0.5742560822304251
387
+ name: Cosine Ndcg@100
388
+ - type: cosine_ndcg@150
389
+ value: 0.6059775924816383
390
+ name: Cosine Ndcg@150
391
+ - type: cosine_ndcg@200
392
+ value: 0.6254063201510274
393
+ name: Cosine Ndcg@200
394
+ - type: cosine_mrr@1
395
+ value: 0.2955665024630542
396
+ name: Cosine Mrr@1
397
+ - type: cosine_mrr@20
398
+ value: 0.5138789918346562
399
+ name: Cosine Mrr@20
400
+ - type: cosine_mrr@50
401
+ value: 0.5140086262655611
402
+ name: Cosine Mrr@50
403
+ - type: cosine_mrr@100
404
+ value: 0.5140086262655611
405
+ name: Cosine Mrr@100
406
+ - type: cosine_mrr@150
407
+ value: 0.5140546646615833
408
+ name: Cosine Mrr@150
409
+ - type: cosine_mrr@200
410
+ value: 0.5140546646615833
411
+ name: Cosine Mrr@200
412
+ - type: cosine_map@1
413
+ value: 0.2955665024630542
414
+ name: Cosine Map@1
415
+ - type: cosine_map@20
416
+ value: 0.42010224651188977
417
+ name: Cosine Map@20
418
+ - type: cosine_map@50
419
+ value: 0.37517744419195703
420
+ name: Cosine Map@50
421
+ - type: cosine_map@100
422
+ value: 0.3784520844424068
423
+ name: Cosine Map@100
424
+ - type: cosine_map@150
425
+ value: 0.3928983602214202
426
+ name: Cosine Map@150
427
+ - type: cosine_map@200
428
+ value: 0.40049621656562834
429
+ name: Cosine Map@200
430
+ - type: cosine_map@500
431
+ value: 0.4142041780241764
432
+ name: Cosine Map@500
433
+ - task:
434
+ type: information-retrieval
435
+ name: Information Retrieval
436
+ dataset:
437
+ name: full zh
438
+ type: full_zh
439
+ metrics:
440
+ - type: cosine_accuracy@1
441
+ value: 0.6601941747572816
442
+ name: Cosine Accuracy@1
443
+ - type: cosine_accuracy@20
444
+ value: 0.9902912621359223
445
+ name: Cosine Accuracy@20
446
+ - type: cosine_accuracy@50
447
+ value: 0.9902912621359223
448
+ name: Cosine Accuracy@50
449
+ - type: cosine_accuracy@100
450
+ value: 0.9902912621359223
451
+ name: Cosine Accuracy@100
452
+ - type: cosine_accuracy@150
453
+ value: 0.9902912621359223
454
+ name: Cosine Accuracy@150
455
+ - type: cosine_accuracy@200
456
+ value: 0.9902912621359223
457
+ name: Cosine Accuracy@200
458
+ - type: cosine_precision@1
459
+ value: 0.6601941747572816
460
+ name: Cosine Precision@1
461
+ - type: cosine_precision@20
462
+ value: 0.46990291262135936
463
+ name: Cosine Precision@20
464
+ - type: cosine_precision@50
465
+ value: 0.2766990291262136
466
+ name: Cosine Precision@50
467
+ - type: cosine_precision@100
468
+ value: 0.17145631067961165
469
+ name: Cosine Precision@100
470
+ - type: cosine_precision@150
471
+ value: 0.12381877022653723
472
+ name: Cosine Precision@150
473
+ - type: cosine_precision@200
474
+ value: 0.09747572815533984
475
+ name: Cosine Precision@200
476
+ - type: cosine_recall@1
477
+ value: 0.06391645269201905
478
+ name: Cosine Recall@1
479
+ - type: cosine_recall@20
480
+ value: 0.5028687618433456
481
+ name: Cosine Recall@20
482
+ - type: cosine_recall@50
483
+ value: 0.6651242597088418
484
+ name: Cosine Recall@50
485
+ - type: cosine_recall@100
486
+ value: 0.7783273755437382
487
+ name: Cosine Recall@100
488
+ - type: cosine_recall@150
489
+ value: 0.8334866166756513
490
+ name: Cosine Recall@150
491
+ - type: cosine_recall@200
492
+ value: 0.8666706510858552
493
+ name: Cosine Recall@200
494
+ - type: cosine_ndcg@1
495
+ value: 0.6601941747572816
496
+ name: Cosine Ndcg@1
497
+ - type: cosine_ndcg@20
498
+ value: 0.6467729312304265
499
+ name: Cosine Ndcg@20
500
+ - type: cosine_ndcg@50
501
+ value: 0.6531754449097694
502
+ name: Cosine Ndcg@50
503
+ - type: cosine_ndcg@100
504
+ value: 0.7091690247935931
505
+ name: Cosine Ndcg@100
506
+ - type: cosine_ndcg@150
507
+ value: 0.7326072552384693
508
+ name: Cosine Ndcg@150
509
+ - type: cosine_ndcg@200
510
+ value: 0.7462718534326636
511
+ name: Cosine Ndcg@200
512
+ - type: cosine_mrr@1
513
+ value: 0.6601941747572816
514
+ name: Cosine Mrr@1
515
+ - type: cosine_mrr@20
516
+ value: 0.8101941747572816
517
+ name: Cosine Mrr@20
518
+ - type: cosine_mrr@50
519
+ value: 0.8101941747572816
520
+ name: Cosine Mrr@50
521
+ - type: cosine_mrr@100
522
+ value: 0.8101941747572816
523
+ name: Cosine Mrr@100
524
+ - type: cosine_mrr@150
525
+ value: 0.8101941747572816
526
+ name: Cosine Mrr@150
527
+ - type: cosine_mrr@200
528
+ value: 0.8101941747572816
529
+ name: Cosine Mrr@200
530
+ - type: cosine_map@1
531
+ value: 0.6601941747572816
532
+ name: Cosine Map@1
533
+ - type: cosine_map@20
534
+ value: 0.5008318658399892
535
+ name: Cosine Map@20
536
+ - type: cosine_map@50
537
+ value: 0.47687535367801903
538
+ name: Cosine Map@50
539
+ - type: cosine_map@100
540
+ value: 0.506399482523297
541
+ name: Cosine Map@100
542
+ - type: cosine_map@150
543
+ value: 0.515344178164581
544
+ name: Cosine Map@150
545
+ - type: cosine_map@200
546
+ value: 0.5196266745217748
547
+ name: Cosine Map@200
548
+ - type: cosine_map@500
549
+ value: 0.5245537410408139
550
+ name: Cosine Map@500
551
+ - task:
552
+ type: information-retrieval
553
+ name: Information Retrieval
554
+ dataset:
555
+ name: mix es
556
+ type: mix_es
557
+ metrics:
558
+ - type: cosine_accuracy@1
559
+ value: 0.733749349973999
560
+ name: Cosine Accuracy@1
561
+ - type: cosine_accuracy@20
562
+ value: 0.9604784191367655
563
+ name: Cosine Accuracy@20
564
+ - type: cosine_accuracy@50
565
+ value: 0.982839313572543
566
+ name: Cosine Accuracy@50
567
+ - type: cosine_accuracy@100
568
+ value: 0.9916796671866874
569
+ name: Cosine Accuracy@100
570
+ - type: cosine_accuracy@150
571
+ value: 0.9947997919916797
572
+ name: Cosine Accuracy@150
573
+ - type: cosine_accuracy@200
574
+ value: 0.9953198127925117
575
+ name: Cosine Accuracy@200
576
+ - type: cosine_precision@1
577
+ value: 0.733749349973999
578
+ name: Cosine Precision@1
579
+ - type: cosine_precision@20
580
+ value: 0.12433697347893914
581
+ name: Cosine Precision@20
582
+ - type: cosine_precision@50
583
+ value: 0.0516588663546542
584
+ name: Cosine Precision@50
585
+ - type: cosine_precision@100
586
+ value: 0.026229849193967765
587
+ name: Cosine Precision@100
588
+ - type: cosine_precision@150
589
+ value: 0.017635638758883684
590
+ name: Cosine Precision@150
591
+ - type: cosine_precision@200
592
+ value: 0.013273530941237652
593
+ name: Cosine Precision@200
594
+ - type: cosine_recall@1
595
+ value: 0.28340762201916647
596
+ name: Cosine Recall@1
597
+ - type: cosine_recall@20
598
+ value: 0.9186774137632172
599
+ name: Cosine Recall@20
600
+ - type: cosine_recall@50
601
+ value: 0.9536314785924771
602
+ name: Cosine Recall@50
603
+ - type: cosine_recall@100
604
+ value: 0.968538741549662
605
+ name: Cosine Recall@100
606
+ - type: cosine_recall@150
607
+ value: 0.9768070722828913
608
+ name: Cosine Recall@150
609
+ - type: cosine_recall@200
610
+ value: 0.9806205581556595
611
+ name: Cosine Recall@200
612
+ - type: cosine_ndcg@1
613
+ value: 0.733749349973999
614
+ name: Cosine Ndcg@1
615
+ - type: cosine_ndcg@20
616
+ value: 0.8074696494514497
617
+ name: Cosine Ndcg@20
618
+ - type: cosine_ndcg@50
619
+ value: 0.8170488841773651
620
+ name: Cosine Ndcg@50
621
+ - type: cosine_ndcg@100
622
+ value: 0.8203516409516334
623
+ name: Cosine Ndcg@100
624
+ - type: cosine_ndcg@150
625
+ value: 0.8219710202163846
626
+ name: Cosine Ndcg@150
627
+ - type: cosine_ndcg@200
628
+ value: 0.8226411885850343
629
+ name: Cosine Ndcg@200
630
+ - type: cosine_mrr@1
631
+ value: 0.733749349973999
632
+ name: Cosine Mrr@1
633
+ - type: cosine_mrr@20
634
+ value: 0.8015837695391573
635
+ name: Cosine Mrr@20
636
+ - type: cosine_mrr@50
637
+ value: 0.8023398853791036
638
+ name: Cosine Mrr@50
639
+ - type: cosine_mrr@100
640
+ value: 0.8024787052722444
641
+ name: Cosine Mrr@100
642
+ - type: cosine_mrr@150
643
+ value: 0.8025062574128484
644
+ name: Cosine Mrr@150
645
+ - type: cosine_mrr@200
646
+ value: 0.8025096562416121
647
+ name: Cosine Mrr@200
648
+ - type: cosine_map@1
649
+ value: 0.733749349973999
650
+ name: Cosine Map@1
651
+ - type: cosine_map@20
652
+ value: 0.7389285820519963
653
+ name: Cosine Map@20
654
+ - type: cosine_map@50
655
+ value: 0.7414939322506505
656
+ name: Cosine Map@50
657
+ - type: cosine_map@100
658
+ value: 0.7419568857454747
659
+ name: Cosine Map@100
660
+ - type: cosine_map@150
661
+ value: 0.7421153780150582
662
+ name: Cosine Map@150
663
+ - type: cosine_map@200
664
+ value: 0.742164620684282
665
+ name: Cosine Map@200
666
+ - type: cosine_map@500
667
+ value: 0.7422579374234903
668
+ name: Cosine Map@500
669
+ - task:
670
+ type: information-retrieval
671
+ name: Information Retrieval
672
+ dataset:
673
+ name: mix de
674
+ type: mix_de
675
+ metrics:
676
+ - type: cosine_accuracy@1
677
+ value: 0.6859074362974519
678
+ name: Cosine Accuracy@1
679
+ - type: cosine_accuracy@20
680
+ value: 0.9661986479459178
681
+ name: Cosine Accuracy@20
682
+ - type: cosine_accuracy@50
683
+ value: 0.982839313572543
684
+ name: Cosine Accuracy@50
685
+ - type: cosine_accuracy@100
686
+ value: 0.9927197087883516
687
+ name: Cosine Accuracy@100
688
+ - type: cosine_accuracy@150
689
+ value: 0.9932397295891836
690
+ name: Cosine Accuracy@150
691
+ - type: cosine_accuracy@200
692
+ value: 0.9937597503900156
693
+ name: Cosine Accuracy@200
694
+ - type: cosine_precision@1
695
+ value: 0.6859074362974519
696
+ name: Cosine Precision@1
697
+ - type: cosine_precision@20
698
+ value: 0.12732709308372334
699
+ name: Cosine Precision@20
700
+ - type: cosine_precision@50
701
+ value: 0.05308372334893397
702
+ name: Cosine Precision@50
703
+ - type: cosine_precision@100
704
+ value: 0.027025481019240776
705
+ name: Cosine Precision@100
706
+ - type: cosine_precision@150
707
+ value: 0.018103657479632513
708
+ name: Cosine Precision@150
709
+ - type: cosine_precision@200
710
+ value: 0.013606344253770154
711
+ name: Cosine Precision@200
712
+ - type: cosine_recall@1
713
+ value: 0.2577396429190501
714
+ name: Cosine Recall@1
715
+ - type: cosine_recall@20
716
+ value: 0.9241896342520368
717
+ name: Cosine Recall@20
718
+ - type: cosine_recall@50
719
+ value: 0.9614317906049575
720
+ name: Cosine Recall@50
721
+ - type: cosine_recall@100
722
+ value: 0.9787224822326227
723
+ name: Cosine Recall@100
724
+ - type: cosine_recall@150
725
+ value: 0.983359334373375
726
+ name: Cosine Recall@150
727
+ - type: cosine_recall@200
728
+ value: 0.9854394175767031
729
+ name: Cosine Recall@200
730
+ - type: cosine_ndcg@1
731
+ value: 0.6859074362974519
732
+ name: Cosine Ndcg@1
733
+ - type: cosine_ndcg@20
734
+ value: 0.7894367570955271
735
+ name: Cosine Ndcg@20
736
+ - type: cosine_ndcg@50
737
+ value: 0.7998923204035095
738
+ name: Cosine Ndcg@50
739
+ - type: cosine_ndcg@100
740
+ value: 0.8037683941688618
741
+ name: Cosine Ndcg@100
742
+ - type: cosine_ndcg@150
743
+ value: 0.8046891228048068
744
+ name: Cosine Ndcg@150
745
+ - type: cosine_ndcg@200
746
+ value: 0.8050715563658618
747
+ name: Cosine Ndcg@200
748
+ - type: cosine_mrr@1
749
+ value: 0.6859074362974519
750
+ name: Cosine Mrr@1
751
+ - type: cosine_mrr@20
752
+ value: 0.7703397211809108
753
+ name: Cosine Mrr@20
754
+ - type: cosine_mrr@50
755
+ value: 0.7708870204854694
756
+ name: Cosine Mrr@50
757
+ - type: cosine_mrr@100
758
+ value: 0.7710242509181896
759
+ name: Cosine Mrr@100
760
+ - type: cosine_mrr@150
761
+ value: 0.7710286578741289
762
+ name: Cosine Mrr@150
763
+ - type: cosine_mrr@200
764
+ value: 0.7710319701085292
765
+ name: Cosine Mrr@200
766
+ - type: cosine_map@1
767
+ value: 0.6859074362974519
768
+ name: Cosine Map@1
769
+ - type: cosine_map@20
770
+ value: 0.711359959198991
771
+ name: Cosine Map@20
772
+ - type: cosine_map@50
773
+ value: 0.7143436554485498
774
+ name: Cosine Map@50
775
+ - type: cosine_map@100
776
+ value: 0.7149332520404413
777
+ name: Cosine Map@100
778
+ - type: cosine_map@150
779
+ value: 0.7150312982701879
780
+ name: Cosine Map@150
781
+ - type: cosine_map@200
782
+ value: 0.7150609466134881
783
+ name: Cosine Map@200
784
+ - type: cosine_map@500
785
+ value: 0.715115635794944
786
+ name: Cosine Map@500
787
+ - task:
788
+ type: information-retrieval
789
+ name: Information Retrieval
790
+ dataset:
791
+ name: mix zh
792
+ type: mix_zh
793
+ metrics:
794
+ - type: cosine_accuracy@1
795
+ value: 0.1814872594903796
796
+ name: Cosine Accuracy@1
797
+ - type: cosine_accuracy@20
798
+ value: 1.0
799
+ name: Cosine Accuracy@20
800
+ - type: cosine_accuracy@50
801
+ value: 1.0
802
+ name: Cosine Accuracy@50
803
+ - type: cosine_accuracy@100
804
+ value: 1.0
805
+ name: Cosine Accuracy@100
806
+ - type: cosine_accuracy@150
807
+ value: 1.0
808
+ name: Cosine Accuracy@150
809
+ - type: cosine_accuracy@200
810
+ value: 1.0
811
+ name: Cosine Accuracy@200
812
+ - type: cosine_precision@1
813
+ value: 0.1814872594903796
814
+ name: Cosine Precision@1
815
+ - type: cosine_precision@20
816
+ value: 0.15439417576703063
817
+ name: Cosine Precision@20
818
+ - type: cosine_precision@50
819
+ value: 0.0617576703068123
820
+ name: Cosine Precision@50
821
+ - type: cosine_precision@100
822
+ value: 0.03087883515340615
823
+ name: Cosine Precision@100
824
+ - type: cosine_precision@150
825
+ value: 0.020585890102270757
826
+ name: Cosine Precision@150
827
+ - type: cosine_precision@200
828
+ value: 0.015439417576703075
829
+ name: Cosine Precision@200
830
+ - type: cosine_recall@1
831
+ value: 0.05822499566649332
832
+ name: Cosine Recall@1
833
+ - type: cosine_recall@20
834
+ value: 1.0
835
+ name: Cosine Recall@20
836
+ - type: cosine_recall@50
837
+ value: 1.0
838
+ name: Cosine Recall@50
839
+ - type: cosine_recall@100
840
+ value: 1.0
841
+ name: Cosine Recall@100
842
+ - type: cosine_recall@150
843
+ value: 1.0
844
+ name: Cosine Recall@150
845
+ - type: cosine_recall@200
846
+ value: 1.0
847
+ name: Cosine Recall@200
848
+ - type: cosine_ndcg@1
849
+ value: 0.1814872594903796
850
+ name: Cosine Ndcg@1
851
+ - type: cosine_ndcg@20
852
+ value: 0.5442006309834599
853
+ name: Cosine Ndcg@20
854
+ - type: cosine_ndcg@50
855
+ value: 0.5442006309834599
856
+ name: Cosine Ndcg@50
857
+ - type: cosine_ndcg@100
858
+ value: 0.5442006309834599
859
+ name: Cosine Ndcg@100
860
+ - type: cosine_ndcg@150
861
+ value: 0.5442006309834599
862
+ name: Cosine Ndcg@150
863
+ - type: cosine_ndcg@200
864
+ value: 0.5442006309834599
865
+ name: Cosine Ndcg@200
866
+ - type: cosine_mrr@1
867
+ value: 0.1814872594903796
868
+ name: Cosine Mrr@1
869
+ - type: cosine_mrr@20
870
+ value: 0.4016099489578433
871
+ name: Cosine Mrr@20
872
+ - type: cosine_mrr@50
873
+ value: 0.4016099489578433
874
+ name: Cosine Mrr@50
875
+ - type: cosine_mrr@100
876
+ value: 0.4016099489578433
877
+ name: Cosine Mrr@100
878
+ - type: cosine_mrr@150
879
+ value: 0.4016099489578433
880
+ name: Cosine Mrr@150
881
+ - type: cosine_mrr@200
882
+ value: 0.4016099489578433
883
+ name: Cosine Mrr@200
884
+ - type: cosine_map@1
885
+ value: 0.1814872594903796
886
+ name: Cosine Map@1
887
+ - type: cosine_map@20
888
+ value: 0.32662137894847204
889
+ name: Cosine Map@20
890
+ - type: cosine_map@50
891
+ value: 0.32662137894847204
892
+ name: Cosine Map@50
893
+ - type: cosine_map@100
894
+ value: 0.32662137894847204
895
+ name: Cosine Map@100
896
+ - type: cosine_map@150
897
+ value: 0.32662137894847204
898
+ name: Cosine Map@150
899
+ - type: cosine_map@200
900
+ value: 0.32662137894847204
901
+ name: Cosine Map@200
902
+ - type: cosine_map@500
903
+ value: 0.32662137894847204
904
+ name: Cosine Map@500
905
+ ---
906
+
907
+ # SentenceTransformer based on BAAI/bge-m3
908
+
909
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) on the full_en, full_de, full_es, full_zh and mix datasets. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
910
+
911
+ ## Model Details
912
+
913
+ ### Model Description
914
+ - **Model Type:** Sentence Transformer
915
+ - **Base model:** [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) <!-- at revision 5617a9f61b028005a4858fdac845db406aefb181 -->
916
+ - **Maximum Sequence Length:** 512 tokens
917
+ - **Output Dimensionality:** 1024 dimensions
918
+ - **Similarity Function:** Cosine Similarity
919
+ - **Training Datasets:**
920
+ - full_en
921
+ - full_de
922
+ - full_es
923
+ - full_zh
924
+ - mix
925
+ <!-- - **Language:** Unknown -->
926
+ <!-- - **License:** Unknown -->
927
+
928
+ ### Model Sources
929
+
930
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
931
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
932
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
933
+
934
+ ### Full Model Architecture
935
+
936
+ ```
937
+ SentenceTransformer(
938
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
939
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
940
+ (2): Normalize()
941
+ )
942
+ ```
943
+
944
+ ## Usage
945
+
946
+ ### Direct Usage (Sentence Transformers)
947
+
948
+ First install the Sentence Transformers library:
949
+
950
+ ```bash
951
+ pip install -U sentence-transformers
952
+ ```
953
+
954
+ Then you can load this model and run inference.
955
+ ```python
956
+ from sentence_transformers import SentenceTransformer
957
+
958
+ # Download from the 🤗 Hub
959
+ model = SentenceTransformer("sentence_transformers_model_id")
960
+ # Run inference
961
+ sentences = [
962
+ 'Volksvertreter',
963
+ 'Parlamentarier',
964
+ 'Oberbürgermeister',
965
+ ]
966
+ embeddings = model.encode(sentences)
967
+ print(embeddings.shape)
968
+ # [3, 1024]
969
+
970
+ # Get the similarity scores for the embeddings
971
+ similarities = model.similarity(embeddings, embeddings)
972
+ print(similarities.shape)
973
+ # [3, 3]
974
+ ```
975
+
976
+ <!--
977
+ ### Direct Usage (Transformers)
978
+
979
+ <details><summary>Click to see the direct usage in Transformers</summary>
980
+
981
+ </details>
982
+ -->
983
+
984
+ <!--
985
+ ### Downstream Usage (Sentence Transformers)
986
+
987
+ You can finetune this model on your own dataset.
988
+
989
+ <details><summary>Click to expand</summary>
990
+
991
+ </details>
992
+ -->
993
+
994
+ <!--
995
+ ### Out-of-Scope Use
996
+
997
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
998
+ -->
999
+
1000
+ ## Evaluation
1001
+
1002
+ ### Metrics
1003
+
1004
+ #### Information Retrieval
1005
+
1006
+ * Datasets: `full_en`, `full_es`, `full_de`, `full_zh`, `mix_es`, `mix_de` and `mix_zh`
1007
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
1008
+
1009
+ | Metric | full_en | full_es | full_de | full_zh | mix_es | mix_de | mix_zh |
1010
+ |:---------------------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------|
1011
+ | cosine_accuracy@1 | 0.6571 | 0.1135 | 0.2956 | 0.6602 | 0.7337 | 0.6859 | 0.1815 |
1012
+ | cosine_accuracy@20 | 0.9905 | 1.0 | 0.9803 | 0.9903 | 0.9605 | 0.9662 | 1.0 |
1013
+ | cosine_accuracy@50 | 0.9905 | 1.0 | 0.9852 | 0.9903 | 0.9828 | 0.9828 | 1.0 |
1014
+ | cosine_accuracy@100 | 0.9905 | 1.0 | 0.9852 | 0.9903 | 0.9917 | 0.9927 | 1.0 |
1015
+ | cosine_accuracy@150 | 0.9905 | 1.0 | 0.9901 | 0.9903 | 0.9948 | 0.9932 | 1.0 |
1016
+ | cosine_accuracy@200 | 0.9905 | 1.0 | 0.9901 | 0.9903 | 0.9953 | 0.9938 | 1.0 |
1017
+ | cosine_precision@1 | 0.6571 | 0.1135 | 0.2956 | 0.6602 | 0.7337 | 0.6859 | 0.1815 |
1018
+ | cosine_precision@20 | 0.5043 | 0.5678 | 0.5392 | 0.4699 | 0.1243 | 0.1273 | 0.1544 |
1019
+ | cosine_precision@50 | 0.3034 | 0.3862 | 0.3802 | 0.2767 | 0.0517 | 0.0531 | 0.0618 |
1020
+ | cosine_precision@100 | 0.1849 | 0.2496 | 0.2477 | 0.1715 | 0.0262 | 0.027 | 0.0309 |
1021
+ | cosine_precision@150 | 0.1316 | 0.1884 | 0.1857 | 0.1238 | 0.0176 | 0.0181 | 0.0206 |
1022
+ | cosine_precision@200 | 0.1021 | 0.1498 | 0.1489 | 0.0975 | 0.0133 | 0.0136 | 0.0154 |
1023
+ | cosine_recall@1 | 0.0675 | 0.0035 | 0.0111 | 0.0639 | 0.2834 | 0.2577 | 0.0582 |
1024
+ | cosine_recall@20 | 0.5373 | 0.3784 | 0.3399 | 0.5029 | 0.9187 | 0.9242 | 1.0 |
1025
+ | cosine_recall@50 | 0.7067 | 0.5572 | 0.5309 | 0.6651 | 0.9536 | 0.9614 | 1.0 |
1026
+ | cosine_recall@100 | 0.8223 | 0.6675 | 0.643 | 0.7783 | 0.9685 | 0.9787 | 1.0 |
1027
+ | cosine_recall@150 | 0.8681 | 0.7305 | 0.7044 | 0.8335 | 0.9768 | 0.9834 | 1.0 |
1028
+ | cosine_recall@200 | 0.8939 | 0.7624 | 0.7436 | 0.8667 | 0.9806 | 0.9854 | 1.0 |
1029
+ | cosine_ndcg@1 | 0.6571 | 0.1135 | 0.2956 | 0.6602 | 0.7337 | 0.6859 | 0.1815 |
1030
+ | cosine_ndcg@20 | 0.6828 | 0.6139 | 0.5621 | 0.6468 | 0.8075 | 0.7894 | 0.5442 |
1031
+ | cosine_ndcg@50 | 0.6935 | 0.586 | 0.5486 | 0.6532 | 0.817 | 0.7999 | 0.5442 |
1032
+ | cosine_ndcg@100 | 0.7508 | 0.6122 | 0.5743 | 0.7092 | 0.8204 | 0.8038 | 0.5442 |
1033
+ | cosine_ndcg@150 | 0.7709 | 0.6445 | 0.606 | 0.7326 | 0.822 | 0.8047 | 0.5442 |
1034
+ | **cosine_ndcg@200** | **0.7811** | **0.6608** | **0.6254** | **0.7463** | **0.8226** | **0.8051** | **0.5442** |
1035
+ | cosine_mrr@1 | 0.6571 | 0.1135 | 0.2956 | 0.6602 | 0.7337 | 0.6859 | 0.1815 |
1036
+ | cosine_mrr@20 | 0.8051 | 0.5536 | 0.5139 | 0.8102 | 0.8016 | 0.7703 | 0.4016 |
1037
+ | cosine_mrr@50 | 0.8051 | 0.5536 | 0.514 | 0.8102 | 0.8023 | 0.7709 | 0.4016 |
1038
+ | cosine_mrr@100 | 0.8051 | 0.5536 | 0.514 | 0.8102 | 0.8025 | 0.771 | 0.4016 |
1039
+ | cosine_mrr@150 | 0.8051 | 0.5536 | 0.5141 | 0.8102 | 0.8025 | 0.771 | 0.4016 |
1040
+ | cosine_mrr@200 | 0.8051 | 0.5536 | 0.5141 | 0.8102 | 0.8025 | 0.771 | 0.4016 |
1041
+ | cosine_map@1 | 0.6571 | 0.1135 | 0.2956 | 0.6602 | 0.7337 | 0.6859 | 0.1815 |
1042
+ | cosine_map@20 | 0.5404 | 0.4821 | 0.4201 | 0.5008 | 0.7389 | 0.7114 | 0.3266 |
1043
+ | cosine_map@50 | 0.5247 | 0.4261 | 0.3752 | 0.4769 | 0.7415 | 0.7143 | 0.3266 |
1044
+ | cosine_map@100 | 0.5575 | 0.4286 | 0.3785 | 0.5064 | 0.742 | 0.7149 | 0.3266 |
1045
+ | cosine_map@150 | 0.5657 | 0.4437 | 0.3929 | 0.5153 | 0.7421 | 0.715 | 0.3266 |
1046
+ | cosine_map@200 | 0.5689 | 0.4506 | 0.4005 | 0.5196 | 0.7422 | 0.7151 | 0.3266 |
1047
+ | cosine_map@500 | 0.574 | 0.4632 | 0.4142 | 0.5246 | 0.7423 | 0.7151 | 0.3266 |
1048
+
1049
+ <!--
1050
+ ## Bias, Risks and Limitations
1051
+
1052
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
1053
+ -->
1054
+
1055
+ <!--
1056
+ ### Recommendations
1057
+
1058
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
1059
+ -->
1060
+
1061
+ ## Training Details
1062
+
1063
+ ### Training Datasets
1064
+ <details><summary>full_en</summary>
1065
+
1066
+ #### full_en
1067
+
1068
+ * Dataset: full_en
1069
+ * Size: 28,880 training samples
1070
+ * Columns: <code>anchor</code> and <code>positive</code>
1071
+ * Approximate statistics based on the first 1000 samples:
1072
+ | | anchor | positive |
1073
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
1074
+ | type | string | string |
1075
+ | details | <ul><li>min: 3 tokens</li><li>mean: 5.68 tokens</li><li>max: 11 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 5.76 tokens</li><li>max: 12 tokens</li></ul> |
1076
+ * Samples:
1077
+ | anchor | positive |
1078
+ |:-----------------------------------------|:-----------------------------------------|
1079
+ | <code>air commodore</code> | <code>flight lieutenant</code> |
1080
+ | <code>command and control officer</code> | <code>flight officer</code> |
1081
+ | <code>air commodore</code> | <code>command and control officer</code> |
1082
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
1083
+ ```json
1084
+ {'guide': SentenceTransformer(
1085
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
1086
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1087
+ (2): Normalize()
1088
+ ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
1089
+ ```
1090
+ </details>
1091
+ <details><summary>full_de</summary>
1092
+
1093
+ #### full_de
1094
+
1095
+ * Dataset: full_de
1096
+ * Size: 23,023 training samples
1097
+ * Columns: <code>anchor</code> and <code>positive</code>
1098
+ * Approximate statistics based on the first 1000 samples:
1099
+ | | anchor | positive |
1100
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
1101
+ | type | string | string |
1102
+ | details | <ul><li>min: 3 tokens</li><li>mean: 7.99 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 8.19 tokens</li><li>max: 30 tokens</li></ul> |
1103
+ * Samples:
1104
+ | anchor | positive |
1105
+ |:----------------------------------|:-----------------------------------------------------|
1106
+ | <code>Staffelkommandantin</code> | <code>Kommodore</code> |
1107
+ | <code>Luftwaffenoffizierin</code> | <code>Luftwaffenoffizier/Luftwaffenoffizierin</code> |
1108
+ | <code>Staffelkommandantin</code> | <code>Luftwaffenoffizierin</code> |
1109
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
1110
+ ```json
1111
+ {'guide': SentenceTransformer(
1112
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
1113
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1114
+ (2): Normalize()
1115
+ ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
1116
+ ```
1117
+ </details>
1118
+ <details><summary>full_es</summary>
1119
+
1120
+ #### full_es
1121
+
1122
+ * Dataset: full_es
1123
+ * Size: 20,724 training samples
1124
+ * Columns: <code>anchor</code> and <code>positive</code>
1125
+ * Approximate statistics based on the first 1000 samples:
1126
+ | | anchor | positive |
1127
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
1128
+ | type | string | string |
1129
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.13 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 8.84 tokens</li><li>max: 32 tokens</li></ul> |
1130
+ * Samples:
1131
+ | anchor | positive |
1132
+ |:------------------------------------|:-------------------------------------------|
1133
+ | <code>jefe de escuadrón</code> | <code>instructor</code> |
1134
+ | <code>comandante de aeronave</code> | <code>instructor de simulador</code> |
1135
+ | <code>instructor</code> | <code>oficial del Ejército del Aire</code> |
1136
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
1137
+ ```json
1138
+ {'guide': SentenceTransformer(
1139
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
1140
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1141
+ (2): Normalize()
1142
+ ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
1143
+ ```
1144
+ </details>
1145
+ <details><summary>full_zh</summary>
1146
+
1147
+ #### full_zh
1148
+
1149
+ * Dataset: full_zh
1150
+ * Size: 30,401 training samples
1151
+ * Columns: <code>anchor</code> and <code>positive</code>
1152
+ * Approximate statistics based on the first 1000 samples:
1153
+ | | anchor | positive |
1154
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
1155
+ | type | string | string |
1156
+ | details | <ul><li>min: 5 tokens</li><li>mean: 7.15 tokens</li><li>max: 14 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 7.46 tokens</li><li>max: 21 tokens</li></ul> |
1157
+ * Samples:
1158
+ | anchor | positive |
1159
+ |:------------------|:---------------------|
1160
+ | <code>技术总监</code> | <code>技术和运营总监</code> |
1161
+ | <code>技术总监</code> | <code>技术主管</code> |
1162
+ | <code>技术总监</code> | <code>技术艺术总监</code> |
1163
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
1164
+ ```json
1165
+ {'guide': SentenceTransformer(
1166
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
1167
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1168
+ (2): Normalize()
1169
+ ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
1170
+ ```
1171
+ </details>
1172
+ <details><summary>mix</summary>
1173
+
1174
+ #### mix
1175
+
1176
+ * Dataset: mix
1177
+ * Size: 21,760 training samples
1178
+ * Columns: <code>anchor</code> and <code>positive</code>
1179
+ * Approximate statistics based on the first 1000 samples:
1180
+ | | anchor | positive |
1181
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
1182
+ | type | string | string |
1183
+ | details | <ul><li>min: 2 tokens</li><li>mean: 6.71 tokens</li><li>max: 19 tokens</li></ul> | <ul><li>min: 2 tokens</li><li>mean: 7.69 tokens</li><li>max: 19 tokens</li></ul> |
1184
+ * Samples:
1185
+ | anchor | positive |
1186
+ |:------------------------------------------|:----------------------------------------------------------------|
1187
+ | <code>technical manager</code> | <code>Technischer Direktor für Bühne, Film und Fernsehen</code> |
1188
+ | <code>head of technical</code> | <code>directora técnica</code> |
1189
+ | <code>head of technical department</code> | <code>技术艺术总监</code> |
1190
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
1191
+ ```json
1192
+ {'guide': SentenceTransformer(
1193
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
1194
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1195
+ (2): Normalize()
1196
+ ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
1197
+ ```
1198
+ </details>
1199
+
1200
+ ### Training Hyperparameters
1201
+ #### Non-Default Hyperparameters
1202
+
1203
+ - `eval_strategy`: steps
1204
+ - `per_device_train_batch_size`: 64
1205
+ - `per_device_eval_batch_size`: 128
1206
+ - `gradient_accumulation_steps`: 2
1207
+ - `num_train_epochs`: 5
1208
+ - `warmup_ratio`: 0.05
1209
+ - `log_on_each_node`: False
1210
+ - `fp16`: True
1211
+ - `dataloader_num_workers`: 4
1212
+ - `ddp_find_unused_parameters`: True
1213
+ - `batch_sampler`: no_duplicates
1214
+
1215
+ #### All Hyperparameters
1216
+ <details><summary>Click to expand</summary>
1217
+
1218
+ - `overwrite_output_dir`: False
1219
+ - `do_predict`: False
1220
+ - `eval_strategy`: steps
1221
+ - `prediction_loss_only`: True
1222
+ - `per_device_train_batch_size`: 64
1223
+ - `per_device_eval_batch_size`: 128
1224
+ - `per_gpu_train_batch_size`: None
1225
+ - `per_gpu_eval_batch_size`: None
1226
+ - `gradient_accumulation_steps`: 2
1227
+ - `eval_accumulation_steps`: None
1228
+ - `torch_empty_cache_steps`: None
1229
+ - `learning_rate`: 5e-05
1230
+ - `weight_decay`: 0.0
1231
+ - `adam_beta1`: 0.9
1232
+ - `adam_beta2`: 0.999
1233
+ - `adam_epsilon`: 1e-08
1234
+ - `max_grad_norm`: 1.0
1235
+ - `num_train_epochs`: 5
1236
+ - `max_steps`: -1
1237
+ - `lr_scheduler_type`: linear
1238
+ - `lr_scheduler_kwargs`: {}
1239
+ - `warmup_ratio`: 0.05
1240
+ - `warmup_steps`: 0
1241
+ - `log_level`: passive
1242
+ - `log_level_replica`: warning
1243
+ - `log_on_each_node`: False
1244
+ - `logging_nan_inf_filter`: True
1245
+ - `save_safetensors`: True
1246
+ - `save_on_each_node`: False
1247
+ - `save_only_model`: False
1248
+ - `restore_callback_states_from_checkpoint`: False
1249
+ - `no_cuda`: False
1250
+ - `use_cpu`: False
1251
+ - `use_mps_device`: False
1252
+ - `seed`: 42
1253
+ - `data_seed`: None
1254
+ - `jit_mode_eval`: False
1255
+ - `use_ipex`: False
1256
+ - `bf16`: False
1257
+ - `fp16`: True
1258
+ - `fp16_opt_level`: O1
1259
+ - `half_precision_backend`: auto
1260
+ - `bf16_full_eval`: False
1261
+ - `fp16_full_eval`: False
1262
+ - `tf32`: None
1263
+ - `local_rank`: 0
1264
+ - `ddp_backend`: None
1265
+ - `tpu_num_cores`: None
1266
+ - `tpu_metrics_debug`: False
1267
+ - `debug`: []
1268
+ - `dataloader_drop_last`: True
1269
+ - `dataloader_num_workers`: 4
1270
+ - `dataloader_prefetch_factor`: None
1271
+ - `past_index`: -1
1272
+ - `disable_tqdm`: False
1273
+ - `remove_unused_columns`: True
1274
+ - `label_names`: None
1275
+ - `load_best_model_at_end`: False
1276
+ - `ignore_data_skip`: False
1277
+ - `fsdp`: []
1278
+ - `fsdp_min_num_params`: 0
1279
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
1280
+ - `tp_size`: 0
1281
+ - `fsdp_transformer_layer_cls_to_wrap`: None
1282
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
1283
+ - `deepspeed`: None
1284
+ - `label_smoothing_factor`: 0.0
1285
+ - `optim`: adamw_torch
1286
+ - `optim_args`: None
1287
+ - `adafactor`: False
1288
+ - `group_by_length`: False
1289
+ - `length_column_name`: length
1290
+ - `ddp_find_unused_parameters`: True
1291
+ - `ddp_bucket_cap_mb`: None
1292
+ - `ddp_broadcast_buffers`: False
1293
+ - `dataloader_pin_memory`: True
1294
+ - `dataloader_persistent_workers`: False
1295
+ - `skip_memory_metrics`: True
1296
+ - `use_legacy_prediction_loop`: False
1297
+ - `push_to_hub`: False
1298
+ - `resume_from_checkpoint`: None
1299
+ - `hub_model_id`: None
1300
+ - `hub_strategy`: every_save
1301
+ - `hub_private_repo`: None
1302
+ - `hub_always_push`: False
1303
+ - `gradient_checkpointing`: False
1304
+ - `gradient_checkpointing_kwargs`: None
1305
+ - `include_inputs_for_metrics`: False
1306
+ - `include_for_metrics`: []
1307
+ - `eval_do_concat_batches`: True
1308
+ - `fp16_backend`: auto
1309
+ - `push_to_hub_model_id`: None
1310
+ - `push_to_hub_organization`: None
1311
+ - `mp_parameters`:
1312
+ - `auto_find_batch_size`: False
1313
+ - `full_determinism`: False
1314
+ - `torchdynamo`: None
1315
+ - `ray_scope`: last
1316
+ - `ddp_timeout`: 1800
1317
+ - `torch_compile`: False
1318
+ - `torch_compile_backend`: None
1319
+ - `torch_compile_mode`: None
1320
+ - `include_tokens_per_second`: False
1321
+ - `include_num_input_tokens_seen`: False
1322
+ - `neftune_noise_alpha`: None
1323
+ - `optim_target_modules`: None
1324
+ - `batch_eval_metrics`: False
1325
+ - `eval_on_start`: False
1326
+ - `use_liger_kernel`: False
1327
+ - `eval_use_gather_object`: False
1328
+ - `average_tokens_across_devices`: False
1329
+ - `prompts`: None
1330
+ - `batch_sampler`: no_duplicates
1331
+ - `multi_dataset_batch_sampler`: proportional
1332
+
1333
+ </details>
1334
+
1335
+ ### Training Logs
1336
+ | Epoch | Step | Training Loss | full_en_cosine_ndcg@200 | full_es_cosine_ndcg@200 | full_de_cosine_ndcg@200 | full_zh_cosine_ndcg@200 | mix_es_cosine_ndcg@200 | mix_de_cosine_ndcg@200 | mix_zh_cosine_ndcg@200 |
1337
+ |:------:|:----:|:-------------:|:-----------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|
1338
+ | -1 | -1 | - | 0.6856 | 0.5207 | 0.4655 | 0.6713 | 0.6224 | 0.5604 | 0.5548 |
1339
+ | 0.0010 | 1 | 5.3354 | - | - | - | - | - | - | - |
1340
+ | 0.1027 | 100 | 2.665 | - | - | - | - | - | - | - |
1341
+ | 0.2053 | 200 | 1.3375 | 0.7691 | 0.6530 | 0.6298 | 0.7517 | 0.7513 | 0.7393 | 0.5490 |
1342
+ | 0.3080 | 300 | 1.1101 | - | - | - | - | - | - | - |
1343
+ | 0.4107 | 400 | 0.9453 | 0.7802 | 0.6643 | 0.6246 | 0.7531 | 0.7610 | 0.7441 | 0.5493 |
1344
+ | 0.5133 | 500 | 0.9202 | - | - | - | - | - | - | - |
1345
+ | 0.6160 | 600 | 0.7887 | 0.7741 | 0.6549 | 0.6171 | 0.7542 | 0.7672 | 0.7540 | 0.5482 |
1346
+ | 0.7187 | 700 | 0.7604 | - | - | - | - | - | - | - |
1347
+ | 0.8214 | 800 | 0.7219 | 0.7846 | 0.6674 | 0.6244 | 0.7648 | 0.7741 | 0.7592 | 0.5497 |
1348
+ | 0.9240 | 900 | 0.6965 | - | - | - | - | - | - | - |
1349
+ | 1.0267 | 1000 | 0.6253 | 0.7646 | 0.6391 | 0.6122 | 0.7503 | 0.7825 | 0.7704 | 0.5463 |
1350
+ | 1.1294 | 1100 | 0.4737 | - | - | - | - | - | - | - |
1351
+ | 1.2320 | 1200 | 0.5055 | 0.7758 | 0.6582 | 0.6178 | 0.7514 | 0.7857 | 0.7764 | 0.5501 |
1352
+ | 1.3347 | 1300 | 0.5042 | - | - | - | - | - | - | - |
1353
+ | 1.4374 | 1400 | 0.5073 | 0.7613 | 0.6578 | 0.6178 | 0.7505 | 0.7829 | 0.7762 | 0.5452 |
1354
+ | 1.5400 | 1500 | 0.4975 | - | - | - | - | - | - | - |
1355
+ | 1.6427 | 1600 | 0.5242 | 0.7736 | 0.6673 | 0.6279 | 0.7555 | 0.7940 | 0.7859 | 0.5477 |
1356
+ | 1.7454 | 1700 | 0.4713 | - | - | - | - | - | - | - |
1357
+ | 1.8480 | 1800 | 0.4814 | 0.7845 | 0.6733 | 0.6285 | 0.7642 | 0.7992 | 0.7904 | 0.5449 |
1358
+ | 1.9507 | 1900 | 0.4526 | - | - | - | - | - | - | - |
1359
+ | 2.0544 | 2000 | 0.36 | 0.7790 | 0.6639 | 0.6252 | 0.7500 | 0.8032 | 0.7888 | 0.5499 |
1360
+ | 2.1571 | 2100 | 0.3744 | - | - | - | - | - | - | - |
1361
+ | 2.2598 | 2200 | 0.3031 | 0.7787 | 0.6614 | 0.6190 | 0.7537 | 0.7993 | 0.7811 | 0.5476 |
1362
+ | 2.3624 | 2300 | 0.3638 | - | - | - | - | - | - | - |
1363
+ | 2.4651 | 2400 | 0.358 | 0.7798 | 0.6615 | 0.6258 | 0.7497 | 0.8018 | 0.7828 | 0.5481 |
1364
+ | 2.5678 | 2500 | 0.3247 | - | - | - | - | - | - | - |
1365
+ | 2.6704 | 2600 | 0.3247 | 0.7854 | 0.6663 | 0.6248 | 0.7560 | 0.8081 | 0.7835 | 0.5452 |
1366
+ | 2.7731 | 2700 | 0.3263 | - | - | - | - | - | - | - |
1367
+ | 2.8758 | 2800 | 0.3212 | 0.7761 | 0.6681 | 0.6250 | 0.7517 | 0.8121 | 0.7927 | 0.5458 |
1368
+ | 2.9784 | 2900 | 0.3291 | - | - | - | - | - | - | - |
1369
+ | 3.0821 | 3000 | 0.2816 | 0.7727 | 0.6604 | 0.6163 | 0.7370 | 0.8163 | 0.7985 | 0.5473 |
1370
+ | 3.1848 | 3100 | 0.2698 | - | - | - | - | - | - | - |
1371
+ | 3.2875 | 3200 | 0.2657 | 0.7757 | 0.6615 | 0.6247 | 0.7417 | 0.8117 | 0.8004 | 0.5436 |
1372
+ | 3.3901 | 3300 | 0.2724 | - | - | - | - | - | - | - |
1373
+ | 3.4928 | 3400 | 0.2584 | 0.7850 | 0.6583 | 0.6320 | 0.7458 | 0.8120 | 0.7980 | 0.5454 |
1374
+ | 3.5955 | 3500 | 0.2573 | - | - | - | - | - | - | - |
1375
+ | 3.6982 | 3600 | 0.2744 | 0.7796 | 0.6552 | 0.6237 | 0.7409 | 0.8193 | 0.8018 | 0.5466 |
1376
+ | 3.8008 | 3700 | 0.3054 | - | - | - | - | - | - | - |
1377
+ | 3.9035 | 3800 | 0.2727 | 0.7825 | 0.6642 | 0.6293 | 0.7504 | 0.8213 | 0.8058 | 0.5463 |
1378
+ | 4.0062 | 3900 | 0.2353 | - | - | - | - | - | - | - |
1379
+ | 4.1088 | 4000 | 0.2353 | 0.7747 | 0.6628 | 0.6263 | 0.7384 | 0.8239 | 0.8065 | 0.5447 |
1380
+ | 4.2115 | 4100 | 0.2385 | - | - | - | - | - | - | - |
1381
+ | 4.3142 | 4200 | 0.231 | 0.7811 | 0.6608 | 0.6254 | 0.7463 | 0.8226 | 0.8051 | 0.5442 |
1382
+
1383
+
1384
+ ### Framework Versions
1385
+ - Python: 3.11.11
1386
+ - Sentence Transformers: 4.1.0
1387
+ - Transformers: 4.51.2
1388
+ - PyTorch: 2.6.0+cu124
1389
+ - Accelerate: 1.6.0
1390
+ - Datasets: 3.5.0
1391
+ - Tokenizers: 0.21.1
1392
+
1393
+ ## Citation
1394
+
1395
+ ### BibTeX
1396
+
1397
+ #### Sentence Transformers
1398
+ ```bibtex
1399
+ @inproceedings{reimers-2019-sentence-bert,
1400
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
1401
+ author = "Reimers, Nils and Gurevych, Iryna",
1402
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
1403
+ month = "11",
1404
+ year = "2019",
1405
+ publisher = "Association for Computational Linguistics",
1406
+ url = "https://arxiv.org/abs/1908.10084",
1407
+ }
1408
+ ```
1409
+
1410
+ #### GISTEmbedLoss
1411
+ ```bibtex
1412
+ @misc{solatorio2024gistembed,
1413
+ title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
1414
+ author={Aivin V. Solatorio},
1415
+ year={2024},
1416
+ eprint={2402.16829},
1417
+ archivePrefix={arXiv},
1418
+ primaryClass={cs.LG}
1419
+ }
1420
+ ```
1421
+
1422
+ <!--
1423
+ ## Glossary
1424
+
1425
+ *Clearly define terms in order to be accessible across audiences.*
1426
+ -->
1427
+
1428
+ <!--
1429
+ ## Model Card Authors
1430
+
1431
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
1432
+ -->
1433
+
1434
+ <!--
1435
+ ## Model Card Contact
1436
+
1437
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
1438
+ -->
checkpoint-4200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b78a4739eadca247b8b2e2c00baf6260120f87a52a561be923312b94cba8232a
3
+ size 2271064456
checkpoint-4200/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-4200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5999173a8d03d315a6eeef8cf9fb69d1d387f37263ccc42a43a215f5e76d8f22
3
+ size 4533972937
checkpoint-4200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d907753d591cf08f2e02a54be246122575c6702ce42ce38fd9f774562be3d4
3
+ size 15958
checkpoint-4200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b186621336b0abea983113004ecd9b3118c0bc11e75a3da16b0bb6e48f4f1d5
3
+ size 988
checkpoint-4200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36387b84c88d119079837fce0adc18fc34124598fb149b8eb92af5bb4134e761
3
+ size 1064
checkpoint-4200/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-4200/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
checkpoint-4200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
3
+ size 5624
checkpoint-4400/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 8194,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.51.2",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
checkpoint-4400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b1f51d5c393dbe3441f3e6303002dfa4d493abbc24118e5f4d0a03ee025a9bd
3
+ size 2271064456
checkpoint-4400/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-4400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9e6288ee5586d0ffcde2044dc04da2224eade48e12128ba7e97e6946e6e525
3
+ size 4533972937
checkpoint-4400/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba81f3b1771a8b460f12027c4271da4d35cf373c809e35a113805049901d939b
3
+ size 988
checkpoint-4400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0828827d515ca1d2148a70c185e3b6c96109604210f6fb69f993d1e2b7882ffb
3
+ size 1064
checkpoint-4400/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
3
+ size 5624
checkpoint-4600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1c9c9e2b9e3d67f10744473f5e1d1d314b24f390c90d03e669760e0af5c9c4
3
+ size 2271064456
checkpoint-4600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e104089212061a9d12b0d2f8e82d52a0e257172d6f6a345d45abaebaf0d3bb
3
+ size 4533972937
checkpoint-4600/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
checkpoint-4600/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-4800/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-4800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d2ac2c0c92b9ef57cc468b9281831355dd814fe1281d4477c4f70a583159129
3
+ size 2271064456
checkpoint-4800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881841f70dc9d4f24a46057ced1d2e32ca60e5ff2f9355c3d66879ea2de63e56
3
+ size 4533972937
checkpoint-4800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0483b24401bb5c934be17d3fb87b418062454f4034967bcd424997391ddc532
3
+ size 15958
checkpoint-4800/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf548d9246c1d9e15e20b2ddca08d24f314024d3fae6a40e3553c10631d480a
3
+ size 988
checkpoint-4800/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
checkpoint-4800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
3
+ size 5624