pj-mathematician commited on Jun 20

Commit

13d4c6a

verified ·

1 Parent(s): 523c335

Add files using upload-large-folder tool

Browse files

Files changed (43) hide show

checkpoint-4000/1_Pooling/config.json +10 -0
checkpoint-4000/config.json +27 -0
checkpoint-4000/config_sentence_transformers.json +10 -0
checkpoint-4000/model.safetensors +3 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scaler.pt +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/sentence_bert_config.json +4 -0
checkpoint-4000/special_tokens_map.json +51 -0
checkpoint-4000/tokenizer_config.json +56 -0
checkpoint-4000/trainer_state.json +0 -0
checkpoint-4000/training_args.bin +3 -0
checkpoint-4200/1_Pooling/config.json +10 -0
checkpoint-4200/README.md +1438 -0
checkpoint-4200/model.safetensors +3 -0
checkpoint-4200/modules.json +20 -0
checkpoint-4200/optimizer.pt +3 -0
checkpoint-4200/rng_state.pth +3 -0
checkpoint-4200/scaler.pt +3 -0
checkpoint-4200/scheduler.pt +3 -0
checkpoint-4200/sentence_bert_config.json +4 -0
checkpoint-4200/sentencepiece.bpe.model +3 -0
checkpoint-4200/training_args.bin +3 -0
checkpoint-4400/config.json +27 -0
checkpoint-4400/model.safetensors +3 -0
checkpoint-4400/modules.json +20 -0
checkpoint-4400/optimizer.pt +3 -0
checkpoint-4400/scaler.pt +3 -0
checkpoint-4400/scheduler.pt +3 -0
checkpoint-4400/trainer_state.json +0 -0
checkpoint-4400/training_args.bin +3 -0
checkpoint-4600/model.safetensors +3 -0
checkpoint-4600/optimizer.pt +3 -0
checkpoint-4600/sentencepiece.bpe.model +3 -0
checkpoint-4600/special_tokens_map.json +51 -0
checkpoint-4800/1_Pooling/config.json +10 -0
checkpoint-4800/model.safetensors +3 -0
checkpoint-4800/optimizer.pt +3 -0
checkpoint-4800/rng_state.pth +3 -0
checkpoint-4800/scaler.pt +3 -0
checkpoint-4800/sentencepiece.bpe.model +3 -0
checkpoint-4800/training_args.bin +3 -0

checkpoint-4000/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "architectures": [
+    "XLMRobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 8194,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

checkpoint-4000/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.51.2",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

checkpoint-4000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e396a37e5acc679024a9fc8c8cddae538f7cf082d5682d004c44cf494e226f69
+size 2271064456

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f99efc690de9364ab3597a63d4ecd7e827d9b9ac9541a5cd91510bccd5027f02
+size 4533972937

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3efbf048b90dbf88fbe7c3e5343d8f5b231ce2f762e746e6dc52ad13d65a600
+size 15958

checkpoint-4000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60386f0725c4fe780fb345fd957860c14c74f3535df8dbe1242c6c681a5d255b
+size 988

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067bf8b23f4d6fd97b4d2f83930a64c8500ad6e476ffd0437ac9124a04eee854
+size 1064

checkpoint-4000/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

checkpoint-4000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-4000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 8192,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-4000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
+size 5624

checkpoint-4200/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

checkpoint-4200/README.md ADDED Viewed

	@@ -0,0 +1,1438 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:124788
+- loss:GISTEmbedLoss
+base_model: BAAI/bge-m3
+widget:
+- source_sentence: 其他机械、设备和有形货物租赁服务代表
+  sentences:
+  - 其他机械和设备租赁服务工作人员
+  - 电子和电信设备及零部件物流经理
+  - 工业主厨
+- source_sentence: 公交车司机
+  sentences:
+  - 表演灯光设计师
+  - 乙烯基地板安装工
+  - 国际巴士司机
+- source_sentence: online communication manager
+  sentences:
+  - trades union official
+  - social media manager
+  - budget manager
+- source_sentence: Projektmanagerin
+  sentences:
+  - Projektmanager/Projektmanagerin
+  - Category-Manager
+  - Infanterist
+- source_sentence: Volksvertreter
+  sentences:
+  - Parlamentarier
+  - Oberbürgermeister
+  - Konsul
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@20
+- cosine_accuracy@50
+- cosine_accuracy@100
+- cosine_accuracy@150
+- cosine_accuracy@200
+- cosine_precision@1
+- cosine_precision@20
+- cosine_precision@50
+- cosine_precision@100
+- cosine_precision@150
+- cosine_precision@200
+- cosine_recall@1
+- cosine_recall@20
+- cosine_recall@50
+- cosine_recall@100
+- cosine_recall@150
+- cosine_recall@200
+- cosine_ndcg@1
+- cosine_ndcg@20
+- cosine_ndcg@50
+- cosine_ndcg@100
+- cosine_ndcg@150
+- cosine_ndcg@200
+- cosine_mrr@1
+- cosine_mrr@20
+- cosine_mrr@50
+- cosine_mrr@100
+- cosine_mrr@150
+- cosine_mrr@200
+- cosine_map@1
+- cosine_map@20
+- cosine_map@50
+- cosine_map@100
+- cosine_map@150
+- cosine_map@200
+- cosine_map@500
+model-index:
+- name: SentenceTransformer based on BAAI/bge-m3
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: full en
+      type: full_en
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.6571428571428571
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 0.9904761904761905
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 0.9904761904761905
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 0.9904761904761905
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 0.9904761904761905
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 0.9904761904761905
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.6571428571428571
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.5042857142857142
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.30342857142857144
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.18485714285714283
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.13161904761904764
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.1020952380952381
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.06749696615971254
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 0.5373072040835736
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 0.7066915041490871
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 0.8223255763807351
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 0.8681298207585033
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 0.8939381871513931
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.6571428571428571
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.6828242233504754
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.6934957075565445
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.7508237653332346
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.7708996755918012
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.7810547976165594
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.6571428571428571
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.8050793650793651
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.8050793650793651
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.8050793650793651
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.8050793650793651
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.8050793650793651
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.6571428571428571
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.5403780248322398
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.5246924299662313
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.5574701928996357
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.5657362210212612
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.5689495406824301
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.5740394717933254
+      name: Cosine Map@500
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: full es
+      type: full_es
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.11351351351351352
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 1.0
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 1.0
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 1.0
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 1.0
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 1.0
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.11351351351351352
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.5678378378378378
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.38616216216216215
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.24956756756756757
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.18836036036036036
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.14981081081081082
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.0035155918996302815
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 0.37836142042267473
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 0.5571586783455559
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 0.6675392853403386
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 0.7304539075934318
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 0.762368065923207
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.11351351351351352
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.6138712223781554
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.5860105244597086
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.612222606218991
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.6445206608822607
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.6607643472995034
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.11351351351351352
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.5536036036036036
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.5536036036036036
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.5536036036036036
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.5536036036036036
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.5536036036036036
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.11351351351351352
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.48205571119205054
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.426066001253444
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.4286297248227863
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.44367730975701125
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.45055470203697434
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.4632014183024849
+      name: Cosine Map@500
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: full de
+      type: full_de
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.2955665024630542
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 0.9802955665024631
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 0.9852216748768473
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 0.9852216748768473
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 0.9901477832512315
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 0.9901477832512315
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.2955665024630542
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.5391625615763547
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.3801970443349754
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.2476847290640394
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.18568144499178982
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.14891625615763546
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.01108543831680986
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 0.3399387209539555
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 0.5308580040187325
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 0.6430327898382845
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 0.7043523082318627
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 0.7435945575564449
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.2955665024630542
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.5620736680453444
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.5486209217219633
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.5742560822304251
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.6059775924816383
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.6254063201510274
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.2955665024630542
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.5138789918346562
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.5140086262655611
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.5140086262655611
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.5140546646615833
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.5140546646615833
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.2955665024630542
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.42010224651188977
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.37517744419195703
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.3784520844424068
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.3928983602214202
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.40049621656562834
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.4142041780241764
+      name: Cosine Map@500
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: full zh
+      type: full_zh
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.6601941747572816
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 0.9902912621359223
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 0.9902912621359223
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 0.9902912621359223
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 0.9902912621359223
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 0.9902912621359223
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.6601941747572816
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.46990291262135936
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.2766990291262136
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.17145631067961165
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.12381877022653723
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.09747572815533984
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.06391645269201905
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 0.5028687618433456
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 0.6651242597088418
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 0.7783273755437382
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 0.8334866166756513
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 0.8666706510858552
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.6601941747572816
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.6467729312304265
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.6531754449097694
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.7091690247935931
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.7326072552384693
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.7462718534326636
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.6601941747572816
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.8101941747572816
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.8101941747572816
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.8101941747572816
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.8101941747572816
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.8101941747572816
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.6601941747572816
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.5008318658399892
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.47687535367801903
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.506399482523297
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.515344178164581
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.5196266745217748
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.5245537410408139
+      name: Cosine Map@500
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: mix es
+      type: mix_es
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.733749349973999
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 0.9604784191367655
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 0.982839313572543
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 0.9916796671866874
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 0.9947997919916797
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 0.9953198127925117
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.733749349973999
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.12433697347893914
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.0516588663546542
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.026229849193967765
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.017635638758883684
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.013273530941237652
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.28340762201916647
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 0.9186774137632172
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 0.9536314785924771
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 0.968538741549662
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 0.9768070722828913
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 0.9806205581556595
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.733749349973999
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.8074696494514497
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.8170488841773651
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.8203516409516334
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.8219710202163846
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.8226411885850343
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.733749349973999
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.8015837695391573
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.8023398853791036
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.8024787052722444
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.8025062574128484
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.8025096562416121
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.733749349973999
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.7389285820519963
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.7414939322506505
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.7419568857454747
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.7421153780150582
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.742164620684282
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.7422579374234903
+      name: Cosine Map@500
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: mix de
+      type: mix_de
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.6859074362974519
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 0.9661986479459178
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 0.982839313572543
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 0.9927197087883516
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 0.9932397295891836
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 0.9937597503900156
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.6859074362974519
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.12732709308372334
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.05308372334893397
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.027025481019240776
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.018103657479632513
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.013606344253770154
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.2577396429190501
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 0.9241896342520368
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 0.9614317906049575
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 0.9787224822326227
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 0.983359334373375
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 0.9854394175767031
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.6859074362974519
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.7894367570955271
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.7998923204035095
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.8037683941688618
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.8046891228048068
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.8050715563658618
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.6859074362974519
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.7703397211809108
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.7708870204854694
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.7710242509181896
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.7710286578741289
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.7710319701085292
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.6859074362974519
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.711359959198991
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.7143436554485498
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.7149332520404413
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.7150312982701879
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.7150609466134881
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.715115635794944
+      name: Cosine Map@500
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: mix zh
+      type: mix_zh
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.1814872594903796
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@20
+      value: 1.0
+      name: Cosine Accuracy@20
+    - type: cosine_accuracy@50
+      value: 1.0
+      name: Cosine Accuracy@50
+    - type: cosine_accuracy@100
+      value: 1.0
+      name: Cosine Accuracy@100
+    - type: cosine_accuracy@150
+      value: 1.0
+      name: Cosine Accuracy@150
+    - type: cosine_accuracy@200
+      value: 1.0
+      name: Cosine Accuracy@200
+    - type: cosine_precision@1
+      value: 0.1814872594903796
+      name: Cosine Precision@1
+    - type: cosine_precision@20
+      value: 0.15439417576703063
+      name: Cosine Precision@20
+    - type: cosine_precision@50
+      value: 0.0617576703068123
+      name: Cosine Precision@50
+    - type: cosine_precision@100
+      value: 0.03087883515340615
+      name: Cosine Precision@100
+    - type: cosine_precision@150
+      value: 0.020585890102270757
+      name: Cosine Precision@150
+    - type: cosine_precision@200
+      value: 0.015439417576703075
+      name: Cosine Precision@200
+    - type: cosine_recall@1
+      value: 0.05822499566649332
+      name: Cosine Recall@1
+    - type: cosine_recall@20
+      value: 1.0
+      name: Cosine Recall@20
+    - type: cosine_recall@50
+      value: 1.0
+      name: Cosine Recall@50
+    - type: cosine_recall@100
+      value: 1.0
+      name: Cosine Recall@100
+    - type: cosine_recall@150
+      value: 1.0
+      name: Cosine Recall@150
+    - type: cosine_recall@200
+      value: 1.0
+      name: Cosine Recall@200
+    - type: cosine_ndcg@1
+      value: 0.1814872594903796
+      name: Cosine Ndcg@1
+    - type: cosine_ndcg@20
+      value: 0.5442006309834599
+      name: Cosine Ndcg@20
+    - type: cosine_ndcg@50
+      value: 0.5442006309834599
+      name: Cosine Ndcg@50
+    - type: cosine_ndcg@100
+      value: 0.5442006309834599
+      name: Cosine Ndcg@100
+    - type: cosine_ndcg@150
+      value: 0.5442006309834599
+      name: Cosine Ndcg@150
+    - type: cosine_ndcg@200
+      value: 0.5442006309834599
+      name: Cosine Ndcg@200
+    - type: cosine_mrr@1
+      value: 0.1814872594903796
+      name: Cosine Mrr@1
+    - type: cosine_mrr@20
+      value: 0.4016099489578433
+      name: Cosine Mrr@20
+    - type: cosine_mrr@50
+      value: 0.4016099489578433
+      name: Cosine Mrr@50
+    - type: cosine_mrr@100
+      value: 0.4016099489578433
+      name: Cosine Mrr@100
+    - type: cosine_mrr@150
+      value: 0.4016099489578433
+      name: Cosine Mrr@150
+    - type: cosine_mrr@200
+      value: 0.4016099489578433
+      name: Cosine Mrr@200
+    - type: cosine_map@1
+      value: 0.1814872594903796
+      name: Cosine Map@1
+    - type: cosine_map@20
+      value: 0.32662137894847204
+      name: Cosine Map@20
+    - type: cosine_map@50
+      value: 0.32662137894847204
+      name: Cosine Map@50
+    - type: cosine_map@100
+      value: 0.32662137894847204
+      name: Cosine Map@100
+    - type: cosine_map@150
+      value: 0.32662137894847204
+      name: Cosine Map@150
+    - type: cosine_map@200
+      value: 0.32662137894847204
+      name: Cosine Map@200
+    - type: cosine_map@500
+      value: 0.32662137894847204
+      name: Cosine Map@500
+---
+# SentenceTransformer based on BAAI/bge-m3
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) on the full_en, full_de, full_es, full_zh and mix datasets. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [BAAI/bge-m3](https://huggingface.co/BAAI/bge-m3) <!-- at revision 5617a9f61b028005a4858fdac845db406aefb181 -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 1024 dimensions
+- **Similarity Function:** Cosine Similarity
+- **Training Datasets:**
+    - full_en
+    - full_de
+    - full_es
+    - full_zh
+    - mix
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
+  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'Volksvertreter',
+    'Parlamentarier',
+    'Oberbürgermeister',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 1024]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Datasets: `full_en`, `full_es`, `full_de`, `full_zh`, `mix_es`, `mix_de` and `mix_zh`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
+| Metric               | full_en    | full_es    | full_de    | full_zh    | mix_es     | mix_de     | mix_zh     |
+|:---------------------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------|
+| cosine_accuracy@1    | 0.6571     | 0.1135     | 0.2956     | 0.6602     | 0.7337     | 0.6859     | 0.1815     |
+| cosine_accuracy@20   | 0.9905     | 1.0        | 0.9803     | 0.9903     | 0.9605     | 0.9662     | 1.0        |
+| cosine_accuracy@50   | 0.9905     | 1.0        | 0.9852     | 0.9903     | 0.9828     | 0.9828     | 1.0        |
+| cosine_accuracy@100  | 0.9905     | 1.0        | 0.9852     | 0.9903     | 0.9917     | 0.9927     | 1.0        |
+| cosine_accuracy@150  | 0.9905     | 1.0        | 0.9901     | 0.9903     | 0.9948     | 0.9932     | 1.0        |
+| cosine_accuracy@200  | 0.9905     | 1.0        | 0.9901     | 0.9903     | 0.9953     | 0.9938     | 1.0        |
+| cosine_precision@1   | 0.6571     | 0.1135     | 0.2956     | 0.6602     | 0.7337     | 0.6859     | 0.1815     |
+| cosine_precision@20  | 0.5043     | 0.5678     | 0.5392     | 0.4699     | 0.1243     | 0.1273     | 0.1544     |
+| cosine_precision@50  | 0.3034     | 0.3862     | 0.3802     | 0.2767     | 0.0517     | 0.0531     | 0.0618     |
+| cosine_precision@100 | 0.1849     | 0.2496     | 0.2477     | 0.1715     | 0.0262     | 0.027      | 0.0309     |
+| cosine_precision@150 | 0.1316     | 0.1884     | 0.1857     | 0.1238     | 0.0176     | 0.0181     | 0.0206     |
+| cosine_precision@200 | 0.1021     | 0.1498     | 0.1489     | 0.0975     | 0.0133     | 0.0136     | 0.0154     |
+| cosine_recall@1      | 0.0675     | 0.0035     | 0.0111     | 0.0639     | 0.2834     | 0.2577     | 0.0582     |
+| cosine_recall@20     | 0.5373     | 0.3784     | 0.3399     | 0.5029     | 0.9187     | 0.9242     | 1.0        |
+| cosine_recall@50     | 0.7067     | 0.5572     | 0.5309     | 0.6651     | 0.9536     | 0.9614     | 1.0        |
+| cosine_recall@100    | 0.8223     | 0.6675     | 0.643      | 0.7783     | 0.9685     | 0.9787     | 1.0        |
+| cosine_recall@150    | 0.8681     | 0.7305     | 0.7044     | 0.8335     | 0.9768     | 0.9834     | 1.0        |
+| cosine_recall@200    | 0.8939     | 0.7624     | 0.7436     | 0.8667     | 0.9806     | 0.9854     | 1.0        |
+| cosine_ndcg@1        | 0.6571     | 0.1135     | 0.2956     | 0.6602     | 0.7337     | 0.6859     | 0.1815     |
+| cosine_ndcg@20       | 0.6828     | 0.6139     | 0.5621     | 0.6468     | 0.8075     | 0.7894     | 0.5442     |
+| cosine_ndcg@50       | 0.6935     | 0.586      | 0.5486     | 0.6532     | 0.817      | 0.7999     | 0.5442     |
+| cosine_ndcg@100      | 0.7508     | 0.6122     | 0.5743     | 0.7092     | 0.8204     | 0.8038     | 0.5442     |
+| cosine_ndcg@150      | 0.7709     | 0.6445     | 0.606      | 0.7326     | 0.822      | 0.8047     | 0.5442     |
+| **cosine_ndcg@200**  | **0.7811** | **0.6608** | **0.6254** | **0.7463** | **0.8226** | **0.8051** | **0.5442** |
+| cosine_mrr@1         | 0.6571     | 0.1135     | 0.2956     | 0.6602     | 0.7337     | 0.6859     | 0.1815     |
+| cosine_mrr@20        | 0.8051     | 0.5536     | 0.5139     | 0.8102     | 0.8016     | 0.7703     | 0.4016     |
+| cosine_mrr@50        | 0.8051     | 0.5536     | 0.514      | 0.8102     | 0.8023     | 0.7709     | 0.4016     |
+| cosine_mrr@100       | 0.8051     | 0.5536     | 0.514      | 0.8102     | 0.8025     | 0.771      | 0.4016     |
+| cosine_mrr@150       | 0.8051     | 0.5536     | 0.5141     | 0.8102     | 0.8025     | 0.771      | 0.4016     |
+| cosine_mrr@200       | 0.8051     | 0.5536     | 0.5141     | 0.8102     | 0.8025     | 0.771      | 0.4016     |
+| cosine_map@1         | 0.6571     | 0.1135     | 0.2956     | 0.6602     | 0.7337     | 0.6859     | 0.1815     |
+| cosine_map@20        | 0.5404     | 0.4821     | 0.4201     | 0.5008     | 0.7389     | 0.7114     | 0.3266     |
+| cosine_map@50        | 0.5247     | 0.4261     | 0.3752     | 0.4769     | 0.7415     | 0.7143     | 0.3266     |
+| cosine_map@100       | 0.5575     | 0.4286     | 0.3785     | 0.5064     | 0.742      | 0.7149     | 0.3266     |
+| cosine_map@150       | 0.5657     | 0.4437     | 0.3929     | 0.5153     | 0.7421     | 0.715      | 0.3266     |
+| cosine_map@200       | 0.5689     | 0.4506     | 0.4005     | 0.5196     | 0.7422     | 0.7151     | 0.3266     |
+| cosine_map@500       | 0.574      | 0.4632     | 0.4142     | 0.5246     | 0.7423     | 0.7151     | 0.3266     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Datasets
+<details><summary>full_en</summary>
+#### full_en
+* Dataset: full_en
+* Size: 28,880 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                         |
+  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                           |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 5.68 tokens</li><li>max: 11 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 5.76 tokens</li><li>max: 12 tokens</li></ul> |
+* Samples:
+  | anchor                                   | positive                                 |
+  |:-----------------------------------------|:-----------------------------------------|
+  | <code>air commodore</code>               | <code>flight lieutenant</code>           |
+  | <code>command and control officer</code> | <code>flight officer</code>              |
+  | <code>air commodore</code>               | <code>command and control officer</code> |
+* Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
+  ```json
+  {'guide': SentenceTransformer(
+    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
+    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+    (2): Normalize()
+  ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
+  ```
+</details>
+<details><summary>full_de</summary>
+#### full_de
+* Dataset: full_de
+* Size: 23,023 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                         |
+  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                           |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 7.99 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 8.19 tokens</li><li>max: 30 tokens</li></ul> |
+* Samples:
+  | anchor                            | positive                                             |
+  |:----------------------------------|:-----------------------------------------------------|
+  | <code>Staffelkommandantin</code>  | <code>Kommodore</code>                               |
+  | <code>Luftwaffenoffizierin</code> | <code>Luftwaffenoffizier/Luftwaffenoffizierin</code> |
+  | <code>Staffelkommandantin</code>  | <code>Luftwaffenoffizierin</code>                    |
+* Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
+  ```json
+  {'guide': SentenceTransformer(
+    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
+    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+    (2): Normalize()
+  ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
+  ```
+</details>
+<details><summary>full_es</summary>
+#### full_es
+* Dataset: full_es
+* Size: 20,724 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                         |
+  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                           |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 9.13 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 8.84 tokens</li><li>max: 32 tokens</li></ul> |
+* Samples:
+  | anchor                              | positive                                   |
+  |:------------------------------------|:-------------------------------------------|
+  | <code>jefe de escuadrón</code>      | <code>instructor</code>                    |
+  | <code>comandante de aeronave</code> | <code>instructor de simulador</code>       |
+  | <code>instructor</code>             | <code>oficial del Ejército del Aire</code> |
+* Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
+  ```json
+  {'guide': SentenceTransformer(
+    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
+    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+    (2): Normalize()
+  ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
+  ```
+</details>
+<details><summary>full_zh</summary>
+#### full_zh
+* Dataset: full_zh
+* Size: 30,401 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                         |
+  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                           |
+  | details | <ul><li>min: 5 tokens</li><li>mean: 7.15 tokens</li><li>max: 14 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 7.46 tokens</li><li>max: 21 tokens</li></ul> |
+* Samples:
+  | anchor            | positive             |
+  |:------------------|:---------------------|
+  | <code>技术总监</code> | <code>技术和运营总监</code> |
+  | <code>技术总监</code> | <code>技术主管</code>    |
+  | <code>技术总监</code> | <code>技术艺术总监</code>  |
+* Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
+  ```json
+  {'guide': SentenceTransformer(
+    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
+    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+    (2): Normalize()
+  ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
+  ```
+</details>
+<details><summary>mix</summary>
+#### mix
+* Dataset: mix
+* Size: 21,760 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                         |
+  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                           |
+  | details | <ul><li>min: 2 tokens</li><li>mean: 6.71 tokens</li><li>max: 19 tokens</li></ul> | <ul><li>min: 2 tokens</li><li>mean: 7.69 tokens</li><li>max: 19 tokens</li></ul> |
+* Samples:
+  | anchor                                    | positive                                                        |
+  |:------------------------------------------|:----------------------------------------------------------------|
+  | <code>technical manager</code>            | <code>Technischer Direktor für Bühne, Film und Fernsehen</code> |
+  | <code>head of technical</code>            | <code>directora técnica</code>                                  |
+  | <code>head of technical department</code> | <code>技术艺术总监</code>                                             |
+* Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
+  ```json
+  {'guide': SentenceTransformer(
+    (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
+    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+    (2): Normalize()
+  ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
+  ```
+</details>
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 128
+- `gradient_accumulation_steps`: 2
+- `num_train_epochs`: 5
+- `warmup_ratio`: 0.05
+- `log_on_each_node`: False
+- `fp16`: True
+- `dataloader_num_workers`: 4
+- `ddp_find_unused_parameters`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 128
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 2
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 5
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.05
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: False
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: True
+- `dataloader_num_workers`: 4
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `tp_size`: 0
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: True
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | full_en_cosine_ndcg@200 | full_es_cosine_ndcg@200 | full_de_cosine_ndcg@200 | full_zh_cosine_ndcg@200 | mix_es_cosine_ndcg@200 | mix_de_cosine_ndcg@200 | mix_zh_cosine_ndcg@200 |
+|:------:|:----:|:-------------:|:-----------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|
+| -1     | -1   | -             | 0.6856                  | 0.5207                  | 0.4655                  | 0.6713                  | 0.6224                 | 0.5604                 | 0.5548                 |
+| 0.0010 | 1    | 5.3354        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 0.1027 | 100  | 2.665         | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 0.2053 | 200  | 1.3375        | 0.7691                  | 0.6530                  | 0.6298                  | 0.7517                  | 0.7513                 | 0.7393                 | 0.5490                 |
+| 0.3080 | 300  | 1.1101        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 0.4107 | 400  | 0.9453        | 0.7802                  | 0.6643                  | 0.6246                  | 0.7531                  | 0.7610                 | 0.7441                 | 0.5493                 |
+| 0.5133 | 500  | 0.9202        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 0.6160 | 600  | 0.7887        | 0.7741                  | 0.6549                  | 0.6171                  | 0.7542                  | 0.7672                 | 0.7540                 | 0.5482                 |
+| 0.7187 | 700  | 0.7604        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 0.8214 | 800  | 0.7219        | 0.7846                  | 0.6674                  | 0.6244                  | 0.7648                  | 0.7741                 | 0.7592                 | 0.5497                 |
+| 0.9240 | 900  | 0.6965        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 1.0267 | 1000 | 0.6253        | 0.7646                  | 0.6391                  | 0.6122                  | 0.7503                  | 0.7825                 | 0.7704                 | 0.5463                 |
+| 1.1294 | 1100 | 0.4737        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 1.2320 | 1200 | 0.5055        | 0.7758                  | 0.6582                  | 0.6178                  | 0.7514                  | 0.7857                 | 0.7764                 | 0.5501                 |
+| 1.3347 | 1300 | 0.5042        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 1.4374 | 1400 | 0.5073        | 0.7613                  | 0.6578                  | 0.6178                  | 0.7505                  | 0.7829                 | 0.7762                 | 0.5452                 |
+| 1.5400 | 1500 | 0.4975        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 1.6427 | 1600 | 0.5242        | 0.7736                  | 0.6673                  | 0.6279                  | 0.7555                  | 0.7940                 | 0.7859                 | 0.5477                 |
+| 1.7454 | 1700 | 0.4713        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 1.8480 | 1800 | 0.4814        | 0.7845                  | 0.6733                  | 0.6285                  | 0.7642                  | 0.7992                 | 0.7904                 | 0.5449                 |
+| 1.9507 | 1900 | 0.4526        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 2.0544 | 2000 | 0.36          | 0.7790                  | 0.6639                  | 0.6252                  | 0.7500                  | 0.8032                 | 0.7888                 | 0.5499                 |
+| 2.1571 | 2100 | 0.3744        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 2.2598 | 2200 | 0.3031        | 0.7787                  | 0.6614                  | 0.6190                  | 0.7537                  | 0.7993                 | 0.7811                 | 0.5476                 |
+| 2.3624 | 2300 | 0.3638        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 2.4651 | 2400 | 0.358         | 0.7798                  | 0.6615                  | 0.6258                  | 0.7497                  | 0.8018                 | 0.7828                 | 0.5481                 |
+| 2.5678 | 2500 | 0.3247        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 2.6704 | 2600 | 0.3247        | 0.7854                  | 0.6663                  | 0.6248                  | 0.7560                  | 0.8081                 | 0.7835                 | 0.5452                 |
+| 2.7731 | 2700 | 0.3263        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 2.8758 | 2800 | 0.3212        | 0.7761                  | 0.6681                  | 0.6250                  | 0.7517                  | 0.8121                 | 0.7927                 | 0.5458                 |
+| 2.9784 | 2900 | 0.3291        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 3.0821 | 3000 | 0.2816        | 0.7727                  | 0.6604                  | 0.6163                  | 0.7370                  | 0.8163                 | 0.7985                 | 0.5473                 |
+| 3.1848 | 3100 | 0.2698        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 3.2875 | 3200 | 0.2657        | 0.7757                  | 0.6615                  | 0.6247                  | 0.7417                  | 0.8117                 | 0.8004                 | 0.5436                 |
+| 3.3901 | 3300 | 0.2724        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 3.4928 | 3400 | 0.2584        | 0.7850                  | 0.6583                  | 0.6320                  | 0.7458                  | 0.8120                 | 0.7980                 | 0.5454                 |
+| 3.5955 | 3500 | 0.2573        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 3.6982 | 3600 | 0.2744        | 0.7796                  | 0.6552                  | 0.6237                  | 0.7409                  | 0.8193                 | 0.8018                 | 0.5466                 |
+| 3.8008 | 3700 | 0.3054        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 3.9035 | 3800 | 0.2727        | 0.7825                  | 0.6642                  | 0.6293                  | 0.7504                  | 0.8213                 | 0.8058                 | 0.5463                 |
+| 4.0062 | 3900 | 0.2353        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 4.1088 | 4000 | 0.2353        | 0.7747                  | 0.6628                  | 0.6263                  | 0.7384                  | 0.8239                 | 0.8065                 | 0.5447                 |
+| 4.2115 | 4100 | 0.2385        | -                       | -                       | -                       | -                       | -                      | -                      | -                      |
+| 4.3142 | 4200 | 0.231         | 0.7811                  | 0.6608                  | 0.6254                  | 0.7463                  | 0.8226                 | 0.8051                 | 0.5442                 |
+### Framework Versions
+- Python: 3.11.11
+- Sentence Transformers: 4.1.0
+- Transformers: 4.51.2
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.6.0
+- Datasets: 3.5.0
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### GISTEmbedLoss
+```bibtex
+@misc{solatorio2024gistembed,
+    title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
+    author={Aivin V. Solatorio},
+    year={2024},
+    eprint={2402.16829},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

checkpoint-4200/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b78a4739eadca247b8b2e2c00baf6260120f87a52a561be923312b94cba8232a
+size 2271064456

checkpoint-4200/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

checkpoint-4200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5999173a8d03d315a6eeef8cf9fb69d1d387f37263ccc42a43a215f5e76d8f22
+size 4533972937

checkpoint-4200/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58d907753d591cf08f2e02a54be246122575c6702ce42ce38fd9f774562be3d4
+size 15958

checkpoint-4200/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b186621336b0abea983113004ecd9b3118c0bc11e75a3da16b0bb6e48f4f1d5
+size 988

checkpoint-4200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36387b84c88d119079837fce0adc18fc34124598fb149b8eb92af5bb4134e761
+size 1064

checkpoint-4200/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

checkpoint-4200/sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

checkpoint-4200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
+size 5624

checkpoint-4400/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "architectures": [
+    "XLMRobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 8194,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

checkpoint-4400/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b1f51d5c393dbe3441f3e6303002dfa4d493abbc24118e5f4d0a03ee025a9bd
+size 2271064456

checkpoint-4400/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

checkpoint-4400/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a9e6288ee5586d0ffcde2044dc04da2224eade48e12128ba7e97e6946e6e525
+size 4533972937

checkpoint-4400/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba81f3b1771a8b460f12027c4271da4d35cf373c809e35a113805049901d939b
+size 988

checkpoint-4400/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0828827d515ca1d2148a70c185e3b6c96109604210f6fb69f993d1e2b7882ffb
+size 1064

checkpoint-4400/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
+size 5624

checkpoint-4600/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f1c9c9e2b9e3d67f10744473f5e1d1d314b24f390c90d03e669760e0af5c9c4
+size 2271064456

checkpoint-4600/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05e104089212061a9d12b0d2f8e82d52a0e257172d6f6a345d45abaebaf0d3bb
+size 4533972937

checkpoint-4600/sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

checkpoint-4600/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-4800/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

checkpoint-4800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d2ac2c0c92b9ef57cc468b9281831355dd814fe1281d4477c4f70a583159129
+size 2271064456

checkpoint-4800/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:881841f70dc9d4f24a46057ced1d2e32ca60e5ff2f9355c3d66879ea2de63e56
+size 4533972937

checkpoint-4800/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0483b24401bb5c934be17d3fb87b418062454f4034967bcd424997391ddc532
+size 15958

checkpoint-4800/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf548d9246c1d9e15e20b2ddca08d24f314024d3fae6a40e3553c10631d480a
+size 988

checkpoint-4800/sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

checkpoint-4800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a948c4f5667f6700da28d0d70c0c6f024b018ee933ba85d5cc9de9d626dadca
+size 5624