static-embeddings / tomaarsen.py
gregtatum's picture
Add the initial models
f7fef32
raw
history blame contribute delete
607 Bytes
from sentence_transformers import SentenceTransformer
from torch.nn import EmbeddingBag
import torch
model = SentenceTransformer("tomaarsen/static-retrieval-mrl-en-v1")
embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
embeddings = torch.Tensor(embedding_bag.weight)
assert embeddings.shape == torch.Size([30522, 1024])
print(f"1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB:")
print(f"512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB:")
print(f"256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB:")
print("Embeddings[0]", embeddings[0])