-
Notifications
You must be signed in to change notification settings - Fork 25.7k
Closed
Labels
Description
Create an inference endpoint:
curl -X PUT -u elastic:password -H "Content-Type: application/json" -d '{"service":"elser","service_settings":{"num_allocations":1,"num_threads":1}}' "http://localhost:9200/_inference/sparse_embedding/my-elser-model"
shows:
{
"inference_id": "my-elser-model",
"task_type": "sparse_embedding",
"service": "elasticsearch",
"service_settings": {
"num_allocations": 1,
"num_threads": 1,
"model_id": ".elser_model_2_linux-x86_64"
},
"chunking_settings": {
"strategy": "sentence",
"max_chunk_size": 250,
"sentence_overlap": 1
}
}
Enable adaptive allocations:
curl -X POST -u elastic:password -H "Content-Type: application/json" -d '{"adaptive_allocations": { "enabled": "true", "min_number_of_allocations": 0, "max_number_of_allocations": 1} }' "http://localhost:9200/_ml/trained_models/my-elser-model/deployment/_update"
shows:
{
"assignment": {
"task_parameters": {
"model_id": ".elser_model_2_linux-x86_64",
"deployment_id": "my-elser-model",
"model_bytes": 274756282,
"threads_per_allocation": 1,
"number_of_allocations": 1,
"queue_capacity": 10000,
"cache_size": "274756282b",
"priority": "normal",
"per_deployment_memory_bytes": 0,
"per_allocation_memory_bytes": 0
},
"routing_table": {
"VtOsT8emQHaBZzXvZX8g7Q": {
"current_allocations": 1,
"target_allocations": 1,
"routing_state": "started",
"reason": ""
}
},
"assignment_state": "started",
"start_time": "2025-06-30T15:01:20.059416449Z",
"max_assigned_allocations": 1,
"adaptive_allocations": {
"enabled": true,
"min_number_of_allocations": 0,
"max_number_of_allocations": 1
}
}
}
curl -u elastic:password http://localhost:9200/_inference/sparse_embedding/my-elser-model
Expected (contains adaptive allocations):
{
"endpoints": [
{
"inference_id": "my-elser-model",
"task_type": "sparse_embedding",
"service": "elasticsearch",
"service_settings": {
"num_allocations": 1,
"num_threads": 1,
"adaptive_allocations": {
"enabled": true,
"min_number_of_allocations": 0,
"max_number_of_allocations": 1
},
"model_id": ".elser_model_2_linux-x86_64"
},
"chunking_settings": {
"strategy": "sentence",
"max_chunk_size": 250,
"sentence_overlap": 1
}
}
]
}
Actual:
{
"endpoints": [
{
"inference_id": "my-elser-model",
"task_type": "sparse_embedding",
"service": "elasticsearch",
"service_settings": {
"num_allocations": 1,
"num_threads": 1,
"model_id": ".elser_model_2_linux-x86_64"
},
"chunking_settings": {
"strategy": "sentence",
"max_chunk_size": 250,
"sentence_overlap": 1
}
}
]
}