[ML] Get Inference returns inaccurate Trained Models #130339

Closed

Assignees

Labels

opened

on Jun 30, 2025

Create an inference endpoint:

curl -X PUT -u elastic:password -H "Content-Type: application/json" -d '{"service":"elser","service_settings":{"num_allocations":1,"num_threads":1}}' "http://localhost:9200/_inference/sparse_embedding/my-elser-model"

shows:

{
  "inference_id": "my-elser-model",
  "task_type": "sparse_embedding",
  "service": "elasticsearch",
  "service_settings": {
    "num_allocations": 1,
    "num_threads": 1,
    "model_id": ".elser_model_2_linux-x86_64"
  },
  "chunking_settings": {
    "strategy": "sentence",
    "max_chunk_size": 250,
    "sentence_overlap": 1
  }
}

Enable adaptive allocations:

curl -X POST -u elastic:password -H "Content-Type: application/json" -d '{"adaptive_allocations": { "enabled": "true", "min_number_of_allocations": 0, "max_number_of_allocations": 1} }' "http://localhost:9200/_ml/trained_models/my-elser-model/deployment/_update"

shows:

{
  "assignment": {
    "task_parameters": {
      "model_id": ".elser_model_2_linux-x86_64",
      "deployment_id": "my-elser-model",
      "model_bytes": 274756282,
      "threads_per_allocation": 1,
      "number_of_allocations": 1,
      "queue_capacity": 10000,
      "cache_size": "274756282b",
      "priority": "normal",
      "per_deployment_memory_bytes": 0,
      "per_allocation_memory_bytes": 0
    },
    "routing_table": {
      "VtOsT8emQHaBZzXvZX8g7Q": {
        "current_allocations": 1,
        "target_allocations": 1,
        "routing_state": "started",
        "reason": ""
      }
    },
    "assignment_state": "started",
    "start_time": "2025-06-30T15:01:20.059416449Z",
    "max_assigned_allocations": 1,
    "adaptive_allocations": {
      "enabled": true,
      "min_number_of_allocations": 0,
      "max_number_of_allocations": 1
    }
  }
}

curl -u elastic:password http://localhost:9200/_inference/sparse_embedding/my-elser-model

Expected (contains adaptive allocations):

{
  "endpoints": [
    {
      "inference_id": "my-elser-model",
      "task_type": "sparse_embedding",
      "service": "elasticsearch",
      "service_settings": {
        "num_allocations": 1,
        "num_threads": 1,
        "adaptive_allocations": {
          "enabled": true,
          "min_number_of_allocations": 0,
          "max_number_of_allocations": 1
        },
        "model_id": ".elser_model_2_linux-x86_64"
      },
      "chunking_settings": {
        "strategy": "sentence",
        "max_chunk_size": 250,
        "sentence_overlap": 1
      }
    }
  ]
}

Actual:

{
  "endpoints": [
    {
      "inference_id": "my-elser-model",
      "task_type": "sparse_embedding",
      "service": "elasticsearch",
      "service_settings": {
        "num_allocations": 1,
        "num_threads": 1,
        "model_id": ".elser_model_2_linux-x86_64"
      },
      "chunking_settings": {
        "strategy": "sentence",
        "max_chunk_size": 250,
        "sentence_overlap": 1
      }
    }
  ]
}

Metadata

Assignees

prwhelan

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests