Skip to content

vllm.attention.backends.registry

Attention backend registry

BACKEND_MAP module-attribute

BACKEND_MAP = {
    FLASH_ATTN: "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend",
    TRITON_ATTN: "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",
    XFORMERS: "vllm.v1.attention.backends.xformers.XFormersAttentionBackend",
    ROCM_ATTN: "vllm.v1.attention.backends.rocm_attn.RocmAttentionBackend",
    ROCM_AITER_MLA: "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend",
    ROCM_AITER_FA: "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend",
    TORCH_SDPA: "vllm.v1.attention.backends.cpu_attn.TorchSDPABackend",
    FLASHINFER: "vllm.v1.attention.backends.flashinfer.FlashInferBackend",
    FLASHINFER_MLA: "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend",
    TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend",
    CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend",
    FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend",
    FLASHMLA_SPARSE: "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend",
    FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend",
    PALLAS: "vllm.v1.attention.backends.pallas.PallasAttentionBackend",
    FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",
    TREE_ATTN: "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend",
    ROCM_AITER_UNIFIED_ATTN: "vllm.v1.attention.backends.rocm_aiter_unified_attn.RocmAiterUnifiedAttentionBackend",
}

_Backend

Bases: Enum

Source code in vllm/attention/backends/registry.py
class _Backend(enum.Enum):
    FLASH_ATTN = enum.auto()
    TRITON_ATTN = enum.auto()
    XFORMERS = enum.auto()
    ROCM_ATTN = enum.auto()
    ROCM_AITER_MLA = enum.auto()
    ROCM_AITER_FA = enum.auto()  # used for ViT attn backend
    TORCH_SDPA = enum.auto()
    FLASHINFER = enum.auto()
    FLASHINFER_MLA = enum.auto()
    TRITON_MLA = enum.auto()
    CUTLASS_MLA = enum.auto()
    FLASHMLA = enum.auto()
    FLASHMLA_SPARSE = enum.auto()
    FLASH_ATTN_MLA = enum.auto()
    PALLAS = enum.auto()
    IPEX = enum.auto()
    NO_ATTENTION = enum.auto()
    FLEX_ATTENTION = enum.auto()
    TREE_ATTN = enum.auto()
    ROCM_AITER_UNIFIED_ATTN = enum.auto()

CUTLASS_MLA class-attribute instance-attribute

CUTLASS_MLA = auto()

FLASHINFER class-attribute instance-attribute

FLASHINFER = auto()

FLASHINFER_MLA class-attribute instance-attribute

FLASHINFER_MLA = auto()

FLASHMLA class-attribute instance-attribute

FLASHMLA = auto()

FLASHMLA_SPARSE class-attribute instance-attribute

FLASHMLA_SPARSE = auto()

FLASH_ATTN class-attribute instance-attribute

FLASH_ATTN = auto()

FLASH_ATTN_MLA class-attribute instance-attribute

FLASH_ATTN_MLA = auto()

FLEX_ATTENTION class-attribute instance-attribute

FLEX_ATTENTION = auto()

IPEX class-attribute instance-attribute

IPEX = auto()

NO_ATTENTION class-attribute instance-attribute

NO_ATTENTION = auto()

PALLAS class-attribute instance-attribute

PALLAS = auto()

ROCM_AITER_FA class-attribute instance-attribute

ROCM_AITER_FA = auto()

ROCM_AITER_MLA class-attribute instance-attribute

ROCM_AITER_MLA = auto()

ROCM_AITER_UNIFIED_ATTN class-attribute instance-attribute

ROCM_AITER_UNIFIED_ATTN = auto()

ROCM_ATTN class-attribute instance-attribute

ROCM_ATTN = auto()

TORCH_SDPA class-attribute instance-attribute

TORCH_SDPA = auto()

TREE_ATTN class-attribute instance-attribute

TREE_ATTN = auto()

TRITON_ATTN class-attribute instance-attribute

TRITON_ATTN = auto()

TRITON_MLA class-attribute instance-attribute

TRITON_MLA = auto()

XFORMERS class-attribute instance-attribute

XFORMERS = auto()

backend_name_to_enum

backend_name_to_enum(
    backend_name: str,
) -> Optional[_Backend]

Convert a string backend name to a _Backend enum value.

Returns:

Name Type Description
_Backend Optional[_Backend]

enum value if backend_name is a valid in-tree type

None Optional[_Backend]

otherwise it's an invalid in-tree type or an out-of-tree platform is loaded.

Source code in vllm/attention/backends/registry.py
def backend_name_to_enum(backend_name: str) -> Optional[_Backend]:
    """
    Convert a string backend name to a _Backend enum value.

    Returns:
        _Backend: enum value if backend_name is a valid in-tree type
        None: otherwise it's an invalid in-tree type or an out-of-tree platform
              is loaded.
    """
    assert backend_name is not None
    return _Backend[backend_name] if backend_name in _Backend.__members__ else None

backend_to_class

backend_to_class(backend: _Backend) -> type

Get the backend class.

Parameters:

Name Type Description Default
backend _Backend

The backend enum value

required

Returns:

Type Description
type

The backend class

Source code in vllm/attention/backends/registry.py
def backend_to_class(backend: _Backend) -> type:
    """Get the backend class.

    Args:
        backend: The backend enum value

    Returns:
        The backend class
    """
    backend_class_name = backend_to_class_str(backend)
    return resolve_obj_by_qualname(backend_class_name)

backend_to_class_str

backend_to_class_str(backend: _Backend) -> str

Get the backend class string

Parameters:

Name Type Description Default
backend _Backend

The backend enum value

required

Returns:

Type Description
str

The backend class string

Source code in vllm/attention/backends/registry.py
def backend_to_class_str(backend: _Backend) -> str:
    """Get the backend class string

    Args:
        backend: The backend enum value

    Returns:
        The backend class string
    """
    return BACKEND_MAP[backend]

register_attn_backend

register_attn_backend(
    backend: _Backend, class_path: Optional[str] = None
)

Decorator: register a custom attention backend into BACKEND_MAPPING. - If class_path is provided, use it. - Otherwise, auto-generate from the class object. Validation: only checks if 'backend' is a valid _Backend enum member. Overwriting existing mappings is allowed. This enables other hardware platforms to plug in custom out-of-tree backends.

Source code in vllm/attention/backends/registry.py
def register_attn_backend(backend: _Backend, class_path: Optional[str] = None):
    """
    Decorator: register a custom attention backend into BACKEND_MAPPING.
    - If class_path is provided, use it.
    - Otherwise, auto-generate from the class object.
    Validation: only checks if 'backend' is a valid _Backend enum member.
    Overwriting existing mappings is allowed. This enables other hardware
    platforms to plug in custom out-of-tree backends.
    """
    if not isinstance(backend, _Backend):
        raise ValueError(f"{backend} is not a valid _Backend enum value.")

    def decorator(cls):
        path = class_path or f"{cls.__module__}.{cls.__qualname__}"
        BACKEND_MAP[backend] = path
        return cls

    return decorator