vllm.attention.backends.registry ¶

Attention backend registry

BACKEND_MAP `module-attribute` ¶

BACKEND_MAP = {
    FLASH_ATTN: "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend",
    TRITON_ATTN: "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",
    XFORMERS: "vllm.v1.attention.backends.xformers.XFormersAttentionBackend",
    ROCM_ATTN: "vllm.v1.attention.backends.rocm_attn.RocmAttentionBackend",
    ROCM_AITER_MLA: "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend",
    ROCM_AITER_FA: "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend",
    TORCH_SDPA: "vllm.v1.attention.backends.cpu_attn.TorchSDPABackend",
    FLASHINFER: "vllm.v1.attention.backends.flashinfer.FlashInferBackend",
    FLASHINFER_MLA: "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend",
    TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend",
    CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend",
    FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend",
    FLASHMLA_SPARSE: "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend",
    FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend",
    PALLAS: "vllm.v1.attention.backends.pallas.PallasAttentionBackend",
    FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",
    TREE_ATTN: "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend",
    ROCM_AITER_UNIFIED_ATTN: "vllm.v1.attention.backends.rocm_aiter_unified_attn.RocmAiterUnifiedAttentionBackend",
}

_Backend ¶

Bases: Enum

Source code in vllm/attention/backends/registry.py

class _Backend(enum.Enum):
    FLASH_ATTN = enum.auto()
    TRITON_ATTN = enum.auto()
    XFORMERS = enum.auto()
    ROCM_ATTN = enum.auto()
    ROCM_AITER_MLA = enum.auto()
    ROCM_AITER_FA = enum.auto()  # used for ViT attn backend
    TORCH_SDPA = enum.auto()
    FLASHINFER = enum.auto()
    FLASHINFER_MLA = enum.auto()
    TRITON_MLA = enum.auto()
    CUTLASS_MLA = enum.auto()
    FLASHMLA = enum.auto()
    FLASHMLA_SPARSE = enum.auto()
    FLASH_ATTN_MLA = enum.auto()
    PALLAS = enum.auto()
    IPEX = enum.auto()
    NO_ATTENTION = enum.auto()
    FLEX_ATTENTION = enum.auto()
    TREE_ATTN = enum.auto()
    ROCM_AITER_UNIFIED_ATTN = enum.auto()

CUTLASS_MLA `class-attribute` `instance-attribute` ¶

CUTLASS_MLA = auto()

FLASHINFER `class-attribute` `instance-attribute` ¶

FLASHINFER = auto()

FLASHINFER_MLA `class-attribute` `instance-attribute` ¶

FLASHINFER_MLA = auto()

FLASHMLA `class-attribute` `instance-attribute` ¶

FLASHMLA = auto()

FLASHMLA_SPARSE `class-attribute` `instance-attribute` ¶

FLASHMLA_SPARSE = auto()

FLASH_ATTN `class-attribute` `instance-attribute` ¶

FLASH_ATTN = auto()

FLASH_ATTN_MLA `class-attribute` `instance-attribute` ¶

FLASH_ATTN_MLA = auto()

FLEX_ATTENTION `class-attribute` `instance-attribute` ¶

FLEX_ATTENTION = auto()

IPEX `class-attribute` `instance-attribute` ¶

IPEX = auto()

NO_ATTENTION `class-attribute` `instance-attribute` ¶

NO_ATTENTION = auto()

PALLAS `class-attribute` `instance-attribute` ¶

PALLAS = auto()

ROCM_AITER_FA `class-attribute` `instance-attribute` ¶

ROCM_AITER_FA = auto()

ROCM_AITER_MLA `class-attribute` `instance-attribute` ¶

ROCM_AITER_MLA = auto()

ROCM_AITER_UNIFIED_ATTN `class-attribute` `instance-attribute` ¶

ROCM_AITER_UNIFIED_ATTN = auto()

ROCM_ATTN `class-attribute` `instance-attribute` ¶

ROCM_ATTN = auto()

TORCH_SDPA `class-attribute` `instance-attribute` ¶

TORCH_SDPA = auto()

TREE_ATTN `class-attribute` `instance-attribute` ¶

TREE_ATTN = auto()

TRITON_ATTN `class-attribute` `instance-attribute` ¶

TRITON_ATTN = auto()

TRITON_MLA `class-attribute` `instance-attribute` ¶

TRITON_MLA = auto()

XFORMERS `class-attribute` `instance-attribute` ¶

XFORMERS = auto()

backend_name_to_enum ¶

backend_name_to_enum(
    backend_name: str,
) -> Optional[_Backend]

Convert a string backend name to a _Backend enum value.

Returns:

Name	Type	Description
`_Backend`	`Optional[_Backend]`	enum value if backend_name is a valid in-tree type
`None`	`Optional[_Backend]`	otherwise it's an invalid in-tree type or an out-of-tree platform is loaded.

Source code in vllm/attention/backends/registry.py

def backend_name_to_enum(backend_name: str) -> Optional[_Backend]:
    """
    Convert a string backend name to a _Backend enum value.

    Returns:
        _Backend: enum value if backend_name is a valid in-tree type
        None: otherwise it's an invalid in-tree type or an out-of-tree platform
              is loaded.
    """
    assert backend_name is not None
    return _Backend[backend_name] if backend_name in _Backend.__members__ else None

backend_to_class ¶

backend_to_class(backend: _Backend) -> type

Get the backend class.

Parameters:

Name	Type	Description	Default
`backend`	`_Backend`	The backend enum value	required

Returns:

Type	Description
`type`	The backend class

Source code in vllm/attention/backends/registry.py

def backend_to_class(backend: _Backend) -> type:
    """Get the backend class.

    Args:
        backend: The backend enum value

    Returns:
        The backend class
    """
    backend_class_name = backend_to_class_str(backend)
    return resolve_obj_by_qualname(backend_class_name)

backend_to_class_str ¶

backend_to_class_str(backend: _Backend) -> str

Get the backend class string

Parameters:

Name	Type	Description	Default
`backend`	`_Backend`	The backend enum value	required

Returns:

Type	Description
`str`	The backend class string

Source code in vllm/attention/backends/registry.py

def backend_to_class_str(backend: _Backend) -> str:
    """Get the backend class string

    Args:
        backend: The backend enum value

    Returns:
        The backend class string
    """
    return BACKEND_MAP[backend]

register_attn_backend ¶

register_attn_backend(
    backend: _Backend, class_path: Optional[str] = None
)

Decorator: register a custom attention backend into BACKEND_MAPPING. - If class_path is provided, use it. - Otherwise, auto-generate from the class object. Validation: only checks if 'backend' is a valid _Backend enum member. Overwriting existing mappings is allowed. This enables other hardware platforms to plug in custom out-of-tree backends.

Source code in vllm/attention/backends/registry.py

def register_attn_backend(backend: _Backend, class_path: Optional[str] = None):
    """
    Decorator: register a custom attention backend into BACKEND_MAPPING.
    - If class_path is provided, use it.
    - Otherwise, auto-generate from the class object.
    Validation: only checks if 'backend' is a valid _Backend enum member.
    Overwriting existing mappings is allowed. This enables other hardware
    platforms to plug in custom out-of-tree backends.
    """
    if not isinstance(backend, _Backend):
        raise ValueError(f"{backend} is not a valid _Backend enum value.")

    def decorator(cls):
        path = class_path or f"{cls.__module__}.{cls.__qualname__}"
        BACKEND_MAP[backend] = path
        return cls

    return decorator

vllm.attention.backends.registry ¶

BACKEND_MAP module-attribute ¶

_Backend ¶

CUTLASS_MLA class-attribute instance-attribute ¶

FLASHINFER class-attribute instance-attribute ¶

FLASHINFER_MLA class-attribute instance-attribute ¶

FLASHMLA class-attribute instance-attribute ¶

FLASHMLA_SPARSE class-attribute instance-attribute ¶

FLASH_ATTN class-attribute instance-attribute ¶

FLASH_ATTN_MLA class-attribute instance-attribute ¶

FLEX_ATTENTION class-attribute instance-attribute ¶

IPEX class-attribute instance-attribute ¶

NO_ATTENTION class-attribute instance-attribute ¶

PALLAS class-attribute instance-attribute ¶

ROCM_AITER_FA class-attribute instance-attribute ¶

ROCM_AITER_MLA class-attribute instance-attribute ¶

ROCM_AITER_UNIFIED_ATTN class-attribute instance-attribute ¶

ROCM_ATTN class-attribute instance-attribute ¶

TORCH_SDPA class-attribute instance-attribute ¶

TREE_ATTN class-attribute instance-attribute ¶

TRITON_ATTN class-attribute instance-attribute ¶

TRITON_MLA class-attribute instance-attribute ¶

XFORMERS class-attribute instance-attribute ¶

backend_name_to_enum ¶

backend_to_class ¶

backend_to_class_str ¶

register_attn_backend ¶

BACKEND_MAP `module-attribute` ¶

CUTLASS_MLA `class-attribute` `instance-attribute` ¶

FLASHINFER `class-attribute` `instance-attribute` ¶

FLASHINFER_MLA `class-attribute` `instance-attribute` ¶

FLASHMLA `class-attribute` `instance-attribute` ¶

FLASHMLA_SPARSE `class-attribute` `instance-attribute` ¶

FLASH_ATTN `class-attribute` `instance-attribute` ¶

FLASH_ATTN_MLA `class-attribute` `instance-attribute` ¶

FLEX_ATTENTION `class-attribute` `instance-attribute` ¶

IPEX `class-attribute` `instance-attribute` ¶

NO_ATTENTION `class-attribute` `instance-attribute` ¶

PALLAS `class-attribute` `instance-attribute` ¶

ROCM_AITER_FA `class-attribute` `instance-attribute` ¶

ROCM_AITER_MLA `class-attribute` `instance-attribute` ¶

ROCM_AITER_UNIFIED_ATTN `class-attribute` `instance-attribute` ¶

ROCM_ATTN `class-attribute` `instance-attribute` ¶

TORCH_SDPA `class-attribute` `instance-attribute` ¶

TREE_ATTN `class-attribute` `instance-attribute` ¶

TRITON_ATTN `class-attribute` `instance-attribute` ¶

TRITON_MLA `class-attribute` `instance-attribute` ¶

XFORMERS `class-attribute` `instance-attribute` ¶