vllm.model_executor.model_loader.base_loader ¶

logger `module-attribute` ¶

logger = init_logger(__name__)

BaseModelLoader ¶

Bases: ABC

Base class for model loaders.

Source code in vllm/model_executor/model_loader/base_loader.py

class BaseModelLoader(ABC):
    """Base class for model loaders."""

    def __init__(self, load_config: LoadConfig):
        self.load_config = load_config

    @abstractmethod
    def download_model(self, model_config: ModelConfig) -> None:
        """Download a model so that it can be immediately loaded."""
        raise NotImplementedError

    @abstractmethod
    def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
        """Load weights into a model. This standalone API allows
        inplace weights loading for an already-initialized model"""
        raise NotImplementedError

    def load_model(
        self, vllm_config: VllmConfig, model_config: ModelConfig
    ) -> nn.Module:
        """Load a model with the given configurations."""
        device_config = vllm_config.device_config
        load_config = vllm_config.load_config
        load_device = (
            device_config.device if load_config.device is None else load_config.device
        )
        target_device = torch.device(load_device)
        with set_default_torch_dtype(model_config.dtype):
            with target_device:
                model = initialize_model(
                    vllm_config=vllm_config, model_config=model_config
                )

            logger.debug("Loading weights on %s ...", load_device)
            # Quantization does not happen in `load_weights` but after it
            self.load_weights(model, model_config)
            process_weights_after_loading(model, model_config, target_device)
        return model.eval()

load_config `instance-attribute` ¶

load_config = load_config

init ¶

__init__(load_config: LoadConfig)

Source code in vllm/model_executor/model_loader/base_loader.py

def __init__(self, load_config: LoadConfig):
    self.load_config = load_config

download_model `abstractmethod` ¶

download_model(model_config: ModelConfig) -> None

Download a model so that it can be immediately loaded.

Source code in vllm/model_executor/model_loader/base_loader.py

@abstractmethod
def download_model(self, model_config: ModelConfig) -> None:
    """Download a model so that it can be immediately loaded."""
    raise NotImplementedError

load_model ¶

load_model(
    vllm_config: VllmConfig, model_config: ModelConfig
) -> Module

Load a model with the given configurations.

Source code in vllm/model_executor/model_loader/base_loader.py

def load_model(
    self, vllm_config: VllmConfig, model_config: ModelConfig
) -> nn.Module:
    """Load a model with the given configurations."""
    device_config = vllm_config.device_config
    load_config = vllm_config.load_config
    load_device = (
        device_config.device if load_config.device is None else load_config.device
    )
    target_device = torch.device(load_device)
    with set_default_torch_dtype(model_config.dtype):
        with target_device:
            model = initialize_model(
                vllm_config=vllm_config, model_config=model_config
            )

        logger.debug("Loading weights on %s ...", load_device)
        # Quantization does not happen in `load_weights` but after it
        self.load_weights(model, model_config)
        process_weights_after_loading(model, model_config, target_device)
    return model.eval()

load_weights `abstractmethod` ¶

load_weights(
    model: Module, model_config: ModelConfig
) -> None

Load weights into a model. This standalone API allows inplace weights loading for an already-initialized model

Source code in vllm/model_executor/model_loader/base_loader.py

@abstractmethod
def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
    """Load weights into a model. This standalone API allows
    inplace weights loading for an already-initialized model"""
    raise NotImplementedError

vllm.model_executor.model_loader.base_loader ¶

logger module-attribute ¶

BaseModelLoader ¶

load_config instance-attribute ¶

__init__ ¶

download_model abstractmethod ¶

load_model ¶

load_weights abstractmethod ¶

logger `module-attribute` ¶

load_config `instance-attribute` ¶

init ¶

download_model `abstractmethod` ¶

load_weights `abstractmethod` ¶