Skip to content

OllamaLLM

OllamaLLM

Bases: AsyncLLM

Ollama LLM implementation running the Async API client.

Attributes:

Name Type Description
model str

the model name to use for the LLM e.g. "notus".

host Optional[RuntimeParameter[str]]

the Ollama server host.

timeout RuntimeParameter[int]

the timeout for the LLM. Defaults to 120.

_aclient Optional[AsyncClient]

the AsyncClient to use for the Ollama API. It is meant to be used internally. Set in the load method.

Runtime parameters
  • host: the Ollama server host.
  • timeout: the client timeout for the Ollama API. Defaults to 120.
Source code in src/distilabel/llms/ollama.py
class OllamaLLM(AsyncLLM):
    """Ollama LLM implementation running the Async API client.

    Attributes:
        model: the model name to use for the LLM e.g. "notus".
        host: the Ollama server host.
        timeout: the timeout for the LLM. Defaults to `120`.
        _aclient: the `AsyncClient` to use for the Ollama API. It is meant to be used internally.
            Set in the `load` method.

    Runtime parameters:
        - `host`: the Ollama server host.
        - `timeout`: the client timeout for the Ollama API. Defaults to `120`.
    """

    model: str
    host: Optional[RuntimeParameter[str]] = Field(
        default=None, description="The host of the Ollama API."
    )
    timeout: RuntimeParameter[int] = Field(
        default=120, description="The timeout for the Ollama API."
    )
    follow_redirects: bool = True
    structured_output: Optional[RuntimeParameter[InstructorStructuredOutputType]] = (
        Field(
            default=None,
            description="The structured output format to use across all the generations.",
        )
    )

    _num_generations_param_supported = False

    _aclient: Optional["AsyncClient"] = PrivateAttr(...)

    def load(self) -> None:
        """Loads the `AsyncClient` to use Ollama async API."""
        super().load()

        try:
            from ollama import AsyncClient

            self._aclient = AsyncClient(
                host=self.host,
                timeout=self.timeout,
                follow_redirects=self.follow_redirects,
            )
        except ImportError as e:
            raise ImportError(
                "Ollama Python client is not installed. Please install it using"
                " `pip install ollama`."
            ) from e

    @property
    def model_name(self) -> str:
        """Returns the model name used for the LLM."""
        return self.model

    @validate_call
    async def agenerate(  # type: ignore
        self,
        input: StandardInput,
        format: Literal["", "json"] = "",
        # TODO: include relevant options from `Options` in `agenerate` method.
        options: Union[Options, None] = None,
        keep_alive: Union[bool, None] = None,
    ) -> GenerateOutput:
        """
        Generates a response asynchronously, using the [Ollama Async API definition](https://github.com/ollama/ollama-python).

        Args:
            input: the input to use for the generation.
            format: the format to use for the generation. Defaults to `""`.
            options: the options to use for the generation. Defaults to `None`.
            keep_alive: whether to keep the connection alive. Defaults to `None`.

        Returns:
            A list of strings as completion for the given input.
        """
        text = None
        try:
            completion: Dict[str, Any] = await self._aclient.chat(  # type: ignore
                model=self.model,
                messages=input,  # type: ignore
                stream=False,
                format=format,
                options=options,
                keep_alive=keep_alive,
            )
            text = completion["message"]["content"]
        except Exception as e:
            self._logger.warning(  # type: ignore
                f"⚠️ Received no response using Ollama client (model: '{self.model_name}')."
                f" Finish reason was: {e}"
            )

        return [text]

model_name: str property

Returns the model name used for the LLM.

agenerate(input, format='', options=None, keep_alive=None) async

Generates a response asynchronously, using the Ollama Async API definition.

Parameters:

Name Type Description Default
input StandardInput

the input to use for the generation.

required
format Literal['', 'json']

the format to use for the generation. Defaults to "".

''
options Union[Options, None]

the options to use for the generation. Defaults to None.

None
keep_alive Union[bool, None]

whether to keep the connection alive. Defaults to None.

None

Returns:

Type Description
GenerateOutput

A list of strings as completion for the given input.

Source code in src/distilabel/llms/ollama.py
@validate_call
async def agenerate(  # type: ignore
    self,
    input: StandardInput,
    format: Literal["", "json"] = "",
    # TODO: include relevant options from `Options` in `agenerate` method.
    options: Union[Options, None] = None,
    keep_alive: Union[bool, None] = None,
) -> GenerateOutput:
    """
    Generates a response asynchronously, using the [Ollama Async API definition](https://github.com/ollama/ollama-python).

    Args:
        input: the input to use for the generation.
        format: the format to use for the generation. Defaults to `""`.
        options: the options to use for the generation. Defaults to `None`.
        keep_alive: whether to keep the connection alive. Defaults to `None`.

    Returns:
        A list of strings as completion for the given input.
    """
    text = None
    try:
        completion: Dict[str, Any] = await self._aclient.chat(  # type: ignore
            model=self.model,
            messages=input,  # type: ignore
            stream=False,
            format=format,
            options=options,
            keep_alive=keep_alive,
        )
        text = completion["message"]["content"]
    except Exception as e:
        self._logger.warning(  # type: ignore
            f"⚠️ Received no response using Ollama client (model: '{self.model_name}')."
            f" Finish reason was: {e}"
        )

    return [text]

load()

Loads the AsyncClient to use Ollama async API.

Source code in src/distilabel/llms/ollama.py
def load(self) -> None:
    """Loads the `AsyncClient` to use Ollama async API."""
    super().load()

    try:
        from ollama import AsyncClient

        self._aclient = AsyncClient(
            host=self.host,
            timeout=self.timeout,
            follow_redirects=self.follow_redirects,
        )
    except ImportError as e:
        raise ImportError(
            "Ollama Python client is not installed. Please install it using"
            " `pip install ollama`."
        ) from e