Skip to content

anyscale

AnyscaleLLM

Bases: OpenAILLM

Source code in src/distilabel/llm/anyscale.py
class AnyscaleLLM(OpenAILLM):
    def __init__(
        self,
        model: str,
        task: "Task",
        client: Union["OpenAI", None] = None,
        api_key: Union[str, None] = None,
        max_new_tokens: int = 128,
        frequency_penalty: float = 0.0,
        presence_penalty: float = 0.0,
        temperature: float = 1.0,
        top_p: float = 1.0,
        num_threads: Union[int, None] = None,
        prompt_format: Union["SupportedFormats", None] = None,
        prompt_formatting_fn: Union[Callable[..., str], None] = None,
    ) -> None:
        """Initializes the AnyscaleLLM class.

        Args:
            model (str): the model to be used for generation.
            task (Task): the task to be performed by the LLM.
            client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.
                If `None`, a new client will be created. Defaults to `None`.
            api_key (Union[str, None], optional): the Anyscale API key to be used for generation.
                If `None`, the `ANYSCALE_API_KEY` environment variable will be used. Defaults to `None`.
                Visit "https://docs.endpoints.anyscale.com/guides/authenticate/" for more information.
            max_new_tokens (int, optional): the maximum number of tokens to be generated.
                Defaults to 128.
            frequency_penalty (float, optional): the frequency penalty to be used for generation.
                Defaults to 0.0.
            presence_penalty (float, optional): the presence penalty to be used for generation.
                Defaults to 0.0.
            temperature (float, optional): the temperature to be used for generation.
                Defaults to 1.0.
            top_p (float, optional): the top-p value to be used for generation.
                Defaults to 1.0.
            num_threads (Union[int, None], optional): the number of threads to be used
                for parallel generation. If `None`, no parallel generation will be performed.
                Defaults to `None`.
            prompt_format (Union[SupportedFormats, None], optional): the format to be used
                for the prompt. If `None`, the default format of the task will be used, available
                formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,
                but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)
                will be used if no `prompt_formatting_fn` is provided.
            prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be
                applied to the prompt before generation. If `None`, no formatting will be applied.
                Defaults to `None`.

        Raises:
            AssertionError: if the provided `model` is not available in your OpenAI account.

        Examples:
            >>> from distilabel.tasks import TextGenerationTask
            >>> from distilabel.llm import AnyscaleLLM
            >>> llm = AnyscaleLLM(model="HuggingFaceH4/zephyr-7b-beta", task=TextGenerationTask())
            >>> llm.generate([{"input": "What's the capital of Spain?"}])
        """
        LLM.__init__(
            self,
            task=task,
            num_threads=num_threads,
            prompt_format=prompt_format,
            prompt_formatting_fn=prompt_formatting_fn,
        )

        if not _OPENAI_AVAILABLE:
            raise ImportError(
                "`AnyscaleLLM` cannot be used as `openai` is not installed, please "
                " install it with `pip install openai`."
            )

        self.max_tokens = max_new_tokens
        self.frequency_penalty = frequency_penalty
        self.presence_penalty = presence_penalty
        self.temperature = temperature
        self.top_p = top_p

        self.client = client or OpenAI(
            api_key=api_key or os.getenv("ANYSCALE_API_KEY"),
            max_retries=6,
            base_url="https://api.endpoints.anyscale.com/v1",
        )

        assert (
            model in self.available_models
        ), f"Provided `model` is not available in your Anyscale account, available models are {self.available_models}"
        self.model = model

__init__(model, task, client=None, api_key=None, max_new_tokens=128, frequency_penalty=0.0, presence_penalty=0.0, temperature=1.0, top_p=1.0, num_threads=None, prompt_format=None, prompt_formatting_fn=None)

Initializes the AnyscaleLLM class.

Parameters:

Name Type Description Default
model str

the model to be used for generation.

required
task Task

the task to be performed by the LLM.

required
client Union[OpenAI, None]

an OpenAI client to be used for generation. If None, a new client will be created. Defaults to None.

None
api_key Union[str, None]

the Anyscale API key to be used for generation. If None, the ANYSCALE_API_KEY environment variable will be used. Defaults to None. Visit "https://docs.endpoints.anyscale.com/guides/authenticate/" for more information.

None
max_new_tokens int

the maximum number of tokens to be generated. Defaults to 128.

128
frequency_penalty float

the frequency penalty to be used for generation. Defaults to 0.0.

0.0
presence_penalty float

the presence penalty to be used for generation. Defaults to 0.0.

0.0
temperature float

the temperature to be used for generation. Defaults to 1.0.

1.0
top_p float

the top-p value to be used for generation. Defaults to 1.0.

1.0
num_threads Union[int, None]

the number of threads to be used for parallel generation. If None, no parallel generation will be performed. Defaults to None.

None
prompt_format Union[SupportedFormats, None]

the format to be used for the prompt. If None, the default format of the task will be used, available formats are openai, chatml, llama2, zephyr, and default. Defaults to None, but default (concatenation of system_prompt and formatted_prompt with a line-break) will be used if no prompt_formatting_fn is provided.

None
prompt_formatting_fn Union[Callable[..., str], None]

a function to be applied to the prompt before generation. If None, no formatting will be applied. Defaults to None.

None

Raises:

Type Description
AssertionError

if the provided model is not available in your OpenAI account.

Examples:

>>> from distilabel.tasks import TextGenerationTask
>>> from distilabel.llm import AnyscaleLLM
>>> llm = AnyscaleLLM(model="HuggingFaceH4/zephyr-7b-beta", task=TextGenerationTask())
>>> llm.generate([{"input": "What's the capital of Spain?"}])
Source code in src/distilabel/llm/anyscale.py
def __init__(
    self,
    model: str,
    task: "Task",
    client: Union["OpenAI", None] = None,
    api_key: Union[str, None] = None,
    max_new_tokens: int = 128,
    frequency_penalty: float = 0.0,
    presence_penalty: float = 0.0,
    temperature: float = 1.0,
    top_p: float = 1.0,
    num_threads: Union[int, None] = None,
    prompt_format: Union["SupportedFormats", None] = None,
    prompt_formatting_fn: Union[Callable[..., str], None] = None,
) -> None:
    """Initializes the AnyscaleLLM class.

    Args:
        model (str): the model to be used for generation.
        task (Task): the task to be performed by the LLM.
        client (Union[OpenAI, None], optional): an OpenAI client to be used for generation.
            If `None`, a new client will be created. Defaults to `None`.
        api_key (Union[str, None], optional): the Anyscale API key to be used for generation.
            If `None`, the `ANYSCALE_API_KEY` environment variable will be used. Defaults to `None`.
            Visit "https://docs.endpoints.anyscale.com/guides/authenticate/" for more information.
        max_new_tokens (int, optional): the maximum number of tokens to be generated.
            Defaults to 128.
        frequency_penalty (float, optional): the frequency penalty to be used for generation.
            Defaults to 0.0.
        presence_penalty (float, optional): the presence penalty to be used for generation.
            Defaults to 0.0.
        temperature (float, optional): the temperature to be used for generation.
            Defaults to 1.0.
        top_p (float, optional): the top-p value to be used for generation.
            Defaults to 1.0.
        num_threads (Union[int, None], optional): the number of threads to be used
            for parallel generation. If `None`, no parallel generation will be performed.
            Defaults to `None`.
        prompt_format (Union[SupportedFormats, None], optional): the format to be used
            for the prompt. If `None`, the default format of the task will be used, available
            formats are `openai`, `chatml`, `llama2`, `zephyr`, and `default`. Defaults to `None`,
            but `default` (concatenation of `system_prompt` and `formatted_prompt` with a line-break)
            will be used if no `prompt_formatting_fn` is provided.
        prompt_formatting_fn (Union[Callable[..., str], None], optional): a function to be
            applied to the prompt before generation. If `None`, no formatting will be applied.
            Defaults to `None`.

    Raises:
        AssertionError: if the provided `model` is not available in your OpenAI account.

    Examples:
        >>> from distilabel.tasks import TextGenerationTask
        >>> from distilabel.llm import AnyscaleLLM
        >>> llm = AnyscaleLLM(model="HuggingFaceH4/zephyr-7b-beta", task=TextGenerationTask())
        >>> llm.generate([{"input": "What's the capital of Spain?"}])
    """
    LLM.__init__(
        self,
        task=task,
        num_threads=num_threads,
        prompt_format=prompt_format,
        prompt_formatting_fn=prompt_formatting_fn,
    )

    if not _OPENAI_AVAILABLE:
        raise ImportError(
            "`AnyscaleLLM` cannot be used as `openai` is not installed, please "
            " install it with `pip install openai`."
        )

    self.max_tokens = max_new_tokens
    self.frequency_penalty = frequency_penalty
    self.presence_penalty = presence_penalty
    self.temperature = temperature
    self.top_p = top_p

    self.client = client or OpenAI(
        api_key=api_key or os.getenv("ANYSCALE_API_KEY"),
        max_retries=6,
        base_url="https://api.endpoints.anyscale.com/v1",
    )

    assert (
        model in self.available_models
    ), f"Provided `model` is not available in your Anyscale account, available models are {self.available_models}"
    self.model = model