Skip to content

self_instruct

SelfInstructTask dataclass

Bases: TextGenerationTask

A TextGenerationTask following the Self-Instruct specification for building the prompts.

Reference: https://github.com/yizhongw/self-instruct

Parameters:

Name Type Description Default
system_prompt str

the system prompt to be used. Defaults to None.

'You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks.You are given a task description and a set of instructions for how to write the prompts for a specific AI application.'
principles Dict[str, List[str]]

the principles to be used for the system prompt. Defaults to None.

field(default_factory=lambda : {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution Union[Dict[str, float], Literal[balanced], None]

the distribution of principles to be used for the system prompt. Defaults to None.

None
application_description str

the description of the AI application. Defaults to "AI assistant".

'AI assistant'
num_instructions int

the number of instructions to be used for the prompt. Defaults to 5.

5
Source code in src/distilabel/tasks/text_generation/self_instruct.py
@dataclass
class SelfInstructTask(TextGenerationTask):
    """A `TextGenerationTask` following the Self-Instruct specification for building
    the prompts.

    Reference: https://github.com/yizhongw/self-instruct

    Args:
        system_prompt (str, optional): the system prompt to be used. Defaults to `None`.
        principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.
            Defaults to `None`.
        principles_distribution (Union[Dict[str, float], Literal["balanced"], None], optional): the
            distribution of principles to be used for the system prompt. Defaults to `None`.
        application_description (str, optional): the description of the AI application. Defaults to
            "AI assistant".
        num_instructions (int, optional): the number of instructions to be used for the prompt.
            Defaults to 5.
    """

    system_prompt: str = (
        "You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks."
        "You are given a task description and a set of instructions for how to write the prompts for a specific AI application."
    )
    application_description: str = "AI assistant"
    num_instructions: int = 5

    __jinja2_template__: str = _SELF_INSTRUCT_TEMPLATE

    def generate_prompt(self, input: str) -> Prompt:
        """Generates a prompt following the Self-Instruct specification.

        Args:
            input (str): the input to be used for the prompt.

        Returns:
            Prompt: the generated prompt.

        Examples:
            >>> from distilabel.tasks.text_generation import SelfInstructTask
            >>> task = SelfInstructTask(system_prompt="You are a helpful assistant.", num_instructions=2)
            >>> task.generate_prompt("What are the first 5 Fibonacci numbers?")
            Prompt(
                system_prompt="You are a helpful assistant.",
                formatted_prompt="# Task Description\nDevelop 2 user queries that ...",
            )
        """
        render_kwargs = {
            "application_description": self.application_description,
            "num_instructions": self.num_instructions,
            "input": input,
        }
        return Prompt(
            system_prompt=self.system_prompt,
            formatted_prompt=self.template.render(**render_kwargs),
        )

    def parse_output(self, output: str) -> Dict[str, List[str]]:
        """Parses the output of the model into the desired format."""
        return {"generations": output.split("\n")}

generate_prompt(input)

Generates a prompt following the Self-Instruct specification.

    Args:
        input (str): the input to be used for the prompt.

    Returns:
        Prompt: the generated prompt.

    Examples:
        >>> from distilabel.tasks.text_generation import SelfInstructTask
        >>> task = SelfInstructTask(system_prompt="You are a helpful assistant.", num_instructions=2)
        >>> task.generate_prompt("What are the first 5 Fibonacci numbers?")
        Prompt(
            system_prompt="You are a helpful assistant.",
            formatted_prompt="# Task Description

Develop 2 user queries that ...", )

Source code in src/distilabel/tasks/text_generation/self_instruct.py
def generate_prompt(self, input: str) -> Prompt:
    """Generates a prompt following the Self-Instruct specification.

    Args:
        input (str): the input to be used for the prompt.

    Returns:
        Prompt: the generated prompt.

    Examples:
        >>> from distilabel.tasks.text_generation import SelfInstructTask
        >>> task = SelfInstructTask(system_prompt="You are a helpful assistant.", num_instructions=2)
        >>> task.generate_prompt("What are the first 5 Fibonacci numbers?")
        Prompt(
            system_prompt="You are a helpful assistant.",
            formatted_prompt="# Task Description\nDevelop 2 user queries that ...",
        )
    """
    render_kwargs = {
        "application_description": self.application_description,
        "num_instructions": self.num_instructions,
        "input": input,
    }
    return Prompt(
        system_prompt=self.system_prompt,
        formatted_prompt=self.template.render(**render_kwargs),
    )

parse_output(output)

Parses the output of the model into the desired format.

Source code in src/distilabel/tasks/text_generation/self_instruct.py
def parse_output(self, output: str) -> Dict[str, List[str]]:
    """Parses the output of the model into the desired format."""
    return {"generations": output.split("\n")}