Skip to content

self_instruct

SelfInstructTask dataclass

Bases: InstructTaskMixin, TextGenerationTask

A TextGenerationTask following the Self-Instruct specification for building the prompts.

Parameters:

Name Type Description Default
system_prompt str

the system prompt to be used. Defaults to None.

'You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks. You are given a task description and a set of instructions for how to write the prompts for an specific AI application.'
principles Dict[str, List[str]]

the principles to be used for the system prompt. Defaults to None.

field(default_factory=lambda: {'harmlessness': harmlessness, 'helpfulness': helpfulness, 'truthfulness': truthfulness, 'honesty': honesty, 'verbalized_calibration': verbalized_calibration}, repr=False)
principles_distribution Union[Dict[str, float], Literal[balanced], None]

the distribution of principles to be used for the system prompt. Defaults to None.

None
application_description str

the description of the AI application. Defaults to "AI assistant".

'AI assistant'
num_instructions int

the number of instructions to be used for the prompt. Defaults to 5.

5
criteria_for_query_generation str

the criteria for query generation that we want our model to have. Default value covers default behaviour for SelfInstructTask. This value is passed to the .jinja template, where extra instructions are added to ensure correct output format.

'Incorporate a diverse range of verbs, avoiding repetition.\nEnsure queries are compatible with AI model\'s text generation functions and are limited to 1-2 sentences.\nDesign queries to be self-contained and standalone.\nBlend interrogative (e.g., "What is the significance of x?") and imperative (e.g., "Detail the process of x.") styles.'
References
Source code in src/distilabel/tasks/text_generation/self_instruct.py
@dataclass
class SelfInstructTask(InstructTaskMixin, TextGenerationTask):
    """A `TextGenerationTask` following the Self-Instruct specification for building
    the prompts.

    Args:
        system_prompt (str, optional): the system prompt to be used. Defaults to `None`.
        principles (Dict[str, List[str]], optional): the principles to be used for the system prompt.
            Defaults to `None`.
        principles_distribution (Union[Dict[str, float], Literal["balanced"], None], optional): the
            distribution of principles to be used for the system prompt. Defaults to `None`.
        application_description (str, optional): the description of the AI application. Defaults to
            "AI assistant".
        num_instructions (int, optional): the number of instructions to be used for the prompt.
            Defaults to 5.
        criteria_for_query_generation (str, optional): the criteria for query generation that we want
            our model to have. Default value covers default behaviour for SelfInstructTask. This value is
            passed to the .jinja template, where extra instructions are added to ensure correct output format.

    References:
        - [`Self-Instruct: Aligning Language Models with Self-Generated Instructions`](https://arxiv.org/abs/2212.10560)
        - [`Self-Instruct - GitHub Repository`](https://github.com/yizhongw/self-instruct)
    """

    system_prompt: str = (
        "You are an expert prompt writer, writing the best and most diverse prompts for a variety of tasks."
        " You are given a task description and a set of instructions for how to write the prompts for an"
        " specific AI application."
    )

    application_description: str = "AI assistant"
    num_instructions: int = 5

    criteria_for_query_generation: str = (
        "Incorporate a diverse range of verbs, avoiding repetition.\n"
        "Ensure queries are compatible with AI model's text generation functions and are limited to 1-2 sentences.\n"
        "Design queries to be self-contained and standalone.\n"
        'Blend interrogative (e.g., "What is the significance of x?") and imperative (e.g., "Detail the process of x.") styles.'
    )

    __jinja2_template__: str = _SELF_INSTRUCT_TEMPLATE

    def generate_prompt(self, input: str, **_: Any) -> Prompt:
        """Generates a prompt following the Self-Instruct specification.

        Args:
            input (str): the input to be used for the prompt.

        Returns:
            Prompt: the generated prompt.

        Examples:
            >>> from distilabel.tasks.text_generation import SelfInstructTask
            >>> task = SelfInstructTask(system_prompt="You are a helpful assistant.", num_instructions=2)
            >>> task.generate_prompt("What are the first 5 Fibonacci numbers?")
            Prompt(
                system_prompt="You are a helpful assistant.",
                formatted_prompt="# Task Description ...",
            )
        """
        render_kwargs = {
            "application_description": self.application_description,
            "num_instructions": self.num_instructions,
            "criteria_for_query_generation": self.criteria_for_query_generation,
            "input": input,
        }
        return Prompt(
            system_prompt=self.system_prompt,
            formatted_prompt=self.template.render(**render_kwargs),
        )

    @property
    def output_args_names(self) -> List[str]:
        return ["instructions"]

    def parse_output(self, output: str) -> Dict[str, List[str]]:
        """Parses the output of the model into the desired format."""
        pattern = re.compile(r"\d+\.\s*(.*?)\n")
        return {"instructions": pattern.findall(output)}

generate_prompt(input, **_)

Generates a prompt following the Self-Instruct specification.

Parameters:

Name Type Description Default
input str

the input to be used for the prompt.

required

Returns:

Name Type Description
Prompt Prompt

the generated prompt.

Examples:

>>> from distilabel.tasks.text_generation import SelfInstructTask
>>> task = SelfInstructTask(system_prompt="You are a helpful assistant.", num_instructions=2)
>>> task.generate_prompt("What are the first 5 Fibonacci numbers?")
Prompt(
    system_prompt="You are a helpful assistant.",
    formatted_prompt="# Task Description ...",
)
Source code in src/distilabel/tasks/text_generation/self_instruct.py
def generate_prompt(self, input: str, **_: Any) -> Prompt:
    """Generates a prompt following the Self-Instruct specification.

    Args:
        input (str): the input to be used for the prompt.

    Returns:
        Prompt: the generated prompt.

    Examples:
        >>> from distilabel.tasks.text_generation import SelfInstructTask
        >>> task = SelfInstructTask(system_prompt="You are a helpful assistant.", num_instructions=2)
        >>> task.generate_prompt("What are the first 5 Fibonacci numbers?")
        Prompt(
            system_prompt="You are a helpful assistant.",
            formatted_prompt="# Task Description ...",
        )
    """
    render_kwargs = {
        "application_description": self.application_description,
        "num_instructions": self.num_instructions,
        "criteria_for_query_generation": self.criteria_for_query_generation,
        "input": input,
    }
    return Prompt(
        system_prompt=self.system_prompt,
        formatted_prompt=self.template.render(**render_kwargs),
    )

parse_output(output)

Parses the output of the model into the desired format.

Source code in src/distilabel/tasks/text_generation/self_instruct.py
def parse_output(self, output: str) -> Dict[str, List[str]]:
    """Parses the output of the model into the desired format."""
    pattern = re.compile(r"\d+\.\s*(.*?)\n")
    return {"instructions": pattern.findall(output)}