Skip to content

Command Line Interface

This section contains the API reference for the command line interface. For more information on how to use the command line interface, see the Tutorial - CLI.

Utility functions for the pipeline commands

Here are some utility functions to help working with the pipelines in the console.

display_pipeline_information(pipeline)

Displays the pipeline information to the console.

Parameters:

Name Type Description Default
pipeline BasePipeline

The pipeline.

required
Source code in src/distilabel/cli/pipeline/utils.py
def display_pipeline_information(pipeline: "BasePipeline") -> None:
    """Displays the pipeline information to the console.

    Args:
        pipeline: The pipeline.
    """
    from rich.console import Console

    Console().print(_build_pipeline_panel(pipeline))

get_config_from_url(url)

Loads the pipeline configuration from a URL pointing to a JSON or YAML file.

Parameters:

Name Type Description Default
url str

The URL pointing to the pipeline configuration file.

required

Returns:

Type Description
Dict[str, Any]

The pipeline configuration as a dictionary.

Raises:

Type Description
ValueError

If the file format is not supported.

Source code in src/distilabel/cli/pipeline/utils.py
def get_config_from_url(url: str) -> Dict[str, Any]:
    """Loads the pipeline configuration from a URL pointing to a JSON or YAML file.

    Args:
        url: The URL pointing to the pipeline configuration file.

    Returns:
        The pipeline configuration as a dictionary.

    Raises:
        ValueError: If the file format is not supported.
    """
    if not url.endswith((".json", ".yaml", ".yml")):
        raise ValueError(
            f"Unsupported file format for '{url}'. Only JSON and YAML are supported"
        )
    if "huggingface.co" in url and "HF_TOKEN" in os.environ:
        headers = {"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}
    else:
        headers = None
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    if url.endswith((".yaml", ".yml")):
        content = response.content.decode("utf-8")
        return yaml.safe_load(content)

    return response.json()

get_pipeline(config)

Get a pipeline from a configuration file.

Parameters:

Name Type Description Default
config str

The path or URL to the pipeline configuration file.

required

Returns:

Type Description
BasePipeline

The pipeline.

Raises:

Type Description
ValueError

If the file format is not supported.

FileNotFoundError

If the configuration file does not exist.

Source code in src/distilabel/cli/pipeline/utils.py
def get_pipeline(config: str) -> "BasePipeline":
    """Get a pipeline from a configuration file.

    Args:
        config: The path or URL to the pipeline configuration file.

    Returns:
        The pipeline.

    Raises:
        ValueError: If the file format is not supported.
        FileNotFoundError: If the configuration file does not exist.
    """
    if valid_http_url(config):
        return Pipeline.from_dict(get_config_from_url(config))

    if Path(config).is_file():
        return Pipeline.from_file(config)

    raise FileNotFoundError(f"Config file '{config}' does not exist.")

parse_runtime_parameters(params)

Parses the runtime parameters from the CLI format to the format expected by the Pipeline.run method. The CLI format is a list of tuples, where the first element is a list of keys and the second element is the value.

Parameters:

Name Type Description Default
params List[Tuple[List[str], str]]

A list of tuples, where the first element is a list of keys and the second element is the value.

required

Returns:

Type Description
Dict[str, Dict[str, Any]]

A dictionary with the runtime parameters in the format expected by the

Dict[str, Dict[str, Any]]

Pipeline.run method.

Source code in src/distilabel/cli/pipeline/utils.py
def parse_runtime_parameters(
    params: List[Tuple[List[str], str]],
) -> Dict[str, Dict[str, Any]]:
    """Parses the runtime parameters from the CLI format to the format expected by the
    `Pipeline.run` method. The CLI format is a list of tuples, where the first element is
    a list of keys and the second element is the value.

    Args:
        params: A list of tuples, where the first element is a list of keys and the
            second element is the value.

    Returns:
        A dictionary with the runtime parameters in the format expected by the
        `Pipeline.run` method.
    """
    runtime_params = {}
    for keys, value in params:
        current = runtime_params
        for i, key in enumerate(keys):
            if i == len(keys) - 1:
                current[key] = value
            else:
                current = current.setdefault(key, {})
    return runtime_params

valid_http_url(url)

Check if the URL is a valid HTTP URL.

Parameters:

Name Type Description Default
url str

The URL to check.

required

Returns:

Type Description
bool

True, if the URL is a valid HTTP URL. False, otherwise.

Source code in src/distilabel/cli/pipeline/utils.py
def valid_http_url(url: str) -> bool:
    """Check if the URL is a valid HTTP URL.

    Args:
        url: The URL to check.

    Returns:
        `True`, if the URL is a valid HTTP URL. `False`, otherwise.
    """
    try:
        TypeAdapter(HttpUrl).validate_python(url)  # type: ignore
    except ValidationError:
        return False

    return True