Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .spellcheck-en-custom.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ nb
oneMKL
orchestrator
ots
parallelized
png
pre
preceeds
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

### Features

* `ilab data generate` now supports parallelized data generation across batches of the seed
data when running with a the vLLM serving. The `--batch-size` argument can be used to
control this behavior.
* `ilab model download` now supports downloading models from OCI registries. Repositories
that are prefixed by "docker://" and specified against `--repository` are treated as OCI
registries.
Expand Down
7 changes: 7 additions & 0 deletions TROUBLESHOOTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ This document is for commonly found problems and their solutions when using `ila

## `ilab` troubleshooting

### `ilab data generate --endpoint-url` with llama-cpp fails with `openai.InternalServerError: Service Unavailable`

llama-cpp does not support batching, which is enabled by default with remote
endpoints. To resolve this error, disable batching using `--batch-size=0`.

See [this issue](https://github.com/instructlab/instructlab/issues/1892).

### `ilab data generate` command running slow on macOS

If you notice `ilab data generate` running for several hours or more on a Mac M-series, you should first check out the available memory on your system (See [Activity Monitor](https://support.apple.com/en-ie/guide/activity-monitor/welcome/mac) for more details). If there is < 8GM RAM available before serving a model, then check to see if you can free up some memory.
Expand Down
6 changes: 6 additions & 0 deletions scripts/basic-workflow-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ test_generate() {
if [ "$SDG_PIPELINE" = "full" ]; then
GENERATE_ARGS+=("--pipeline" "full")
fi

# Disable batching with llama-cpp. See https://github.com/instructlab/instructlab/issues/1892
if [ "$BACKEND" = "llama-cpp" ]; then
GENERATE_ARGS+=("--batch-size" "0")
fi

ilab data generate --num-instructions ${NUM_INSTRUCTIONS} "${GENERATE_ARGS[@]}"
}

Expand Down
28 changes: 24 additions & 4 deletions src/instructlab/data/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,13 @@
"--pipeline",
type=click.STRING,
default="simple",
# Hidden until instructlab-sdg releases a version with multiple pipelines
# For now only "simple" is supported in the latest release.
hidden=True,
help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline worlfow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.",
help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.",
)
@click.option(
"--batch-size",
type=click.IntRange(min=0),
default=None,
help="Number of elements to process in each batch through the SDG pipeline. Enabled by default for the vLLM serving backend, with a batch size of 8 chosen based on experiments to optimize for throughput. Use 0 to disable.",
)
@click.option(
"--enable-serving-output",
Expand Down Expand Up @@ -174,6 +177,7 @@ def generate(
model_family,
pipeline,
enable_serving_output,
batch_size,
):
"""Generates synthetic data to enhance your example data"""
# pylint: disable=import-outside-toplevel
Expand All @@ -192,12 +196,19 @@ def generate(
if ctx.obj is not None:
prompt_file_path = ctx.obj.config.generate.prompt_file

# If batch size is not set explicitly, default to 8
# Once https://github.com/instructlab/sdg/issues/224 is resolved we can
# pass batch_size=None to the library instead
if batch_size is None:
batch_size = 8

backend_instance = None
if endpoint_url:
api_base = endpoint_url
else:
# First Party
from instructlab.model.backends import backends
from instructlab.model.backends.llama_cpp import Server as llama_cpp_server

ctx.obj.config.serve.llama_cpp.llm_family = model_family
backend_instance = backends.select_backend(ctx.obj.config.generate.teacher)
Expand All @@ -210,6 +221,14 @@ def generate(
except Exception as exc:
click.secho(f"Failed to start server: {exc}", fg="red")
raise click.exceptions.Exit(1)

# disable batching when running with the local llama.cpp server
if isinstance(backend_instance, llama_cpp_server):
if batch_size is not None:
logger.warning(
"Disabling SDG batching - unsupported with llama.cpp serving"
)
batch_size = 0
try:
click.echo(
f"Generating synthetic data using '{model}' model, taxonomy:'{taxonomy_path}' against {api_base} server"
Expand All @@ -236,6 +255,7 @@ def generate(
tls_client_key=tls_client_key,
tls_client_passwd=tls_client_passwd,
pipeline=pipeline,
batch_size=batch_size,
)
except GenerateException as exc:
click.secho(
Expand Down