Skip to content

Commit ec98d01

Browse files
committed
Fix video pipeline alpha outputs
1 parent a894cb3 commit ec98d01

4 files changed

Lines changed: 139 additions & 18 deletions

File tree

AGENTS.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Repository Guidelines
2+
3+
## Project Structure & Module Organization
4+
- `backgroundremover/` contains the core library, CLI/server entry points, and U2Net implementation.
5+
- `background_remover_gui.py` and `run_gui.bat` provide the desktop GUI launcher.
6+
- `models/` holds bundled model artifacts; downloaded weights live under `~/.u2net/` at runtime.
7+
- `examplefiles/` provides sample inputs; `dist/` and `archive/` are build outputs.
8+
9+
## Build, Test, and Development Commands
10+
- `pip install -r requirements.txt` installs runtime dependencies for local development.
11+
- `pip install -e .` installs the package in editable mode with console scripts.
12+
- `python -m backgroundremover.cmd.cli -i "input.jpg" -o "output.png"` runs the CLI locally.
13+
- `backgroundremover -i "input.jpg" -o "output.png"` uses the installed console script.
14+
- `backgroundremover-server -p 5000` starts the HTTP server (Flask + Waitress).
15+
- `python background_remover_gui.py` launches the GUI.
16+
- `docker build -t bgremover .` builds the Docker image described in `Dockerfile`.
17+
18+
## Coding Style & Naming Conventions
19+
- Follow standard Python style (PEP 8) with 4-space indentation.
20+
- Use `snake_case` for modules/functions and `CapWords` for classes.
21+
- Keep CLI flags consistent with existing short/long options in `backgroundremover/cmd/cli.py`.
22+
23+
## Testing Guidelines
24+
- No automated test suite is currently present.
25+
- When adding new behavior, include a short manual check in the PR (command + expected output), or add tests if you introduce test tooling.
26+
27+
## Commit & Pull Request Guidelines
28+
- Commit messages in history are short, imperative sentences (e.g., `Improve CLI validation and error handling`).
29+
- PRs should include a concise summary, reproduction or usage commands, and sample outputs (images or logs) when changing model behavior or CLI flags.
30+
31+
## Runtime Assets & Configuration
32+
- Models download to `~/.u2net/` on first use; document any new model names or file paths.
33+
- Video processing depends on `ffmpeg`; note shared memory settings when using Docker.

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,16 @@ backgroundremover -i "/path/to/video.mp4" -mk -o "output.matte.mp4"
288288

289289
### Video Playback and Compatibility
290290

291-
**Important:** The transparent `.mov` files created by this tool use the `qtrle` (QuickTime RLE) codec with alpha channel. Not all video players support this format correctly.
291+
**Important:** Transparent `.mov` outputs default to the lossless `qtrle` (QuickTime RLE) codec with alpha channel. This is large but preserves transparency. You can switch codecs with `--alpha-codec` for smaller or more compatible outputs.
292+
293+
Examples:
294+
```bash
295+
# macOS-friendly ProRes 4444 (still large, but more compatible)
296+
backgroundremover -i "video.mp4" -tv --alpha-codec prores_ks -o "output.mov"
297+
298+
# Smaller WebM with alpha (if your tools support it)
299+
backgroundremover -i "video.mp4" -tv --alpha-codec libvpx-vp9 -o "output.webm"
300+
```
292301

293302
**Recommended video players:**
294303
- **mpv** (https://mpv.io) - Best support for transparent videos (Linux, Mac, Windows)
@@ -318,7 +327,6 @@ backgroundremover -i "/path/to/video.mp4" -mk -o "output.matte.mp4"
318327
```
319328

320329
### Advance usage for video
321-
322330
Change the framerate of the video (default is set to 30)
323331

324332
```bash

backgroundremover/cmd/cli.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,18 @@ def main():
128128
type=lambda x: bool(strtobool(x)),
129129
help="Output transparent video format mov",
130130
)
131+
ap.add_argument(
132+
"--alpha-codec",
133+
default="auto",
134+
type=str,
135+
help="Codec for transparent video output (auto, prores_ks, qtrle, libvpx-vp9). Auto defaults to lossless qtrle.",
136+
)
137+
ap.add_argument(
138+
"--alpha-pix-fmt",
139+
default=None,
140+
type=str,
141+
help="Override pixel format for transparent video output (e.g., yuva444p10le).",
142+
)
131143

132144
ap.add_argument(
133145
"-tov",
@@ -282,23 +294,29 @@ def is_image_file(filename):
282294
gpu_batchsize=args.gpubatchsize,
283295
model_name=args.model,
284296
frame_limit=args.framelimit,
285-
framerate=args.framerate)
297+
framerate=args.framerate,
298+
alpha_codec=args.alpha_codec,
299+
alpha_pix_fmt=args.alpha_pix_fmt)
286300
elif args.transparentvideoovervideo:
287301
utilities.transparentvideoovervideo(output_path, os.path.abspath(args.backgroundvideo.name),
288302
input_path,
289303
worker_nodes=args.workernodes,
290304
gpu_batchsize=args.gpubatchsize,
291305
model_name=args.model,
292306
frame_limit=args.framelimit,
293-
framerate=args.framerate)
307+
framerate=args.framerate,
308+
alpha_codec=args.alpha_codec,
309+
alpha_pix_fmt=args.alpha_pix_fmt)
294310
elif args.transparentvideooverimage:
295311
utilities.transparentvideooverimage(output_path, os.path.abspath(args.backgroundimage.name),
296312
input_path,
297313
worker_nodes=args.workernodes,
298314
gpu_batchsize=args.gpubatchsize,
299315
model_name=args.model,
300316
frame_limit=args.framelimit,
301-
framerate=args.framerate)
317+
framerate=args.framerate,
318+
alpha_codec=args.alpha_codec,
319+
alpha_pix_fmt=args.alpha_pix_fmt)
302320
elif args.transparentgif:
303321
utilities.transparentgif(output_path, input_path,
304322
worker_nodes=args.workernodes,
@@ -373,23 +391,29 @@ def is_image_file(filename):
373391
gpu_batchsize=args.gpubatchsize,
374392
model_name=args.model,
375393
frame_limit=args.framelimit,
376-
framerate=args.framerate)
394+
framerate=args.framerate,
395+
alpha_codec=args.alpha_codec,
396+
alpha_pix_fmt=args.alpha_pix_fmt)
377397
elif args.transparentvideoovervideo:
378398
utilities.transparentvideoovervideo(os.path.abspath(args.output.name), os.path.abspath(args.backgroundvideo.name),
379399
os.path.abspath(args.input.name),
380400
worker_nodes=args.workernodes,
381401
gpu_batchsize=args.gpubatchsize,
382402
model_name=args.model,
383403
frame_limit=args.framelimit,
384-
framerate=args.framerate)
404+
framerate=args.framerate,
405+
alpha_codec=args.alpha_codec,
406+
alpha_pix_fmt=args.alpha_pix_fmt)
385407
elif args.transparentvideooverimage:
386408
utilities.transparentvideooverimage(os.path.abspath(args.output.name), os.path.abspath(args.backgroundimage.name),
387409
os.path.abspath(args.input.name),
388410
worker_nodes=args.workernodes,
389411
gpu_batchsize=args.gpubatchsize,
390412
model_name=args.model,
391413
frame_limit=args.framelimit,
392-
framerate=args.framerate)
414+
framerate=args.framerate,
415+
alpha_codec=args.alpha_codec,
416+
alpha_pix_fmt=args.alpha_pix_fmt)
393417
elif args.transparentgif:
394418
utilities.transparentgif(os.path.abspath(args.output.name), os.path.abspath(args.input.name),
395419
worker_nodes=args.workernodes,

backgroundremover/utilities.py

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import math
3+
from fractions import Fraction
34
import torch.multiprocessing as multiprocessing
45
import subprocess as sp
56
import time
@@ -14,6 +15,43 @@
1415
multiprocessing.set_start_method('spawn', force=True)
1516

1617

18+
def _parse_frame_rate(rate_str):
19+
try:
20+
return float(Fraction(rate_str))
21+
except Exception:
22+
return float(rate_str)
23+
24+
25+
def _alpha_encoding_args(output, alpha_codec, alpha_pix_fmt):
26+
ext = Path(output).suffix.lower()
27+
if alpha_codec in (None, "auto"):
28+
if ext == ".webm":
29+
alpha_codec = "libvpx-vp9"
30+
else:
31+
alpha_codec = "qtrle"
32+
33+
if alpha_codec == "prores_ks":
34+
args = ["-c:v", "prores_ks", "-profile:v", "4"]
35+
pix_fmt = alpha_pix_fmt or "yuva444p10le"
36+
args += ["-pix_fmt", pix_fmt]
37+
return args
38+
39+
if alpha_codec == "libvpx-vp9":
40+
args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0"]
41+
pix_fmt = alpha_pix_fmt or "yuva420p"
42+
args += ["-pix_fmt", pix_fmt]
43+
return args
44+
45+
if alpha_codec == "qtrle":
46+
pix_fmt = alpha_pix_fmt or "argb"
47+
return ["-c:v", "qtrle", "-pix_fmt", pix_fmt]
48+
49+
args = ["-c:v", alpha_codec]
50+
if alpha_pix_fmt:
51+
args += ["-pix_fmt", alpha_pix_fmt]
52+
return args
53+
54+
1755
def worker(worker_nodes,
1856
worker_index,
1957
result_dict,
@@ -104,9 +142,13 @@ def matte_key(output, file_path,
104142

105143
if framerate == -1:
106144
print(F"FRAME RATE DETECTED: {frame_rate_str} (if this looks wrong, override the frame rate)")
107-
framerate = math.ceil(eval(frame_rate_str))
145+
framerate_str = frame_rate_str
146+
framerate_value = _parse_frame_rate(frame_rate_str)
147+
else:
148+
framerate_str = str(framerate)
149+
framerate_value = float(framerate)
108150

109-
print(F"FRAME RATE: {framerate} TOTAL FRAMES: {total_frames}")
151+
print(F"FRAME RATE: {framerate_value} TOTAL FRAMES: {total_frames}")
110152

111153
p = multiprocessing.Process(target=capture_frames,
112154
args=(file_path, frames_dict, gpu_batchsize * prefetched_batches, total_frames))
@@ -163,7 +205,7 @@ def matte_key(output, file_path,
163205
'-vcodec', 'rawvideo',
164206
'-s', F"{frame.shape[1]}x320",
165207
'-pix_fmt', 'gray',
166-
'-r', F"{framerate}",
208+
'-r', framerate_str,
167209
'-i', '-',
168210
'-an',
169211
'-vcodec', 'mpeg4',
@@ -260,7 +302,9 @@ def transparentvideo(output, file_path,
260302
model_name,
261303
frame_limit=-1,
262304
prefetched_batches=4,
263-
framerate=-1):
305+
framerate=-1,
306+
alpha_codec="auto",
307+
alpha_pix_fmt=None):
264308
temp_dir = tempfile.TemporaryDirectory()
265309
tmpdirname = Path(temp_dir.name)
266310
temp_file = os.path.abspath(os.path.join(tmpdirname, "matte.mp4"))
@@ -272,10 +316,13 @@ def transparentvideo(output, file_path,
272316
prefetched_batches,
273317
framerate)
274318
print("Starting alphamerge")
319+
encoding_args = _alpha_encoding_args(output, alpha_codec, alpha_pix_fmt)
275320
cmd = [
276321
'ffmpeg', '-y', '-i', file_path, '-i', temp_file, '-filter_complex',
277-
'[1][0]scale2ref[mask][main];[main][mask]alphamerge', '-c:v', 'qtrle', '-shortest', output
322+
'[1][0]scale2ref[mask][main];[main][mask]alphamerge[v]',
323+
'-map', '[v]', '-map', '0:a?', '-shortest'
278324
]
325+
cmd += encoding_args + [output]
279326

280327
sp.run(cmd)
281328
print("Process finished")
@@ -292,7 +339,9 @@ def transparentvideoovervideo(output, overlay, file_path,
292339
model_name,
293340
frame_limit=-1,
294341
prefetched_batches=4,
295-
framerate=-1):
342+
framerate=-1,
343+
alpha_codec="auto",
344+
alpha_pix_fmt=None):
296345
temp_dir = tempfile.TemporaryDirectory()
297346
tmpdirname = Path(temp_dir.name)
298347
temp_file = os.path.abspath(os.path.join(tmpdirname, "matte.mp4"))
@@ -304,10 +353,13 @@ def transparentvideoovervideo(output, overlay, file_path,
304353
prefetched_batches,
305354
framerate)
306355
print("Starting alphamerge")
356+
encoding_args = _alpha_encoding_args(output, alpha_codec, alpha_pix_fmt)
307357
cmd = [
308358
'ffmpeg', '-y', '-i', file_path, '-i', temp_file, '-i', overlay, '-filter_complex',
309-
'[1][0]scale2ref[mask][main];[main][mask]alphamerge[vid];[vid][2:v]scale2ref[fg][bg];[bg][fg]overlay[out]', '-map', '[out]', '-shortest', output
359+
'[1][0]scale2ref[mask][main];[main][mask]alphamerge[vid];[vid][2:v]scale2ref[fg][bg];[bg][fg]overlay[out]',
360+
'-map', '[out]', '-map', '2:a?', '-shortest'
310361
]
362+
cmd += encoding_args + [output]
311363
sp.run(cmd)
312364
print("Process finished")
313365
try:
@@ -323,7 +375,9 @@ def transparentvideooverimage(output, overlay, file_path,
323375
model_name,
324376
frame_limit=-1,
325377
prefetched_batches=4,
326-
framerate=-1):
378+
framerate=-1,
379+
alpha_codec="auto",
380+
alpha_pix_fmt=None):
327381
temp_dir = tempfile.TemporaryDirectory()
328382
tmpdirname = Path(temp_dir.name)
329383
temp_file = os.path.abspath(os.path.join(tmpdirname, "matte.mp4"))
@@ -342,11 +396,13 @@ def transparentvideooverimage(output, overlay, file_path,
342396
]
343397
sp.run(cmd)
344398
print("Starting alphamerge")
399+
encoding_args = _alpha_encoding_args(output, alpha_codec, alpha_pix_fmt)
345400
cmd = [
346401
'ffmpeg', '-y', '-i', temp_image, '-i', file_path, '-i', temp_file, '-filter_complex',
347-
'[0:v]scale2ref=oh*mdar:ih[bg];[1:v]scale2ref=oh*mdar:ih[fg];[bg][fg]overlay=(W-w)/2:(H-h)/2:shortest=1[out]',
348-
'-map', '[out]', '-shortest', output
402+
'[2][1]scale2ref[mask][main];[main][mask]alphamerge[fg];[0:v]scale2ref[bg][fg];[bg][fg]overlay=(W-w)/2:(H-h)/2:shortest=1[out]',
403+
'-map', '[out]', '-map', '1:a?', '-shortest'
349404
]
405+
cmd += encoding_args + [output]
350406
sp.run(cmd)
351407
print("Process finished")
352408
try:

0 commit comments

Comments
 (0)