13.05.2026, 18:04
(12.05.2026, 19:51)Selur Wrote: Quick and Dirty: just running all frames through the server:
Cu Selur# Imports
import sys
import os
import vapoursynth as vs
# getting Vapoursynth core
core = vs.core
# Limit frame cache to 48449MB
core.max_cache_size = 48449
# Import scripts folder
scriptPath = 'F:/Hybrid/64bit/vsscripts'
sys.path.insert(0, os.path.abspath(scriptPath))
# loading plugins
core.std.LoadPlugin(path="F:/Hybrid/64bit/Vapoursynth/Lib/site-packages/vapoursynth/plugins2/fmtconv.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/Vapoursynth/Lib/site-packages/vapoursynth/plugins2/libbestsource.dll")
# Import scripts
import validate
# Source: 'G:\TestClips&Co\files\test.avi'
# clip current meta; color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, color primaries: Unspecific, color transfer: Unspecified, yuv luminance scale: limited, scanorder: progressive, full height: true ((Source))
# Loading 'G:\TestClips&Co\files\test.avi' using BestSource
clip = core.bs.VideoSource(source="G:/TestClips&Co/files/test.avi", cachepath="J:/tmp/test_bestSource", track=0, hwdevice="opencl")
import xmlrpc.client
import io
import numpy as np
from PIL import Image
clip_rgb = core.resize.Bicubic(clip, format=vs.RGB24, matrix_in_s="470bg")
proxy = xmlrpc.client.ServerProxy("http://127.0.0.1:8765/", use_builtin_types=True)
PROMPT = "Colorize this black and white image with natural, realistic colors."
def frame_to_png_bytes(f):
w, h = f.width, f.height
# VapourSynth R55+: planes are accessed with frame[plane]
r = np.asarray(f[0])
g = np.asarray(f[1])
b = np.asarray(f[2])
arr = np.dstack([r, g, b])
img = Image.fromarray(arr, "RGB")
buf = io.BytesIO()
img.save(buf, format="PNG")
return xmlrpc.client.Binary(buf.getvalue())
def write_png_to_frame(fout, png_bytes_data):
out_img = Image.open(io.BytesIO(bytes(png_bytes_data))).convert("RGB")
out_arr = np.array(out_img)
for plane_idx in range(3):
np.copyto(np.asarray(fout[plane_idx]), out_arr[:, :, plane_idx])
# Process pairs: frame N and N+1 together
# Use FrameEval with a clip-of-clips approach, or simply process even frames
# and carry the paired result. A simpler approach for offline encoding:
num_frames = clip_rgb.num_frames
results = {} # cache colorized frames
def colorize_paired(n, f):
if n in results:
return results.pop(n)
fout = f.copy()
# Get frame n
png1 = frame_to_png_bytes(f)
# Get frame n+1 (if exists)
n2 = min(n + 1, num_frames - 1)
f2 = clip_rgb.get_frame(n2)
png2 = frame_to_png_bytes(f2)
fout2 = f2.copy()
result = proxy.colorize_frame_pair(png1, png2, PROMPT, 8)
# gap_px=8 is the separator between the two images during inference
if result["ok"]:
write_png_to_frame(fout, result["data1"])
write_png_to_frame(fout2, result["data2"])
if n2 != n:
results[n2] = fout2 # cache the second result
return fout
colorized = core.std.ModifyFrame(clip_rgb, clip_rgb, colorize_paired)
output = core.resize.Bicubic(colorized, format=vs.YUV420P8, matrix_s="470bg")
output.set_output()
Hi, Selur and Dan
everything worked for me when installing and using the server
(.venv) PS E:\DiTServerRPC> .\.venv\Scripts\activate
(.venv) PS E:\DiTServerRPC>
(.venv) PS E:\DiTServerRPC> python dit_client_pair_example.py --use-shm
[INFO] Connecting to http://127.0.0.1:8765/ ...
[INFO] Server is reachable.
[INFO] Transport: shared memory
[INFO] Pipeline is loaded on server.
[INFO] Image 1: sample1_bw.jpg (1480x1080 px)
[INFO] Image 2: sample2_bw.jpg (1480x1080 px)
[INFO] Running paired inference (gap=8px) ...
[INFO] Inference time : 5.96s total (2.98s per image)
[INFO] Round-trip time: 6.08s
[INFO] Saved: sample1_colorized.jpg
[INFO] Saved: sample2_colorized.jpg
(.venv) PS E:\DiTServerRPC>but what Selur did in terms of pairing the ditserver with a hybrid for direct video coloring I never managed to do.
it would be very good if the work with this server was automated with a hybrid.


