13.05.2026, 18:04
(12.05.2026, 19:51)Selur Wrote: Quick and Dirty: just running all frames through the server:
Cu Selur# Imports import sys import os import vapoursynth as vs # getting Vapoursynth core core = vs.core # Limit frame cache to 48449MB core.max_cache_size = 48449 # Import scripts folder scriptPath = 'F:/Hybrid/64bit/vsscripts' sys.path.insert(0, os.path.abspath(scriptPath)) # loading plugins core.std.LoadPlugin(path="F:/Hybrid/64bit/Vapoursynth/Lib/site-packages/vapoursynth/plugins2/fmtconv.dll") core.std.LoadPlugin(path="F:/Hybrid/64bit/Vapoursynth/Lib/site-packages/vapoursynth/plugins2/libbestsource.dll") # Import scripts import validate # Source: 'G:\TestClips&Co\files\test.avi' # clip current meta; color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, color primaries: Unspecific, color transfer: Unspecified, yuv luminance scale: limited, scanorder: progressive, full height: true ((Source)) # Loading 'G:\TestClips&Co\files\test.avi' using BestSource clip = core.bs.VideoSource(source="G:/TestClips&Co/files/test.avi", cachepath="J:/tmp/test_bestSource", track=0, hwdevice="opencl") import xmlrpc.client import io import numpy as np from PIL import Image clip_rgb = core.resize.Bicubic(clip, format=vs.RGB24, matrix_in_s="470bg") proxy = xmlrpc.client.ServerProxy("http://127.0.0.1:8765/", use_builtin_types=True) PROMPT = "Colorize this black and white image with natural, realistic colors." def frame_to_png_bytes(f): w, h = f.width, f.height # VapourSynth R55+: planes are accessed with frame[plane] r = np.asarray(f[0]) g = np.asarray(f[1]) b = np.asarray(f[2]) arr = np.dstack([r, g, b]) img = Image.fromarray(arr, "RGB") buf = io.BytesIO() img.save(buf, format="PNG") return xmlrpc.client.Binary(buf.getvalue()) def write_png_to_frame(fout, png_bytes_data): out_img = Image.open(io.BytesIO(bytes(png_bytes_data))).convert("RGB") out_arr = np.array(out_img) for plane_idx in range(3): np.copyto(np.asarray(fout[plane_idx]), out_arr[:, :, plane_idx]) # Process pairs: frame N and N+1 together # Use FrameEval with a clip-of-clips approach, or simply process even frames # and carry the paired result. A simpler approach for offline encoding: num_frames = clip_rgb.num_frames results = {} # cache colorized frames def colorize_paired(n, f): if n in results: return results.pop(n) fout = f.copy() # Get frame n png1 = frame_to_png_bytes(f) # Get frame n+1 (if exists) n2 = min(n + 1, num_frames - 1) f2 = clip_rgb.get_frame(n2) png2 = frame_to_png_bytes(f2) fout2 = f2.copy() result = proxy.colorize_frame_pair(png1, png2, PROMPT, 8) # gap_px=8 is the separator between the two images during inference if result["ok"]: write_png_to_frame(fout, result["data1"]) write_png_to_frame(fout2, result["data2"]) if n2 != n: results[n2] = fout2 # cache the second result return fout colorized = core.std.ModifyFrame(clip_rgb, clip_rgb, colorize_paired) output = core.resize.Bicubic(colorized, format=vs.YUV420P8, matrix_s="470bg") output.set_output()
Hi, Selur and Dan
everything worked for me when installing and using the server
(.venv) PS E:\DiTServerRPC> .\.venv\Scripts\activate
(.venv) PS E:\DiTServerRPC>
(.venv) PS E:\DiTServerRPC> python dit_client_pair_example.py --use-shm
[INFO] Connecting to http://127.0.0.1:8765/ ...
[INFO] Server is reachable.
[INFO] Transport: shared memory
[INFO] Pipeline is loaded on server.
[INFO] Image 1: sample1_bw.jpg (1480x1080 px)
[INFO] Image 2: sample2_bw.jpg (1480x1080 px)
[INFO] Running paired inference (gap=8px) ...
[INFO] Inference time : 5.96s total (2.98s per image)
[INFO] Round-trip time: 6.08s
[INFO] Saved: sample1_colorized.jpg
[INFO] Saved: sample2_colorized.jpg
(.venv) PS E:\DiTServerRPC>but what Selur did in terms of pairing the ditserver with a hybrid for direct video coloring I never managed to do.
it would be very good if the work with this server was automated with a hybrid.



