This forum uses cookies

***Selur*** · 15.04.2025, 20:02

Quote:On my tests the AviSynth script is encoded at about 40fps (using avs import in Vapoursynth), while the Vapoursynth version is about 2X faster.

Strange for me in a quick test, avs.ColorYUV2(autogain=true) was around 3000 fps and autoadjust.AutoGain around 450 fps.
Busy week this, week and I'm mainly afk Rolleyes

Dan64 · 15.04.2025, 20:18

(15.04.2025, 20:02)Selur Wrote:
Quote:On my tests the AviSynth script is encoded at about 40fps (using avs import in Vapoursynth), while the Vapoursynth version is about 2X faster.
Strange for me in a quick test, avs.ColorYUV2(autogain=true) was around 3000 fps and autoadjust.AutoGain around 450 fps.
Busy week this, week and I'm mainly afk

I performed the tests on the completed FRED script (attached)

If effect using the "Benchmark" tool included in vsedit. I get a speed of 40fps for the AVS version and 450fps for the VS version.
So it seems that the bottleneck is AutoGain, but given the total result, for the moment I don't consider this an issue. Rolleyes

Dan

djilayeden · 16.04.2025, 00:55

Hi Dan & Selur
I wanted to ask for your opinion on this test result I recently produced.

I tried to closely follow the steps Fred shared in one of his posts from 2017 (where he described his updated script with filters like RemoveDirtSMC(), GamMac(), DePanStabilize(), multiple Tweak() operations, RgbAdjust(), and a specific multi-stage sharpening setup using UnsharpMask and Sharpen()).

I believe the outcome looks quite close to what Fred was achieving in his later work — especially in terms of color balance and fine detail recovery.

What do you think of the result?

Thanks again for all your work and insight — your guidance and Hybrid support really make these tests possible!

Best regards,

***Selur*** · 17.04.2025, 16:23

Quote: What do you think of the result?

Totally oversharpened, overcompressed, shapened macroblocking,... => to me TestScenefred.mp4 looks horrible.

***Selur*** · (This post was last modified: 17.04.2025, 18:31 by Selur.)

@Dan64: I compared the speed of AutoGain with PIL and without it:
With:

def AutoGain(clip: vs.VideoNode, clip_limit : float = 1.0, strength: float = 0.5) -> vs.VideoNode:

   """ AutoGain filter using OpenCV

        :param clip:          Clip to process (support only RGB24).

        :param clip_limit:    Threshold for contrast limiting, range [0, 50] (default=1.0)

        :param strength:      Strength of the filter. A strength=0 means that the clip is returned unchanged,

                              range [0, 1] (default=0.5)

   """

   if clip.format.id != vs.RGB24:

      # clip not in RGB24 format, it will be converted

      if clip.format.color_family == vs.ColorFamily.YUV:

         rgb_clip = clip.resize.Bicubic(format=vs.RGB24, matrix_in_s="709", range_s="full",

                                    dither_type="error_diffusion")

      else:

         rgb_clip = clip.resize.Bicubic(format=vs.RGB24, range_s="full")

   else:

      rgb_clip = clip

   weight: float = max(min(1.0 - strength, 1.0), 0.0)

   def frame_autogain(n, f, bits_per_pixel: int = 8):

      img = _frame_to_image(f)

      img_np = np.asarray(img)

      yuv = cv2.cvtColor(img_np, cv2.COLOR_RGB2YUV)

      yuv_new = cv2.cvtColor(img_np, cv2.COLOR_RGB2YUV)

      dY = yuv[:, :, 0]

      if bits_per_pixel <= 16:

         bits_per_pixel_for_freq = bits_per_pixel

      else:

         bits_per_pixel_for_freq = 16

      loose_max_limit = (235 + 1) << (bits_per_pixel_for_freq - 8)

      loose_min_limit = 16 << (bits_per_pixel_for_freq - 8)

      loose_max = dY.max()

      loose_min = dY.min()

      maxY = min(loose_max, loose_max_limit)

      minY = max(loose_min, loose_min_limit)

      y_range = maxY - minY

      y_offset = 0

      y_gain = 0

      if y_range > 0:

         scale = (loose_max_limit - loose_min_limit) / y_range

         y_offset = (loose_min_limit - scale * minY) / (1 << (bits_per_pixel_for_freq - 8))

         y_gain = (256 * (scale - 1.0))

      if clip_limit > 0:

         y_offset = y_offset * (1 - clip_limit/100.0)

         y_gain = y_gain  * (1 - clip_limit/100.0)

      dY = (dY + y_offset) * (y_gain/256 + 1)

      yuv_new[:, :, 0] = dY.clip(min=0, max=255).astype(np.uint8)

      img_new = Image.fromarray(cv2.cvtColor(yuv_new, cv2.COLOR_YUV2RGB))

      return _image_to_frame(img_new, f.copy())

   clip_a = clip.std.ModifyFrame(clips=rgb_clip, selector=partial(frame_autogain,

                                                                    bits_per_pixel = clip.format.bits_per_sample))

   clip_b = rgb_clip

   if weight > 0:

      clip_rgb = core.std.Merge(clip_a, clip_b, weight)

   else:

      clip_rgb = clip_a

   if clip.format.id != vs.RGB24:

      # convert the format for tweak to YUV 8bits

      clip_new = clip_rgb.resize.Bicubic(format=vs.YUV420P8, matrix_s="709", range_s="limited")

   else:

      clip_new = clip_rgb

   return clip_new

without:

def AutoGain2(clip: vs.VideoNode, clip_limit: float = 1.0, strength: float = 0.5) -> vs.VideoNode:

    if clip.format.id != vs.RGB24:

        rgb_clip = (clip.resize.Bicubic(format=vs.RGB24, matrix_in_s="709", range_s="full")

                    if clip.format.color_family == vs.ColorFamily.YUV else

                    clip.resize.Bicubic(format=vs.RGB24, range_s="full"))

    else:

        rgb_clip = clip

    weight = max(min(1.0 - strength, 1.0), 0.0)

    bits_per_pixel = clip.format.bits_per_sample

    bits_per_pixel_for_freq = min(bits_per_pixel, 16)

    loose_max_limit = (235 + 1) << (bits_per_pixel_for_freq - 8)

    loose_min_limit = 16 << (bits_per_pixel_for_freq - 8)

    clip_limit_factor = (1 - clip_limit/100.0) if clip_limit > 0 else 1.0

    def frame_autogain(n, f):

        # Create RGB numpy array from frame planes

        img_np = np.stack([

            np.asarray(f[0], dtype=np.uint8),

            np.asarray(f[1], dtype=np.uint8),

            np.asarray(f[2], dtype=np.uint8)

        ], axis=2)

        # Process image

        yuv = cv2.cvtColor(img_np, cv2.COLOR_RGB2YUV)

        dY = yuv[:, :, 0]

        maxY = min(dY.max(), loose_max_limit)

        minY = max(dY.min(), loose_min_limit)

        y_range = maxY - minY

        if y_range > 0:

            scale = (loose_max_limit - loose_min_limit) / y_range

            y_offset = ((loose_min_limit - scale * minY) / 

                       (1 << (bits_per_pixel_for_freq - 8))) * clip_limit_factor

            y_gain = (256 * (scale - 1.0)) * clip_limit_factor

            yuv[:, :, 0] = np.clip((dY + y_offset) * (y_gain/256 + 1), 0, 255)

        # Convert back to RGB

        rgb_new = cv2.cvtColor(yuv, cv2.COLOR_YUV2RGB)

        # Create new frame and copy planes

        new_frame = f.copy()

        for i in range(3):

            np.copyto(np.asarray(new_frame[i]), rgb_new[:, :, i])

        return new_frame

    clip_a = rgb_clip.std.ModifyFrame(clips=[rgb_clip], selector=frame_autogain)

    clip_rgb = core.std.Merge(clip_a, rgb_clip, weight) if weight > 0 else clip_a

    return (clip_rgb.resize.Bicubic(format=vs.YUV420P8, matrix_s="709", range_s="limited") 

            if clip.format.id != vs.RGB24 else clip_rgb)

Using vsViewers Benchmark tool I got:
460-480fps with "clip = autoadjust.AutoGain(clip)"
and
640-650fps "clip = autoadjust.AutoGain2(clip)"
that's nearly 40% faster. Do you get similar speeds?

Cu Selur

Ps.: Since I don't like that Fred and some other users write their scripts, I also attached a function which wraps the whole thing.
PPs.: added UnsharpenMask to sharpen.py

djilayeden · 17.04.2025, 23:55

Thank you for your review selur!

What remarkable work in fred.py , Mr Selur!

***Selur*** · (This post was last modified: 19.04.2025, 19:53 by Selur.)

Here's a AutoLevelsYUV444 version:

import vapoursynth as vs

import numpy as np

import ctypes

import cv2

core = vs.core

def AutoLevelsYUV444(

    clip: vs.VideoNode,

    method: int = 4,

    clip_limit: float = 1.0,

    gridsize: int = 8,

    strength: float = 0.5

) -> vs.VideoNode:

    fmt = clip.format

    if fmt.color_family != vs.YUV or fmt.subsampling_w or fmt.subsampling_h:

        raise vs.Error("AutoLevelsYUV444: Only YUV444 formats are allowed")

    bits     = fmt.bits_per_sample

    is_float = fmt.sample_type == vs.FLOAT

    peak     = 1.0 if is_float else (1 << bits) - 1

    # Choose NumPy and ctypes types based on bit depth

    if is_float:

        dtype = np.float32

        ctype = ctypes.c_float

    else:

        dtype = np.uint8 if bits <= 8 else np.uint16

        ctype = ctypes.c_uint8 if bits <= 8 else ctypes.c_uint16

    def _process_plane(plane: np.ndarray, mode: str, algo: int) -> np.ndarray:

        # plane: 2D array with values in [0, peak]

        # → 8-bit for OpenCV

        p8 = np.clip((plane / peak) * 255, 0, 255).astype(np.uint8)

        if mode == "clahe":

            clahe = cv2.createCLAHE(clipLimit=clip_limit,

                                    tileGridSize=(gridsize, gridsize))

            out8 = clahe.apply(p8)

        elif mode == "hist":

            out8 = cv2.equalizeHist(p8)

        else:  # scale

            hist = cv2.calcHist([p8],[0],None,[256],[0,256]).ravel()

            cdf  = hist.cumsum(); total = cdf[-1]

            clipv = clip_limit * total / 200.0

            lo    = np.searchsorted(cdf, clipv)

            hi    = np.searchsorted(cdf, total - clipv)

            alpha = 255 / max(hi - lo, 1)

            beta  = -lo * alpha

            out8  = (cv2.convertScaleAbs(p8, alpha=alpha, beta=beta)

                     if algo==0 else np.clip(p8*alpha+beta,0,255).astype(np.uint8))

        # back to original range

        return ((out8.astype(np.float32) / 255) * peak).astype(dtype)

    def read_plane(f: vs.VideoFrame, idx: int) -> np.ndarray:

        """Reads plane idx as (height×width) array via ctypes.from_address."""

        w, h   = clip.width, clip.height

        ptr     = f.get_read_ptr(idx)             # c_void_p

        addr    = ptr if isinstance(ptr, int) else ptr.value

        buf_len = w * h

        # Create ctypes array and convert

        buf_type = ctype * buf_len

        buf      = buf_type.from_address(addr)

        arr      = np.ctypeslib.as_array(buf)    # 1D array

        return arr.reshape((h, w))

    def write_plane(f: vs.VideoFrame, idx: int, data: np.ndarray):

        """Writes data (h×w) back to plane idx."""

        w, h   = clip.width, clip.height

        ptr     = f.get_write_ptr(idx)

        addr    = ptr if isinstance(ptr, int) else ptr.value

        buf_len = w * h

        buf_type = ctype * buf_len

        buf      = buf_type.from_address(addr)

        arr      = np.ctypeslib.as_array(buf).reshape((h, w))

        arr[:, :] = data

    def selector(n: int, f):

        # f can be VideoFrame or [VideoFrame]; ~> we take f[0] if list

        frame = f if isinstance(f, vs.VideoFrame) else f[0]

        # Read Y, U, V

        y = read_plane(frame, 0)

        u = read_plane(frame, 1)

        v = read_plane(frame, 2)

        # Apply method

        if   method == 0:

            y2 = _process_plane(y,    "clahe", 0)

        elif method == 1:

            y2 = _process_plane(y,    "hist",  0)

            u  = _process_plane(u,    "hist",  0)

            v  = _process_plane(v,    "hist",  0)

        elif method == 2:

            y2 = _process_plane(y,    "clahe", 0)

            u  = _process_plane(u,    "clahe", 0)

            v  = _process_plane(v,    "clahe", 0)

        elif method == 3:

            y1 = _process_plane(y,    "clahe", 0)

            y3 = _process_plane(y,    "hist",  0)

            y2 = ((y1 + y3) / 2).astype(dtype)

        elif method == 4:

            y2 = _process_plane(y,    "scale", 0)

        else:  # method == 5

            y2 = _process_plane(y,    "scale", 1)

        # Blend

        y_out = ((1 - strength)*y + strength*y2).astype(dtype)

        # Build new frame

        fout = frame.copy()

        write_plane(fout, 0, y_out)

        write_plane(fout, 1, u)

        write_plane(fout, 2, v)

        return fout

    # Use ModifyFrame (Selector returns VideoFrame)

    return core.std.ModifyFrame(clip=clip, clips=clip, selector=selector)

Which only works on Y.

Cu Selur

***Selur*** · 26.04.2025, 17:24

I added a AutoGain without using numpy to color.py

Login
Username:
Password:	Lost Password?
	Remember me