Revert "vpx use webrtc parameter"

This reverts commit d23fd902d766cbcdf6e81d9ec7991a9e8eabbfe1.
This commit is contained in:
21pages 2023-07-18 13:49:52 +08:00
parent 22ca7177a6
commit 633c80d5e4
5 changed files with 88 additions and 377 deletions

View File

@ -101,8 +101,10 @@ fn test_vpx(
let config = EncoderCfg::VPX(VpxEncoderConfig {
width: width as _,
height: height as _,
timebase: [1, 1000],
bitrate: bitrate_k as _,
codec: codec_id,
num_threads: (num_cpus::get() / 2) as _,
});
let mut encoder = VpxEncoder::new(config).unwrap();
let mut vpxs = vec![];

View File

@ -101,8 +101,10 @@ fn main() -> io::Result<()> {
let mut vpx = vpx_encode::VpxEncoder::new(EncoderCfg::VPX(vpx_encode::VpxEncoderConfig {
width,
height,
timebase: [1, 1000],
bitrate: args.flag_bv,
codec: vpx_codec,
num_threads: 0,
}))
.unwrap();

View File

@ -78,7 +78,7 @@ mod webrtc {
} else {
// Use 2 threads for low res on ARM.
#[cfg(any(target_arch = "arm", target_arch = "aarch64", target_os = "android"))]
if width * height >= 320 * 180 && number_of_cores > 2 {
if (width * height >= 320 * 180 && number_of_cores > 2) {
return 2;
}
// 1 thread less than VGA.

View File

@ -10,14 +10,13 @@ use hbb_common::ResultType;
use crate::codec::EncoderApi;
use crate::{GoogleImage, STRIDE_ALIGN};
use super::vpx::{vpx_codec_err_t::*, *};
use super::vpx::{vp8e_enc_control_id::*, vpx_codec_err_t::*, *};
use crate::{generate_call_macro, generate_call_ptr_macro, Error, Result};
use hbb_common::bytes::Bytes;
use std::os::raw::c_uint;
use std::os::raw::{c_int, c_uint};
use std::{ptr, slice};
generate_call_macro!(call_vpx, false);
generate_call_macro!(call_vpx_allow_err, true);
generate_call_ptr_macro!(call_vpx_ptr);
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
@ -54,11 +53,40 @@ impl EncoderApi for VpxEncoder {
VpxVideoCodecId::VP8 => call_vpx_ptr!(vpx_codec_vp8_cx()),
VpxVideoCodecId::VP9 => call_vpx_ptr!(vpx_codec_vp9_cx()),
};
let mut c = unsafe { std::mem::MaybeUninit::zeroed().assume_init() };
call_vpx!(vpx_codec_enc_config_default(i, &mut c, 0));
let c = match config.codec {
VpxVideoCodecId::VP8 => webrtc::vp8::enc_cfg(i, &config)?,
VpxVideoCodecId::VP9 => webrtc::vp9::enc_cfg(i, &config)?,
// https://www.webmproject.org/docs/encoder-parameters/
// default: c.rc_min_quantizer = 0, c.rc_max_quantizer = 63
// try rc_resize_allowed later
c.g_w = config.width;
c.g_h = config.height;
c.g_timebase.num = config.timebase[0];
c.g_timebase.den = config.timebase[1];
c.rc_target_bitrate = config.bitrate;
c.rc_undershoot_pct = 95;
c.rc_dropframe_thresh = 25;
c.g_threads = if config.num_threads == 0 {
num_cpus::get() as _
} else {
config.num_threads
};
c.g_error_resilient = VPX_ERROR_RESILIENT_DEFAULT;
// https://developers.google.com/media/vp9/bitrate-modes/
// Constant Bitrate mode (CBR) is recommended for live streaming with VP9.
c.rc_end_usage = vpx_rc_mode::VPX_CBR;
// c.kf_min_dist = 0;
// c.kf_max_dist = 999999;
c.kf_mode = vpx_kf_mode::VPX_KF_DISABLED; // reduce bandwidth a lot
/*
The VPX encoder supports two-pass encoding for rate control purposes.
In two-pass encoding, the entire encoding process is performed twice.
The first pass generates new control parameters for the second pass.
This approach enables the best PSNR at the same bit rate.
*/
let mut ctx = Default::default();
call_vpx!(vpx_codec_enc_init_ver(
@ -68,9 +96,50 @@ impl EncoderApi for VpxEncoder {
0,
VPX_ENCODER_ABI_VERSION as _
));
match config.codec {
VpxVideoCodecId::VP8 => webrtc::vp8::set_control(&mut ctx, &c)?,
VpxVideoCodecId::VP9 => webrtc::vp9::set_control(&mut ctx, &c)?,
if config.codec == VpxVideoCodecId::VP9 {
// set encoder internal speed settings
// in ffmpeg, it is --speed option
/*
set to 0 or a positive value 1-16, the codec will try to adapt its
complexity depending on the time it spends encoding. Increasing this
number will make the speed go up and the quality go down.
Negative values mean strict enforcement of this
while positive values are adaptive
*/
/* https://developers.google.com/media/vp9/live-encoding
Speed 5 to 8 should be used for live / real-time encoding.
Lower numbers (5 or 6) are higher quality but require more CPU power.
Higher numbers (7 or 8) will be lower quality but more manageable for lower latency
use cases and also for lower CPU power devices such as mobile.
*/
call_vpx!(vpx_codec_control_(&mut ctx, VP8E_SET_CPUUSED as _, 7,));
// set row level multi-threading
/*
as some people in comments and below have already commented,
more recent versions of libvpx support -row-mt 1 to enable tile row
multi-threading. This can increase the number of tiles by up to 4x in VP9
(since the max number of tile rows is 4, regardless of video height).
To enable this, use -tile-rows N where N is the number of tile rows in
log2 units (so -tile-rows 1 means 2 tile rows and -tile-rows 2 means 4 tile
rows). The total number of active threads will then be equal to
$tile_rows * $tile_columns
*/
call_vpx!(vpx_codec_control_(
&mut ctx,
VP9E_SET_ROW_MT as _,
1 as c_int
));
call_vpx!(vpx_codec_control_(
&mut ctx,
VP9E_SET_TILE_COLUMNS as _,
4 as c_int
));
} else if config.codec == VpxVideoCodecId::VP8 {
// https://github.com/webmproject/libvpx/blob/972149cafeb71d6f08df89e91a0130d6a38c4b15/vpx/vp8cx.h#L172
// https://groups.google.com/a/webmproject.org/g/webm-discuss/c/DJhSrmfQ61M
call_vpx!(vpx_codec_control_(&mut ctx, VP8E_SET_CPUUSED as _, 12,));
}
Ok(Self {
@ -218,10 +287,13 @@ pub struct VpxEncoderConfig {
pub width: c_uint,
/// The height (in pixels).
pub height: c_uint,
/// The timebase numerator and denominator (in seconds).
pub timebase: [c_int; 2],
/// The target bitrate (in kilobits per second).
pub bitrate: c_uint,
/// The codec
pub codec: VpxVideoCodecId,
pub num_threads: u32,
}
#[derive(Clone, Copy, Debug)]
@ -417,370 +489,3 @@ impl Drop for Image {
}
unsafe impl Send for vpx_codec_ctx_t {}
mod webrtc {
use super::*;
const K_QP_MAX: u32 = 25; // worth adjusting
const MODE: VideoCodecMode = VideoCodecMode::KScreensharing;
const K_RTP_TICKS_PER_SECOND: i32 = 90000;
const NUMBER_OF_TEMPORAL_LAYERS: u32 = 1;
const DENOISING_ON: bool = true;
const FRAME_DROP_ENABLED: bool = false;
#[allow(dead_code)]
#[derive(Debug, PartialEq, Eq)]
enum VideoCodecMode {
KRealtimeVideo,
KScreensharing,
}
#[allow(dead_code)]
#[derive(Debug, PartialEq, Eq)]
enum VideoCodecComplexity {
KComplexityLow = -1,
KComplexityNormal = 0,
KComplexityHigh = 1,
KComplexityHigher = 2,
KComplexityMax = 3,
}
// https://webrtc.googlesource.com/src/+/refs/heads/main/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
pub mod vp9 {
use super::*;
const SVC: bool = false;
// https://webrtc.googlesource.com/src/+/refs/heads/main/api/video_codecs/video_encoder.cc#35
const KEY_FRAME_INTERVAL: u32 = 3000;
const ADAPTIVE_QP_MODE: bool = true;
pub fn enc_cfg(
i: *const vpx_codec_iface_t,
cfg: &VpxEncoderConfig,
) -> ResultType<vpx_codec_enc_cfg_t> {
let mut c: vpx_codec_enc_cfg_t =
unsafe { std::mem::MaybeUninit::zeroed().assume_init() };
call_vpx!(vpx_codec_enc_config_default(i, &mut c, 0));
// kProfile0
c.g_bit_depth = vpx_bit_depth::VPX_BITS_8;
c.g_profile = 0;
c.g_input_bit_depth = 8;
c.g_w = cfg.width;
c.g_h = cfg.height;
c.rc_target_bitrate = cfg.bitrate; // in kbit/s
c.g_error_resilient = if SVC { VPX_ERROR_RESILIENT_DEFAULT } else { 0 };
c.g_timebase.num = 1;
c.g_timebase.den = K_RTP_TICKS_PER_SECOND;
c.g_lag_in_frames = 0;
c.rc_dropframe_thresh = if FRAME_DROP_ENABLED { 30 } else { 0 };
c.rc_end_usage = vpx_rc_mode::VPX_CBR;
c.g_pass = vpx_enc_pass::VPX_RC_ONE_PASS;
c.rc_min_quantizer = if MODE == VideoCodecMode::KScreensharing {
8
} else {
2
};
c.rc_max_quantizer = K_QP_MAX;
c.rc_undershoot_pct = 50;
c.rc_overshoot_pct = 50;
c.rc_buf_initial_sz = 500;
c.rc_buf_optimal_sz = 600;
c.rc_buf_sz = 1000;
// Key-frame interval is enforced manually by this wrapper.
c.kf_mode = vpx_kf_mode::VPX_KF_DISABLED;
// TODO(webm:1592): work-around for libvpx issue, as it can still
// put some key-frames at will even in VPX_KF_DISABLED kf_mode.
c.kf_max_dist = KEY_FRAME_INTERVAL;
c.kf_min_dist = c.kf_max_dist;
c.rc_resize_allowed = 0;
// Determine number of threads based on the image size and #cores.
c.g_threads = number_of_threads(c.g_w, c.g_h, num_cpus::get());
c.temporal_layering_mode =
vp9e_temporal_layering_mode::VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING as _;
c.ts_number_layers = 1;
c.ts_rate_decimator[0] = 1;
c.ts_periodicity = 1;
c.ts_layer_id[0] = 0;
Ok(c)
}
pub fn set_control(ctx: *mut vpx_codec_ctx_t, cfg: &vpx_codec_enc_cfg_t) -> ResultType<()> {
use vp8e_enc_control_id::*;
macro_rules! call_ctl {
($ctx:expr, $vpxe:expr, $arg:expr) => {{
call_vpx_allow_err!(vpx_codec_control_($ctx, $vpxe as i32, $arg));
}};
}
call_ctl!(
ctx,
VP8E_SET_MAX_INTRA_BITRATE_PCT,
max_intra_target(cfg.rc_buf_optimal_sz)
);
call_ctl!(ctx, VP9E_SET_AQ_MODE, if ADAPTIVE_QP_MODE { 3 } else { 0 });
call_ctl!(ctx, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
call_ctl!(ctx, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
call_ctl!(
ctx,
VP8E_SET_CPUUSED,
get_default_performance_flags(cfg.g_w, cfg.g_h).0
);
call_ctl!(ctx, VP9E_SET_TILE_COLUMNS, cfg.g_threads >> 1);
// Turn on row-based multithreading.
call_ctl!(ctx, VP9E_SET_ROW_MT, 1);
let denoising = DENOISING_ON
&& allow_denoising()
&& get_default_performance_flags(cfg.g_w, cfg.g_h).1;
call_ctl!(
ctx,
VP9E_SET_NOISE_SENSITIVITY,
if denoising { 1 } else { 0 }
);
if MODE == VideoCodecMode::KScreensharing {
call_ctl!(ctx, VP9E_SET_TUNE_CONTENT, 1);
}
// Enable encoder skip of static/low content blocks.
call_ctl!(ctx, VP8E_SET_STATIC_THRESHOLD, 1);
Ok(())
}
// return (base_layer_speed, allow_denoising)
fn get_default_performance_flags(width: u32, height: u32) -> (u32, bool) {
if cfg!(any(
target_arch = "arm",
target_arch = "aarch64",
target_os = "android"
)) {
(8, true)
} else if width * height < 352 * 288 {
(5, true)
} else if width * height < 1920 * 1080 {
(7, true)
} else {
(9, false)
}
}
fn allow_denoising() -> bool {
// Do not enable the denoiser on ARM since optimization is pending.
// Denoiser is on by default on other platforms.
if cfg!(any(
target_arch = "arm",
target_arch = "aarch64",
target_os = "android"
)) {
false
} else {
true
}
}
fn number_of_threads(width: u32, height: u32, number_of_cores: usize) -> u32 {
// Keep the number of encoder threads equal to the possible number of column
// tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
if width * height >= 1280 * 720 && number_of_cores > 4 {
return 4;
} else if width * height >= 640 * 360 && number_of_cores > 2 {
return 2;
} else {
// Use 2 threads for low res on ARM.
#[cfg(any(target_arch = "arm", target_arch = "aarch64", target_os = "android"))]
if width * height >= 320 * 180 && number_of_cores > 2 {
return 2;
}
// 1 thread less than VGA.
return 1;
}
}
}
// https://webrtc.googlesource.com/src/+/refs/heads/main/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
pub mod vp8 {
use super::*;
// https://webrtc.googlesource.com/src/+/refs/heads/main/api/video_codecs/video_encoder.cc#23
const DISABLE_KEY_FRAME_INTERVAL: bool = true;
const KEY_FRAME_INTERVAL: u32 = 3000;
const COMPLEXITY: VideoCodecComplexity = VideoCodecComplexity::KComplexityNormal;
const K_TOKEN_PARTITIONS: vp8e_token_partitions =
vp8e_token_partitions::VP8_ONE_TOKENPARTITION;
pub fn enc_cfg(
i: *const vpx_codec_iface_t,
cfg: &VpxEncoderConfig,
) -> ResultType<vpx_codec_enc_cfg_t> {
let mut c: vpx_codec_enc_cfg_t =
unsafe { std::mem::MaybeUninit::zeroed().assume_init() };
call_vpx!(vpx_codec_enc_config_default(i, &mut c, 0));
c.g_w = cfg.width;
c.g_h = cfg.height;
c.g_timebase.num = 1;
c.g_timebase.den = K_RTP_TICKS_PER_SECOND;
c.g_lag_in_frames = 0;
c.g_error_resilient = if NUMBER_OF_TEMPORAL_LAYERS > 1 {
VPX_ERROR_RESILIENT_DEFAULT
} else {
0
};
c.rc_end_usage = vpx_rc_mode::VPX_CBR;
c.g_pass = vpx_enc_pass::VPX_RC_ONE_PASS;
c.rc_resize_allowed = 0;
c.rc_min_quantizer = if MODE == VideoCodecMode::KScreensharing {
12
} else {
2
};
c.rc_max_quantizer = K_QP_MAX;
c.rc_undershoot_pct = 100;
c.rc_overshoot_pct = 15;
c.rc_buf_initial_sz = 500;
c.rc_buf_optimal_sz = 600;
c.rc_buf_sz = 1000;
if !DISABLE_KEY_FRAME_INTERVAL && KEY_FRAME_INTERVAL > 0 {
c.kf_mode = vpx_kf_mode::VPX_KF_AUTO;
c.kf_max_dist = KEY_FRAME_INTERVAL;
} else {
c.kf_mode = vpx_kf_mode::VPX_KF_DISABLED;
}
c.g_threads = number_of_threads(c.g_w, c.g_h, num_cpus::get());
c.rc_target_bitrate = cfg.bitrate;
c.rc_dropframe_thresh = if FRAME_DROP_ENABLED { 30 } else { 0 };
Ok(c)
}
pub fn set_control(ctx: *mut vpx_codec_ctx_t, cfg: &vpx_codec_enc_cfg_t) -> ResultType<()> {
use vp8e_enc_control_id::*;
macro_rules! call_ctl {
($ctx:expr, $vpxe:expr, $arg:expr) => {{
call_vpx_allow_err!(vpx_codec_control_($ctx, $vpxe as i32, $arg));
}};
}
call_ctl!(
ctx,
VP8E_SET_STATIC_THRESHOLD,
if MODE == VideoCodecMode::KScreensharing {
100
} else {
1
}
);
call_ctl!(
ctx,
VP8E_SET_CPUUSED,
get_cpu_speed(cfg.g_w, cfg.g_h, num_cpus::get())
);
call_ctl!(ctx, VP8E_SET_TOKEN_PARTITIONS, K_TOKEN_PARTITIONS);
call_ctl!(
ctx,
VP8E_SET_MAX_INTRA_BITRATE_PCT,
max_intra_target(cfg.rc_buf_optimal_sz)
);
call_ctl!(
ctx,
VP8E_SET_SCREEN_CONTENT_MODE,
if MODE == VideoCodecMode::KScreensharing {
2 // On with more aggressive rate control.
} else {
0
}
);
Ok(())
}
fn get_cpu_speed_default() -> i32 {
match COMPLEXITY {
VideoCodecComplexity::KComplexityHigh => -5,
VideoCodecComplexity::KComplexityHigher => -4,
VideoCodecComplexity::KComplexityMax => -3,
_ => -6,
}
}
fn get_cpu_speed(width: u32, height: u32, number_of_cores: usize) -> i32 {
if cfg!(any(
target_arch = "arm",
target_arch = "aarch64",
target_os = "android"
)) {
if number_of_cores <= 3 {
-12
} else if width * height <= 352 * 288 {
-8
} else if width * height <= 640 * 480 {
-10
} else {
-12
}
} else {
let cpu_speed_default = get_cpu_speed_default();
if width * height < 352 * 288 {
if cpu_speed_default < -4 {
-4
} else {
cpu_speed_default
}
} else {
cpu_speed_default
}
}
}
fn number_of_threads(width: u32, height: u32, cpus: usize) -> u32 {
if cfg!(target_os = "android") {
if width * height >= 320 * 180 {
if cpus >= 4 {
// 3 threads for CPUs with 4 and more cores since most of times only 4
// cores will be active.
3
} else if cpus == 3 || cpus == 2 {
2
} else {
1
}
} else {
1
}
} else {
if width * height >= 1920 * 1080 && cpus > 8 {
8 // 8 threads for 1080p on high perf machines.
} else if width * height > 1280 * 960 && cpus >= 6 {
// 3 threads for 1080p.
return 3;
} else if width * height > 640 * 480 && cpus >= 3 {
// Default 2 threads for qHD/HD, but allow 3 if core count is high enough,
// as this will allow more margin for high-core/low clock machines or if
// not built with highest optimization.
if cpus >= 6 {
3
} else {
2
}
} else {
// 1 thread for VGA or less.
1
}
}
}
}
fn max_intra_target(optimal_buffer_size: u32) -> u32 {
const MAX_FRAMERATE: u32 = 60; // TODO
let scale_par: f32 = 0.5;
let target_pct: u32 =
((optimal_buffer_size as f32) * scale_par * MAX_FRAMERATE as f32 / 10.0) as u32;
let min_intra_size: u32 = 300;
if target_pct < min_intra_size {
min_intra_size
} else {
target_pct
}
}
}

View File

@ -535,12 +535,14 @@ fn run(sp: GenericService) -> ResultType<()> {
EncoderCfg::VPX(VpxEncoderConfig {
width: c.width as _,
height: c.height as _,
timebase: [1, 1000], // Output timestamp precision
bitrate,
codec: if name == scrap::CodecName::VP8 {
VpxVideoCodecId::VP8
} else {
VpxVideoCodecId::VP9
},
num_threads: (num_cpus::get() / 2) as _,
})
}
scrap::CodecName::AV1 => EncoderCfg::AOM(AomEncoderConfig {