In the next few blogs, learn the implementation process of X264.
The analysis of source code refers to Raytheon's blog, thank you Raytheon! Blog link: https://blog.csdn.net/leixiaohua1020/article/details/45536607.
1. overview
The flow chart of X264 encoding is as follows:
First, we need to create an encoder, initialize the encoder parameters, read the YUV data for VCL video coding, pack the encoded data in NAL, cycle all video frames, finally release the memory and close the encoder.
2. Application engineering
(1) the main() function is the entry of the x264 console application program. The main functions are as follows:
//Main function int main( int argc, char **argv ) { x264_param_t param; //Encoders conflg cli_opt_t opt = ; //Encoder operation int ret = 0; FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" ) #ifdef _WIN32 FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" ) GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE ); _setmode( _fileno( stdin ), _O_BINARY ); //Binary format _setmode( _fileno( stdout ), _O_BINARY ); _setmode( _fileno( stderr ), _O_BINARY ); #endif if( parse( argc, argv, ¶m, &opt ) < 0 ) //Parsing command line input ret = -1; #ifdef _WIN32 SetConsoleTitleW( org_console_title ); #endif signal( SIGINT, sigint_handler ); if( !ret ) ret = encode( ¶m, &opt ); //Code if( filter.free ) filter.free( opt.hin ); else if( opt.hin ) cli_input.close_file( opt.hin ); if( opt.hout ) cli_output.close_file( opt.hout, 0, 0 ); if( opt.tcfile_out ) fclose( opt.tcfile_out ); if( opt.qpfile ) fclose( opt.qpfile ); #ifdef _WIN32 SetConsoleTitleW( org_console_title ); free( argv ); #endif return ret; }
At the beginning of the main function, first call parse() to parse the input command-line parameters, and then call encode() to encode.
(2) the parse() function is used to parse the parameters entered on the command line.
static int parse(int argc, char **argv, x264_param_t *param, cli_opt_t *opt)
argc: number of parameters; argv: pointer to parameters; - param: parameter structure; - opt: operation type;
parse() function:
//Parsing command line input static int parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt ) { char *input_filename = NULL; const char *demuxer = demuxer_names[0]; char *output_filename = NULL; const char *muxer = muxer_names[0]; char *tcfile_name = NULL; x264_param_t defaults; char *profile = NULL; char *vid_filters = NULL; int b_thread_input = 0; int b_turbo = 1; int b_user_ref = 0; int b_user_fps = 0; int b_user_interlaced = 0; cli_input_opt_t input_opt; cli_output_opt_t output_opt; char *preset = NULL; char *tune = NULL; //Initialization parameter defaults x264_param_default( &defaults ); cli_log_level = defaults.i_log_level; memset( &input_opt, 0, sizeof(cli_input_opt_t) ); memset( &output_opt, 0, sizeof(cli_output_opt_t) ); input_opt.bit_depth = 8; input_opt.input_range = input_opt.output_range = param->vui.b_fullrange = RANGE_AUTO; int output_csp = defaults.i_csp; opt->b_progress = 1; /* Presets are applied before all other options. */ for( optind = 0;; ) { int c = getopt_long( argc, argv, short_options, long_options, NULL ); if( c == -1 ) break; if( c == OPT_PRESET ) preset = optarg; if( c == OPT_TUNE ) tune = optarg; else if( c == '?' ) return -1; } if( preset && !strcasecmp( preset, "placebo" ) ) b_turbo = 0; //Set preset, tune if( x264_param_default_preset( param, preset, tune ) < 0 ) return -1; /* Parse command line options */ //Resolve command line options for( optind = 0;; ) { int b_error = 0; int long_options_index = -1; int c = getopt_long( argc, argv, short_options, long_options, &long_options_index ); if( c == -1 ) { break; } //Different options for different processing switch( c ) { case 'h': help( &defaults, 0 );//"- h" help menu exit(0); case OPT_LONGHELP: help( &defaults, 1 ); exit(0); case OPT_FULLHELP: help( &defaults, 2 ); exit(0); case 'V': print_version_info();//Print version information exit(0); case OPT_FRAMES: param->i_frame_total = X264_MAX( atoi( optarg ), 0 ); break; case OPT_SEEK: opt->i_seek = X264_MAX( atoi( optarg ), 0 ); break; case 'o': output_filename = optarg;//Output file path break; case OPT_MUXER: FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg ) break; case OPT_DEMUXER: FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg ) break; case OPT_INDEX: input_opt.index_file = optarg; break; case OPT_QPFILE: opt->qpfile = x264_fopen( optarg, "rb" ); FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg ) if( !x264_is_regular_file( opt->qpfile ) ) { x264_cli_log( "x264", X264_LOG_ERROR, "qpfile incompatible with non-regular file `%s'\n", optarg ); fclose( opt->qpfile ); return -1; } break; case OPT_THREAD_INPUT: b_thread_input = 1; break; case OPT_QUIET: cli_log_level = param->i_log_level = X264_LOG_NONE;//Set log level break; case 'v': cli_log_level = param->i_log_level = X264_LOG_DEBUG;//Set log level break; case OPT_LOG_LEVEL: if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) ) cli_log_level += X264_LOG_NONE; else cli_log_level = atoi( optarg ); param->i_log_level = cli_log_level;//Set log level break; case OPT_NOPROGRESS: opt->b_progress = 0; break; case OPT_TUNE: case OPT_PRESET: break; case OPT_PROFILE: profile = optarg; break; case OPT_SLOWFIRSTPASS: b_turbo = 0; break; case 'r': b_user_ref = 1; goto generic_option; case OPT_FPS: b_user_fps = 1; param->b_vfr_input = 0; goto generic_option; case OPT_INTERLACED: b_user_interlaced = 1; goto generic_option; case OPT_TCFILE_IN: tcfile_name = optarg; break; case OPT_TCFILE_OUT: opt->tcfile_out = x264_fopen( optarg, "wb" ); FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg ) break; case OPT_TIMEBASE: input_opt.timebase = optarg; break; case OPT_PULLDOWN: FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg ) break; case OPT_VIDEO_FILTER: vid_filters = optarg; break; case OPT_INPUT_FMT: input_opt.format = optarg;//Input file format break; case OPT_INPUT_RES: input_opt.resolution = optarg;//Input resolution break; case OPT_INPUT_CSP: input_opt.colorspace = optarg;//Input gamut break; case OPT_INPUT_DEPTH: input_opt.bit_depth = atoi( optarg );//Input color bit depth break; case OPT_DTS_COMPRESSION: output_opt.use_dts_compress = 1; break; case OPT_OUTPUT_CSP: FAIL_IF_ERROR( parse_enum_value( optarg, output_csp_names, &output_csp ), "Unknown output csp `%s'\n", optarg ) // correct the parsed value to the libx264 csp value #if X264_CHROMA_FORMAT static const uint8_t output_csp_fix[] = { X264_CHROMA_FORMAT, X264_CSP_RGB }; #else static const uint8_t output_csp_fix[] = { X264_CSP_I420, X264_CSP_I422, X264_CSP_I444, X264_CSP_RGB }; #endif param->i_csp = output_csp = output_csp_fix[output_csp]; break; case OPT_INPUT_RANGE: FAIL_IF_ERROR( parse_enum_value( optarg, range_names, &input_opt.input_range ), "Unknown input range `%s'\n", optarg ) input_opt.input_range += RANGE_AUTO; break; case OPT_RANGE: FAIL_IF_ERROR( parse_enum_value( optarg, range_names, ¶m->vui.b_fullrange ), "Unknown range `%s'\n", optarg ); input_opt.output_range = param->vui.b_fullrange += RANGE_AUTO; break; default: generic_option: { if( long_options_index < 0 ) { for( int i = 0; long_options[i].name; i++ ) if( long_options[i].val == c ) { long_options_index = i; break; } if( long_options_index < 0 ) { /* getopt_long already printed an error message */ return -1; } } //Parsing parameters entered as strings //That is, both the option name and the option value are strings b_error |= x264_param_parse( param, long_options[long_options_index].name, optarg ); } } if( b_error ) { const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind-2]; x264_cli_log( "x264", X264_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg ); return -1; } } /* If first pass mode is used, apply faster settings. */ if( b_turbo ) x264_param_apply_fastfirstpass( param ); /* Apply profile restrictions. */ //Set profile if( x264_param_apply_profile( param, profile ) < 0 ) return -1; /* Get the file name */ FAIL_IF_ERROR( optind > argc - 1 || !output_filename, "No %s file. Run x264 --help for a list of options.\n", optind > argc - 1 ? "input" : "output" ) //Determine the output file format according to the suffix of the file name (raw H264, flv, mp4...) if( select_output( muxer, output_filename, param ) ) return -1; FAIL_IF_ERROR( cli_output.open_file( output_filename, &opt->hout, &output_opt ), "could not open output file `%s'\n", output_filename ) //Input file path input_filename = argv[optind++]; video_info_t info = ; char demuxername[5]; /* set info flags to be overwritten by demuxer as necessary. */ //Set info structure info.csp = param->i_csp; info.fps_num = param->i_fps_num; info.fps_den = param->i_fps_den; info.fullrange = input_opt.input_range == RANGE_PC; info.interlaced = param->b_interlaced; if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 ) { info.sar_width = param->vui.i_sar_width; info.sar_height = param->vui.i_sar_height; } info.tff = param->b_tff; info.vfr = param->b_vfr_input; input_opt.seek = opt->i_seek; input_opt.progress = opt->b_progress; input_opt.output_csp = output_csp; //Format the input file (yuv, y4m...) if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) ) return -1; FAIL_IF_ERROR( !opt->hin && cli_input.open_file( input_filename, &opt->hin, &info, &input_opt ), "could not open input file `%s'\n", input_filename ) x264_reduce_fraction( &info.sar_width, &info.sar_height ); x264_reduce_fraction( &info.fps_num, &info.fps_den ); x264_cli_log( demuxername, X264_LOG_INFO, "%dx%d%c %u:%u @ %u/%u fps (%cfr)\n", info.width, info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height, info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' ); if( tcfile_name ) { FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" ) FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" ) cli_input = timecode_input; } else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" ) /* init threaded input while the information about the input video is unaltered by filtering */ #if HAVE_THREAD if( info.thread_safe && (b_thread_input || param->i_threads > 1 || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1)) ) { if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) ) { fprintf( stderr, "x264 [error]: threaded input failed\n" ); return -1; } cli_input = thread_input; } #endif /* override detected values by those specified by the user */ if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 ) { info.sar_width = param->vui.i_sar_width; info.sar_height = param->vui.i_sar_height; } if( b_user_fps ) { info.fps_num = param->i_fps_num; info.fps_den = param->i_fps_den; } if( !info.vfr ) { info.timebase_num = info.fps_den; info.timebase_den = info.fps_num; } if( !tcfile_name && input_opt.timebase ) { uint64_t i_user_timebase_num; uint64_t i_user_timebase_den; int ret = sscanf( input_opt.timebase, "%"SCNu64"/%"SCNu64, &i_user_timebase_num, &i_user_timebase_den ); FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase ) else if( ret == 1 ) { i_user_timebase_num = info.timebase_num; i_user_timebase_den = strtoul( input_opt.timebase, NULL, 10 ); } FAIL_IF_ERROR( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX, "timebase you specified exceeds H.264 maximum\n" ) opt->timebase_convert_multiplier = ((double)i_user_timebase_den / info.timebase_den) * ((double)info.timebase_num / i_user_timebase_num); info.timebase_num = i_user_timebase_num; info.timebase_den = i_user_timebase_den; info.vfr = 1; } if( b_user_interlaced ) { info.interlaced = param->b_interlaced; info.tff = param->b_tff; } if( input_opt.input_range != RANGE_AUTO ) info.fullrange = input_opt.input_range; //Initialize filter //filter can be regarded as an "extended" input source if( init_vid_filters( vid_filters, &opt->hin, &info, param, output_csp ) ) return -1; /* set param flags from the post-filtered video */ param->b_vfr_input = info.vfr; param->i_fps_num = info.fps_num; param->i_fps_den = info.fps_den; param->i_timebase_num = info.timebase_num; param->i_timebase_den = info.timebase_den; param->vui.i_sar_width = info.sar_width; param->vui.i_sar_height = info.sar_height; info.num_frames = X264_MAX( info.num_frames - opt->i_seek, 0 ); if( (!info.num_frames || param->i_frame_total < info.num_frames) && param->i_frame_total > 0 ) info.num_frames = param->i_frame_total; param->i_frame_total = info.num_frames; if( !b_user_interlaced && info.interlaced ) { #if HAVE_INTERLACED x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, enabling %cff interlaced mode.\n" " If you want otherwise, use --no-interlaced or --%cff\n", info.tff ? 't' : 'b', info.tff ? 'b' : 't' ); param->b_interlaced = 1; param->b_tff = !!info.tff; #else x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, but not compiled with interlaced support\n" ); #endif } /* if the user never specified the output range and the input is now rgb, default it to pc */ int csp = param->i_csp & X264_CSP_MASK; if( csp >= X264_CSP_BGR && csp <= X264_CSP_RGB ) { if( input_opt.output_range == RANGE_AUTO ) param->vui.b_fullrange = RANGE_PC; /* otherwise fail if they specified tv */ FAIL_IF_ERROR( !param->vui.b_fullrange, "RGB must be PC range" ) } /* Automatically reduce reference frame count to match the user's target level * if the user didn't explicitly set a reference frame count. */ if( !b_user_ref ) { int mbs = (((param->i_width)+15)>>4) * (((param->i_height)+15)>>4); for( int i = 0; x264_levels[i].level_idc != 0; i++ ) if( param->i_level_idc == x264_levels[i].level_idc ) { while( mbs * param->i_frame_reference > x264_levels[i].dpb && param->i_frame_reference > 1 ) param->i_frame_reference--; break; } } return 0; }
Specific process:
(1) call x264 param default() to assign the default value to the structure x264 param t where the parameter is stored;
Call x264 param default preset() to assign value to x264 param T;
(3) call getopt_long () in the loop to parse the input parameters one by one, and do the corresponding processing.
Call select input() to parse the output file format;
(5) call select u output() to parse the input file format
X264 param default(): set the default value of x264 param T structure (API of x264).
void x264_param_default(x264_param_t *param) { memset( param, 0, sizeof( x264_param_t ) ); //Open up memory space param->cpu = x264_cpu_detect(); //CPU automatic detection param->i_threads = X264_THREADS_AUTO; //Parallel encoding thread is 0 param->b_deterministic = 1; //Allow nondeterministic thread optimization param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO; //Auto select thread lead buffer size-1 //Video attribute param->i_csp = X264_CSP_I420; //Set the format of input video sampling 0x0001yuv 4:2:0 planar param->i_width = 0; //width param->i_height = 0; //height param->vui.i_sar_width = 0; param->vui.i_sar_height= 0; //Set aspect ratio param->vui.i_overscan = 0; //Over scan line, default undef, optional: Show (watch) cross (remove) param->vui.i_vidformat = 5; //undef video format param->vui.b_fullrange = 0; //off param->vui.i_colorprim = 2; //undef original chroma format param->vui.i_transfer = 2; //undef conversion mode param->vui.i_colmatrix = 2; //undef chroma matrix setting param->vui.i_chroma_loc= 0; //left center chroma sample specification, range 0-5, default 0 param->i_fps_num = 25; //Frame rate param->i_fps_den = 1; //The ratio of the number of two integers is used to express the frame rate param->i_level_idc = -1; //Setting of level value param->i_slice_max_size = 0; //The maximum number of bytes per slice, including estimated NAL overhead param->i_slice_max_mbs = 0; //The maximum number of macroblocks per piece, rewrite I ﹣ slice ﹣ count param->i_slice_count = 0; //Number of bars per frame: sets the rectangle bar //Coding parameter param->i_frame_reference = 3; //The maximum number of reference frames. param->i_keyint_max = 250; //Set IDR key at this interval param->i_keyint_min = 25; //Scene switching is less than secondary value encoding bit I, not IDR param->i_bframe = 3; //Number of B frames between two reference frames param->i_scenecut_threshold = 40; //How to actively insert additional I frames param->i_bframe_adaptive = X264_B_ADAPT_FAST; //Adaptive B frame decision 1 param->i_bframe_bias = 0; //Control the decision of inserting B frame, the range is - 100 ~ + 100, the higher is the easier to insert B frame param->b_bframe_pyramid = 0; //Allow part B as reference frame param->b_deblocking_filter = 1; //Deblocking effect correlation param->i_deblocking_filter_alphac0 = 0; //[- 6, 6] - 6 brightness filter, 6 strong param->i_deblocking_filter_beta = 0; //[- 6, 6] ditto param->b_cabac = 1; //Switch of cabac param->i_cabac_init_idc = 0; //rate control param->rc.i_rc_method = X264_RC_CRF; //Constant bit rate param->rc.i_bitrate = 0; //Set the average code rate size param->rc.f_rate_tolerance = 1.0; param->rc.i_vbv_max_bitrate = 0; //In average bit rate mode, the maximum instantaneous bit rate is 0 by default (the same as - B setting) param->rc.i_vbv_buffer_size = 0; //Rate control buffer size, kbit, default 0 param->rc.f_vbv_buffer_init = 0.9; param->rc.i_qp_constant = 23; //Minimum qp value param->rc.f_rf_constant = 23; param->rc.i_qp_min = 10; //Allowed minimum quantized value param->rc.i_qp_max = 51; //Maximum allowed quantization value param->rc.i_qp_step = 4; //Maximum quantization step between frames param->rc.f_ip_factor = 1.4; param->rc.f_pb_factor = 1.3; param->rc.i_aq_mode = X264_AQ_VARIANCE; param->rc.f_aq_strength = 1.0; param->rc.i_lookahead = 40; param->rc.b_stat_write = 0; param->rc.psz_stat_out = "x264_2pass.log"; param->rc.b_stat_read = 0; param->rc.psz_stat_in = "x264_2pass.log"; param->rc.f_qcompress = 0.6; param->rc.f_qblur = 0.5; //Fuzzy quantization in time param->rc.f_complexity_blur = 20; //Fuzzy complexity in time param->rc.i_zones = 0; param->rc.b_mb_tree = 1; //Journal param->pf_log = x264_log_default; param->p_log_private = NULL; param->i_log_level = X264_LOG_INFO; //Default to Info //Analysis param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8; param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;//Spatial prediction model param->analyse.i_me_method = X264_ME_HEX; //Motion estimation algorithm HEX param->analyse.f_psy_rd = 1.0; param->analyse.b_psy = 1; param->analyse.f_psy_trellis = 0; param->analyse.i_me_range = 16; //Range of motion estimation param->analyse.i_subpel_refine = 7; //Subpixel motion estimation quality param->analyse.b_mixed_references = 1; //Allow the partition of each macroblock to have its own reference number in the P frame param->analyse.b_chroma_me = 1; //Sub-pixel chroma motion estimation and P-frame mode selection param->analyse.i_mv_range_thread = -1; //Minimum space between threads param->analyse.i_mv_range = -1; //Maximum length of motion vector set from level param->analyse.i_chroma_qp_offset = 0; //Chroma quantization step offset param->analyse.b_fast_pskip = 1; //Fast P frame skip detection param->analyse.b_weighted_bipred = 1; //Implicit weighting for b frame param->analyse.b_dct_decimate = 1; //Converting parameter fields in P-frames param->analyse.b_transform_8x8 = 1; //Inter frame partition param->analyse.i_trellis = 1; //Trellis quantization. Find the appropriate quantization value for each 8x8 block. CABAC is required. The default value is 0 param->analyse.i_luma_deadzone[0] = 21; //Invalid area size used in inter frame luminance quantization param->analyse.i_luma_deadzone[1] = 11; //Invalid area size used in intra frame luminance quantization param->analyse.b_psnr = 0; //Is PSNR displayed param->analyse.b_ssim = 0; //Display SSIM or not //Quantification param->i_cqm_preset = X264_CQM_FLAT; //Custom quantization matrix (CQM), initialization quantization mode is flat 0 memset( param->cqm_4iy, 16, 16 ); memset( param->cqm_4ic, 16, 16 ); memset( param->cqm_4py, 16, 16 ); memset( param->cqm_4pc, 16, 16 ); memset( param->cqm_8iy, 16, 64 ); memset( param->cqm_8py, 16, 64 ); //Open up space param->b_repeat_headers = 1; //Place SPS/PPS before each key param->b_aud = 0; //Generate access unit separator }
X264? Param? Default? Preset(): set the preset and tune of x264 (the API of libx264).
//Set preset, tune int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune ) { x264_param_default( param ); //Set preset if( preset && x264_param_apply_preset( param, preset ) < 0 ) return -1; //Set tune if( tune && x264_param_apply_tune( param, tune ) < 0 ) return -1; return 0; }
In the code, the functions x264 ﹣ param ﹣ apply ﹣ preset() and x264 ﹣ param ﹣ apply ﹣ tune() are called to set preset and tune respectively.
x264_param_apply_preset():
//Set preset static int x264_param_apply_preset( x264_param_t *param, const char *preset ) { char *end; int i = strtol( preset, &end, 10 ); if( *end == 0 && i >= 0 && i < sizeof(x264_preset_names)/sizeof(*x264_preset_names)-1 ) preset = x264_preset_names[i]; //Several different preset settings with different parameters if( !strcasecmp( preset, "ultrafast" ) ) { param->i_frame_reference = 1; //The maximum number of reference frames is set to 1 param->i_scenecut_threshold = 0; param->b_deblocking_filter = 0; //Do not use deblocking filtering param->b_cabac = 0; //Do not use CABAC param->i_bframe = 0; //Do not use frame B param->analyse.intra = 0; param->analyse.inter = 0; param->analyse.b_transform_8x8 = 0; //Do not use 8x8DCT param->analyse.i_me_method = X264_ME_DIA;//Selection of motion algorithm, using "Diamond" param->analyse.i_subpel_refine = 0; param->rc.i_aq_mode = 0; param->analyse.b_mixed_references = 0; param->analyse.i_trellis = 0; param->i_bframe_adaptive = X264_B_ADAPT_NONE; param->rc.b_mb_tree = 0; param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;//Do not use weighting param->analyse.b_weighted_bipred = 0; param->rc.i_lookahead = 0; } else if( !strcasecmp( preset, "superfast" ) ) { param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4; param->analyse.i_me_method = X264_ME_DIA; //Diamond template param->analyse.i_subpel_refine = 1; //Sub pixel motion estimation quality is 1 param->i_frame_reference = 1; //The maximum number of reference frames is 1 param->analyse.b_mixed_references = 0; param->analyse.i_trellis = 0; param->rc.b_mb_tree = 0; param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; param->rc.i_lookahead = 0; } else if( !strcasecmp( preset, "veryfast" ) ) { param->analyse.i_me_method = X264_ME_HEX; //Hexagon template param->analyse.i_subpel_refine = 2; param->i_frame_reference = 1; param->analyse.b_mixed_references = 0; param->analyse.i_trellis = 0; param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; param->rc.i_lookahead = 10; } else if( !strcasecmp( preset, "faster" ) ) { param->analyse.b_mixed_references = 0; param->i_frame_reference = 2; //The maximum frame number of reference frame is set to 2 param->analyse.i_subpel_refine = 4; param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; param->rc.i_lookahead = 20; } else if( !strcasecmp( preset, "fast" ) ) { param->i_frame_reference = 2; param->analyse.i_subpel_refine = 6; param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE; param->rc.i_lookahead = 30; } else if( !strcasecmp( preset, "medium" ) ) { /* Default is medium */ } else if( !strcasecmp( preset, "slow" ) ) { param->analyse.i_me_method = X264_ME_UMH; //UMH is relatively complex param->analyse.i_subpel_refine = 8; //Sub pixel motion estimation quality is 8 param->i_frame_reference = 5; //Set the group frame size of the reference frame to 5 param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; param->rc.i_lookahead = 50; } else if( !strcasecmp( preset, "slower" ) ) { param->analyse.i_me_method = X264_ME_UMH; param->analyse.i_subpel_refine = 9; param->i_frame_reference = 8; //The maximum number of reference frames is set to 8 param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; param->analyse.inter |= X264_ANALYSE_PSUB8x8; param->analyse.i_trellis = 2; param->rc.i_lookahead = 60; } else if( !strcasecmp( preset, "veryslow" ) ) { param->analyse.i_me_method = X264_ME_UMH; param->analyse.i_subpel_refine = 10; param->analyse.i_me_range = 24; param->i_frame_reference = 16; param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; param->analyse.inter |= X264_ANALYSE_PSUB8x8; param->analyse.i_trellis = 2; param->i_bframe = 8; //Frame B between two reference frames is 8 param->rc.i_lookahead = 60; } else if( !strcasecmp( preset, "placebo" ) ) { param->analyse.i_me_method = X264_ME_TESA; //TESA is very slow. param->analyse.i_subpel_refine = 11; param->analyse.i_me_range = 24; //Motion estimation range set to 24 param->i_frame_reference = 16; //The maximum number of reference frames is set to 16 param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; param->analyse.inter |= X264_ANALYSE_PSUB8x8; param->analyse.b_fast_pskip = 0; param->analyse.i_trellis = 2; param->i_bframe = 16; //Frame B between two reference frames is 16 param->rc.i_lookahead = 60; } else { x264_log( NULL, X264_LOG_ERROR, "invalid preset '%s'\n", preset ); return -1; } return 0; }
The balance between coding speed and quality can be adjusted by adjusting preset parameters. Its specific parameters are ultra fast, super fast, veryfast, fast, fast, medium, slow, slow, placebo. The encoding speed of these parameters is from fast to slow, and the default value is medium.
x264_param_apply_tune():
//Set tune static int x264_param_apply_tune( x264_param_t *param, const char *tune ) { char *tmp = x264_malloc( strlen( tune ) + 1 ); if( !tmp ) return -1; tmp = strcpy( tmp, tune ); //Decompose a string into an array of strings. The second parameter is the separator char *s = strtok( tmp, ",./-+" ); int psy_tuning_used = 0; //Set up //This is cyclic. You can set it multiple times while( s ) { if( !strncasecmp( s, "film", 4 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -1; param->i_deblocking_filter_beta = -1; param->analyse.f_psy_trellis = 0.15; } else if( !strncasecmp( s, "animation", 9 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; param->i_deblocking_filter_alphac0 = 1; param->i_deblocking_filter_beta = 1; param->analyse.f_psy_rd = 0.4; param->rc.f_aq_strength = 0.6; param->i_bframe += 2; } else if( !strncasecmp( s, "grain", 5 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -2; param->i_deblocking_filter_beta = -2; param->analyse.f_psy_trellis = 0.25; param->analyse.b_dct_decimate = 0; param->rc.f_pb_factor = 1.1; param->rc.f_ip_factor = 1.1; param->rc.f_aq_strength = 0.5; param->analyse.i_luma_deadzone[0] = 6; param->analyse.i_luma_deadzone[1] = 6; param->rc.f_qcompress = 0.8; } else if( !strncasecmp( s, "stillimage", 10 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_deblocking_filter_alphac0 = -3; param->i_deblocking_filter_beta = -3; param->analyse.f_psy_rd = 2.0; param->analyse.f_psy_trellis = 0.7; param->rc.f_aq_strength = 1.2; } else if( !strncasecmp( s, "psnr", 4 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->rc.i_aq_mode = X264_AQ_NONE; param->analyse.b_psy = 0; } else if( !strncasecmp( s, "ssim", 4 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE; param->analyse.b_psy = 0; } else if( !strncasecmp( s, "fastdecode", 10 ) ) { param->b_deblocking_filter = 0; param->b_cabac = 0; param->analyse.b_weighted_bipred = 0; param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; } else if( !strncasecmp( s, "zerolatency", 11 ) ) { //zerolatency is fast param->rc.i_lookahead = 0; param->i_sync_lookahead = 0; param->i_bframe = 0; //Do not use frame B param->b_sliced_threads = 1; param->b_vfr_input = 0; param->rc.b_mb_tree = 0; } else if( !strncasecmp( s, "touhou", 6 ) ) { if( psy_tuning_used++ ) goto psy_failure; param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1; param->i_deblocking_filter_alphac0 = -1; param->i_deblocking_filter_beta = -1; param->analyse.f_psy_trellis = 0.2; param->rc.f_aq_strength = 1.3; if( param->analyse.inter & X264_ANALYSE_PSUB16x16 ) param->analyse.inter |= X264_ANALYSE_PSUB8x8; } else { x264_log( NULL, X264_LOG_ERROR, "invalid tune '%s'\n", s ); x264_free( tmp ); return -1; } if( 0 ) { psy_failure: x264_log( NULL, X264_LOG_WARNING, "only 1 psy tuning can be used: ignoring tune %s\n", s ); } s = strtok( NULL, ",./-+" ); } x264_free( tmp ); return 0; }
The parameter values of tune are:
Film: film, real person type;
Animation: animation;
grain: used when a large number of particles need to be retained;
stillimage: used for static image coding;
psnr: a parameter optimized to improve psnr;
ssim: a parameter optimized to improve ssim (ssim: an index to measure the similarity between two images, the larger the value, the better, the maximum is 1);
Fast decode: parameters that can be decoded quickly;
Zero latency, which can reduce the asynchronous of audio and video;
X264? Param? Apply? Profile(): set x264 profile (x264 API).
//Set profile int x264_param_apply_profile( x264_param_t *param, const char *profile ) { if( !profile ) return 0; //String to integer int p = profile_string_to_int( profile ); //Check if the profile setting is correct if( p < 0 ) { x264_log( NULL, X264_LOG_ERROR, "invalid profile: %s\n", profile ); return -1; } if( p < PROFILE_HIGH444_PREDICTIVE && ((param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant <= 0) || (param->rc.i_rc_method == X264_RC_CRF && (int)(param->rc.f_rf_constant + QP_BD_OFFSET) <= 0)) ) { x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support lossless\n", profile ); return -1; } if( p < PROFILE_HIGH444_PREDICTIVE && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I444 ) { x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:4:4\n", profile ); return -1; } if( p < PROFILE_HIGH422 && (param->i_csp & X264_CSP_MASK) >= X264_CSP_I422 ) { x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support 4:2:2\n", profile ); return -1; } if( p < PROFILE_HIGH10 && BIT_DEPTH > 8 ) { x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d\n", profile, BIT_DEPTH ); return -1; } //Set according to different profiles //Baseline basic if( p == PROFILE_BASELINE ) { //DCT8x8 is not supported param->analyse.b_transform_8x8 = 0; //Do not use CABAC param->b_cabac = 0; param->i_cqm_preset = X264_CQM_FLAT; param->psz_cqm_file = NULL; //No B frame. param->i_bframe = 0; //No weighting param->analyse.i_weighted_pred = X264_WEIGHTP_NONE; //Interlacing is not supported if( param->b_interlaced ) { x264_log( NULL, X264_LOG_ERROR, "baseline profile doesn't support interlacing\n" ); return -1; } if( param->b_fake_interlaced ) { x264_log( NULL, X264_LOG_ERROR, "baseline profile doesn't support fake interlacing\n" ); return -1; } } //Main master type else if( p == PROFILE_MAIN ) { //DCT8x8 is not supported param->analyse.b_transform_8x8 = 0; param->i_cqm_preset = X264_CQM_FLAT; param->psz_cqm_file = NULL; } return 0; }
Its function calls profile string to int():
static int profile_string_to_int( const char *str ) { if( !strcasecmp( str, "baseline" ) ) return PROFILE_BASELINE; if( !strcasecmp( str, "main" ) ) return PROFILE_MAIN; if( !strcasecmp( str, "high" ) ) return PROFILE_HIGH; if( !strcasecmp( str, "high10" ) ) return PROFILE_HIGH10; if( !strcasecmp( str, "high422" ) ) return PROFILE_HIGH422; if( !strcasecmp( str, "high444" ) ) return PROFILE_HIGH444_PREDICTIVE; return -1; }
(3) finally, according to the set encoder parameters, the input video file is encoded with H264 video algorithm, and the encoded H264 bitstream is written to the file.
Encode() function: encode YUV as H264, and the source code is as follows:
static int encode( x264_param_t *param, cli_opt_t *opt ) //Encoder (internal loop for frame by frame coding) { x264_t *h = NULL; //Encoder handle x264_picture_t pic; //Current encoding frame cli_pic_t cli_pic; const cli_pulldown_t *pulldown = NULL; int i_frame = 0; //Coding frame number statistics int i_frame_output = 0; int64_t i_end, i_previous = 0, i_start = 0; //Coding time statistics int64_t i_file = 0; //Length of current NAL packaging int i_frame_size; //Encoded stream length int64_t last_dts = 0; int64_t prev_dts = 0; int64_t first_dts = 0; int pts_warning_cnt = 0; int64_t largest_pts = -1; int64_t second_largest_pts = -1; int64_t ticks_per_frame; double duration; double pulldown_pts = 0; int retval = 0; opt->b_progress &= param->i_log_level < X264_LOG_DEBUG; //Debugging information level if( opt->i_pulldown && !param->b_vfr_input ) { param->b_pulldown = 1; param->b_pic_struct = 1; pulldown = &pulldown_values[opt->i_pulldown]; param->i_timebase_num = param->i_fps_den; FAIL_IF_ERROR2( fmod( param->i_fps_num * pulldown->fps_factor, 1 ), "unsupported framerate for chosen pulldown\n" ) param->i_timebase_den = param->i_fps_num * pulldown->fps_factor; } h = x264_encoder_open( param ); //Open encoder FAIL_IF_ERROR2( !h, "x264_encoder_open failed\n" ); x264_encoder_parameters( h, param ); //Get parameters FAIL_IF_ERROR2( cli_output.set_param( opt->hout, param ), "can't set outfile param\n" ); i_start = x264_mdate(); //Timing, reading the current system time ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num; FAIL_IF_ERROR2( ticks_per_frame < 1 && !param->b_vfr_input, "ticks_per_frame invalid: %"PRId64"\n",ticks_per_frame ) ticks_per_frame = X264_MAX( ticks_per_frame, 1 ); //If SPS/PPS/SEI is not added in front of each keyframe, then SPS/PPS/SEI is added in front of the whole code stream; the Header refers to SPS/PPS/SEI if( !param->b_repeat_headers ) { x264_nal_t *headers; int i_nal; FAIL_IF_ERROR2( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" ) FAIL_IF_ERROR2( (i_file = cli_output.write_headers( opt->hout, headers )) < 0, "error writing headers to output file\n" ); } if( opt->tcfile_out ) fprintf( opt->tcfile_out, "# timecode format v2\n" ); for( ; !b_ctrl_c && (i_frame < param->i_frame_total || !param->i_frame_total); i_frame++ ) //Cycle code all frames { if( filter.get_frame( opt->hin, &cli_pic, i_frame + opt->i_seek ) ) //Get 1 frame YUV data and save it in cli pic break; x264_picture_init( &pic ); //Initializing a pic ture structure convert_cli_to_lib_pic( &pic, &cli_pic ); if( !param->b_vfr_input ) pic.i_pts = i_frame; if( opt->i_pulldown && !param->b_vfr_input ) { pic.i_pic_struct = pulldown->pattern[ i_frame % pulldown->mod ]; pic.i_pts = (int64_t)( pulldown_pts + 0.5 ); pulldown_pts += pulldown_frame_duration[pic.i_pic_struct]; } else if( opt->timebase_convert_multiplier ) pic.i_pts = (int64_t)( pic.i_pts * opt->timebase_convert_multiplier + 0.5 ); if( pic.i_pts <= largest_pts ) { if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING ) x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",i_frame, pic.i_pts, largest_pts ); else if( pts_warning_cnt == MAX_PTS_WARNING ) x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" ); pts_warning_cnt++; pic.i_pts = largest_pts + ticks_per_frame; } second_largest_pts = largest_pts; largest_pts = pic.i_pts; if( opt->tcfile_out ) fprintf( opt->tcfile_out, "%.6f\n", pic.i_pts * ((double)param->i_timebase_num / param->i_timebase_den) * 1e3 ); if( opt->qpfile ) parse_qpfile( opt, &pic, i_frame + opt->i_seek ); prev_dts = last_dts; i_frame_size = encode_frame( h, opt->hout, &pic, &last_dts ); //Encoding 1 frame YUV data stored in pic if( i_frame_size < 0 ) { b_ctrl_c = 1; /* lie to exit the loop */ retval = -1; } else if( i_frame_size ) { i_file += i_frame_size; i_frame_output++; if( i_frame_output == 1 ) first_dts = prev_dts = last_dts; } if( filter.release_frame( opt->hin, &cli_pic, i_frame + opt->i_seek ) ) //Release the processed YUV data break; if( opt->b_progress && i_frame_output ) i_previous = print_status( i_start, i_previous, i_frame_output, param->i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts ); } while( !b_ctrl_c && x264_encoder_delayed_frames( h ) ) //X264? Encoder? Delayed? Frames() returns the number of remaining frames { prev_dts = last_dts; i_frame_size = encode_frame( h, opt->hout, NULL, &last_dts ); //Code if( i_frame_size < 0 ) { b_ctrl_c = 1; retval = -1; } else if( i_frame_size ) { i_file += i_frame_size; i_frame_output++; if( i_frame_output == 1 ) first_dts = prev_dts = last_dts; } if( opt->b_progress && i_frame_output ) //Output some statistics i_previous = print_status( i_start, i_previous, i_frame_output, param->i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts ); } fail: if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG ) x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING ); if( i_frame_output == 1 ) duration = (double)param->i_fps_den / param->i_fps_num; else if( b_ctrl_c ) duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den; else duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den; i_end = x264_mdate(); //Get system time, timing if( opt->b_progress ) fprintf( stderr, " \r" ); if( h ) x264_encoder_close( h ); //Turn off encoder fprintf( stderr, "\n" ); if( b_ctrl_c ) fprintf( stderr, "aborted at input frame %d, output frame %d\n", opt->i_seek + i_frame, i_frame_output ); cli_output.close_file( opt->hout, largest_pts, second_largest_pts ); opt->hout = NULL; if( i_frame_output > 0 ) { double fps = (double)i_frame_output * (double)1000000/(double)( i_end - i_start ); fprintf( stderr, "encoded %d frames, %.2f fps, %.2f kb/s\n", i_frame_output, fps,(double) i_file * 8 / ( 1000 * duration ) ); } return retval; }
The basic process of encode() function:
Call the x264 encoder open() function to open the encoder;
Call x264 encoder parameters() to get the current parameter set x264 param T;
Call x264 encoder header() to add SPS/PPS/SEI information in front of the code stream;
Call encode ﹣ frame() to enter the coding cycle of frame by frame;
Call printf? Status() to output the encoded statistics;
Call x264 encode close() to close the encoder;
Encode frame() function: encode a frame of data, and call the x264 encoder encode() function internally. Code:
static int encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *last_dts ) { x264_picture_t pic_out; //Pending frame x264_nal_t *nal; //NAL packing pointer int i_nal; //The number of NAL int i_frame_size = 0; i_frame_size = x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out ); //Encode x264 picture as x264 nal FAIL_IF_ERROR( i_frame_size < 0, "x264_encoder_encode failed\n" ); //Encoding failed, output current information if( i_frame_size ) { i_frame_size = cli_output.write_frame( hout, nal[0].p_payload, i_frame_size, &pic_out ); *last_dts = pic_out.i_dts; } return i_frame_size; }
Printf? Status() function: output the statistics after a frame of data encoding. Code:
static int64_t print_status( int64_t i_start, int64_t i_previous, int i_frame, int i_frame_total, int64_t i_file, x264_param_t *param, int64_t last_ts ) { char buf[200]; int64_t i_time = x264_mdate(); if( i_previous && i_time - i_previous < UPDATE_INTERVAL ) return i_previous; int64_t i_elapsed = i_time - i_start; double fps = i_elapsed > 0 ? i_frame * 1000000. / i_elapsed : 0; double bitrate; if( last_ts ) bitrate = (double) i_file * 8 / ( (double) last_ts * 1000 * param->i_timebase_num / param->i_timebase_den ); else bitrate = (double) i_file * 8 / ( (double) 1000 * param->i_fps_den / param->i_fps_num ); if( i_frame_total ) { int eta = i_elapsed * (i_frame_total - i_frame) / ((int64_t)i_frame * 1000000); sprintf( buf, "x264 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", 100. * i_frame / i_frame_total, i_frame, i_frame_total, fps, bitrate, eta/3600, (eta/60)%60, eta%60 ); } else sprintf( buf, "x264 %d frames: %.2f fps, %.2f kb/s", i_frame, fps, bitrate ); fprintf( stderr, "%s \r", buf+5 ); x264_cli_set_console_title( buf ); fflush( stderr ); // needed in windows return i_time; }
Run the encoding function. When the encoding is successful, we can see some information printed by this function.