x264_macroblock_load_pic_pointers( x264_t *h, int i_mb_x, int i_mb

static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb_x, int i_mb_y, int i)

{

const int w = (i == 0 ? 16 : 8);

const int i_stride = h->fdec->i_stride[!!i];

const int i_stride2 = i_stride << h->mb.b_interlaced;

const int i_pix_offset = h->mb.b_interlaced?

w * (i_mb_x + (i_mb_y&~1) * i_stride) + (i_mb_y&1) * i_stride

: w * (i_mb_x + i_mb_y * i_stride);

int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };

const uint8_t *intra_fdec = &h->mb.intra_border_backup[i_mb_y & h->sh.b_mbaff][i][i_mb_x*16>>!!i];

x264_frame_t **fref[2] = { h->fref0, h->fref1 };

int j, k;

if( h->mb.b_interlaced )

ref_pix_offset[1] += (1-2*(i_mb_y&1)) * i_stride;

h->mb.pic.i_stride[i] = i_stride2;

h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];

h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,

h->mb.pic.p_fenc_plane[i], i_stride2, w );

memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );

if( h->mb.b_interlaced )

{

const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset];

for( j = 0; j < w; j++ )

h->mb.pic.p_fdec[i][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];

}

for( j = 0; j < h->mb.pic.i_fref[0]; j++ )/*对list0操作*/

{

h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &fref[0][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];/* 将指向参考帧共同位置整点像素的指针赋值给h->mb.pic.p_fref[0][j][0,4,5],这里是对YUV三个分量操作*/

if( i == 0 )

for( k = 1; k < 4; k++ )/* p_fref[参考列表][参考帧索引][整点像素、/2垂直+水平+对角、U、V]*/

h->mb.pic.p_fref[0][j][k] = &fref[0][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];/* 将指向参考帧共同位置的/2内插像素的指针赋值给h->mb.pic.p_fref[0][j][1~3]*/

}

if( h->sh.i_type == SLICE_TYPE_B )

for( j = 0; j < h->mb.pic.i_fref[1]; j++ )/*对list1操作*/

{

h->mb.pic.p_fref[1][j][i==0 ? 0:i+3] = &fref[1][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];

if( i == 0 )

for( k = 1; k < 4; k++ )

h->mb.pic.p_fref[1][j][k] = &fref[1][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];

}

================================================================

1、const int w = (i == 0 ? 16 : 8);

输入参数i={0,1,2}分别对应YUV三个分量。Y分量是16x16块，UV分量是8x8块，所以有：

2、const int i_stride = h->fdec->i_stride[!!i]

!!i指的是将输入的i参数从0~2转换成0、1，虽然i_stride[3]有3个变量，但是规格只有Y与UV两种。i_stride[?]的说明:

在函数x264_frame_t *x264_frame_new( x264_t *h )有如下几个语句:

/* allocate frame data (+64 for extra data for me) */

#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))

#define PADH 32

int align = (省略过程，在非mbaff情况下)=16;

......

i_width = ALIGN( h->param.i_width, 16 );

i_stride = ALIGN( i_width + 2*PADH, align );

i_lines = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );

frame->i_plane = 3;

for( i = 0; i < 3; i++ )

{

frame->i_stride[i] = ALIGN( i_stride >> !!i, 16 );

frame->i_width[i] = i_width >> !!i;

frame->i_lines[i] = i_lines >> !!i;

}

luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));

chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));

for( i = 1; i < 3; i++ )

{

CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );

frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;

}

不采用mbaff(即h->param.b_interlaced = 0),输入图像352x288，那么有：

i_width = [ ( h->param.i_width + 15 ) & 0xfffff0 ]

i_stride = [(i_width+64 + 15 ) & 0xfffff0] = 416

i_lines = [ ( h->param.i_height + 15 ) & 0xfffff0 ]

+64是对行进行扩展64，这个知道。+ 15 然后再&0xfffff0是为了保证图像宽是16的倍数，即可以分成整数个宏块。举例：

如图像宽是128=1000 0000 则128+15=1000 1111 （129+15）&0xfffff0=128

如果图像宽129=1000 0001 则129+15=1001 0000 （129+15）&0xfffff0=144

16*n<图像宽<=16*（n+1） n=0.1...则经过上述公式图像宽=16*（n+1）

实际上是对图像的行列各扩展了64，即上下左右填了32，这个是预测用的，代码中是这样注释的"/* allocate frame data (+64 for extra data for me) */"。

frame->i_stride[0~2] = ALIGN( i_stride >> !!(0~2), 16 );

色度的步长i_stride[1]、i_stride[2] 为 i_stride[0]/2，再扩展成16的倍数

色度亮度单元扩展之后

luma_plane_size = ( 亮度图像宽度 + 64 )*( 亮度图像高度 + 64 ) =

chroma_plane_size = ( (亮度图像宽度+64)/2 )*( 色度图像高度 + 64 ) =

然后通过

CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );

将输入图像扩展保存至 frame->buffer[i]

原始图像

扩展

64列

扩展

64行

相当于

顶部扩展32行

左边

扩展32列

原始图像

右边

扩展32

列

底部扩展32行

3、const int i_pix_offset是通过宏块坐标计算的像素坐标

4、h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,

h->mb.pic.p_fenc_plane[i], i_stride2, w );

h->mb.pic.p_fenc[i]指向的单元是32*27，h->mb.pic.p_fenc_plane[i]指向扩展后的图片，单元是16x16，FENC_STRIDE=32，i_stride2=416

copy整个函数原型

static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )

{

int y;

for( y = 0; y < i_height; y++ )

{

memcpy( dst, src, i_width );

src += i_src_stride;

dst += i_dst_stride;

}

功能是将h->mb.pic.p_fenc[i]的实际像素拷贝到h->mb.pic.p_fenc_plane[i]中

5、memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE],

&plane_fdec[-1-i_stride], w*3/2+1 );

这里为什么拷贝 w*3/2+1=25个数据？对于Y(U、V的类似)宏块需要上面1行共需要25个数据，需要左边1列共16个数据。h->mb.pic.p_fdec[i][-1-FDEC_STRIDE] 对应M，memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], &plane_fdec[-1-i_stride], w*3/2+1 );是取上面1行25个数据。

4、for( j = 0; j < h->mb.pic.i_fref[0]; j++ )/*对list0操作*/

{

h->mb.pic.p_fref[0][j][i==0 ? 0:i+3] = &fref[0][j >> h->mb.b_interlaced]->plane[i][ref_pix_offset[j&1]];

/* 将指向参考帧共同位置整点像素的指针赋值给h->mb.pic.p_fref[0][j][0,4,5],这里是对YUV三个分量操作*/

if( i == 0 )

for( k = 1; k < 4; k++ )

/* p_fref[参考列表][参考帧索引][整点像素、/2垂直+水平+对角、U、V]*/

h->mb.pic.p_fref[0][j][k] = &fref[0][j >> h->mb.b_interlaced]->filtered[k][ref_pix_offset[j&1]];

/* 将指向参考帧共同位置的/2内插像素的指针赋值给h->mb.pic.p_fref[0][j][1~3]*/

}

累死，未完待续...

本站仅提供存储服务，所有内容均由用户发布，如发现有害或侵权内容，请点击举报。