/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file h264.c
* H.264 / AVC / MPEG4 part10 codec.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264.h"
#include "h264data.h"
#include "h264_parser.h"
#include "golomb.h"
#include "rectangle.h"
#include "cabac.h"
#ifdef ARCH_X86
//#include "i386/h264_i386.h"
#endif
//#undef NDEBUG
#include <assert.h rel='nofollow' onclick='return false;'>
#include <string.h>
#include "../libavutil/common.h"
/**
* Value of Picture.reference when Picture is not a reference picture, but
* is held for delayed output.
*/
#define DELAYED_PIC_REF 4
static VLC coeff_token_vlc[4];
static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
static VLC chroma_dc_coeff_token_vlc;
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
static const int chroma_dc_coeff_token_vlc_table_size = 256;
static VLC total_zeros_vlc[15];
static VLC_TYPE total_zeros_vlc_tables[15][512][2];
static const int total_zeros_vlc_tables_size = 512;
static VLC chroma_dc_total_zeros_vlc[3];
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
static const int chroma_dc_total_zeros_vlc_tables_size = 8;
static VLC run_vlc[6];
static VLC_TYPE run_vlc_tables[6][8][2];
static const int run_vlc_tables_size = 8;
static VLC run7_vlc;
static VLC_TYPE run7_vlc_table[96][2];
static const int run7_vlc_table_size = 96;
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
static Picture * remove_long(H264Context *h, int i, int ref_mask);
static av_always_inline uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
return (b&0xFFFF) + (a<<16);
#else
return (a&0xFFFF) + (b<<16);
#endif
}
static const uint8_t rem6[52]={
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
};
static const uint8_t div6[52]={
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
};
static const int left_block_options[4][8]={
{0,1,2,3,7,10,8,11},
{2,2,3,3,8,11,8,11},
{0,0,1,1,7,10,7,10},
{0,2,0,2,7,10,7,10}
};
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
int * left_block;
int topleft_partition= -1;
int i;
top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
//FIXME deblocking could skip the intra and nnz parts.
if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
return;
/* Wow, what a mess, why didn't they simplify the interlacing & intra
* stuff, I can't imagine that these complex rules are worth it. */
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1;
left_block = left_block_options[0];
if(FRAME_MBAFF){
const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
const int top_pair_xy = pair_xy - s->mb_stride;
const int topleft_pair_xy = top_pair_xy - 1;
const int topright_pair_xy = top_pair_xy + 1;
const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
const int bottom = (s->mb_y & 1);
// tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
if (bottom
? !curr_mb_frame_flag // bottom macroblock
: (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
) {
top_xy -= s->mb_stride;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
: (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
) {
topleft_xy -= s->mb_stride;
} else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
topleft_xy += s->mb_stride;
// take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
topleft_partition = 0;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
: (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
) {
topright_xy -= s->mb_stride;
}
if (left_mb_frame_flag != curr_mb_frame_flag) {
left_xy[1] = left_xy[0] = pair_xy - 1;
if (curr_mb_frame_flag) {
if (bottom) {
left_block = left_block_options[1];
} else {
left_block= left_block_options[2];
}
} else {
left_xy[1] += s->mb_stride;
left_block = left_block_options[3];
}
}
}
h->top_mb_xy = top_xy;
h->left_mb_xy[0] = left_xy[0];
h->left_mb_xy[1] = left_xy[1];
if(for_deblock){
topleft_type = 0;
topright_type = 0;
top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
if(MB_MBAFF && !IS_INTRA(mb_type)){
int list;
for(list=0; list<h->list_count; list++){
//These values where changed for ease of performing MC, we need to change them back
//FIXME maybe we can make MC and loop filter use the same values or prevent
//the MC code from changing ref_cache and rather use a temporary array.
if(USES_LIST(mb_type,list))