00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "png.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046
00047 #define pb_7f (~0UL/255 * 0x7f)
00048 #define pb_80 (~0UL/255 * 0x80)
00049
00050 const uint8_t ff_zigzag_direct[64] = {
00051 0, 1, 8, 16, 9, 2, 3, 10,
00052 17, 24, 32, 25, 18, 11, 4, 5,
00053 12, 19, 26, 33, 40, 48, 41, 34,
00054 27, 20, 13, 6, 7, 14, 21, 28,
00055 35, 42, 49, 56, 57, 50, 43, 36,
00056 29, 22, 15, 23, 30, 37, 44, 51,
00057 58, 59, 52, 45, 38, 31, 39, 46,
00058 53, 60, 61, 54, 47, 55, 62, 63
00059 };
00060
00061
00062
00063 const uint8_t ff_zigzag248_direct[64] = {
00064 0, 8, 1, 9, 16, 24, 2, 10,
00065 17, 25, 32, 40, 48, 56, 33, 41,
00066 18, 26, 3, 11, 4, 12, 19, 27,
00067 34, 42, 49, 57, 50, 58, 35, 43,
00068 20, 28, 5, 13, 6, 14, 21, 29,
00069 36, 44, 51, 59, 52, 60, 37, 45,
00070 22, 30, 7, 15, 23, 31, 38, 46,
00071 53, 61, 54, 62, 39, 47, 55, 63,
00072 };
00073
00074
00075 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00076
00077 const uint8_t ff_alternate_horizontal_scan[64] = {
00078 0, 1, 2, 3, 8, 9, 16, 17,
00079 10, 11, 4, 5, 6, 7, 15, 14,
00080 13, 12, 19, 18, 24, 25, 32, 33,
00081 26, 27, 20, 21, 22, 23, 28, 29,
00082 30, 31, 34, 35, 40, 41, 48, 49,
00083 42, 43, 36, 37, 38, 39, 44, 45,
00084 46, 47, 50, 51, 56, 57, 58, 59,
00085 52, 53, 54, 55, 60, 61, 62, 63,
00086 };
00087
00088 const uint8_t ff_alternate_vertical_scan[64] = {
00089 0, 8, 16, 24, 1, 9, 2, 10,
00090 17, 25, 32, 40, 48, 56, 57, 49,
00091 41, 33, 26, 18, 3, 11, 4, 12,
00092 19, 27, 34, 42, 50, 58, 35, 43,
00093 51, 59, 20, 28, 5, 13, 6, 14,
00094 21, 29, 36, 44, 52, 60, 37, 45,
00095 53, 61, 22, 30, 7, 15, 23, 31,
00096 38, 46, 54, 62, 39, 47, 55, 63,
00097 };
00098
00099
00100 static const uint8_t simple_mmx_permutation[64]={
00101 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00102 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00103 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00104 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00105 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00106 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00107 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00108 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00109 };
00110
00111 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00112
00113 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00114 int i;
00115 int end;
00116
00117 st->scantable= src_scantable;
00118
00119 for(i=0; i<64; i++){
00120 int j;
00121 j = src_scantable[i];
00122 st->permutated[i] = permutation[j];
00123 #if ARCH_PPC
00124 st->inverse[j] = i;
00125 #endif
00126 }
00127
00128 end=-1;
00129 for(i=0; i<64; i++){
00130 int j;
00131 j = st->permutated[i];
00132 if(j>end) end=j;
00133 st->raster_end[i]= end;
00134 }
00135 }
00136
00137 static int pix_sum_c(uint8_t * pix, int line_size)
00138 {
00139 int s, i, j;
00140
00141 s = 0;
00142 for (i = 0; i < 16; i++) {
00143 for (j = 0; j < 16; j += 8) {
00144 s += pix[0];
00145 s += pix[1];
00146 s += pix[2];
00147 s += pix[3];
00148 s += pix[4];
00149 s += pix[5];
00150 s += pix[6];
00151 s += pix[7];
00152 pix += 8;
00153 }
00154 pix += line_size - 16;
00155 }
00156 return s;
00157 }
00158
00159 static int pix_norm1_c(uint8_t * pix, int line_size)
00160 {
00161 int s, i, j;
00162 uint32_t *sq = ff_squareTbl + 256;
00163
00164 s = 0;
00165 for (i = 0; i < 16; i++) {
00166 for (j = 0; j < 16; j += 8) {
00167 #if 0
00168 s += sq[pix[0]];
00169 s += sq[pix[1]];
00170 s += sq[pix[2]];
00171 s += sq[pix[3]];
00172 s += sq[pix[4]];
00173 s += sq[pix[5]];
00174 s += sq[pix[6]];
00175 s += sq[pix[7]];
00176 #else
00177 #if LONG_MAX > 2147483647
00178 register uint64_t x=*(uint64_t*)pix;
00179 s += sq[x&0xff];
00180 s += sq[(x>>8)&0xff];
00181 s += sq[(x>>16)&0xff];
00182 s += sq[(x>>24)&0xff];
00183 s += sq[(x>>32)&0xff];
00184 s += sq[(x>>40)&0xff];
00185 s += sq[(x>>48)&0xff];
00186 s += sq[(x>>56)&0xff];
00187 #else
00188 register uint32_t x=*(uint32_t*)pix;
00189 s += sq[x&0xff];
00190 s += sq[(x>>8)&0xff];
00191 s += sq[(x>>16)&0xff];
00192 s += sq[(x>>24)&0xff];
00193 x=*(uint32_t*)(pix+4);
00194 s += sq[x&0xff];
00195 s += sq[(x>>8)&0xff];
00196 s += sq[(x>>16)&0xff];
00197 s += sq[(x>>24)&0xff];
00198 #endif
00199 #endif
00200 pix += 8;
00201 }
00202 pix += line_size - 16;
00203 }
00204 return s;
00205 }
00206
00207 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00208 int i;
00209
00210 for(i=0; i+8<=w; i+=8){
00211 dst[i+0]= av_bswap32(src[i+0]);
00212 dst[i+1]= av_bswap32(src[i+1]);
00213 dst[i+2]= av_bswap32(src[i+2]);
00214 dst[i+3]= av_bswap32(src[i+3]);
00215 dst[i+4]= av_bswap32(src[i+4]);
00216 dst[i+5]= av_bswap32(src[i+5]);
00217 dst[i+6]= av_bswap32(src[i+6]);
00218 dst[i+7]= av_bswap32(src[i+7]);
00219 }
00220 for(;i<w; i++){
00221 dst[i+0]= av_bswap32(src[i+0]);
00222 }
00223 }
00224
00225 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00226 {
00227 while (len--)
00228 *dst++ = av_bswap16(*src++);
00229 }
00230
00231 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00232 {
00233 int s, i;
00234 uint32_t *sq = ff_squareTbl + 256;
00235
00236 s = 0;
00237 for (i = 0; i < h; i++) {
00238 s += sq[pix1[0] - pix2[0]];
00239 s += sq[pix1[1] - pix2[1]];
00240 s += sq[pix1[2] - pix2[2]];
00241 s += sq[pix1[3] - pix2[3]];
00242 pix1 += line_size;
00243 pix2 += line_size;
00244 }
00245 return s;
00246 }
00247
00248 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00249 {
00250 int s, i;
00251 uint32_t *sq = ff_squareTbl + 256;
00252
00253 s = 0;
00254 for (i = 0; i < h; i++) {
00255 s += sq[pix1[0] - pix2[0]];
00256 s += sq[pix1[1] - pix2[1]];
00257 s += sq[pix1[2] - pix2[2]];
00258 s += sq[pix1[3] - pix2[3]];
00259 s += sq[pix1[4] - pix2[4]];
00260 s += sq[pix1[5] - pix2[5]];
00261 s += sq[pix1[6] - pix2[6]];
00262 s += sq[pix1[7] - pix2[7]];
00263 pix1 += line_size;
00264 pix2 += line_size;
00265 }
00266 return s;
00267 }
00268
00269 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00270 {
00271 int s, i;
00272 uint32_t *sq = ff_squareTbl + 256;
00273
00274 s = 0;
00275 for (i = 0; i < h; i++) {
00276 s += sq[pix1[ 0] - pix2[ 0]];
00277 s += sq[pix1[ 1] - pix2[ 1]];
00278 s += sq[pix1[ 2] - pix2[ 2]];
00279 s += sq[pix1[ 3] - pix2[ 3]];
00280 s += sq[pix1[ 4] - pix2[ 4]];
00281 s += sq[pix1[ 5] - pix2[ 5]];
00282 s += sq[pix1[ 6] - pix2[ 6]];
00283 s += sq[pix1[ 7] - pix2[ 7]];
00284 s += sq[pix1[ 8] - pix2[ 8]];
00285 s += sq[pix1[ 9] - pix2[ 9]];
00286 s += sq[pix1[10] - pix2[10]];
00287 s += sq[pix1[11] - pix2[11]];
00288 s += sq[pix1[12] - pix2[12]];
00289 s += sq[pix1[13] - pix2[13]];
00290 s += sq[pix1[14] - pix2[14]];
00291 s += sq[pix1[15] - pix2[15]];
00292
00293 pix1 += line_size;
00294 pix2 += line_size;
00295 }
00296 return s;
00297 }
00298
00299
00300
00301 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
00302 {
00303 uint8_t *ptr, *last_line;
00304 int i;
00305
00306 last_line = buf + (height - 1) * wrap;
00307 for(i=0;i<w;i++) {
00308
00309 if (sides&EDGE_TOP) memcpy(buf - (i + 1) * wrap, buf, width);
00310 if (sides&EDGE_BOTTOM) memcpy(last_line + (i + 1) * wrap, last_line, width);
00311 }
00312
00313 ptr = buf;
00314 for(i=0;i<height;i++) {
00315 memset(ptr - w, ptr[0], w);
00316 memset(ptr + width, ptr[width-1], w);
00317 ptr += wrap;
00318 }
00319
00320 for(i=0;i<w;i++) {
00321 if (sides&EDGE_TOP) {
00322 memset(buf - (i + 1) * wrap - w, buf[0], w);
00323 memset(buf - (i + 1) * wrap + width, buf[width-1], w);
00324 }
00325
00326 if (sides&EDGE_BOTTOM) {
00327 memset(last_line + (i + 1) * wrap - w, last_line[0], w);
00328 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w);
00329 }
00330 }
00331 }
00332
00345 void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
00346 int src_x, int src_y, int w, int h){
00347 int x, y;
00348 int start_y, start_x, end_y, end_x;
00349
00350 if(src_y>= h){
00351 src+= (h-1-src_y)*linesize;
00352 src_y=h-1;
00353 }else if(src_y<=-block_h){
00354 src+= (1-block_h-src_y)*linesize;
00355 src_y=1-block_h;
00356 }
00357 if(src_x>= w){
00358 src+= (w-1-src_x);
00359 src_x=w-1;
00360 }else if(src_x<=-block_w){
00361 src+= (1-block_w-src_x);
00362 src_x=1-block_w;
00363 }
00364
00365 start_y= FFMAX(0, -src_y);
00366 start_x= FFMAX(0, -src_x);
00367 end_y= FFMIN(block_h, h-src_y);
00368 end_x= FFMIN(block_w, w-src_x);
00369 assert(start_y < end_y && block_h);
00370 assert(start_x < end_x && block_w);
00371
00372 w = end_x - start_x;
00373 src += start_y*linesize + start_x;
00374 buf += start_x;
00375
00376
00377 for(y=0; y<start_y; y++){
00378 memcpy(buf, src, w);
00379 buf += linesize;
00380 }
00381
00382
00383 for(; y<end_y; y++){
00384 memcpy(buf, src, w);
00385 src += linesize;
00386 buf += linesize;
00387 }
00388
00389
00390 src -= linesize;
00391 for(; y<block_h; y++){
00392 memcpy(buf, src, w);
00393 buf += linesize;
00394 }
00395
00396 buf -= block_h * linesize + start_x;
00397 while (block_h--){
00398
00399 for(x=0; x<start_x; x++){
00400 buf[x] = buf[start_x];
00401 }
00402
00403
00404 for(x=end_x; x<block_w; x++){
00405 buf[x] = buf[end_x - 1];
00406 }
00407 buf += linesize;
00408 }
00409 }
00410
00411 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00412 {
00413 int i;
00414
00415
00416 for(i=0;i<8;i++) {
00417 block[0] = pixels[0];
00418 block[1] = pixels[1];
00419 block[2] = pixels[2];
00420 block[3] = pixels[3];
00421 block[4] = pixels[4];
00422 block[5] = pixels[5];
00423 block[6] = pixels[6];
00424 block[7] = pixels[7];
00425 pixels += line_size;
00426 block += 8;
00427 }
00428 }
00429
00430 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00431 const uint8_t *s2, int stride){
00432 int i;
00433
00434
00435 for(i=0;i<8;i++) {
00436 block[0] = s1[0] - s2[0];
00437 block[1] = s1[1] - s2[1];
00438 block[2] = s1[2] - s2[2];
00439 block[3] = s1[3] - s2[3];
00440 block[4] = s1[4] - s2[4];
00441 block[5] = s1[5] - s2[5];
00442 block[6] = s1[6] - s2[6];
00443 block[7] = s1[7] - s2[7];
00444 s1 += stride;
00445 s2 += stride;
00446 block += 8;
00447 }
00448 }
00449
00450
00451 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00452 int line_size)
00453 {
00454 int i;
00455 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00456
00457
00458 for(i=0;i<8;i++) {
00459 pixels[0] = cm[block[0]];
00460 pixels[1] = cm[block[1]];
00461 pixels[2] = cm[block[2]];
00462 pixels[3] = cm[block[3]];
00463 pixels[4] = cm[block[4]];
00464 pixels[5] = cm[block[5]];
00465 pixels[6] = cm[block[6]];
00466 pixels[7] = cm[block[7]];
00467
00468 pixels += line_size;
00469 block += 8;
00470 }
00471 }
00472
00473 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00474 int line_size)
00475 {
00476 int i;
00477 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00478
00479
00480 for(i=0;i<4;i++) {
00481 pixels[0] = cm[block[0]];
00482 pixels[1] = cm[block[1]];
00483 pixels[2] = cm[block[2]];
00484 pixels[3] = cm[block[3]];
00485
00486 pixels += line_size;
00487 block += 8;
00488 }
00489 }
00490
00491 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00492 int line_size)
00493 {
00494 int i;
00495 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00496
00497
00498 for(i=0;i<2;i++) {
00499 pixels[0] = cm[block[0]];
00500 pixels[1] = cm[block[1]];
00501
00502 pixels += line_size;
00503 block += 8;
00504 }
00505 }
00506
00507 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00508 uint8_t *restrict pixels,
00509 int line_size)
00510 {
00511 int i, j;
00512
00513 for (i = 0; i < 8; i++) {
00514 for (j = 0; j < 8; j++) {
00515 if (*block < -128)
00516 *pixels = 0;
00517 else if (*block > 127)
00518 *pixels = 255;
00519 else
00520 *pixels = (uint8_t)(*block + 128);
00521 block++;
00522 pixels++;
00523 }
00524 pixels += (line_size - 8);
00525 }
00526 }
00527
00528 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00529 int line_size)
00530 {
00531 int i;
00532
00533
00534 for(i=0;i<8;i++) {
00535 pixels[0] = block[0];
00536 pixels[1] = block[1];
00537 pixels[2] = block[2];
00538 pixels[3] = block[3];
00539 pixels[4] = block[4];
00540 pixels[5] = block[5];
00541 pixels[6] = block[6];
00542 pixels[7] = block[7];
00543
00544 pixels += line_size;
00545 block += 8;
00546 }
00547 }
00548
00549 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00550 int line_size)
00551 {
00552 int i;
00553 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00554
00555
00556 for(i=0;i<8;i++) {
00557 pixels[0] = cm[pixels[0] + block[0]];
00558 pixels[1] = cm[pixels[1] + block[1]];
00559 pixels[2] = cm[pixels[2] + block[2]];
00560 pixels[3] = cm[pixels[3] + block[3]];
00561 pixels[4] = cm[pixels[4] + block[4]];
00562 pixels[5] = cm[pixels[5] + block[5]];
00563 pixels[6] = cm[pixels[6] + block[6]];
00564 pixels[7] = cm[pixels[7] + block[7]];
00565 pixels += line_size;
00566 block += 8;
00567 }
00568 }
00569
00570 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00571 int line_size)
00572 {
00573 int i;
00574 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00575
00576
00577 for(i=0;i<4;i++) {
00578 pixels[0] = cm[pixels[0] + block[0]];
00579 pixels[1] = cm[pixels[1] + block[1]];
00580 pixels[2] = cm[pixels[2] + block[2]];
00581 pixels[3] = cm[pixels[3] + block[3]];
00582 pixels += line_size;
00583 block += 8;
00584 }
00585 }
00586
00587 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00588 int line_size)
00589 {
00590 int i;
00591 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00592
00593
00594 for(i=0;i<2;i++) {
00595 pixels[0] = cm[pixels[0] + block[0]];
00596 pixels[1] = cm[pixels[1] + block[1]];
00597 pixels += line_size;
00598 block += 8;
00599 }
00600 }
00601
00602 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00603 {
00604 int i;
00605 for(i=0;i<8;i++) {
00606 pixels[0] += block[0];
00607 pixels[1] += block[1];
00608 pixels[2] += block[2];
00609 pixels[3] += block[3];
00610 pixels[4] += block[4];
00611 pixels[5] += block[5];
00612 pixels[6] += block[6];
00613 pixels[7] += block[7];
00614 pixels += line_size;
00615 block += 8;
00616 }
00617 }
00618
00619 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00620 {
00621 int i;
00622 for(i=0;i<4;i++) {
00623 pixels[0] += block[0];
00624 pixels[1] += block[1];
00625 pixels[2] += block[2];
00626 pixels[3] += block[3];
00627 pixels += line_size;
00628 block += 4;
00629 }
00630 }
00631
00632 static int sum_abs_dctelem_c(DCTELEM *block)
00633 {
00634 int sum=0, i;
00635 for(i=0; i<64; i++)
00636 sum+= FFABS(block[i]);
00637 return sum;
00638 }
00639
00640 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00641 {
00642 int i;
00643
00644 for (i = 0; i < h; i++) {
00645 memset(block, value, 16);
00646 block += line_size;
00647 }
00648 }
00649
00650 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00651 {
00652 int i;
00653
00654 for (i = 0; i < h; i++) {
00655 memset(block, value, 8);
00656 block += line_size;
00657 }
00658 }
00659
00660 static void scale_block_c(const uint8_t src[64], uint8_t *dst, int linesize)
00661 {
00662 int i, j;
00663 uint16_t *dst1 = (uint16_t *) dst;
00664 uint16_t *dst2 = (uint16_t *)(dst + linesize);
00665
00666 for (j = 0; j < 8; j++) {
00667 for (i = 0; i < 8; i++) {
00668 dst1[i] = dst2[i] = src[i] * 0x0101;
00669 }
00670 src += 8;
00671 dst1 += linesize;
00672 dst2 += linesize;
00673 }
00674 }
00675
00676 #if 0
00677
00678 #define PIXOP2(OPNAME, OP) \
00679 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00680 {\
00681 int i;\
00682 for(i=0; i<h; i++){\
00683 OP(*((uint64_t*)block), AV_RN64(pixels));\
00684 pixels+=line_size;\
00685 block +=line_size;\
00686 }\
00687 }\
00688 \
00689 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00690 {\
00691 int i;\
00692 for(i=0; i<h; i++){\
00693 const uint64_t a= AV_RN64(pixels );\
00694 const uint64_t b= AV_RN64(pixels+1);\
00695 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00696 pixels+=line_size;\
00697 block +=line_size;\
00698 }\
00699 }\
00700 \
00701 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00702 {\
00703 int i;\
00704 for(i=0; i<h; i++){\
00705 const uint64_t a= AV_RN64(pixels );\
00706 const uint64_t b= AV_RN64(pixels+1);\
00707 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00708 pixels+=line_size;\
00709 block +=line_size;\
00710 }\
00711 }\
00712 \
00713 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00714 {\
00715 int i;\
00716 for(i=0; i<h; i++){\
00717 const uint64_t a= AV_RN64(pixels );\
00718 const uint64_t b= AV_RN64(pixels+line_size);\
00719 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00720 pixels+=line_size;\
00721 block +=line_size;\
00722 }\
00723 }\
00724 \
00725 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00726 {\
00727 int i;\
00728 for(i=0; i<h; i++){\
00729 const uint64_t a= AV_RN64(pixels );\
00730 const uint64_t b= AV_RN64(pixels+line_size);\
00731 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00732 pixels+=line_size;\
00733 block +=line_size;\
00734 }\
00735 }\
00736 \
00737 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00738 {\
00739 int i;\
00740 const uint64_t a= AV_RN64(pixels );\
00741 const uint64_t b= AV_RN64(pixels+1);\
00742 uint64_t l0= (a&0x0303030303030303ULL)\
00743 + (b&0x0303030303030303ULL)\
00744 + 0x0202020202020202ULL;\
00745 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00746 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00747 uint64_t l1,h1;\
00748 \
00749 pixels+=line_size;\
00750 for(i=0; i<h; i+=2){\
00751 uint64_t a= AV_RN64(pixels );\
00752 uint64_t b= AV_RN64(pixels+1);\
00753 l1= (a&0x0303030303030303ULL)\
00754 + (b&0x0303030303030303ULL);\
00755 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00756 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00757 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00758 pixels+=line_size;\
00759 block +=line_size;\
00760 a= AV_RN64(pixels );\
00761 b= AV_RN64(pixels+1);\
00762 l0= (a&0x0303030303030303ULL)\
00763 + (b&0x0303030303030303ULL)\
00764 + 0x0202020202020202ULL;\
00765 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00766 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00767 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00768 pixels+=line_size;\
00769 block +=line_size;\
00770 }\
00771 }\
00772 \
00773 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00774 {\
00775 int i;\
00776 const uint64_t a= AV_RN64(pixels );\
00777 const uint64_t b= AV_RN64(pixels+1);\
00778 uint64_t l0= (a&0x0303030303030303ULL)\
00779 + (b&0x0303030303030303ULL)\
00780 + 0x0101010101010101ULL;\
00781 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00782 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00783 uint64_t l1,h1;\
00784 \
00785 pixels+=line_size;\
00786 for(i=0; i<h; i+=2){\
00787 uint64_t a= AV_RN64(pixels );\
00788 uint64_t b= AV_RN64(pixels+1);\
00789 l1= (a&0x0303030303030303ULL)\
00790 + (b&0x0303030303030303ULL);\
00791 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00792 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00793 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00794 pixels+=line_size;\
00795 block +=line_size;\
00796 a= AV_RN64(pixels );\
00797 b= AV_RN64(pixels+1);\
00798 l0= (a&0x0303030303030303ULL)\
00799 + (b&0x0303030303030303ULL)\
00800 + 0x0101010101010101ULL;\
00801 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00802 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00803 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00804 pixels+=line_size;\
00805 block +=line_size;\
00806 }\
00807 }\
00808 \
00809 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00810 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00811 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00812 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00813 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00814 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00815 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00816
00817 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00818 #else // 64 bit variant
00819
00820 #define PIXOP2(OPNAME, OP) \
00821 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00822 int i;\
00823 for(i=0; i<h; i++){\
00824 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00825 pixels+=line_size;\
00826 block +=line_size;\
00827 }\
00828 }\
00829 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00830 int i;\
00831 for(i=0; i<h; i++){\
00832 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00833 pixels+=line_size;\
00834 block +=line_size;\
00835 }\
00836 }\
00837 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00838 int i;\
00839 for(i=0; i<h; i++){\
00840 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00841 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00842 pixels+=line_size;\
00843 block +=line_size;\
00844 }\
00845 }\
00846 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00847 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00848 }\
00849 \
00850 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00851 int src_stride1, int src_stride2, int h){\
00852 int i;\
00853 for(i=0; i<h; i++){\
00854 uint32_t a,b;\
00855 a= AV_RN32(&src1[i*src_stride1 ]);\
00856 b= AV_RN32(&src2[i*src_stride2 ]);\
00857 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00858 a= AV_RN32(&src1[i*src_stride1+4]);\
00859 b= AV_RN32(&src2[i*src_stride2+4]);\
00860 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00861 }\
00862 }\
00863 \
00864 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00865 int src_stride1, int src_stride2, int h){\
00866 int i;\
00867 for(i=0; i<h; i++){\
00868 uint32_t a,b;\
00869 a= AV_RN32(&src1[i*src_stride1 ]);\
00870 b= AV_RN32(&src2[i*src_stride2 ]);\
00871 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00872 a= AV_RN32(&src1[i*src_stride1+4]);\
00873 b= AV_RN32(&src2[i*src_stride2+4]);\
00874 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00875 }\
00876 }\
00877 \
00878 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00879 int src_stride1, int src_stride2, int h){\
00880 int i;\
00881 for(i=0; i<h; i++){\
00882 uint32_t a,b;\
00883 a= AV_RN32(&src1[i*src_stride1 ]);\
00884 b= AV_RN32(&src2[i*src_stride2 ]);\
00885 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00886 }\
00887 }\
00888 \
00889 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00890 int src_stride1, int src_stride2, int h){\
00891 int i;\
00892 for(i=0; i<h; i++){\
00893 uint32_t a,b;\
00894 a= AV_RN16(&src1[i*src_stride1 ]);\
00895 b= AV_RN16(&src2[i*src_stride2 ]);\
00896 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00897 }\
00898 }\
00899 \
00900 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00901 int src_stride1, int src_stride2, int h){\
00902 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00903 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00904 }\
00905 \
00906 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00907 int src_stride1, int src_stride2, int h){\
00908 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00909 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00910 }\
00911 \
00912 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00913 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00914 }\
00915 \
00916 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00917 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00918 }\
00919 \
00920 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00921 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00922 }\
00923 \
00924 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00925 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00926 }\
00927 \
00928 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00929 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00930 int i;\
00931 for(i=0; i<h; i++){\
00932 uint32_t a, b, c, d, l0, l1, h0, h1;\
00933 a= AV_RN32(&src1[i*src_stride1]);\
00934 b= AV_RN32(&src2[i*src_stride2]);\
00935 c= AV_RN32(&src3[i*src_stride3]);\
00936 d= AV_RN32(&src4[i*src_stride4]);\
00937 l0= (a&0x03030303UL)\
00938 + (b&0x03030303UL)\
00939 + 0x02020202UL;\
00940 h0= ((a&0xFCFCFCFCUL)>>2)\
00941 + ((b&0xFCFCFCFCUL)>>2);\
00942 l1= (c&0x03030303UL)\
00943 + (d&0x03030303UL);\
00944 h1= ((c&0xFCFCFCFCUL)>>2)\
00945 + ((d&0xFCFCFCFCUL)>>2);\
00946 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00947 a= AV_RN32(&src1[i*src_stride1+4]);\
00948 b= AV_RN32(&src2[i*src_stride2+4]);\
00949 c= AV_RN32(&src3[i*src_stride3+4]);\
00950 d= AV_RN32(&src4[i*src_stride4+4]);\
00951 l0= (a&0x03030303UL)\
00952 + (b&0x03030303UL)\
00953 + 0x02020202UL;\
00954 h0= ((a&0xFCFCFCFCUL)>>2)\
00955 + ((b&0xFCFCFCFCUL)>>2);\
00956 l1= (c&0x03030303UL)\
00957 + (d&0x03030303UL);\
00958 h1= ((c&0xFCFCFCFCUL)>>2)\
00959 + ((d&0xFCFCFCFCUL)>>2);\
00960 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00961 }\
00962 }\
00963 \
00964 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00965 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00966 }\
00967 \
00968 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00969 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00970 }\
00971 \
00972 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00973 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00974 }\
00975 \
00976 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00977 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00978 }\
00979 \
00980 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00981 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00982 int i;\
00983 for(i=0; i<h; i++){\
00984 uint32_t a, b, c, d, l0, l1, h0, h1;\
00985 a= AV_RN32(&src1[i*src_stride1]);\
00986 b= AV_RN32(&src2[i*src_stride2]);\
00987 c= AV_RN32(&src3[i*src_stride3]);\
00988 d= AV_RN32(&src4[i*src_stride4]);\
00989 l0= (a&0x03030303UL)\
00990 + (b&0x03030303UL)\
00991 + 0x01010101UL;\
00992 h0= ((a&0xFCFCFCFCUL)>>2)\
00993 + ((b&0xFCFCFCFCUL)>>2);\
00994 l1= (c&0x03030303UL)\
00995 + (d&0x03030303UL);\
00996 h1= ((c&0xFCFCFCFCUL)>>2)\
00997 + ((d&0xFCFCFCFCUL)>>2);\
00998 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00999 a= AV_RN32(&src1[i*src_stride1+4]);\
01000 b= AV_RN32(&src2[i*src_stride2+4]);\
01001 c= AV_RN32(&src3[i*src_stride3+4]);\
01002 d= AV_RN32(&src4[i*src_stride4+4]);\
01003 l0= (a&0x03030303UL)\
01004 + (b&0x03030303UL)\
01005 + 0x01010101UL;\
01006 h0= ((a&0xFCFCFCFCUL)>>2)\
01007 + ((b&0xFCFCFCFCUL)>>2);\
01008 l1= (c&0x03030303UL)\
01009 + (d&0x03030303UL);\
01010 h1= ((c&0xFCFCFCFCUL)>>2)\
01011 + ((d&0xFCFCFCFCUL)>>2);\
01012 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01013 }\
01014 }\
01015 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
01016 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01017 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01018 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01019 }\
01020 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
01021 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01022 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01023 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01024 }\
01025 \
01026 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01027 {\
01028 int i, a0, b0, a1, b1;\
01029 a0= pixels[0];\
01030 b0= pixels[1] + 2;\
01031 a0 += b0;\
01032 b0 += pixels[2];\
01033 \
01034 pixels+=line_size;\
01035 for(i=0; i<h; i+=2){\
01036 a1= pixels[0];\
01037 b1= pixels[1];\
01038 a1 += b1;\
01039 b1 += pixels[2];\
01040 \
01041 block[0]= (a1+a0)>>2; \
01042 block[1]= (b1+b0)>>2;\
01043 \
01044 pixels+=line_size;\
01045 block +=line_size;\
01046 \
01047 a0= pixels[0];\
01048 b0= pixels[1] + 2;\
01049 a0 += b0;\
01050 b0 += pixels[2];\
01051 \
01052 block[0]= (a1+a0)>>2;\
01053 block[1]= (b1+b0)>>2;\
01054 pixels+=line_size;\
01055 block +=line_size;\
01056 }\
01057 }\
01058 \
01059 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01060 {\
01061 int i;\
01062 const uint32_t a= AV_RN32(pixels );\
01063 const uint32_t b= AV_RN32(pixels+1);\
01064 uint32_t l0= (a&0x03030303UL)\
01065 + (b&0x03030303UL)\
01066 + 0x02020202UL;\
01067 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01068 + ((b&0xFCFCFCFCUL)>>2);\
01069 uint32_t l1,h1;\
01070 \
01071 pixels+=line_size;\
01072 for(i=0; i<h; i+=2){\
01073 uint32_t a= AV_RN32(pixels );\
01074 uint32_t b= AV_RN32(pixels+1);\
01075 l1= (a&0x03030303UL)\
01076 + (b&0x03030303UL);\
01077 h1= ((a&0xFCFCFCFCUL)>>2)\
01078 + ((b&0xFCFCFCFCUL)>>2);\
01079 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01080 pixels+=line_size;\
01081 block +=line_size;\
01082 a= AV_RN32(pixels );\
01083 b= AV_RN32(pixels+1);\
01084 l0= (a&0x03030303UL)\
01085 + (b&0x03030303UL)\
01086 + 0x02020202UL;\
01087 h0= ((a&0xFCFCFCFCUL)>>2)\
01088 + ((b&0xFCFCFCFCUL)>>2);\
01089 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01090 pixels+=line_size;\
01091 block +=line_size;\
01092 }\
01093 }\
01094 \
01095 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01096 {\
01097 int j;\
01098 for(j=0; j<2; j++){\
01099 int i;\
01100 const uint32_t a= AV_RN32(pixels );\
01101 const uint32_t b= AV_RN32(pixels+1);\
01102 uint32_t l0= (a&0x03030303UL)\
01103 + (b&0x03030303UL)\
01104 + 0x02020202UL;\
01105 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01106 + ((b&0xFCFCFCFCUL)>>2);\
01107 uint32_t l1,h1;\
01108 \
01109 pixels+=line_size;\
01110 for(i=0; i<h; i+=2){\
01111 uint32_t a= AV_RN32(pixels );\
01112 uint32_t b= AV_RN32(pixels+1);\
01113 l1= (a&0x03030303UL)\
01114 + (b&0x03030303UL);\
01115 h1= ((a&0xFCFCFCFCUL)>>2)\
01116 + ((b&0xFCFCFCFCUL)>>2);\
01117 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01118 pixels+=line_size;\
01119 block +=line_size;\
01120 a= AV_RN32(pixels );\
01121 b= AV_RN32(pixels+1);\
01122 l0= (a&0x03030303UL)\
01123 + (b&0x03030303UL)\
01124 + 0x02020202UL;\
01125 h0= ((a&0xFCFCFCFCUL)>>2)\
01126 + ((b&0xFCFCFCFCUL)>>2);\
01127 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01128 pixels+=line_size;\
01129 block +=line_size;\
01130 }\
01131 pixels+=4-line_size*(h+1);\
01132 block +=4-line_size*h;\
01133 }\
01134 }\
01135 \
01136 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01137 {\
01138 int j;\
01139 for(j=0; j<2; j++){\
01140 int i;\
01141 const uint32_t a= AV_RN32(pixels );\
01142 const uint32_t b= AV_RN32(pixels+1);\
01143 uint32_t l0= (a&0x03030303UL)\
01144 + (b&0x03030303UL)\
01145 + 0x01010101UL;\
01146 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01147 + ((b&0xFCFCFCFCUL)>>2);\
01148 uint32_t l1,h1;\
01149 \
01150 pixels+=line_size;\
01151 for(i=0; i<h; i+=2){\
01152 uint32_t a= AV_RN32(pixels );\
01153 uint32_t b= AV_RN32(pixels+1);\
01154 l1= (a&0x03030303UL)\
01155 + (b&0x03030303UL);\
01156 h1= ((a&0xFCFCFCFCUL)>>2)\
01157 + ((b&0xFCFCFCFCUL)>>2);\
01158 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01159 pixels+=line_size;\
01160 block +=line_size;\
01161 a= AV_RN32(pixels );\
01162 b= AV_RN32(pixels+1);\
01163 l0= (a&0x03030303UL)\
01164 + (b&0x03030303UL)\
01165 + 0x01010101UL;\
01166 h0= ((a&0xFCFCFCFCUL)>>2)\
01167 + ((b&0xFCFCFCFCUL)>>2);\
01168 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01169 pixels+=line_size;\
01170 block +=line_size;\
01171 }\
01172 pixels+=4-line_size*(h+1);\
01173 block +=4-line_size*h;\
01174 }\
01175 }\
01176 \
01177 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01178 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01179 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01180 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01181 av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01182 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01183 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01184 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01185
01186 #define op_avg(a, b) a = rnd_avg32(a, b)
01187 #endif
01188 #define op_put(a, b) a = b
01189
01190 PIXOP2(avg, op_avg)
01191 PIXOP2(put, op_put)
01192 #undef op_avg
01193 #undef op_put
01194
01195 #define put_no_rnd_pixels8_c put_pixels8_c
01196 #define put_no_rnd_pixels16_c put_pixels16_c
01197
01198 #define avg2(a,b) ((a+b+1)>>1)
01199 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01200
01201 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01202 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01203 }
01204
01205 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01206 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01207 }
01208
01209 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01210 {
01211 const int A=(16-x16)*(16-y16);
01212 const int B=( x16)*(16-y16);
01213 const int C=(16-x16)*( y16);
01214 const int D=( x16)*( y16);
01215 int i;
01216
01217 for(i=0; i<h; i++)
01218 {
01219 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01220 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01221 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01222 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01223 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01224 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01225 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01226 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01227 dst+= stride;
01228 src+= stride;
01229 }
01230 }
01231
01232 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01233 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01234 {
01235 int y, vx, vy;
01236 const int s= 1<<shift;
01237
01238 width--;
01239 height--;
01240
01241 for(y=0; y<h; y++){
01242 int x;
01243
01244 vx= ox;
01245 vy= oy;
01246 for(x=0; x<8; x++){
01247 int src_x, src_y, frac_x, frac_y, index;
01248
01249 src_x= vx>>16;
01250 src_y= vy>>16;
01251 frac_x= src_x&(s-1);
01252 frac_y= src_y&(s-1);
01253 src_x>>=shift;
01254 src_y>>=shift;
01255
01256 if((unsigned)src_x < width){
01257 if((unsigned)src_y < height){
01258 index= src_x + src_y*stride;
01259 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01260 + src[index +1]* frac_x )*(s-frac_y)
01261 + ( src[index+stride ]*(s-frac_x)
01262 + src[index+stride+1]* frac_x )* frac_y
01263 + r)>>(shift*2);
01264 }else{
01265 index= src_x + av_clip(src_y, 0, height)*stride;
01266 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01267 + src[index +1]* frac_x )*s
01268 + r)>>(shift*2);
01269 }
01270 }else{
01271 if((unsigned)src_y < height){
01272 index= av_clip(src_x, 0, width) + src_y*stride;
01273 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01274 + src[index+stride ]* frac_y )*s
01275 + r)>>(shift*2);
01276 }else{
01277 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01278 dst[y*stride + x]= src[index ];
01279 }
01280 }
01281
01282 vx+= dxx;
01283 vy+= dyx;
01284 }
01285 ox += dxy;
01286 oy += dyy;
01287 }
01288 }
01289
01290 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01291 switch(width){
01292 case 2: put_pixels2_c (dst, src, stride, height); break;
01293 case 4: put_pixels4_c (dst, src, stride, height); break;
01294 case 8: put_pixels8_c (dst, src, stride, height); break;
01295 case 16:put_pixels16_c(dst, src, stride, height); break;
01296 }
01297 }
01298
01299 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01300 int i,j;
01301 for (i=0; i < height; i++) {
01302 for (j=0; j < width; j++) {
01303 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01304 }
01305 src += stride;
01306 dst += stride;
01307 }
01308 }
01309
01310 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01311 int i,j;
01312 for (i=0; i < height; i++) {
01313 for (j=0; j < width; j++) {
01314 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01315 }
01316 src += stride;
01317 dst += stride;
01318 }
01319 }
01320
01321 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01322 int i,j;
01323 for (i=0; i < height; i++) {
01324 for (j=0; j < width; j++) {
01325 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01326 }
01327 src += stride;
01328 dst += stride;
01329 }
01330 }
01331
01332 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01333 int i,j;
01334 for (i=0; i < height; i++) {
01335 for (j=0; j < width; j++) {
01336 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01337 }
01338 src += stride;
01339 dst += stride;
01340 }
01341 }
01342
01343 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01344 int i,j;
01345 for (i=0; i < height; i++) {
01346 for (j=0; j < width; j++) {
01347 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01348 }
01349 src += stride;
01350 dst += stride;
01351 }
01352 }
01353
01354 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01355 int i,j;
01356 for (i=0; i < height; i++) {
01357 for (j=0; j < width; j++) {
01358 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01359 }
01360 src += stride;
01361 dst += stride;
01362 }
01363 }
01364
01365 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01366 int i,j;
01367 for (i=0; i < height; i++) {
01368 for (j=0; j < width; j++) {
01369 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01370 }
01371 src += stride;
01372 dst += stride;
01373 }
01374 }
01375
01376 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01377 int i,j;
01378 for (i=0; i < height; i++) {
01379 for (j=0; j < width; j++) {
01380 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01381 }
01382 src += stride;
01383 dst += stride;
01384 }
01385 }
01386
01387 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01388 switch(width){
01389 case 2: avg_pixels2_c (dst, src, stride, height); break;
01390 case 4: avg_pixels4_c (dst, src, stride, height); break;
01391 case 8: avg_pixels8_c (dst, src, stride, height); break;
01392 case 16:avg_pixels16_c(dst, src, stride, height); break;
01393 }
01394 }
01395
01396 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01397 int i,j;
01398 for (i=0; i < height; i++) {
01399 for (j=0; j < width; j++) {
01400 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01401 }
01402 src += stride;
01403 dst += stride;
01404 }
01405 }
01406
01407 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01408 int i,j;
01409 for (i=0; i < height; i++) {
01410 for (j=0; j < width; j++) {
01411 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01412 }
01413 src += stride;
01414 dst += stride;
01415 }
01416 }
01417
01418 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01419 int i,j;
01420 for (i=0; i < height; i++) {
01421 for (j=0; j < width; j++) {
01422 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01423 }
01424 src += stride;
01425 dst += stride;
01426 }
01427 }
01428
01429 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01430 int i,j;
01431 for (i=0; i < height; i++) {
01432 for (j=0; j < width; j++) {
01433 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01434 }
01435 src += stride;
01436 dst += stride;
01437 }
01438 }
01439
01440 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01441 int i,j;
01442 for (i=0; i < height; i++) {
01443 for (j=0; j < width; j++) {
01444 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01445 }
01446 src += stride;
01447 dst += stride;
01448 }
01449 }
01450
01451 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01452 int i,j;
01453 for (i=0; i < height; i++) {
01454 for (j=0; j < width; j++) {
01455 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01456 }
01457 src += stride;
01458 dst += stride;
01459 }
01460 }
01461
01462 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01463 int i,j;
01464 for (i=0; i < height; i++) {
01465 for (j=0; j < width; j++) {
01466 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01467 }
01468 src += stride;
01469 dst += stride;
01470 }
01471 }
01472
01473 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01474 int i,j;
01475 for (i=0; i < height; i++) {
01476 for (j=0; j < width; j++) {
01477 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01478 }
01479 src += stride;
01480 dst += stride;
01481 }
01482 }
01483 #if 0
01484 #define TPEL_WIDTH(width)\
01485 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01486 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01487 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01488 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01489 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01490 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01491 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01492 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01493 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01494 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01495 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01496 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01497 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01498 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01499 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01500 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01501 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01502 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01503 #endif
01504
01505 #define H264_CHROMA_MC(OPNAME, OP)\
01506 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01507 const int A=(8-x)*(8-y);\
01508 const int B=( x)*(8-y);\
01509 const int C=(8-x)*( y);\
01510 const int D=( x)*( y);\
01511 int i;\
01512 \
01513 assert(x<8 && y<8 && x>=0 && y>=0);\
01514 \
01515 if(D){\
01516 for(i=0; i<h; i++){\
01517 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01518 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01519 dst+= stride;\
01520 src+= stride;\
01521 }\
01522 }else{\
01523 const int E= B+C;\
01524 const int step= C ? stride : 1;\
01525 for(i=0; i<h; i++){\
01526 OP(dst[0], (A*src[0] + E*src[step+0]));\
01527 OP(dst[1], (A*src[1] + E*src[step+1]));\
01528 dst+= stride;\
01529 src+= stride;\
01530 }\
01531 }\
01532 }\
01533 \
01534 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01535 const int A=(8-x)*(8-y);\
01536 const int B=( x)*(8-y);\
01537 const int C=(8-x)*( y);\
01538 const int D=( x)*( y);\
01539 int i;\
01540 \
01541 assert(x<8 && y<8 && x>=0 && y>=0);\
01542 \
01543 if(D){\
01544 for(i=0; i<h; i++){\
01545 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01546 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01547 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01548 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01549 dst+= stride;\
01550 src+= stride;\
01551 }\
01552 }else{\
01553 const int E= B+C;\
01554 const int step= C ? stride : 1;\
01555 for(i=0; i<h; i++){\
01556 OP(dst[0], (A*src[0] + E*src[step+0]));\
01557 OP(dst[1], (A*src[1] + E*src[step+1]));\
01558 OP(dst[2], (A*src[2] + E*src[step+2]));\
01559 OP(dst[3], (A*src[3] + E*src[step+3]));\
01560 dst+= stride;\
01561 src+= stride;\
01562 }\
01563 }\
01564 }\
01565 \
01566 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01567 const int A=(8-x)*(8-y);\
01568 const int B=( x)*(8-y);\
01569 const int C=(8-x)*( y);\
01570 const int D=( x)*( y);\
01571 int i;\
01572 \
01573 assert(x<8 && y<8 && x>=0 && y>=0);\
01574 \
01575 if(D){\
01576 for(i=0; i<h; i++){\
01577 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01578 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01579 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01580 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01581 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01582 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01583 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01584 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01585 dst+= stride;\
01586 src+= stride;\
01587 }\
01588 }else{\
01589 const int E= B+C;\
01590 const int step= C ? stride : 1;\
01591 for(i=0; i<h; i++){\
01592 OP(dst[0], (A*src[0] + E*src[step+0]));\
01593 OP(dst[1], (A*src[1] + E*src[step+1]));\
01594 OP(dst[2], (A*src[2] + E*src[step+2]));\
01595 OP(dst[3], (A*src[3] + E*src[step+3]));\
01596 OP(dst[4], (A*src[4] + E*src[step+4]));\
01597 OP(dst[5], (A*src[5] + E*src[step+5]));\
01598 OP(dst[6], (A*src[6] + E*src[step+6]));\
01599 OP(dst[7], (A*src[7] + E*src[step+7]));\
01600 dst+= stride;\
01601 src+= stride;\
01602 }\
01603 }\
01604 }
01605
01606 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01607 #define op_put(a, b) a = (((b) + 32)>>6)
01608
01609 H264_CHROMA_MC(put_ , op_put)
01610 H264_CHROMA_MC(avg_ , op_avg)
01611 #undef op_avg
01612 #undef op_put
01613
01614 #define QPEL_MC(r, OPNAME, RND, OP) \
01615 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01616 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01617 int i;\
01618 for(i=0; i<h; i++)\
01619 {\
01620 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01621 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01622 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01623 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01624 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01625 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01626 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01627 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01628 dst+=dstStride;\
01629 src+=srcStride;\
01630 }\
01631 }\
01632 \
01633 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01634 const int w=8;\
01635 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01636 int i;\
01637 for(i=0; i<w; i++)\
01638 {\
01639 const int src0= src[0*srcStride];\
01640 const int src1= src[1*srcStride];\
01641 const int src2= src[2*srcStride];\
01642 const int src3= src[3*srcStride];\
01643 const int src4= src[4*srcStride];\
01644 const int src5= src[5*srcStride];\
01645 const int src6= src[6*srcStride];\
01646 const int src7= src[7*srcStride];\
01647 const int src8= src[8*srcStride];\
01648 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01649 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01650 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01651 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01652 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01653 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01654 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01655 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01656 dst++;\
01657 src++;\
01658 }\
01659 }\
01660 \
01661 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01662 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01663 int i;\
01664 \
01665 for(i=0; i<h; i++)\
01666 {\
01667 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01668 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01669 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01670 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01671 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01672 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01673 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01674 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01675 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01676 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01677 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01678 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01679 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01680 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01681 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01682 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01683 dst+=dstStride;\
01684 src+=srcStride;\
01685 }\
01686 }\
01687 \
01688 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01689 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01690 int i;\
01691 const int w=16;\
01692 for(i=0; i<w; i++)\
01693 {\
01694 const int src0= src[0*srcStride];\
01695 const int src1= src[1*srcStride];\
01696 const int src2= src[2*srcStride];\
01697 const int src3= src[3*srcStride];\
01698 const int src4= src[4*srcStride];\
01699 const int src5= src[5*srcStride];\
01700 const int src6= src[6*srcStride];\
01701 const int src7= src[7*srcStride];\
01702 const int src8= src[8*srcStride];\
01703 const int src9= src[9*srcStride];\
01704 const int src10= src[10*srcStride];\
01705 const int src11= src[11*srcStride];\
01706 const int src12= src[12*srcStride];\
01707 const int src13= src[13*srcStride];\
01708 const int src14= src[14*srcStride];\
01709 const int src15= src[15*srcStride];\
01710 const int src16= src[16*srcStride];\
01711 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01712 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01713 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01714 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01715 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01716 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01717 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01718 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01719 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01720 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01721 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01722 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01723 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01724 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01725 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01726 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01727 dst++;\
01728 src++;\
01729 }\
01730 }\
01731 \
01732 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01733 uint8_t half[64];\
01734 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01735 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01736 }\
01737 \
01738 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01739 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01740 }\
01741 \
01742 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01743 uint8_t half[64];\
01744 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01745 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01746 }\
01747 \
01748 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01749 uint8_t full[16*9];\
01750 uint8_t half[64];\
01751 copy_block9(full, src, 16, stride, 9);\
01752 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01753 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01754 }\
01755 \
01756 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01757 uint8_t full[16*9];\
01758 copy_block9(full, src, 16, stride, 9);\
01759 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01760 }\
01761 \
01762 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01763 uint8_t full[16*9];\
01764 uint8_t half[64];\
01765 copy_block9(full, src, 16, stride, 9);\
01766 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01767 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01768 }\
01769 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01770 uint8_t full[16*9];\
01771 uint8_t halfH[72];\
01772 uint8_t halfV[64];\
01773 uint8_t halfHV[64];\
01774 copy_block9(full, src, 16, stride, 9);\
01775 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01776 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01777 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01778 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01779 }\
01780 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01781 uint8_t full[16*9];\
01782 uint8_t halfH[72];\
01783 uint8_t halfHV[64];\
01784 copy_block9(full, src, 16, stride, 9);\
01785 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01786 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01787 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01788 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01789 }\
01790 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01791 uint8_t full[16*9];\
01792 uint8_t halfH[72];\
01793 uint8_t halfV[64];\
01794 uint8_t halfHV[64];\
01795 copy_block9(full, src, 16, stride, 9);\
01796 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01797 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01798 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01799 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01800 }\
01801 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01802 uint8_t full[16*9];\
01803 uint8_t halfH[72];\
01804 uint8_t halfHV[64];\
01805 copy_block9(full, src, 16, stride, 9);\
01806 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01807 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01808 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01809 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01810 }\
01811 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01812 uint8_t full[16*9];\
01813 uint8_t halfH[72];\
01814 uint8_t halfV[64];\
01815 uint8_t halfHV[64];\
01816 copy_block9(full, src, 16, stride, 9);\
01817 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01818 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01819 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01820 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01821 }\
01822 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01823 uint8_t full[16*9];\
01824 uint8_t halfH[72];\
01825 uint8_t halfHV[64];\
01826 copy_block9(full, src, 16, stride, 9);\
01827 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01828 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01829 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01830 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01831 }\
01832 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01833 uint8_t full[16*9];\
01834 uint8_t halfH[72];\
01835 uint8_t halfV[64];\
01836 uint8_t halfHV[64];\
01837 copy_block9(full, src, 16, stride, 9);\
01838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01839 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01840 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01841 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01842 }\
01843 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01844 uint8_t full[16*9];\
01845 uint8_t halfH[72];\
01846 uint8_t halfHV[64];\
01847 copy_block9(full, src, 16, stride, 9);\
01848 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01849 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01850 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01851 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01852 }\
01853 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01854 uint8_t halfH[72];\
01855 uint8_t halfHV[64];\
01856 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01857 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01858 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01859 }\
01860 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01861 uint8_t halfH[72];\
01862 uint8_t halfHV[64];\
01863 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01864 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01865 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01866 }\
01867 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01868 uint8_t full[16*9];\
01869 uint8_t halfH[72];\
01870 uint8_t halfV[64];\
01871 uint8_t halfHV[64];\
01872 copy_block9(full, src, 16, stride, 9);\
01873 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01874 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01875 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01876 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01877 }\
01878 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01879 uint8_t full[16*9];\
01880 uint8_t halfH[72];\
01881 copy_block9(full, src, 16, stride, 9);\
01882 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01883 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01884 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01885 }\
01886 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01887 uint8_t full[16*9];\
01888 uint8_t halfH[72];\
01889 uint8_t halfV[64];\
01890 uint8_t halfHV[64];\
01891 copy_block9(full, src, 16, stride, 9);\
01892 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01893 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01894 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01895 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01896 }\
01897 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01898 uint8_t full[16*9];\
01899 uint8_t halfH[72];\
01900 copy_block9(full, src, 16, stride, 9);\
01901 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01902 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01903 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01904 }\
01905 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01906 uint8_t halfH[72];\
01907 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01908 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01909 }\
01910 \
01911 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01912 uint8_t half[256];\
01913 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01914 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01915 }\
01916 \
01917 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01918 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01919 }\
01920 \
01921 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01922 uint8_t half[256];\
01923 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01924 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01925 }\
01926 \
01927 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01928 uint8_t full[24*17];\
01929 uint8_t half[256];\
01930 copy_block17(full, src, 24, stride, 17);\
01931 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01932 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
01933 }\
01934 \
01935 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01936 uint8_t full[24*17];\
01937 copy_block17(full, src, 24, stride, 17);\
01938 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01939 }\
01940 \
01941 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01942 uint8_t full[24*17];\
01943 uint8_t half[256];\
01944 copy_block17(full, src, 24, stride, 17);\
01945 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01946 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
01947 }\
01948 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01949 uint8_t full[24*17];\
01950 uint8_t halfH[272];\
01951 uint8_t halfV[256];\
01952 uint8_t halfHV[256];\
01953 copy_block17(full, src, 24, stride, 17);\
01954 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01955 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01956 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01957 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01958 }\
01959 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01960 uint8_t full[24*17];\
01961 uint8_t halfH[272];\
01962 uint8_t halfHV[256];\
01963 copy_block17(full, src, 24, stride, 17);\
01964 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01965 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01966 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01967 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01968 }\
01969 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01970 uint8_t full[24*17];\
01971 uint8_t halfH[272];\
01972 uint8_t halfV[256];\
01973 uint8_t halfHV[256];\
01974 copy_block17(full, src, 24, stride, 17);\
01975 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01976 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01977 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01978 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01979 }\
01980 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01981 uint8_t full[24*17];\
01982 uint8_t halfH[272];\
01983 uint8_t halfHV[256];\
01984 copy_block17(full, src, 24, stride, 17);\
01985 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01986 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01987 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01988 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01989 }\
01990 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01991 uint8_t full[24*17];\
01992 uint8_t halfH[272];\
01993 uint8_t halfV[256];\
01994 uint8_t halfHV[256];\
01995 copy_block17(full, src, 24, stride, 17);\
01996 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01997 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01998 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01999 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02000 }\
02001 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02002 uint8_t full[24*17];\
02003 uint8_t halfH[272];\
02004 uint8_t halfHV[256];\
02005 copy_block17(full, src, 24, stride, 17);\
02006 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02007 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02008 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02009 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02010 }\
02011 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
02012 uint8_t full[24*17];\
02013 uint8_t halfH[272];\
02014 uint8_t halfV[256];\
02015 uint8_t halfHV[256];\
02016 copy_block17(full, src, 24, stride, 17);\
02017 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
02018 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02019 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02020 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02021 }\
02022 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02023 uint8_t full[24*17];\
02024 uint8_t halfH[272];\
02025 uint8_t halfHV[256];\
02026 copy_block17(full, src, 24, stride, 17);\
02027 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02028 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02029 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02030 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02031 }\
02032 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02033 uint8_t halfH[272];\
02034 uint8_t halfHV[256];\
02035 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02036 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02037 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02038 }\
02039 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02040 uint8_t halfH[272];\
02041 uint8_t halfHV[256];\
02042 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02043 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02044 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02045 }\
02046 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
02047 uint8_t full[24*17];\
02048 uint8_t halfH[272];\
02049 uint8_t halfV[256];\
02050 uint8_t halfHV[256];\
02051 copy_block17(full, src, 24, stride, 17);\
02052 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02053 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02054 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02055 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02056 }\
02057 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02058 uint8_t full[24*17];\
02059 uint8_t halfH[272];\
02060 copy_block17(full, src, 24, stride, 17);\
02061 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02062 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02063 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02064 }\
02065 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02066 uint8_t full[24*17];\
02067 uint8_t halfH[272];\
02068 uint8_t halfV[256];\
02069 uint8_t halfHV[256];\
02070 copy_block17(full, src, 24, stride, 17);\
02071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02072 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02073 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02074 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02075 }\
02076 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02077 uint8_t full[24*17];\
02078 uint8_t halfH[272];\
02079 copy_block17(full, src, 24, stride, 17);\
02080 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02081 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02082 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02083 }\
02084 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02085 uint8_t halfH[272];\
02086 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02087 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02088 }
02089
02090 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02091 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02092 #define op_put(a, b) a = cm[((b) + 16)>>5]
02093 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02094
02095 QPEL_MC(0, put_ , _ , op_put)
02096 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02097 QPEL_MC(0, avg_ , _ , op_avg)
02098
02099 #undef op_avg
02100 #undef op_avg_no_rnd
02101 #undef op_put
02102 #undef op_put_no_rnd
02103
02104 #define put_qpel8_mc00_c ff_put_pixels8x8_c
02105 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
02106 #define put_qpel16_mc00_c ff_put_pixels16x16_c
02107 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
02108 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
02109 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
02110
02111 #if 1
02112 #define H264_LOWPASS(OPNAME, OP, OP2) \
02113 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02114 const int h=2;\
02115 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02116 int i;\
02117 for(i=0; i<h; i++)\
02118 {\
02119 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02120 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02121 dst+=dstStride;\
02122 src+=srcStride;\
02123 }\
02124 }\
02125 \
02126 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02127 const int w=2;\
02128 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02129 int i;\
02130 for(i=0; i<w; i++)\
02131 {\
02132 const int srcB= src[-2*srcStride];\
02133 const int srcA= src[-1*srcStride];\
02134 const int src0= src[0 *srcStride];\
02135 const int src1= src[1 *srcStride];\
02136 const int src2= src[2 *srcStride];\
02137 const int src3= src[3 *srcStride];\
02138 const int src4= src[4 *srcStride];\
02139 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02140 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02141 dst++;\
02142 src++;\
02143 }\
02144 }\
02145 \
02146 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02147 const int h=2;\
02148 const int w=2;\
02149 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02150 int i;\
02151 src -= 2*srcStride;\
02152 for(i=0; i<h+5; i++)\
02153 {\
02154 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02155 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02156 tmp+=tmpStride;\
02157 src+=srcStride;\
02158 }\
02159 tmp -= tmpStride*(h+5-2);\
02160 for(i=0; i<w; i++)\
02161 {\
02162 const int tmpB= tmp[-2*tmpStride];\
02163 const int tmpA= tmp[-1*tmpStride];\
02164 const int tmp0= tmp[0 *tmpStride];\
02165 const int tmp1= tmp[1 *tmpStride];\
02166 const int tmp2= tmp[2 *tmpStride];\
02167 const int tmp3= tmp[3 *tmpStride];\
02168 const int tmp4= tmp[4 *tmpStride];\
02169 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02170 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02171 dst++;\
02172 tmp++;\
02173 }\
02174 }\
02175 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02176 const int h=4;\
02177 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02178 int i;\
02179 for(i=0; i<h; i++)\
02180 {\
02181 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02182 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02183 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02184 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02185 dst+=dstStride;\
02186 src+=srcStride;\
02187 }\
02188 }\
02189 \
02190 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02191 const int w=4;\
02192 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02193 int i;\
02194 for(i=0; i<w; i++)\
02195 {\
02196 const int srcB= src[-2*srcStride];\
02197 const int srcA= src[-1*srcStride];\
02198 const int src0= src[0 *srcStride];\
02199 const int src1= src[1 *srcStride];\
02200 const int src2= src[2 *srcStride];\
02201 const int src3= src[3 *srcStride];\
02202 const int src4= src[4 *srcStride];\
02203 const int src5= src[5 *srcStride];\
02204 const int src6= src[6 *srcStride];\
02205 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02206 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02207 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02208 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02209 dst++;\
02210 src++;\
02211 }\
02212 }\
02213 \
02214 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02215 const int h=4;\
02216 const int w=4;\
02217 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02218 int i;\
02219 src -= 2*srcStride;\
02220 for(i=0; i<h+5; i++)\
02221 {\
02222 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02223 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02224 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02225 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02226 tmp+=tmpStride;\
02227 src+=srcStride;\
02228 }\
02229 tmp -= tmpStride*(h+5-2);\
02230 for(i=0; i<w; i++)\
02231 {\
02232 const int tmpB= tmp[-2*tmpStride];\
02233 const int tmpA= tmp[-1*tmpStride];\
02234 const int tmp0= tmp[0 *tmpStride];\
02235 const int tmp1= tmp[1 *tmpStride];\
02236 const int tmp2= tmp[2 *tmpStride];\
02237 const int tmp3= tmp[3 *tmpStride];\
02238 const int tmp4= tmp[4 *tmpStride];\
02239 const int tmp5= tmp[5 *tmpStride];\
02240 const int tmp6= tmp[6 *tmpStride];\
02241 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02242 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02243 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02244 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02245 dst++;\
02246 tmp++;\
02247 }\
02248 }\
02249 \
02250 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02251 const int h=8;\
02252 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02253 int i;\
02254 for(i=0; i<h; i++)\
02255 {\
02256 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02257 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02258 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02259 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02260 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02261 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02262 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02263 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02264 dst+=dstStride;\
02265 src+=srcStride;\
02266 }\
02267 }\
02268 \
02269 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02270 const int w=8;\
02271 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02272 int i;\
02273 for(i=0; i<w; i++)\
02274 {\
02275 const int srcB= src[-2*srcStride];\
02276 const int srcA= src[-1*srcStride];\
02277 const int src0= src[0 *srcStride];\
02278 const int src1= src[1 *srcStride];\
02279 const int src2= src[2 *srcStride];\
02280 const int src3= src[3 *srcStride];\
02281 const int src4= src[4 *srcStride];\
02282 const int src5= src[5 *srcStride];\
02283 const int src6= src[6 *srcStride];\
02284 const int src7= src[7 *srcStride];\
02285 const int src8= src[8 *srcStride];\
02286 const int src9= src[9 *srcStride];\
02287 const int src10=src[10*srcStride];\
02288 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02289 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02290 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02291 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02292 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02293 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02294 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02295 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02296 dst++;\
02297 src++;\
02298 }\
02299 }\
02300 \
02301 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02302 const int h=8;\
02303 const int w=8;\
02304 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02305 int i;\
02306 src -= 2*srcStride;\
02307 for(i=0; i<h+5; i++)\
02308 {\
02309 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02310 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02311 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02312 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02313 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02314 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02315 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02316 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02317 tmp+=tmpStride;\
02318 src+=srcStride;\
02319 }\
02320 tmp -= tmpStride*(h+5-2);\
02321 for(i=0; i<w; i++)\
02322 {\
02323 const int tmpB= tmp[-2*tmpStride];\
02324 const int tmpA= tmp[-1*tmpStride];\
02325 const int tmp0= tmp[0 *tmpStride];\
02326 const int tmp1= tmp[1 *tmpStride];\
02327 const int tmp2= tmp[2 *tmpStride];\
02328 const int tmp3= tmp[3 *tmpStride];\
02329 const int tmp4= tmp[4 *tmpStride];\
02330 const int tmp5= tmp[5 *tmpStride];\
02331 const int tmp6= tmp[6 *tmpStride];\
02332 const int tmp7= tmp[7 *tmpStride];\
02333 const int tmp8= tmp[8 *tmpStride];\
02334 const int tmp9= tmp[9 *tmpStride];\
02335 const int tmp10=tmp[10*tmpStride];\
02336 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02337 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02338 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02339 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02340 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02341 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02342 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02343 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02344 dst++;\
02345 tmp++;\
02346 }\
02347 }\
02348 \
02349 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02350 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02351 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02352 src += 8*srcStride;\
02353 dst += 8*dstStride;\
02354 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02355 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02356 }\
02357 \
02358 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02359 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02360 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02361 src += 8*srcStride;\
02362 dst += 8*dstStride;\
02363 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02364 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02365 }\
02366 \
02367 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02368 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02369 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02370 src += 8*srcStride;\
02371 dst += 8*dstStride;\
02372 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02373 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02374 }\
02375
02376 #define H264_MC(OPNAME, SIZE) \
02377 static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02378 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02379 }\
02380 \
02381 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02382 uint8_t half[SIZE*SIZE];\
02383 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02384 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02385 }\
02386 \
02387 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02388 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02389 }\
02390 \
02391 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02392 uint8_t half[SIZE*SIZE];\
02393 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02394 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02395 }\
02396 \
02397 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02398 uint8_t full[SIZE*(SIZE+5)];\
02399 uint8_t * const full_mid= full + SIZE*2;\
02400 uint8_t half[SIZE*SIZE];\
02401 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02402 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02403 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02404 }\
02405 \
02406 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02407 uint8_t full[SIZE*(SIZE+5)];\
02408 uint8_t * const full_mid= full + SIZE*2;\
02409 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02410 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02411 }\
02412 \
02413 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02414 uint8_t full[SIZE*(SIZE+5)];\
02415 uint8_t * const full_mid= full + SIZE*2;\
02416 uint8_t half[SIZE*SIZE];\
02417 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02418 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02419 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02420 }\
02421 \
02422 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02423 uint8_t full[SIZE*(SIZE+5)];\
02424 uint8_t * const full_mid= full + SIZE*2;\
02425 uint8_t halfH[SIZE*SIZE];\
02426 uint8_t halfV[SIZE*SIZE];\
02427 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02428 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02429 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02430 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02431 }\
02432 \
02433 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02434 uint8_t full[SIZE*(SIZE+5)];\
02435 uint8_t * const full_mid= full + SIZE*2;\
02436 uint8_t halfH[SIZE*SIZE];\
02437 uint8_t halfV[SIZE*SIZE];\
02438 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02439 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02440 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02441 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02442 }\
02443 \
02444 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02445 uint8_t full[SIZE*(SIZE+5)];\
02446 uint8_t * const full_mid= full + SIZE*2;\
02447 uint8_t halfH[SIZE*SIZE];\
02448 uint8_t halfV[SIZE*SIZE];\
02449 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02450 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02451 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02452 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02453 }\
02454 \
02455 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02456 uint8_t full[SIZE*(SIZE+5)];\
02457 uint8_t * const full_mid= full + SIZE*2;\
02458 uint8_t halfH[SIZE*SIZE];\
02459 uint8_t halfV[SIZE*SIZE];\
02460 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02461 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02462 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02463 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02464 }\
02465 \
02466 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02467 int16_t tmp[SIZE*(SIZE+5)];\
02468 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02469 }\
02470 \
02471 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02472 int16_t tmp[SIZE*(SIZE+5)];\
02473 uint8_t halfH[SIZE*SIZE];\
02474 uint8_t halfHV[SIZE*SIZE];\
02475 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02476 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02477 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02478 }\
02479 \
02480 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02481 int16_t tmp[SIZE*(SIZE+5)];\
02482 uint8_t halfH[SIZE*SIZE];\
02483 uint8_t halfHV[SIZE*SIZE];\
02484 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02485 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02486 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02487 }\
02488 \
02489 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02490 uint8_t full[SIZE*(SIZE+5)];\
02491 uint8_t * const full_mid= full + SIZE*2;\
02492 int16_t tmp[SIZE*(SIZE+5)];\
02493 uint8_t halfV[SIZE*SIZE];\
02494 uint8_t halfHV[SIZE*SIZE];\
02495 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02496 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02497 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02498 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02499 }\
02500 \
02501 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02502 uint8_t full[SIZE*(SIZE+5)];\
02503 uint8_t * const full_mid= full + SIZE*2;\
02504 int16_t tmp[SIZE*(SIZE+5)];\
02505 uint8_t halfV[SIZE*SIZE];\
02506 uint8_t halfHV[SIZE*SIZE];\
02507 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02508 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02509 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02510 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02511 }\
02512
02513 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02514
02515 #define op_put(a, b) a = cm[((b) + 16)>>5]
02516 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02517 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02518
02519 H264_LOWPASS(put_ , op_put, op2_put)
02520 H264_LOWPASS(avg_ , op_avg, op2_avg)
02521 H264_MC(put_, 2)
02522 H264_MC(put_, 4)
02523 H264_MC(put_, 8)
02524 H264_MC(put_, 16)
02525 H264_MC(avg_, 4)
02526 H264_MC(avg_, 8)
02527 H264_MC(avg_, 16)
02528
02529 #undef op_avg
02530 #undef op_put
02531 #undef op2_avg
02532 #undef op2_put
02533 #endif
02534
02535 #define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
02536 #define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
02537 #define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
02538 #define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
02539
02540 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02541 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02542 int i;
02543
02544 for(i=0; i<h; i++){
02545 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02546 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02547 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02548 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02549 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02550 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02551 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02552 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02553 dst+=dstStride;
02554 src+=srcStride;
02555 }
02556 }
02557
02558 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
02559 put_pixels8_c(dst, src, stride, 8);
02560 }
02561 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
02562 avg_pixels8_c(dst, src, stride, 8);
02563 }
02564 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
02565 put_pixels16_c(dst, src, stride, 16);
02566 }
02567 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
02568 avg_pixels16_c(dst, src, stride, 16);
02569 }
02570
02571 #if CONFIG_RV40_DECODER
02572 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02573 put_pixels16_xy2_c(dst, src, stride, 16);
02574 }
02575 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02576 avg_pixels16_xy2_c(dst, src, stride, 16);
02577 }
02578 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02579 put_pixels8_xy2_c(dst, src, stride, 8);
02580 }
02581 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02582 avg_pixels8_xy2_c(dst, src, stride, 8);
02583 }
02584 #endif
02585
02586 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02587 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02588 int i;
02589
02590 for(i=0; i<w; i++){
02591 const int src_1= src[ -srcStride];
02592 const int src0 = src[0 ];
02593 const int src1 = src[ srcStride];
02594 const int src2 = src[2*srcStride];
02595 const int src3 = src[3*srcStride];
02596 const int src4 = src[4*srcStride];
02597 const int src5 = src[5*srcStride];
02598 const int src6 = src[6*srcStride];
02599 const int src7 = src[7*srcStride];
02600 const int src8 = src[8*srcStride];
02601 const int src9 = src[9*srcStride];
02602 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02603 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02604 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02605 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02606 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02607 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02608 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02609 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02610 src++;
02611 dst++;
02612 }
02613 }
02614
02615 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02616 uint8_t half[64];
02617 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02618 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02619 }
02620
02621 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02622 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02623 }
02624
02625 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02626 uint8_t half[64];
02627 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02628 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02629 }
02630
02631 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02632 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02633 }
02634
02635 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02636 uint8_t halfH[88];
02637 uint8_t halfV[64];
02638 uint8_t halfHV[64];
02639 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02640 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02641 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02642 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02643 }
02644 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02645 uint8_t halfH[88];
02646 uint8_t halfV[64];
02647 uint8_t halfHV[64];
02648 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02649 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02650 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02651 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02652 }
02653 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02654 uint8_t halfH[88];
02655 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02656 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02657 }
02658
02659 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02660 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02661 int x;
02662 const int strength= ff_h263_loop_filter_strength[qscale];
02663
02664 for(x=0; x<8; x++){
02665 int d1, d2, ad1;
02666 int p0= src[x-2*stride];
02667 int p1= src[x-1*stride];
02668 int p2= src[x+0*stride];
02669 int p3= src[x+1*stride];
02670 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02671
02672 if (d<-2*strength) d1= 0;
02673 else if(d<- strength) d1=-2*strength - d;
02674 else if(d< strength) d1= d;
02675 else if(d< 2*strength) d1= 2*strength - d;
02676 else d1= 0;
02677
02678 p1 += d1;
02679 p2 -= d1;
02680 if(p1&256) p1= ~(p1>>31);
02681 if(p2&256) p2= ~(p2>>31);
02682
02683 src[x-1*stride] = p1;
02684 src[x+0*stride] = p2;
02685
02686 ad1= FFABS(d1)>>1;
02687
02688 d2= av_clip((p0-p3)/4, -ad1, ad1);
02689
02690 src[x-2*stride] = p0 - d2;
02691 src[x+ stride] = p3 + d2;
02692 }
02693 }
02694 }
02695
02696 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02697 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02698 int y;
02699 const int strength= ff_h263_loop_filter_strength[qscale];
02700
02701 for(y=0; y<8; y++){
02702 int d1, d2, ad1;
02703 int p0= src[y*stride-2];
02704 int p1= src[y*stride-1];
02705 int p2= src[y*stride+0];
02706 int p3= src[y*stride+1];
02707 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02708
02709 if (d<-2*strength) d1= 0;
02710 else if(d<- strength) d1=-2*strength - d;
02711 else if(d< strength) d1= d;
02712 else if(d< 2*strength) d1= 2*strength - d;
02713 else d1= 0;
02714
02715 p1 += d1;
02716 p2 -= d1;
02717 if(p1&256) p1= ~(p1>>31);
02718 if(p2&256) p2= ~(p2>>31);
02719
02720 src[y*stride-1] = p1;
02721 src[y*stride+0] = p2;
02722
02723 ad1= FFABS(d1)>>1;
02724
02725 d2= av_clip((p0-p3)/4, -ad1, ad1);
02726
02727 src[y*stride-2] = p0 - d2;
02728 src[y*stride+1] = p3 + d2;
02729 }
02730 }
02731 }
02732
02733 static void h261_loop_filter_c(uint8_t *src, int stride){
02734 int x,y,xy,yz;
02735 int temp[64];
02736
02737 for(x=0; x<8; x++){
02738 temp[x ] = 4*src[x ];
02739 temp[x + 7*8] = 4*src[x + 7*stride];
02740 }
02741 for(y=1; y<7; y++){
02742 for(x=0; x<8; x++){
02743 xy = y * stride + x;
02744 yz = y * 8 + x;
02745 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02746 }
02747 }
02748
02749 for(y=0; y<8; y++){
02750 src[ y*stride] = (temp[ y*8] + 2)>>2;
02751 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02752 for(x=1; x<7; x++){
02753 xy = y * stride + x;
02754 yz = y * 8 + x;
02755 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02756 }
02757 }
02758 }
02759
02760 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02761 {
02762 int s, i;
02763
02764 s = 0;
02765 for(i=0;i<h;i++) {
02766 s += abs(pix1[0] - pix2[0]);
02767 s += abs(pix1[1] - pix2[1]);
02768 s += abs(pix1[2] - pix2[2]);
02769 s += abs(pix1[3] - pix2[3]);
02770 s += abs(pix1[4] - pix2[4]);
02771 s += abs(pix1[5] - pix2[5]);
02772 s += abs(pix1[6] - pix2[6]);
02773 s += abs(pix1[7] - pix2[7]);
02774 s += abs(pix1[8] - pix2[8]);
02775 s += abs(pix1[9] - pix2[9]);
02776 s += abs(pix1[10] - pix2[10]);
02777 s += abs(pix1[11] - pix2[11]);
02778 s += abs(pix1[12] - pix2[12]);
02779 s += abs(pix1[13] - pix2[13]);
02780 s += abs(pix1[14] - pix2[14]);
02781 s += abs(pix1[15] - pix2[15]);
02782 pix1 += line_size;
02783 pix2 += line_size;
02784 }
02785 return s;
02786 }
02787
02788 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02789 {
02790 int s, i;
02791
02792 s = 0;
02793 for(i=0;i<h;i++) {
02794 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02795 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02796 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02797 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02798 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02799 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02800 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02801 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02802 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02803 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02804 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02805 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02806 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02807 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02808 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02809 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02810 pix1 += line_size;
02811 pix2 += line_size;
02812 }
02813 return s;
02814 }
02815
02816 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02817 {
02818 int s, i;
02819 uint8_t *pix3 = pix2 + line_size;
02820
02821 s = 0;
02822 for(i=0;i<h;i++) {
02823 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02824 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02825 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02826 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02827 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02828 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02829 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02830 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02831 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02832 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02833 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02834 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02835 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02836 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02837 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02838 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02839 pix1 += line_size;
02840 pix2 += line_size;
02841 pix3 += line_size;
02842 }
02843 return s;
02844 }
02845
02846 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02847 {
02848 int s, i;
02849 uint8_t *pix3 = pix2 + line_size;
02850
02851 s = 0;
02852 for(i=0;i<h;i++) {
02853 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02854 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02855 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02856 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02857 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02858 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02859 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02860 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02861 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
02862 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
02863 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
02864 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
02865 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
02866 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
02867 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
02868 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
02869 pix1 += line_size;
02870 pix2 += line_size;
02871 pix3 += line_size;
02872 }
02873 return s;
02874 }
02875
02876 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02877 {
02878 int s, i;
02879
02880 s = 0;
02881 for(i=0;i<h;i++) {
02882 s += abs(pix1[0] - pix2[0]);
02883 s += abs(pix1[1] - pix2[1]);
02884 s += abs(pix1[2] - pix2[2]);
02885 s += abs(pix1[3] - pix2[3]);
02886 s += abs(pix1[4] - pix2[4]);
02887 s += abs(pix1[5] - pix2[5]);
02888 s += abs(pix1[6] - pix2[6]);
02889 s += abs(pix1[7] - pix2[7]);
02890 pix1 += line_size;
02891 pix2 += line_size;
02892 }
02893 return s;
02894 }
02895
02896 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02897 {
02898 int s, i;
02899
02900 s = 0;
02901 for(i=0;i<h;i++) {
02902 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02903 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02904 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02905 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02906 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02907 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02908 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02909 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02910 pix1 += line_size;
02911 pix2 += line_size;
02912 }
02913 return s;
02914 }
02915
02916 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02917 {
02918 int s, i;
02919 uint8_t *pix3 = pix2 + line_size;
02920
02921 s = 0;
02922 for(i=0;i<h;i++) {
02923 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02924 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02925 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02926 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02927 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02928 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02929 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02930 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02931 pix1 += line_size;
02932 pix2 += line_size;
02933 pix3 += line_size;
02934 }
02935 return s;
02936 }
02937
02938 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02939 {
02940 int s, i;
02941 uint8_t *pix3 = pix2 + line_size;
02942
02943 s = 0;
02944 for(i=0;i<h;i++) {
02945 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02946 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02947 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02948 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02949 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02950 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02951 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02952 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02953 pix1 += line_size;
02954 pix2 += line_size;
02955 pix3 += line_size;
02956 }
02957 return s;
02958 }
02959
02960 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
02961 MpegEncContext *c = v;
02962 int score1=0;
02963 int score2=0;
02964 int x,y;
02965
02966 for(y=0; y<h; y++){
02967 for(x=0; x<16; x++){
02968 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
02969 }
02970 if(y+1<h){
02971 for(x=0; x<15; x++){
02972 score2+= FFABS( s1[x ] - s1[x +stride]
02973 - s1[x+1] + s1[x+1+stride])
02974 -FFABS( s2[x ] - s2[x +stride]
02975 - s2[x+1] + s2[x+1+stride]);
02976 }
02977 }
02978 s1+= stride;
02979 s2+= stride;
02980 }
02981
02982 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
02983 else return score1 + FFABS(score2)*8;
02984 }
02985
02986 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
02987 MpegEncContext *c = v;
02988 int score1=0;
02989 int score2=0;
02990 int x,y;
02991
02992 for(y=0; y<h; y++){
02993 for(x=0; x<8; x++){
02994 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
02995 }
02996 if(y+1<h){
02997 for(x=0; x<7; x++){
02998 score2+= FFABS( s1[x ] - s1[x +stride]
02999 - s1[x+1] + s1[x+1+stride])
03000 -FFABS( s2[x ] - s2[x +stride]
03001 - s2[x+1] + s2[x+1+stride]);
03002 }
03003 }
03004 s1+= stride;
03005 s2+= stride;
03006 }
03007
03008 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03009 else return score1 + FFABS(score2)*8;
03010 }
03011
03012 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03013 int i;
03014 unsigned int sum=0;
03015
03016 for(i=0; i<8*8; i++){
03017 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03018 int w= weight[i];
03019 b>>= RECON_SHIFT;
03020 assert(-512<b && b<512);
03021
03022 sum += (w*b)*(w*b)>>4;
03023 }
03024 return sum>>2;
03025 }
03026
03027 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03028 int i;
03029
03030 for(i=0; i<8*8; i++){
03031 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03032 }
03033 }
03034
03043 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03044 {
03045 int i;
03046 DCTELEM temp[64];
03047
03048 if(last<=0) return;
03049
03050
03051 for(i=0; i<=last; i++){
03052 const int j= scantable[i];
03053 temp[j]= block[j];
03054 block[j]=0;
03055 }
03056
03057 for(i=0; i<=last; i++){
03058 const int j= scantable[i];
03059 const int perm_j= permutation[j];
03060 block[perm_j]= temp[j];
03061 }
03062 }
03063
03064 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03065 return 0;
03066 }
03067
03068 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03069 int i;
03070
03071 memset(cmp, 0, sizeof(void*)*6);
03072
03073 for(i=0; i<6; i++){
03074 switch(type&0xFF){
03075 case FF_CMP_SAD:
03076 cmp[i]= c->sad[i];
03077 break;
03078 case FF_CMP_SATD:
03079 cmp[i]= c->hadamard8_diff[i];
03080 break;
03081 case FF_CMP_SSE:
03082 cmp[i]= c->sse[i];
03083 break;
03084 case FF_CMP_DCT:
03085 cmp[i]= c->dct_sad[i];
03086 break;
03087 case FF_CMP_DCT264:
03088 cmp[i]= c->dct264_sad[i];
03089 break;
03090 case FF_CMP_DCTMAX:
03091 cmp[i]= c->dct_max[i];
03092 break;
03093 case FF_CMP_PSNR:
03094 cmp[i]= c->quant_psnr[i];
03095 break;
03096 case FF_CMP_BIT:
03097 cmp[i]= c->bit[i];
03098 break;
03099 case FF_CMP_RD:
03100 cmp[i]= c->rd[i];
03101 break;
03102 case FF_CMP_VSAD:
03103 cmp[i]= c->vsad[i];
03104 break;
03105 case FF_CMP_VSSE:
03106 cmp[i]= c->vsse[i];
03107 break;
03108 case FF_CMP_ZERO:
03109 cmp[i]= zero_cmp;
03110 break;
03111 case FF_CMP_NSSE:
03112 cmp[i]= c->nsse[i];
03113 break;
03114 #if CONFIG_DWT
03115 case FF_CMP_W53:
03116 cmp[i]= c->w53[i];
03117 break;
03118 case FF_CMP_W97:
03119 cmp[i]= c->w97[i];
03120 break;
03121 #endif
03122 default:
03123 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03124 }
03125 }
03126 }
03127
03128 static void clear_block_c(DCTELEM *block)
03129 {
03130 memset(block, 0, sizeof(DCTELEM)*64);
03131 }
03132
03136 static void clear_blocks_c(DCTELEM *blocks)
03137 {
03138 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03139 }
03140
03141 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03142 long i;
03143 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03144 long a = *(long*)(src+i);
03145 long b = *(long*)(dst+i);
03146 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03147 }
03148 for(; i<w; i++)
03149 dst[i+0] += src[i+0];
03150 }
03151
03152 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03153 long i;
03154 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03155 long a = *(long*)(src1+i);
03156 long b = *(long*)(src2+i);
03157 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03158 }
03159 for(; i<w; i++)
03160 dst[i] = src1[i]+src2[i];
03161 }
03162
03163 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03164 long i;
03165 #if !HAVE_FAST_UNALIGNED
03166 if((long)src2 & (sizeof(long)-1)){
03167 for(i=0; i+7<w; i+=8){
03168 dst[i+0] = src1[i+0]-src2[i+0];
03169 dst[i+1] = src1[i+1]-src2[i+1];
03170 dst[i+2] = src1[i+2]-src2[i+2];
03171 dst[i+3] = src1[i+3]-src2[i+3];
03172 dst[i+4] = src1[i+4]-src2[i+4];
03173 dst[i+5] = src1[i+5]-src2[i+5];
03174 dst[i+6] = src1[i+6]-src2[i+6];
03175 dst[i+7] = src1[i+7]-src2[i+7];
03176 }
03177 }else
03178 #endif
03179 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03180 long a = *(long*)(src1+i);
03181 long b = *(long*)(src2+i);
03182 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
03183 }
03184 for(; i<w; i++)
03185 dst[i+0] = src1[i+0]-src2[i+0];
03186 }
03187
03188 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
03189 int i;
03190 uint8_t l, lt;
03191
03192 l= *left;
03193 lt= *left_top;
03194
03195 for(i=0; i<w; i++){
03196 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
03197 lt= src1[i];
03198 dst[i]= l;
03199 }
03200
03201 *left= l;
03202 *left_top= lt;
03203 }
03204
03205 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
03206 int i;
03207 uint8_t l, lt;
03208
03209 l= *left;
03210 lt= *left_top;
03211
03212 for(i=0; i<w; i++){
03213 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03214 lt= src1[i];
03215 l= src2[i];
03216 dst[i]= l - pred;
03217 }
03218
03219 *left= l;
03220 *left_top= lt;
03221 }
03222
03223 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
03224 int i;
03225
03226 for(i=0; i<w-1; i++){
03227 acc+= src[i];
03228 dst[i]= acc;
03229 i++;
03230 acc+= src[i];
03231 dst[i]= acc;
03232 }
03233
03234 for(; i<w; i++){
03235 acc+= src[i];
03236 dst[i]= acc;
03237 }
03238
03239 return acc;
03240 }
03241
03242 #if HAVE_BIGENDIAN
03243 #define B 3
03244 #define G 2
03245 #define R 1
03246 #define A 0
03247 #else
03248 #define B 0
03249 #define G 1
03250 #define R 2
03251 #define A 3
03252 #endif
03253 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
03254 int i;
03255 int r,g,b,a;
03256 r= *red;
03257 g= *green;
03258 b= *blue;
03259 a= *alpha;
03260
03261 for(i=0; i<w; i++){
03262 b+= src[4*i+B];
03263 g+= src[4*i+G];
03264 r+= src[4*i+R];
03265 a+= src[4*i+A];
03266
03267 dst[4*i+B]= b;
03268 dst[4*i+G]= g;
03269 dst[4*i+R]= r;
03270 dst[4*i+A]= a;
03271 }
03272
03273 *red= r;
03274 *green= g;
03275 *blue= b;
03276 *alpha= a;
03277 }
03278 #undef B
03279 #undef G
03280 #undef R
03281 #undef A
03282
03283 #define BUTTERFLY2(o1,o2,i1,i2) \
03284 o1= (i1)+(i2);\
03285 o2= (i1)-(i2);
03286
03287 #define BUTTERFLY1(x,y) \
03288 {\
03289 int a,b;\
03290 a= x;\
03291 b= y;\
03292 x= a+b;\
03293 y= a-b;\
03294 }
03295
03296 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03297
03298 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03299 int i;
03300 int temp[64];
03301 int sum=0;
03302
03303 assert(h==8);
03304
03305 for(i=0; i<8; i++){
03306
03307 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03308 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03309 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03310 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03311
03312 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03313 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03314 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03315 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03316
03317 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03318 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03319 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03320 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03321 }
03322
03323 for(i=0; i<8; i++){
03324 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03325 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03326 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03327 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03328
03329 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03330 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03331 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03332 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03333
03334 sum +=
03335 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03336 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03337 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03338 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03339 }
03340 #if 0
03341 static int maxi=0;
03342 if(sum>maxi){
03343 maxi=sum;
03344 printf("MAX:%d\n", maxi);
03345 }
03346 #endif
03347 return sum;
03348 }
03349
03350 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03351 int i;
03352 int temp[64];
03353 int sum=0;
03354
03355 assert(h==8);
03356
03357 for(i=0; i<8; i++){
03358
03359 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03360 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03361 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03362 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03363
03364 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03365 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03366 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03367 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03368
03369 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03370 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03371 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03372 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03373 }
03374
03375 for(i=0; i<8; i++){
03376 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03377 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03378 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03379 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03380
03381 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03382 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03383 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03384 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03385
03386 sum +=
03387 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03388 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03389 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03390 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03391 }
03392
03393 sum -= FFABS(temp[8*0] + temp[8*4]);
03394
03395 return sum;
03396 }
03397
03398 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03399 MpegEncContext * const s= (MpegEncContext *)c;
03400 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03401
03402 assert(h==8);
03403
03404 s->dsp.diff_pixels(temp, src1, src2, stride);
03405 s->dsp.fdct(temp);
03406 return s->dsp.sum_abs_dctelem(temp);
03407 }
03408
03409 #if CONFIG_GPL
03410 #define DCT8_1D {\
03411 const int s07 = SRC(0) + SRC(7);\
03412 const int s16 = SRC(1) + SRC(6);\
03413 const int s25 = SRC(2) + SRC(5);\
03414 const int s34 = SRC(3) + SRC(4);\
03415 const int a0 = s07 + s34;\
03416 const int a1 = s16 + s25;\
03417 const int a2 = s07 - s34;\
03418 const int a3 = s16 - s25;\
03419 const int d07 = SRC(0) - SRC(7);\
03420 const int d16 = SRC(1) - SRC(6);\
03421 const int d25 = SRC(2) - SRC(5);\
03422 const int d34 = SRC(3) - SRC(4);\
03423 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03424 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03425 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03426 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03427 DST(0, a0 + a1 ) ;\
03428 DST(1, a4 + (a7>>2)) ;\
03429 DST(2, a2 + (a3>>1)) ;\
03430 DST(3, a5 + (a6>>2)) ;\
03431 DST(4, a0 - a1 ) ;\
03432 DST(5, a6 - (a5>>2)) ;\
03433 DST(6, (a2>>1) - a3 ) ;\
03434 DST(7, (a4>>2) - a7 ) ;\
03435 }
03436
03437 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03438 MpegEncContext * const s= (MpegEncContext *)c;
03439 DCTELEM dct[8][8];
03440 int i;
03441 int sum=0;
03442
03443 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03444
03445 #define SRC(x) dct[i][x]
03446 #define DST(x,v) dct[i][x]= v
03447 for( i = 0; i < 8; i++ )
03448 DCT8_1D
03449 #undef SRC
03450 #undef DST
03451
03452 #define SRC(x) dct[x][i]
03453 #define DST(x,v) sum += FFABS(v)
03454 for( i = 0; i < 8; i++ )
03455 DCT8_1D
03456 #undef SRC
03457 #undef DST
03458 return sum;
03459 }
03460 #endif
03461
03462 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03463 MpegEncContext * const s= (MpegEncContext *)c;
03464 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03465 int sum=0, i;
03466
03467 assert(h==8);
03468
03469 s->dsp.diff_pixels(temp, src1, src2, stride);
03470 s->dsp.fdct(temp);
03471
03472 for(i=0; i<64; i++)
03473 sum= FFMAX(sum, FFABS(temp[i]));
03474
03475 return sum;
03476 }
03477
03478 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03479 MpegEncContext * const s= (MpegEncContext *)c;
03480 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
03481 DCTELEM * const bak = temp+64;
03482 int sum=0, i;
03483
03484 assert(h==8);
03485 s->mb_intra=0;
03486
03487 s->dsp.diff_pixels(temp, src1, src2, stride);
03488
03489 memcpy(bak, temp, 64*sizeof(DCTELEM));
03490
03491 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03492 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03493 ff_simple_idct(temp);
03494
03495 for(i=0; i<64; i++)
03496 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03497
03498 return sum;
03499 }
03500
03501 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03502 MpegEncContext * const s= (MpegEncContext *)c;
03503 const uint8_t *scantable= s->intra_scantable.permutated;
03504 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03505 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
03506 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
03507 int i, last, run, bits, level, distortion, start_i;
03508 const int esc_length= s->ac_esc_length;
03509 uint8_t * length;
03510 uint8_t * last_length;
03511
03512 assert(h==8);
03513
03514 copy_block8(lsrc1, src1, 8, stride, 8);
03515 copy_block8(lsrc2, src2, 8, stride, 8);
03516
03517 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
03518
03519 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03520
03521 bits=0;
03522
03523 if (s->mb_intra) {
03524 start_i = 1;
03525 length = s->intra_ac_vlc_length;
03526 last_length= s->intra_ac_vlc_last_length;
03527 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03528 } else {
03529 start_i = 0;
03530 length = s->inter_ac_vlc_length;
03531 last_length= s->inter_ac_vlc_last_length;
03532 }
03533
03534 if(last>=start_i){
03535 run=0;
03536 for(i=start_i; i<last; i++){
03537 int j= scantable[i];
03538 level= temp[j];
03539
03540 if(level){
03541 level+=64;
03542 if((level&(~127)) == 0){
03543 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03544 }else
03545 bits+= esc_length;
03546 run=0;
03547 }else
03548 run++;
03549 }
03550 i= scantable[last];
03551
03552 level= temp[i] + 64;
03553
03554 assert(level - 64);
03555
03556 if((level&(~127)) == 0){
03557 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03558 }else
03559 bits+= esc_length;
03560
03561 }
03562
03563 if(last>=0){
03564 if(s->mb_intra)
03565 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03566 else
03567 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03568 }
03569
03570 s->dsp.idct_add(lsrc2, 8, temp);
03571
03572 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
03573
03574 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03575 }
03576
03577 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03578 MpegEncContext * const s= (MpegEncContext *)c;
03579 const uint8_t *scantable= s->intra_scantable.permutated;
03580 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03581 int i, last, run, bits, level, start_i;
03582 const int esc_length= s->ac_esc_length;
03583 uint8_t * length;
03584 uint8_t * last_length;
03585
03586 assert(h==8);
03587
03588 s->dsp.diff_pixels(temp, src1, src2, stride);
03589
03590 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03591
03592 bits=0;
03593
03594 if (s->mb_intra) {
03595 start_i = 1;
03596 length = s->intra_ac_vlc_length;
03597 last_length= s->intra_ac_vlc_last_length;
03598 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03599 } else {
03600 start_i = 0;
03601 length = s->inter_ac_vlc_length;
03602 last_length= s->inter_ac_vlc_last_length;
03603 }
03604
03605 if(last>=start_i){
03606 run=0;
03607 for(i=start_i; i<last; i++){
03608 int j= scantable[i];
03609 level= temp[j];
03610
03611 if(level){
03612 level+=64;
03613 if((level&(~127)) == 0){
03614 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03615 }else
03616 bits+= esc_length;
03617 run=0;
03618 }else
03619 run++;
03620 }
03621 i= scantable[last];
03622
03623 level= temp[i] + 64;
03624
03625 assert(level - 64);
03626
03627 if((level&(~127)) == 0){
03628 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03629 }else
03630 bits+= esc_length;
03631 }
03632
03633 return bits;
03634 }
03635
03636 #define VSAD_INTRA(size) \
03637 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03638 int score=0; \
03639 int x,y; \
03640 \
03641 for(y=1; y<h; y++){ \
03642 for(x=0; x<size; x+=4){ \
03643 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
03644 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
03645 } \
03646 s+= stride; \
03647 } \
03648 \
03649 return score; \
03650 }
03651 VSAD_INTRA(8)
03652 VSAD_INTRA(16)
03653
03654 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03655 int score=0;
03656 int x,y;
03657
03658 for(y=1; y<h; y++){
03659 for(x=0; x<16; x++){
03660 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03661 }
03662 s1+= stride;
03663 s2+= stride;
03664 }
03665
03666 return score;
03667 }
03668
03669 #define SQ(a) ((a)*(a))
03670 #define VSSE_INTRA(size) \
03671 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03672 int score=0; \
03673 int x,y; \
03674 \
03675 for(y=1; y<h; y++){ \
03676 for(x=0; x<size; x+=4){ \
03677 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
03678 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
03679 } \
03680 s+= stride; \
03681 } \
03682 \
03683 return score; \
03684 }
03685 VSSE_INTRA(8)
03686 VSSE_INTRA(16)
03687
03688 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03689 int score=0;
03690 int x,y;
03691
03692 for(y=1; y<h; y++){
03693 for(x=0; x<16; x++){
03694 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03695 }
03696 s1+= stride;
03697 s2+= stride;
03698 }
03699
03700 return score;
03701 }
03702
03703 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
03704 int size){
03705 int score=0;
03706 int i;
03707 for(i=0; i<size; i++)
03708 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
03709 return score;
03710 }
03711
03712 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03713 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03714 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03715 #if CONFIG_GPL
03716 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
03717 #endif
03718 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03719 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03720 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
03721 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
03722
03723 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
03724 int i;
03725 for(i=0; i<len; i++)
03726 dst[i] = src0[i] * src1[i];
03727 }
03728
03729 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
03730 int i;
03731 src1 += len-1;
03732 for(i=0; i<len; i++)
03733 dst[i] = src0[i] * src1[-i];
03734 }
03735
03736 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
03737 int i;
03738 for(i=0; i<len; i++)
03739 dst[i] = src0[i] * src1[i] + src2[i];
03740 }
03741
03742 static void vector_fmul_window_c(float *dst, const float *src0,
03743 const float *src1, const float *win, int len)
03744 {
03745 int i,j;
03746 dst += len;
03747 win += len;
03748 src0+= len;
03749 for(i=-len, j=len-1; i<0; i++, j--) {
03750 float s0 = src0[i];
03751 float s1 = src1[j];
03752 float wi = win[i];
03753 float wj = win[j];
03754 dst[i] = s0*wj - s1*wi;
03755 dst[j] = s0*wi + s1*wj;
03756 }
03757 }
03758
03759 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
03760 int len)
03761 {
03762 int i;
03763 for (i = 0; i < len; i++)
03764 dst[i] = src[i] * mul;
03765 }
03766
03767 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
03768 const float **sv, float mul, int len)
03769 {
03770 int i;
03771 for (i = 0; i < len; i += 2, sv++) {
03772 dst[i ] = src[i ] * sv[0][0] * mul;
03773 dst[i+1] = src[i+1] * sv[0][1] * mul;
03774 }
03775 }
03776
03777 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
03778 const float **sv, float mul, int len)
03779 {
03780 int i;
03781 for (i = 0; i < len; i += 4, sv++) {
03782 dst[i ] = src[i ] * sv[0][0] * mul;
03783 dst[i+1] = src[i+1] * sv[0][1] * mul;
03784 dst[i+2] = src[i+2] * sv[0][2] * mul;
03785 dst[i+3] = src[i+3] * sv[0][3] * mul;
03786 }
03787 }
03788
03789 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
03790 int len)
03791 {
03792 int i;
03793 for (i = 0; i < len; i += 2, sv++) {
03794 dst[i ] = sv[0][0] * mul;
03795 dst[i+1] = sv[0][1] * mul;
03796 }
03797 }
03798
03799 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
03800 int len)
03801 {
03802 int i;
03803 for (i = 0; i < len; i += 4, sv++) {
03804 dst[i ] = sv[0][0] * mul;
03805 dst[i+1] = sv[0][1] * mul;
03806 dst[i+2] = sv[0][2] * mul;
03807 dst[i+3] = sv[0][3] * mul;
03808 }
03809 }
03810
03811 static void butterflies_float_c(float *restrict v1, float *restrict v2,
03812 int len)
03813 {
03814 int i;
03815 for (i = 0; i < len; i++) {
03816 float t = v1[i] - v2[i];
03817 v1[i] += v2[i];
03818 v2[i] = t;
03819 }
03820 }
03821
03822 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
03823 {
03824 float p = 0.0;
03825 int i;
03826
03827 for (i = 0; i < len; i++)
03828 p += v1[i] * v2[i];
03829
03830 return p;
03831 }
03832
03833 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
03834 uint32_t maxi, uint32_t maxisign)
03835 {
03836
03837 if(a > mini) return mini;
03838 else if((a^(1<<31)) > maxisign) return maxi;
03839 else return a;
03840 }
03841
03842 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
03843 int i;
03844 uint32_t mini = *(uint32_t*)min;
03845 uint32_t maxi = *(uint32_t*)max;
03846 uint32_t maxisign = maxi ^ (1<<31);
03847 uint32_t *dsti = (uint32_t*)dst;
03848 const uint32_t *srci = (const uint32_t*)src;
03849 for(i=0; i<len; i+=8) {
03850 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
03851 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
03852 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
03853 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
03854 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
03855 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
03856 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
03857 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
03858 }
03859 }
03860 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
03861 int i;
03862 if(min < 0 && max > 0) {
03863 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
03864 } else {
03865 for(i=0; i < len; i+=8) {
03866 dst[i ] = av_clipf(src[i ], min, max);
03867 dst[i + 1] = av_clipf(src[i + 1], min, max);
03868 dst[i + 2] = av_clipf(src[i + 2], min, max);
03869 dst[i + 3] = av_clipf(src[i + 3], min, max);
03870 dst[i + 4] = av_clipf(src[i + 4], min, max);
03871 dst[i + 5] = av_clipf(src[i + 5], min, max);
03872 dst[i + 6] = av_clipf(src[i + 6], min, max);
03873 dst[i + 7] = av_clipf(src[i + 7], min, max);
03874 }
03875 }
03876 }
03877
03878 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
03879 {
03880 int res = 0;
03881
03882 while (order--)
03883 res += (*v1++ * *v2++) >> shift;
03884
03885 return res;
03886 }
03887
03888 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
03889 {
03890 int res = 0;
03891 while (order--) {
03892 res += *v1 * *v2++;
03893 *v1++ += mul * *v3++;
03894 }
03895 return res;
03896 }
03897
03898 #define W0 2048
03899 #define W1 2841
03900 #define W2 2676
03901 #define W3 2408
03902 #define W4 2048
03903 #define W5 1609
03904 #define W6 1108
03905 #define W7 565
03906
03907 static void wmv2_idct_row(short * b)
03908 {
03909 int s1,s2;
03910 int a0,a1,a2,a3,a4,a5,a6,a7;
03911
03912 a1 = W1*b[1]+W7*b[7];
03913 a7 = W7*b[1]-W1*b[7];
03914 a5 = W5*b[5]+W3*b[3];
03915 a3 = W3*b[5]-W5*b[3];
03916 a2 = W2*b[2]+W6*b[6];
03917 a6 = W6*b[2]-W2*b[6];
03918 a0 = W0*b[0]+W0*b[4];
03919 a4 = W0*b[0]-W0*b[4];
03920
03921 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03922 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03923
03924 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
03925 b[1] = (a4+a6 +s1 + (1<<7))>>8;
03926 b[2] = (a4-a6 +s2 + (1<<7))>>8;
03927 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
03928 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
03929 b[5] = (a4-a6 -s2 + (1<<7))>>8;
03930 b[6] = (a4+a6 -s1 + (1<<7))>>8;
03931 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
03932 }
03933 static void wmv2_idct_col(short * b)
03934 {
03935 int s1,s2;
03936 int a0,a1,a2,a3,a4,a5,a6,a7;
03937
03938 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
03939 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
03940 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
03941 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
03942 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
03943 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
03944 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
03945 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
03946
03947 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03948 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03949
03950 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
03951 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
03952 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
03953 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
03954
03955 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
03956 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
03957 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
03958 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
03959 }
03960 void ff_wmv2_idct_c(short * block){
03961 int i;
03962
03963 for(i=0;i<64;i+=8){
03964 wmv2_idct_row(block+i);
03965 }
03966 for(i=0;i<8;i++){
03967 wmv2_idct_col(block+i);
03968 }
03969 }
03970
03971
03972 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
03973 {
03974 ff_wmv2_idct_c(block);
03975 ff_put_pixels_clamped_c(block, dest, line_size);
03976 }
03977 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
03978 {
03979 ff_wmv2_idct_c(block);
03980 ff_add_pixels_clamped_c(block, dest, line_size);
03981 }
03982 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
03983 {
03984 j_rev_dct (block);
03985 ff_put_pixels_clamped_c(block, dest, line_size);
03986 }
03987 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
03988 {
03989 j_rev_dct (block);
03990 ff_add_pixels_clamped_c(block, dest, line_size);
03991 }
03992
03993 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
03994 {
03995 j_rev_dct4 (block);
03996 put_pixels_clamped4_c(block, dest, line_size);
03997 }
03998 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
03999 {
04000 j_rev_dct4 (block);
04001 add_pixels_clamped4_c(block, dest, line_size);
04002 }
04003
04004 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
04005 {
04006 j_rev_dct2 (block);
04007 put_pixels_clamped2_c(block, dest, line_size);
04008 }
04009 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
04010 {
04011 j_rev_dct2 (block);
04012 add_pixels_clamped2_c(block, dest, line_size);
04013 }
04014
04015 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
04016 {
04017 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04018
04019 dest[0] = cm[(block[0] + 4)>>3];
04020 }
04021 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
04022 {
04023 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04024
04025 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
04026 }
04027
04028 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
04029
04030
04031 av_cold void dsputil_static_init(void)
04032 {
04033 int i;
04034
04035 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
04036 for(i=0;i<MAX_NEG_CROP;i++) {
04037 ff_cropTbl[i] = 0;
04038 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
04039 }
04040
04041 for(i=0;i<512;i++) {
04042 ff_squareTbl[i] = (i - 256) * (i - 256);
04043 }
04044
04045 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
04046 }
04047
04048 int ff_check_alignment(void){
04049 static int did_fail=0;
04050 DECLARE_ALIGNED(16, int, aligned);
04051
04052 if((intptr_t)&aligned & 15){
04053 if(!did_fail){
04054 #if HAVE_MMX || HAVE_ALTIVEC
04055 av_log(NULL, AV_LOG_ERROR,
04056 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
04057 "and may be very slow or crash. This is not a bug in libavcodec,\n"
04058 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
04059 "Do not report crashes to FFmpeg developers.\n");
04060 #endif
04061 did_fail=1;
04062 }
04063 return -1;
04064 }
04065 return 0;
04066 }
04067
04068 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
04069 {
04070 int i;
04071
04072 ff_check_alignment();
04073
04074 #if CONFIG_ENCODERS
04075 if(avctx->dct_algo==FF_DCT_FASTINT) {
04076 c->fdct = fdct_ifast;
04077 c->fdct248 = fdct_ifast248;
04078 }
04079 else if(avctx->dct_algo==FF_DCT_FAAN) {
04080 c->fdct = ff_faandct;
04081 c->fdct248 = ff_faandct248;
04082 }
04083 else {
04084 c->fdct = ff_jpeg_fdct_islow;
04085 c->fdct248 = ff_fdct248_islow;
04086 }
04087 #endif //CONFIG_ENCODERS
04088
04089 if(avctx->lowres==1){
04090 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
04091 c->idct_put= ff_jref_idct4_put;
04092 c->idct_add= ff_jref_idct4_add;
04093 }else{
04094 c->idct_put= ff_h264_lowres_idct_put_c;
04095 c->idct_add= ff_h264_lowres_idct_add_c;
04096 }
04097 c->idct = j_rev_dct4;
04098 c->idct_permutation_type= FF_NO_IDCT_PERM;
04099 }else if(avctx->lowres==2){
04100 c->idct_put= ff_jref_idct2_put;
04101 c->idct_add= ff_jref_idct2_add;
04102 c->idct = j_rev_dct2;
04103 c->idct_permutation_type= FF_NO_IDCT_PERM;
04104 }else if(avctx->lowres==3){
04105 c->idct_put= ff_jref_idct1_put;
04106 c->idct_add= ff_jref_idct1_add;
04107 c->idct = j_rev_dct1;
04108 c->idct_permutation_type= FF_NO_IDCT_PERM;
04109 }else{
04110 if(avctx->idct_algo==FF_IDCT_INT){
04111 c->idct_put= ff_jref_idct_put;
04112 c->idct_add= ff_jref_idct_add;
04113 c->idct = j_rev_dct;
04114 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
04115 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
04116 avctx->idct_algo==FF_IDCT_VP3){
04117 c->idct_put= ff_vp3_idct_put_c;
04118 c->idct_add= ff_vp3_idct_add_c;
04119 c->idct = ff_vp3_idct_c;
04120 c->idct_permutation_type= FF_NO_IDCT_PERM;
04121 }else if(avctx->idct_algo==FF_IDCT_WMV2){
04122 c->idct_put= ff_wmv2_idct_put_c;
04123 c->idct_add= ff_wmv2_idct_add_c;
04124 c->idct = ff_wmv2_idct_c;
04125 c->idct_permutation_type= FF_NO_IDCT_PERM;
04126 }else if(avctx->idct_algo==FF_IDCT_FAAN){
04127 c->idct_put= ff_faanidct_put;
04128 c->idct_add= ff_faanidct_add;
04129 c->idct = ff_faanidct;
04130 c->idct_permutation_type= FF_NO_IDCT_PERM;
04131 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
04132 c->idct_put= ff_ea_idct_put_c;
04133 c->idct_permutation_type= FF_NO_IDCT_PERM;
04134 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
04135 c->idct = ff_bink_idct_c;
04136 c->idct_add = ff_bink_idct_add_c;
04137 c->idct_put = ff_bink_idct_put_c;
04138 c->idct_permutation_type = FF_NO_IDCT_PERM;
04139 }else{
04140 c->idct_put= ff_simple_idct_put;
04141 c->idct_add= ff_simple_idct_add;
04142 c->idct = ff_simple_idct;
04143 c->idct_permutation_type= FF_NO_IDCT_PERM;
04144 }
04145 }
04146
04147 c->get_pixels = get_pixels_c;
04148 c->diff_pixels = diff_pixels_c;
04149 c->put_pixels_clamped = ff_put_pixels_clamped_c;
04150 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
04151 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
04152 c->add_pixels_clamped = ff_add_pixels_clamped_c;
04153 c->add_pixels8 = add_pixels8_c;
04154 c->add_pixels4 = add_pixels4_c;
04155 c->sum_abs_dctelem = sum_abs_dctelem_c;
04156 c->emulated_edge_mc = ff_emulated_edge_mc;
04157 c->gmc1 = gmc1_c;
04158 c->gmc = ff_gmc_c;
04159 c->clear_block = clear_block_c;
04160 c->clear_blocks = clear_blocks_c;
04161 c->pix_sum = pix_sum_c;
04162 c->pix_norm1 = pix_norm1_c;
04163
04164 c->fill_block_tab[0] = fill_block16_c;
04165 c->fill_block_tab[1] = fill_block8_c;
04166 c->scale_block = scale_block_c;
04167
04168
04169 c->pix_abs[0][0] = pix_abs16_c;
04170 c->pix_abs[0][1] = pix_abs16_x2_c;
04171 c->pix_abs[0][2] = pix_abs16_y2_c;
04172 c->pix_abs[0][3] = pix_abs16_xy2_c;
04173 c->pix_abs[1][0] = pix_abs8_c;
04174 c->pix_abs[1][1] = pix_abs8_x2_c;
04175 c->pix_abs[1][2] = pix_abs8_y2_c;
04176 c->pix_abs[1][3] = pix_abs8_xy2_c;
04177
04178 #define dspfunc(PFX, IDX, NUM) \
04179 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04180 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04181 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04182 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04183
04184 dspfunc(put, 0, 16);
04185 dspfunc(put_no_rnd, 0, 16);
04186 dspfunc(put, 1, 8);
04187 dspfunc(put_no_rnd, 1, 8);
04188 dspfunc(put, 2, 4);
04189 dspfunc(put, 3, 2);
04190
04191 dspfunc(avg, 0, 16);
04192 dspfunc(avg_no_rnd, 0, 16);
04193 dspfunc(avg, 1, 8);
04194 dspfunc(avg_no_rnd, 1, 8);
04195 dspfunc(avg, 2, 4);
04196 dspfunc(avg, 3, 2);
04197 #undef dspfunc
04198
04199 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04200 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04201
04202 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04203 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04204 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04205 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04206 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04207 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04208 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04209 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04210 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04211
04212 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04213 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04214 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04215 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04216 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04217 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04218 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04219 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04220 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04221
04222 #define dspfunc(PFX, IDX, NUM) \
04223 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04224 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04225 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04226 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04227 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04228 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04229 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04230 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04231 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04232 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04233 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04234 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04235 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04236 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04237 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04238 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04239
04240 dspfunc(put_qpel, 0, 16);
04241 dspfunc(put_no_rnd_qpel, 0, 16);
04242
04243 dspfunc(avg_qpel, 0, 16);
04244
04245
04246 dspfunc(put_qpel, 1, 8);
04247 dspfunc(put_no_rnd_qpel, 1, 8);
04248
04249 dspfunc(avg_qpel, 1, 8);
04250
04251
04252 dspfunc(put_h264_qpel, 0, 16);
04253 dspfunc(put_h264_qpel, 1, 8);
04254 dspfunc(put_h264_qpel, 2, 4);
04255 dspfunc(put_h264_qpel, 3, 2);
04256 dspfunc(avg_h264_qpel, 0, 16);
04257 dspfunc(avg_h264_qpel, 1, 8);
04258 dspfunc(avg_h264_qpel, 2, 4);
04259
04260 #undef dspfunc
04261 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04262 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04263 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04264 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04265 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04266 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04267
04268 c->draw_edges = draw_edges_c;
04269
04270 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
04271 ff_mlp_init(c, avctx);
04272 #endif
04273 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
04274 ff_intrax8dsp_init(c,avctx);
04275 #endif
04276 #if CONFIG_RV30_DECODER
04277 ff_rv30dsp_init(c,avctx);
04278 #endif
04279 #if CONFIG_RV40_DECODER
04280 ff_rv40dsp_init(c,avctx);
04281 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
04282 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
04283 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
04284 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
04285 #endif
04286
04287 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
04288 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04289 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04290 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04291 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04292 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04293 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04294 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04295
04296 #define SET_CMP_FUNC(name) \
04297 c->name[0]= name ## 16_c;\
04298 c->name[1]= name ## 8x8_c;
04299
04300 SET_CMP_FUNC(hadamard8_diff)
04301 c->hadamard8_diff[4]= hadamard8_intra16_c;
04302 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
04303 SET_CMP_FUNC(dct_sad)
04304 SET_CMP_FUNC(dct_max)
04305 #if CONFIG_GPL
04306 SET_CMP_FUNC(dct264_sad)
04307 #endif
04308 c->sad[0]= pix_abs16_c;
04309 c->sad[1]= pix_abs8_c;
04310 c->sse[0]= sse16_c;
04311 c->sse[1]= sse8_c;
04312 c->sse[2]= sse4_c;
04313 SET_CMP_FUNC(quant_psnr)
04314 SET_CMP_FUNC(rd)
04315 SET_CMP_FUNC(bit)
04316 c->vsad[0]= vsad16_c;
04317 c->vsad[4]= vsad_intra16_c;
04318 c->vsad[5]= vsad_intra8_c;
04319 c->vsse[0]= vsse16_c;
04320 c->vsse[4]= vsse_intra16_c;
04321 c->vsse[5]= vsse_intra8_c;
04322 c->nsse[0]= nsse16_c;
04323 c->nsse[1]= nsse8_c;
04324 #if CONFIG_DWT
04325 ff_dsputil_init_dwt(c);
04326 #endif
04327
04328 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04329
04330 c->add_bytes= add_bytes_c;
04331 c->add_bytes_l2= add_bytes_l2_c;
04332 c->diff_bytes= diff_bytes_c;
04333 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
04334 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04335 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
04336 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
04337 c->bswap_buf= bswap_buf;
04338 c->bswap16_buf = bswap16_buf;
04339 #if CONFIG_PNG_DECODER
04340 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
04341 #endif
04342
04343 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
04344 c->h263_h_loop_filter= h263_h_loop_filter_c;
04345 c->h263_v_loop_filter= h263_v_loop_filter_c;
04346 }
04347
04348 if (CONFIG_VP3_DECODER) {
04349 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
04350 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
04351 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
04352 }
04353
04354 c->h261_loop_filter= h261_loop_filter_c;
04355
04356 c->try_8x8basis= try_8x8basis_c;
04357 c->add_8x8basis= add_8x8basis_c;
04358
04359 #if CONFIG_VORBIS_DECODER
04360 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04361 #endif
04362 #if CONFIG_AC3_DECODER
04363 c->ac3_downmix = ff_ac3_downmix_c;
04364 #endif
04365 c->vector_fmul = vector_fmul_c;
04366 c->vector_fmul_reverse = vector_fmul_reverse_c;
04367 c->vector_fmul_add = vector_fmul_add_c;
04368 c->vector_fmul_window = vector_fmul_window_c;
04369 c->vector_clipf = vector_clipf_c;
04370 c->scalarproduct_int16 = scalarproduct_int16_c;
04371 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
04372 c->scalarproduct_float = scalarproduct_float_c;
04373 c->butterflies_float = butterflies_float_c;
04374 c->vector_fmul_scalar = vector_fmul_scalar_c;
04375
04376 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
04377 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
04378
04379 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
04380 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
04381
04382 c->shrink[0]= av_image_copy_plane;
04383 c->shrink[1]= ff_shrink22;
04384 c->shrink[2]= ff_shrink44;
04385 c->shrink[3]= ff_shrink88;
04386
04387 c->prefetch= just_return;
04388
04389 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04390 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04391
04392 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
04393 if (ARCH_ARM) dsputil_init_arm (c, avctx);
04394 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
04395 if (HAVE_VIS) dsputil_init_vis (c, avctx);
04396 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
04397 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
04398 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
04399 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
04400 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
04401
04402 for(i=0; i<64; i++){
04403 if(!c->put_2tap_qpel_pixels_tab[0][i])
04404 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04405 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04406 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04407 }
04408
04409 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
04410 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
04411 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
04412 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
04413
04414 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
04415 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
04416 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
04417 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
04418
04419 switch(c->idct_permutation_type){
04420 case FF_NO_IDCT_PERM:
04421 for(i=0; i<64; i++)
04422 c->idct_permutation[i]= i;
04423 break;
04424 case FF_LIBMPEG2_IDCT_PERM:
04425 for(i=0; i<64; i++)
04426 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04427 break;
04428 case FF_SIMPLE_IDCT_PERM:
04429 for(i=0; i<64; i++)
04430 c->idct_permutation[i]= simple_mmx_permutation[i];
04431 break;
04432 case FF_TRANSPOSE_IDCT_PERM:
04433 for(i=0; i<64; i++)
04434 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04435 break;
04436 case FF_PARTTRANS_IDCT_PERM:
04437 for(i=0; i<64; i++)
04438 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04439 break;
04440 case FF_SSE2_IDCT_PERM:
04441 for(i=0; i<64; i++)
04442 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
04443 break;
04444 default:
04445 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04446 }
04447 }
04448