From 147cebe99eceb120569a527cf9cac9a70b497645 Mon Sep 17 00:00:00 2001 From: Ryan VanderMeulen Date: Fri, 2 May 2025 13:58:19 +0000 Subject: [PATCH] Bug 1963705 - Update libpixman to release 0.46.0. r=jfkthame Differential Revision: https://phabricator.services.mozilla.com/D247432 --- gfx/cairo/README | 2 - gfx/cairo/cairo/src/pixman-rename.h | 29 + gfx/cairo/libpixman/src/meson.build | 1 + gfx/cairo/libpixman/src/moz.build | 1 + gfx/cairo/libpixman/src/pixman-access.c | 81 + .../libpixman/src/pixman-arma64-neon-asm.S | 95 +- .../libpixman/src/pixman-arma64-neon-asm.h | 21 +- gfx/cairo/libpixman/src/pixman-glyph.c | 15 +- gfx/cairo/libpixman/src/pixman-image.c | 24 + gfx/cairo/libpixman/src/pixman-private.h | 4 + gfx/cairo/libpixman/src/pixman-region.c | 135 +- gfx/cairo/libpixman/src/pixman-region16.c | 3 + gfx/cairo/libpixman/src/pixman-region32.c | 3 + gfx/cairo/libpixman/src/pixman-region64f.c | 50 + gfx/cairo/libpixman/src/pixman-rvv.c | 2468 ++++++++++++++++- gfx/cairo/libpixman/src/pixman-utils.c | 37 + gfx/cairo/libpixman/src/pixman-version.h | 6 +- gfx/cairo/libpixman/src/pixman-vmx.c | 1583 +++++------ gfx/cairo/libpixman/src/pixman.c | 20 +- gfx/cairo/libpixman/src/pixman.h | 189 ++ gfx/cairo/pixman-neon.patch | 30 - 21 files changed, 3688 insertions(+), 1109 deletions(-) create mode 100644 gfx/cairo/libpixman/src/pixman-region64f.c delete mode 100644 gfx/cairo/pixman-neon.patch diff --git a/gfx/cairo/README b/gfx/cairo/README index 11b7fb59a764..873d9476fb05 100644 --- a/gfx/cairo/README +++ b/gfx/cairo/README @@ -58,6 +58,4 @@ pixman-export.patch: make sure pixman symbols are not exported in libxul pixman-interp.patch: use lower quality interpolation by default on mobile -pixman-neon.patch: fix for a build failure with clang on armhf linux - pixman-rename.patch: include pixman-rename.h for renaming of external symbols diff --git a/gfx/cairo/cairo/src/pixman-rename.h b/gfx/cairo/cairo/src/pixman-rename.h index 431cd92e968a..c6fb1118ae8d 100644 --- a/gfx/cairo/cairo/src/pixman-rename.h +++ b/gfx/cairo/cairo/src/pixman-rename.h @@ -59,6 +59,35 @@ #define pixman_region32_reset _moz_pixman_region32_reset #define pixman_region32_clear _moz_pixman_region32_clear #define pixman_region32_print _moz_pixman_region32_print +#define pixman_region64f_init _moz_pixman_region64f_init +#define pixman_region64f_init_rect _moz_pixman_region64f_init_rect +#define pixman_region64f_init_rectf _moz_pixman_region64f_init_rectf +#define pixman_region64f_init_rects _moz_pixman_region64f_init_rects +#define pixman_region64f_init_with_extents _moz_pixman_region64f_init_with_extents +#define pixman_region64f_init_from_image _moz_pixman_region64f_init_from_image +#define pixman_region64f_fini _moz_pixman_region64f_fini +#define pixman_region64f_translate _moz_pixman_region64f_translate +#define pixman_region64f_copy _moz_pixman_region64f_copy +#define pixman_region64f_intersect _moz_pixman_region64f_intersect +#define pixman_region64f_union _moz_pixman_region64f_union +#define pixman_region64f_intersect_rect _moz_pixman_region64f_intersect_rect +#define pixman_region64f_intersect_rectf _moz_pixman_region64f_intersect_rectf +#define pixman_region64f_union_rect _moz_pixman_region64f_union_rect +#define pixman_region64f_union_rectf _moz_pixman_region64f_union_rectf +#define pixman_region64f_subtract _moz_pixman_region64f_subtract +#define pixman_region64f_inverse _moz_pixman_region64f_inverse +#define pixman_region64f_contains_point _moz_pixman_region64f_contains_point +#define pixman_region64f_contains_rectangle _moz_pixman_region64f_contains_rectangle +#define pixman_region64f_empty _moz_pixman_region64f_empty +#define pixman_region64f_not_empty _moz_pixman_region64f_not_empty +#define pixman_region64f_extents _moz_pixman_region64f_extents +#define pixman_region64f_n_rects _moz_pixman_region64f_n_rects +#define pixman_region64f_rectangles _moz_pixman_region64f_rectangles +#define pixman_region64f_equal _moz_pixman_region64f_equal +#define pixman_region64f_selfcheck _moz_pixman_region64f_selfcheck +#define pixman_region64f_reset _moz_pixman_region64f_reset +#define pixman_region64f_clear _moz_pixman_region64f_clear +#define pixman_region64f_print _moz_pixman_region64f_print #define pixman_blt _moz_pixman_blt #define pixman_fill _moz_pixman_fill #define pixman_transform_point_3d _moz_pixman_transform_point_3d diff --git a/gfx/cairo/libpixman/src/meson.build b/gfx/cairo/libpixman/src/meson.build index fd41288c8cca..a91c73e2926f 100644 --- a/gfx/cairo/libpixman/src/meson.build +++ b/gfx/cairo/libpixman/src/meson.build @@ -97,6 +97,7 @@ pixman_files = files( 'pixman-radial-gradient.c', 'pixman-region16.c', 'pixman-region32.c', + 'pixman-region64f.c', 'pixman-riscv.c', 'pixman-solid-fill.c', 'pixman-timer.c', diff --git a/gfx/cairo/libpixman/src/moz.build b/gfx/cairo/libpixman/src/moz.build index 434f605cd5ed..662f7d44ae88 100644 --- a/gfx/cairo/libpixman/src/moz.build +++ b/gfx/cairo/libpixman/src/moz.build @@ -34,6 +34,7 @@ SOURCES += [ 'pixman-radial-gradient.c', 'pixman-region16.c', 'pixman-region32.c', + 'pixman-region64f.c', 'pixman-riscv.c', 'pixman-solid-fill.c', 'pixman-trap.c', diff --git a/gfx/cairo/libpixman/src/pixman-access.c b/gfx/cairo/libpixman/src/pixman-access.c index 7bd7a5a258c7..822bef6a3a57 100644 --- a/gfx/cairo/libpixman/src/pixman-access.c +++ b/gfx/cairo/libpixman/src/pixman-access.c @@ -710,6 +710,36 @@ fetch_scanline_rgbaf_float (bits_image_t *image, } #endif +static void +fetch_scanline_a16b16g16r16_float (bits_image_t * image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask) +{ + const uint64_t *bits = (uint64_t *)(image->bits + y * image->rowstride); + const uint64_t *pixel = bits + x; + const uint64_t *end = pixel + width; + argb_t *buffer = (argb_t *)b; + + while (pixel < end) + { + uint64_t p = READ (image, pixel++); + uint64_t a = (p >> 48) & 0xffff; + uint64_t b = (p >> 32) & 0xffff; + uint64_t g = (p >> 16) & 0xffff; + uint64_t r = (p >> 0) & 0xffff; + + buffer->a = pixman_unorm_to_float (a, 16); + buffer->r = pixman_unorm_to_float (r, 16); + buffer->g = pixman_unorm_to_float (g, 16); + buffer->b = pixman_unorm_to_float (b, 16); + + buffer++; + } +} + static void fetch_scanline_x2r10g10b10_float (bits_image_t *image, int x, @@ -907,6 +937,27 @@ fetch_pixel_rgbaf_float (bits_image_t *image, } #endif +static argb_t +fetch_pixel_a16b16g16r16_float (bits_image_t *image, + int offset, + int line) +{ + uint64_t *bits = (uint64_t *)(image->bits + line * image->rowstride); + uint64_t p = READ (image, bits + offset); + uint64_t a = (p >> 48) & 0xffff; + uint64_t b = (p >> 32) & 0xffff; + uint64_t g = (p >> 16) & 0xffff; + uint64_t r = (p >> 0) & 0xffff; + argb_t argb; + + argb.a = pixman_unorm_to_float (a, 16); + argb.r = pixman_unorm_to_float (r, 16); + argb.g = pixman_unorm_to_float (g, 16); + argb.b = pixman_unorm_to_float (b, 16); + + return argb; +} + static argb_t fetch_pixel_x2r10g10b10_float (bits_image_t *image, int offset, @@ -1121,6 +1172,32 @@ store_scanline_rgbf_float (bits_image_t * image, } #endif +static void +store_scanline_a16b16g16r16_float (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint64_t *bits = (uint64_t *)(image->bits + image->rowstride * y); + uint64_t *pixel = bits + x; + argb_t *values = (argb_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + uint64_t a, r, g, b; + + a = pixman_float_to_unorm (values[i].a, 16); + r = pixman_float_to_unorm (values[i].r, 16); + g = pixman_float_to_unorm (values[i].g, 16); + b = pixman_float_to_unorm (values[i].b, 16); + + WRITE (image, pixel++, + (a << 48) | (b << 32) | (g << 16) | (r << 0)); + } +} + static void store_scanline_a2r10g10b10_float (bits_image_t * image, int x, @@ -1633,6 +1710,10 @@ static const format_info_t accessors[] = fetch_pixel_generic_lossy_32, fetch_pixel_rgbf_float, NULL, store_scanline_rgbf_float }, #endif + { PIXMAN_a16b16g16r16, + NULL, fetch_scanline_a16b16g16r16_float, + fetch_pixel_generic_lossy_32, fetch_pixel_a16b16g16r16_float, + NULL, store_scanline_a16b16g16r16_float }, { PIXMAN_a2r10g10b10, NULL, fetch_scanline_a2r10g10b10_float, diff --git a/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.S b/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.S index 7329d4b2b29d..519ff5b16f72 100644 --- a/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.S +++ b/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.S @@ -305,16 +305,14 @@ mov v28.d[0], v14.d[0] mov v29.d[0], v14.d[1] PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] 10: raddhn v20.8b, v10.8h, v17.8h raddhn v23.8b, v11.8h, v19.8h PF ble, 10f - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_SRC, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: raddhn v22.8b, v12.8h, v18.8h st1 {v14.8h}, [DST_W], #16 @@ -497,9 +495,8 @@ generate_composite_function \ ushll v14.8h, v2.8b, #7 sli v14.8h, v14.8h, #1 PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] 10: ushll v9.8h, v0.8b, #7 sli v9.8h, v9.8h, #1 @@ -585,12 +582,10 @@ generate_composite_function \ 10: uqadd v28.8b, v0.8b, v4.8b PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: uqadd v29.8b, v1.8b, v5.8b uqadd v30.8b, v2.8b, v6.8b @@ -631,12 +626,10 @@ generate_composite_function \ 10: uqadd v28.8b, v0.8b, v4.8b PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: uqadd v29.8b, v1.8b, v5.8b uqadd v30.8b, v2.8b, v6.8b @@ -719,15 +712,13 @@ generate_composite_function_single_scanline \ 10: umull v9.8h, v22.8b, v5.8b PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] 10: umull v10.8h, v22.8b, v6.8b PF ble, 10f - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: umull v11.8h, v22.8b, v7.8b .endm @@ -793,15 +784,13 @@ generate_composite_function_single_scanline \ 10: umull v9.8h, v22.8b, v5.8b PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] 10: umull v10.8h, v22.8b, v6.8b PF ble, 10f - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: umull v11.8h, v22.8b, v7.8b .endm @@ -886,9 +875,8 @@ generate_composite_function_single_scanline \ PF subs, PF_CTL, PF_CTL, #0x10 umull v11.8h, v24.8b, v7.8b PF ble, 10f - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 .endm @@ -950,9 +938,8 @@ generate_composite_function \ umull v9.8h, v22.8b, v5.8b umull v10.8h, v22.8b, v6.8b PF blt, 10f - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: umull v11.8h, v22.8b, v7.8b .endm @@ -1436,9 +1423,8 @@ generate_composite_function \ 10: umull v11.8h, v24.8b, v3.8b PF ble, 10f - PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb, DUMMY, [PF_MASK, DUMMY] - PF add, PF_MASK, PF_MASK, #1 + PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift + PF ldrsb, DUMMY, [PF_MASK] 10: st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 ursra v8.8h, v8.8h, #8 @@ -1517,9 +1503,8 @@ generate_composite_function \ 10: umull v3.8h, v27.8b, v16.8b PF ble, 10f - PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb, DUMMY, [PF_MASK, DUMMY] - PF add, PF_MASK, PF_MASK, #1 + PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift + PF ldrsb, DUMMY, [PF_MASK] 10: st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32 ursra v0.8h, v0.8h, #8 @@ -1628,15 +1613,13 @@ generate_composite_function \ 10: umull v19.8h, v24.8b, v11.8b PF ble, 10f - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] 10: uqadd v28.8b, v0.8b, v28.8b PF ble, 10f - PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb, DUMMY, [PF_MASK, DUMMY] - PF add, PF_MASK, PF_MASK, #1 + PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift + PF ldrsb, DUMMY, [PF_MASK] 10: uqadd v29.8b, v1.8b, v29.8b uqadd v30.8b, v2.8b, v30.8b @@ -2699,9 +2682,8 @@ generate_composite_function \ PF sub, PF_X, PF_X, ORIG_W PF subs, PF_CTL, PF_CTL, #0x10 PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] 10: .endm @@ -2768,9 +2750,8 @@ generate_composite_function \ PF sub, PF_X, PF_X, ORIG_W PF subs, PF_CTL, PF_CTL, #0x10 PF ble, 10f - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] 10: .endm diff --git a/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.h b/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.h index ec3d76fea81d..516ebb8f3485 100644 --- a/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.h +++ b/gfx/cairo/libpixman/src/pixman-arma64-neon-asm.h @@ -474,25 +474,21 @@ PF lsl, DUMMY, PF_X, #mask_bpp_shift PF prfm, PREFETCH_MODE, [PF_MASK, DUMMY] .endif - PF ble, 71f + PF ble, 72f PF sub, PF_X, PF_X, ORIG_W PF subs, PF_CTL, PF_CTL, #0x10 -71: PF ble, 72f .if src_bpp_shift >= 0 - PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift - PF ldrsb, DUMMY, [PF_SRC, DUMMY] - PF add, PF_SRC, PF_SRC, #1 + PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift + PF ldrsb, DUMMY, [PF_SRC] .endif .if dst_r_bpp != 0 - PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift - PF ldrsb, DUMMY, [PF_DST, DUMMY] - PF add, PF_DST, PF_DST, #1 + PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift + PF ldrsb, DUMMY, [PF_DST] .endif .if mask_bpp_shift >= 0 - PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift - PF ldrsb, DUMMY, [PF_MASK, DUMMY] - PF add, PF_MASK, PF_MASK, #1 + PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift + PF ldrsb, DUMMY, [PF_MASK] .endif 72: .endif @@ -858,8 +854,7 @@ PF mov, PF_DST, DST_R PF mov, PF_MASK, MASK /* PF_CTL = \prefetch_distance | ((h - 1) << 4) */ - PF lsl, DUMMY, H, #4 - PF mov, PF_CTL, DUMMY + PF lsl, PF_CTL, H, #4 PF add, PF_CTL, PF_CTL, #(\prefetch_distance - 0x10) \init diff --git a/gfx/cairo/libpixman/src/pixman-glyph.c b/gfx/cairo/libpixman/src/pixman-glyph.c index dc9041180e51..346d78947d95 100644 --- a/gfx/cairo/libpixman/src/pixman-glyph.c +++ b/gfx/cairo/libpixman/src/pixman-glyph.c @@ -391,7 +391,8 @@ box32_intersect (pixman_box32_t *dest, return dest->x2 > dest->x1 && dest->y2 > dest->y1; } -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +#if defined(__GNUC__) && defined(__i386__) && !defined(__x86_64__) && \ + !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) #endif PIXMAN_EXPORT void @@ -418,10 +419,10 @@ pixman_composite_glyphs_no_mask (pixman_op_t op, _pixman_image_validate (src); _pixman_image_validate (dest); - + dest_format = dest->common.extended_format_code; dest_flags = dest->common.flags; - + pixman_region32_init (®ion); if (!_pixman_compute_composite_region32 ( ®ion, @@ -452,9 +453,9 @@ pixman_composite_glyphs_no_mask (pixman_op_t op, glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y; glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; - + pbox = pixman_region32_rectangles (®ion, &n); - + info.mask_image = glyph_img; while (n--) @@ -588,7 +589,7 @@ add_glyphs (pixman_glyph_cache_t *cache, glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y; glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width; glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height; - + if (box32_intersect (&composite_box, &glyph_box, &dest_box)) { int src_x = composite_box.x1 - glyph_box.x1; @@ -623,7 +624,7 @@ out: * * Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source * image are both aligned with (dest_x, dest_y) in the destination image. Then - * these three images are composited within the + * these three images are composited within the * * (dest_x, dest_y, dst_x + width, dst_y + height) * diff --git a/gfx/cairo/libpixman/src/pixman-image.c b/gfx/cairo/libpixman/src/pixman-image.c index 72796fc9c9f7..d089cb174dd4 100644 --- a/gfx/cairo/libpixman/src/pixman-image.c +++ b/gfx/cairo/libpixman/src/pixman-image.c @@ -613,6 +613,30 @@ pixman_image_set_clip_region (pixman_image_t * image, return result; } +PIXMAN_EXPORT pixman_bool_t +pixman_image_set_clip_region64f (pixman_image_t * image, + const pixman_region64f_t *region) +{ + image_common_t *common = (image_common_t *)image; + pixman_bool_t result; + + if (region) + { + if ((result = pixman_region32_copy_from_region64f (&common->clip_region, region))) + image->common.have_clip_region = TRUE; + } + else + { + _pixman_image_reset_clip_region (image); + + result = TRUE; + } + + image_property_changed (image); + + return result; +} + PIXMAN_EXPORT void pixman_image_set_has_client_clip (pixman_image_t *image, pixman_bool_t client_clip) diff --git a/gfx/cairo/libpixman/src/pixman-private.h b/gfx/cairo/libpixman/src/pixman-private.h index eab3adff90d6..aa8b290dc6f4 100644 --- a/gfx/cairo/libpixman/src/pixman-private.h +++ b/gfx/cairo/libpixman/src/pixman-private.h @@ -893,6 +893,10 @@ pixman_bool_t pixman_region32_copy_from_region16 (pixman_region32_t *dst, const pixman_region16_t *src); +pixman_bool_t +pixman_region32_copy_from_region64f (pixman_region32_t *dst, + const pixman_region64f_t *src); + pixman_bool_t pixman_region16_copy_from_region32 (pixman_region16_t *dst, const pixman_region32_t *src); diff --git a/gfx/cairo/libpixman/src/pixman-region.c b/gfx/cairo/libpixman/src/pixman-region.c index d75e51991b23..849f615cf361 100644 --- a/gfx/cairo/libpixman/src/pixman-region.c +++ b/gfx/cairo/libpixman/src/pixman-region.c @@ -346,7 +346,11 @@ PREFIX (_print) (region_type_t *rgn) rects = PIXREGION_RECTS (rgn); fprintf (stderr, "num: %d size: %d\n", num, size); - fprintf (stderr, "extents: %d %d %d %d\n", + fprintf (stderr, "extents: " + PRINT_SPECIFIER " " + PRINT_SPECIFIER " " + PRINT_SPECIFIER " " + PRINT_SPECIFIER "\n", rgn->extents.x1, rgn->extents.y1, rgn->extents.x2, @@ -354,7 +358,10 @@ PREFIX (_print) (region_type_t *rgn) for (i = 0; i < num; i++) { - fprintf (stderr, "%d %d %d %d \n", + fprintf (stderr, PRINT_SPECIFIER " " + PRINT_SPECIFIER " " + PRINT_SPECIFIER " " + PRINT_SPECIFIER " \n", rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2); } @@ -394,6 +401,29 @@ PREFIX (_init_rect) (region_type_t * region, region->data = NULL; } +PIXMAN_EXPORT void +PREFIX (_init_rectf) (region_type_t * region, + double x, + double y, + double width, + double height) +{ + region->extents.x1 = x; + region->extents.y1 = y; + region->extents.x2 = x + width; + region->extents.y2 = y + height; + + if (!GOOD_RECT (®ion->extents)) + { + if (BAD_RECT (®ion->extents)) + _pixman_log_error (FUNC, "Invalid rectangle passed"); + PREFIX (_init) (region); + return; + } + + region->data = NULL; +} + PIXMAN_EXPORT void PREFIX (_init_with_extents) (region_type_t *region, const box_type_t *extents) { @@ -572,7 +602,7 @@ pixman_coalesce (region_type_t * region, /* Region to coalesce */ box_type_t *prev_box; /* Current box in previous band */ box_type_t *cur_box; /* Current box in current band */ int numRects; /* Number rectangles in both bands */ - int y2; /* Bottom of current band */ + primitive_t y2; /* Bottom of current band */ /* * Figure out how many rectangles are in the band. @@ -658,8 +688,8 @@ static inline pixman_bool_t pixman_region_append_non_o (region_type_t * region, box_type_t * r, box_type_t * r_end, - int y1, - int y2) + primitive_t y1, + primitive_t y2) { box_type_t *next_rect; int new_rects; @@ -741,8 +771,8 @@ typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region, box_type_t * r1_end, box_type_t * r2, box_type_t * r2_end, - int y1, - int y2); + primitive_t y1, + primitive_t y2); static pixman_bool_t pixman_op (region_type_t * new_reg, /* Place to store result */ @@ -762,8 +792,8 @@ pixman_op (region_type_t * new_reg, /* Place to store result box_type_t *r2; /* Pointer into 2d region */ box_type_t *r1_end; /* End of 1st region */ box_type_t *r2_end; /* End of 2d region */ - int ybot; /* Bottom of intersection */ - int ytop; /* Top of intersection */ + primitive_t ybot; /* Bottom of intersection */ + primitive_t ytop; /* Top of intersection */ region_data_type_t *old_data; /* Old data for new_reg */ int prev_band; /* Index of start of * previous band in new_reg */ @@ -771,10 +801,10 @@ pixman_op (region_type_t * new_reg, /* Place to store result * band in new_reg */ box_type_t * r1_band_end; /* End of current band in r1 */ box_type_t * r2_band_end; /* End of current band in r2 */ - int top; /* Top of non-overlapping band */ - int bot; /* Bottom of non-overlapping band*/ - int r1y1; /* Temps for r1->y1 and r2->y1 */ - int r2y1; + primitive_t top; /* Top of non-overlapping band */ + primitive_t bot; /* Bottom of non-overlapping band*/ + primitive_t r1y1; /* Temps for r1->y1 and r2->y1 */ + primitive_t r2y1; int new_size; int numRects; @@ -1110,11 +1140,11 @@ pixman_region_intersect_o (region_type_t *region, box_type_t * r1_end, box_type_t * r2, box_type_t * r2_end, - int y1, - int y2) + primitive_t y1, + primitive_t y2) { - int x1; - int x2; + primitive_t x1; + primitive_t x2; box_type_t * next_rect; next_rect = PIXREGION_TOP (region); @@ -1262,12 +1292,12 @@ pixman_region_union_o (region_type_t *region, box_type_t * r1_end, box_type_t * r2, box_type_t * r2_end, - int y1, - int y2) + primitive_t y1, + primitive_t y2) { box_type_t *next_rect; - int x1; /* left and right side of current union */ - int x2; + primitive_t x1; /* left and right side of current union */ + primitive_t x2; critical_if_fail (y1 < y2); critical_if_fail (r1 != r1_end && r2 != r2_end); @@ -1337,6 +1367,24 @@ PREFIX(_intersect_rect) (region_type_t *dest, return PREFIX(_intersect) (dest, source, ®ion); } +PIXMAN_EXPORT pixman_bool_t +PREFIX(_intersect_rectf) (region_type_t *dest, + const region_type_t *source, + double x, double y, + double width, + double height) +{ + region_type_t region; + + region.data = NULL; + region.extents.x1 = x; + region.extents.y1 = y; + region.extents.x2 = x + width; + region.extents.y2 = y + height; + + return PREFIX(_intersect) (dest, source, ®ion); +} + /* Convenience function for performing union of region with a * single rectangle */ @@ -1367,6 +1415,33 @@ PREFIX (_union_rect) (region_type_t *dest, return PREFIX (_union) (dest, source, ®ion); } +PIXMAN_EXPORT pixman_bool_t +PREFIX (_union_rectf) (region_type_t *dest, + const region_type_t *source, + double x, + double y, + double width, + double height) +{ + region_type_t region; + + region.extents.x1 = x; + region.extents.y1 = y; + region.extents.x2 = x + width; + region.extents.y2 = y + height; + + if (!GOOD_RECT (®ion.extents)) + { + if (BAD_RECT (®ion.extents)) + _pixman_log_error (FUNC, "Invalid rectangle passed"); + return PREFIX (_copy) (dest, source); + } + + region.data = NULL; + + return PREFIX (_union) (dest, source, ®ion); +} + PIXMAN_EXPORT pixman_bool_t PREFIX (_union) (region_type_t * new_reg, const region_type_t *reg1, @@ -1467,8 +1542,8 @@ quick_sort_rects ( box_type_t rects[], int numRects) { - int y1; - int x1; + primitive_t y1; + primitive_t x1; int i, j; box_type_t *r; @@ -1833,11 +1908,11 @@ pixman_region_subtract_o (region_type_t * region, box_type_t * r1_end, box_type_t * r2, box_type_t * r2_end, - int y1, - int y2) + primitive_t y1, + primitive_t y2) { box_type_t * next_rect; - int x1; + primitive_t x1; x1 = r1->x1; @@ -2066,7 +2141,7 @@ PREFIX (_inverse) (region_type_t * new_reg, /* Destination region */ * Return @end if no such box exists. */ static box_type_t * -find_box_for_y (box_type_t *begin, box_type_t *end, int y) +find_box_for_y (box_type_t *begin, box_type_t *end, primitive_t y) { box_type_t *mid; @@ -2120,7 +2195,7 @@ PREFIX (_contains_rectangle) (const region_type_t * region, box_type_t * pbox_end; int part_in, part_out; int numRects; - int x, y; + primitive_t x, y; GOOD (region); @@ -2571,8 +2646,8 @@ static inline box_type_t * bitmap_addrect (region_type_t *reg, box_type_t *r, box_type_t **first_rect, - int rx1, int ry1, - int rx2, int ry2) + primitive_t rx1, primitive_t ry1, + primitive_t rx2, primitive_t ry2) { if ((rx1 < rx2) && (ry1 < ry2) && (!(reg->data->numRects && diff --git a/gfx/cairo/libpixman/src/pixman-region16.c b/gfx/cairo/libpixman/src/pixman-region16.c index da4719e7a84d..ff7b038b2473 100644 --- a/gfx/cairo/libpixman/src/pixman-region16.c +++ b/gfx/cairo/libpixman/src/pixman-region16.c @@ -35,6 +35,7 @@ typedef pixman_box16_t box_type_t; typedef pixman_region16_data_t region_data_type_t; typedef pixman_region16_t region_type_t; +typedef int primitive_t; typedef int32_t overflow_int_t; typedef struct { @@ -46,6 +47,8 @@ typedef struct { #define PIXMAN_REGION_MAX INT16_MAX #define PIXMAN_REGION_MIN INT16_MIN +#define PRINT_SPECIFIER "%d" + #include "pixman-region.c" /* This function exists only to make it possible to preserve the X ABI - diff --git a/gfx/cairo/libpixman/src/pixman-region32.c b/gfx/cairo/libpixman/src/pixman-region32.c index 68b456bf3c90..fbaa21663006 100644 --- a/gfx/cairo/libpixman/src/pixman-region32.c +++ b/gfx/cairo/libpixman/src/pixman-region32.c @@ -33,6 +33,7 @@ typedef pixman_box32_t box_type_t; typedef pixman_region32_data_t region_data_type_t; typedef pixman_region32_t region_type_t; +typedef int primitive_t; typedef int64_t overflow_int_t; typedef struct { @@ -44,4 +45,6 @@ typedef struct { #define PIXMAN_REGION_MAX INT32_MAX #define PIXMAN_REGION_MIN INT32_MIN +#define PRINT_SPECIFIER "%d" + #include "pixman-region.c" diff --git a/gfx/cairo/libpixman/src/pixman-region64f.c b/gfx/cairo/libpixman/src/pixman-region64f.c new file mode 100644 index 000000000000..fb8cc5ae1306 --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-region64f.c @@ -0,0 +1,50 @@ +/* + * Copyright © 2008 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software + * and its documentation for any purpose is hereby granted without + * fee, provided that the above copyright notice appear in all copies + * and that both that copyright notice and this permission notice + * appear in supporting documentation, and that the name of + * Red Hat, Inc. not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. Red Hat, Inc. makes no representations about the + * suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL, + * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER + * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR + * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Soren Sandmann + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" + +#include + +typedef pixman_box64f_t box_type_t; +typedef pixman_region64f_data_t region_data_type_t; +typedef pixman_region64f_t region_type_t; +typedef double primitive_t; +typedef int64_t overflow_int_t; + +typedef struct { + double x, y; +} point_type_t; + +#define PREFIX(x) pixman_region64f##x + +#define PIXMAN_REGION_MAX INT32_MAX +#define PIXMAN_REGION_MIN INT32_MIN + +#define PRINT_SPECIFIER "%f" + +#include "pixman-region.c" diff --git a/gfx/cairo/libpixman/src/pixman-rvv.c b/gfx/cairo/libpixman/src/pixman-rvv.c index 6808f50e5615..570799ca06d6 100644 --- a/gfx/cairo/libpixman/src/pixman-rvv.c +++ b/gfx/cairo/libpixman/src/pixman-rvv.c @@ -2,6 +2,8 @@ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. * 2005 Lars Knoll & Zack Rusin, Trolltech * 2024 Filip Wasil, Samsung Electronics + * 2024 Bernard Gingold, Samsung Electronics + * 2025 Marek Pikuła, Samsung Electronics * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that @@ -27,6 +29,8 @@ #endif #include "pixman-combine-float.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" #include "pixman-private.h" #include @@ -40,6 +44,50 @@ #include #include +// Convenience macros { + +#define __FE_PTR(p, vl) ((p) += (vl)) + +#define _RVV_FE_PRE(total_len, vn, vl, vspec) \ + size_t vn = total_len, vl = __riscv_vsetvl_##vspec (vn); \ + vn > 0 + +#define _RVV_FE_POST(vn, vl, vspec) vn -= (vl), vl = __riscv_vsetvl_##vspec (vn) + +#define RVV_FOREACH_1(total_len, vl, vspec, p1) \ + for (_RVV_FE_PRE (total_len, vn, vl, vspec); \ + __FE_PTR (p1, vl), _RVV_FE_POST (vn, vl, vspec)) + +#define RVV_FOREACH_2(total_len, vl, vspec, p1, p2) \ + for (_RVV_FE_PRE (total_len, vn, vl, vspec); \ + __FE_PTR (p1, vl), __FE_PTR (p2, vl), _RVV_FE_POST (vn, vl, vspec)) + +#define RVV_FOREACH_3(total_len, vl, vspec, p1, p2, p3) \ + for (_RVV_FE_PRE (total_len, vn, vl, vspec); \ + __FE_PTR (p1, vl), __FE_PTR (p2, vl), __FE_PTR (p3, vl), \ + _RVV_FE_POST (vn, vl, vspec)) + +// vuintXXmYY_t for use in macros (less token concatenation). +#define VUINT(ELEN, LMUL) vuint##ELEN##LMUL##_t +#define VUINT32(LMUL) VUINT (32, LMUL) +#define VUINT16(LMUL) VUINT (16, LMUL) +#define VUINT8(LMUL) VUINT (8, LMUL) + +// Short for vreinterpret commonly used for ARGB batch operations. +#define RVV_U8x4_U32(LMUL, value) \ + __riscv_vreinterpret_v_u8##LMUL##_u32##LMUL (value) +#define RVV_U8x4_U32_m2(value) RVV_U8x4_U32 (m2, value) +#define RVV_U8x4_U32_m4(value) RVV_U8x4_U32 (m4, value) + +#define RVV_U32_U8x4(LMUL, value) \ + __riscv_vreinterpret_v_u32##LMUL##_u8##LMUL (value) +#define RVV_U32_U8x4_m2(value) RVV_U32_U8x4 (m2, value) +#define RVV_U32_U8x4_m4(value) RVV_U32_U8x4 (m4, value) + +// } + +// Float implementation + /* * Screen * @@ -49,11 +97,11 @@ */ static force_inline vfloat32m1_t -rvv_blend_screen (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_screen_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t t0, t1, t2; t0 = __riscv_vfmul_vv_f32m1 (s, da, vl); @@ -72,11 +120,11 @@ rvv_blend_screen (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_multiply (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_multiply_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { return __riscv_vfmul_vv_f32m1 (s, d, vl); } @@ -105,14 +153,14 @@ rvv_blend_multiply (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_overlay (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_overlay_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4, f0, f1, f2; - vbool32_t vb; + vbool32_t vb; t0 = __riscv_vfadd_vv_f32m1 (d, d, vl); t1 = __riscv_vfmul_vv_f32m1 (__riscv_vfadd_vv_f32m1 (s, s, vl), d, vl); vb = __riscv_vmflt_vv_f32m1_b32 (t0, da, vl); @@ -134,14 +182,14 @@ rvv_blend_overlay (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_darken (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_darken_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t ss, dd; - vbool32_t vb; + vbool32_t vb; ss = __riscv_vfmul_vv_f32m1 (da, s, vl); dd = __riscv_vfmul_vv_f32m1 (sa, d, vl); vb = __riscv_vmfgt_vv_f32m1_b32 (ss, dd, vl); @@ -157,14 +205,14 @@ rvv_blend_darken (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_lighten (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_lighten_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t ss, dd; - vbool32_t vb; + vbool32_t vb; ss = __riscv_vfmul_vv_f32m1 (s, da, vl); dd = __riscv_vfmul_vv_f32m1 (d, sa, vl); vb = __riscv_vmfgt_vv_f32m1_b32 (ss, dd, vl); @@ -191,14 +239,14 @@ rvv_blend_lighten (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_color_dodge (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_color_dodge_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4; - vbool32_t is_d_zero, vb, is_t0_non_zero; + vbool32_t is_d_zero, vb, is_t0_non_zero; is_d_zero = __riscv_vmfeq_vf_f32m1_b32 (d, 0.0f, vl); @@ -241,11 +289,11 @@ rvv_blend_color_dodge (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_color_burn (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_color_burn_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4, t5, t6, t7; vbool32_t is_d_ge_da, is_s_zero, vb; @@ -289,14 +337,14 @@ rvv_blend_color_burn (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_hard_light (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_hard_light_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t t0, t1, t2, t3, t4; - vbool32_t vb; + vbool32_t vb; t0 = __riscv_vfadd_vv_f32m1 (s, s, vl); t1 = __riscv_vfmul_vv_f32m1 (__riscv_vfadd_vv_f32m1 (s, s, vl), d, vl); vb = __riscv_vmfgt_vv_f32m1_b32 (t0, sa, vl); @@ -328,15 +376,14 @@ rvv_blend_hard_light (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_soft_light (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_soft_light_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { - vfloat32m1_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, - t13; - vbool32_t is_sa_lt_2s, is_da_ls_4d, is_da_non_zero; + vfloat32m1_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13; + vbool32_t is_sa_lt_2s, is_da_ls_4d, is_da_non_zero; is_da_non_zero = __riscv_vmfne_vf_f32m1_b32 (da, 0.0f, vl); t0 = __riscv_vfadd_vv_f32m1 (s, s, vl); // 2 * s is_sa_lt_2s = __riscv_vmflt_vv_f32m1_b32 (sa, t0, vl); @@ -358,8 +405,8 @@ rvv_blend_soft_light (const vfloat32m1_t sa, __riscv_vfadd_vv_f32m1 (d, d, vl), vl); is_da_ls_4d = __riscv_vmflt_vv_f32m1_b32 (da, t6, vl); t10 = __riscv_vfsub_vv_f32m1 ( - __riscv_vfsqrt_v_f32m1 (__riscv_vfmul_vv_f32m1 (d, da, vl), vl), d, - vl); // sqrtf (d * da) - d + __riscv_vfsqrt_v_f32m1 (__riscv_vfmul_vv_f32m1 (d, da, vl), vl), d, + vl); // sqrtf (d * da) - d t11 = __riscv_vfmul_vv_f32m1 (t2, t10, vl); // (sqrtf (d * da) - d) * (sa - 2 * s) t12 = __riscv_vfsub_vv_f32m1 ( @@ -397,14 +444,14 @@ rvv_blend_soft_light (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_difference (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_difference_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t dsa, sda; - vbool32_t vb; + vbool32_t vb; dsa = __riscv_vfmul_vv_f32m1 (d, sa, vl); sda = __riscv_vfmul_vv_f32m1 (s, da, vl); vb = __riscv_vmflt_vv_f32m1_b32 (sda, dsa, vl); @@ -422,11 +469,11 @@ rvv_blend_difference (const vfloat32m1_t sa, */ static force_inline vfloat32m1_t -rvv_blend_exclusion (const vfloat32m1_t sa, - const vfloat32m1_t s, - const vfloat32m1_t da, - const vfloat32m1_t d, - size_t vl) +rvv_blend_exclusion_float (const vfloat32m1_t sa, + const vfloat32m1_t s, + const vfloat32m1_t da, + const vfloat32m1_t d, + size_t vl) { vfloat32m1_t t0, t1; t0 = __riscv_vfmul_vv_f32m1 (__riscv_vfadd_vv_f32m1 (d, d, vl), s, vl); @@ -442,13 +489,13 @@ typedef vfloat32m1_t (*rvv_combine_channel_float_t) (const vfloat32m1_t sa, size_t vl); static force_inline void -rvv_combine_inner (pixman_bool_t component, - float *dest, - const float *src, - const float *mask, - int n_pixels, - rvv_combine_channel_float_t combine_a, - rvv_combine_channel_float_t combine_c) +rvv_combine_inner_float (pixman_bool_t component, + float *dest, + const float *src, + const float *mask, + int n_pixels, + rvv_combine_channel_float_t combine_a, + rvv_combine_channel_float_t combine_c) { float *__restrict__ pd = dest; const float *__restrict__ ps = src; @@ -618,8 +665,8 @@ rvv_combine_inner (pixman_bool_t component, pixman_implementation_t *imp, pixman_op_t op, float *dest, \ const float *src, const float *mask, int n_pixels) \ { \ - rvv_combine_inner (component, dest, src, mask, n_pixels, combine_a, \ - combine_c); \ + rvv_combine_inner_float (component, dest, src, mask, n_pixels, \ + combine_a, combine_c); \ } #define RVV_MAKE_COMBINERS(name, combine_a, combine_c) \ @@ -627,10 +674,10 @@ rvv_combine_inner (pixman_bool_t component, RVV_MAKE_COMBINER (name##_u, FALSE, combine_a, combine_c) static force_inline vfloat32m1_t -rvv_get_factor (combine_factor_t factor, - vfloat32m1_t sa, - vfloat32m1_t da, - size_t vl) +rvv_get_factor_float (combine_factor_t factor, + vfloat32m1_t sa, + vfloat32m1_t da, + size_t vl) { vfloat32m1_t vone = __riscv_vfmv_v_f_f32m1 (1.0f, vl); vfloat32m1_t vzero = __riscv_vfmv_v_f_f32m1 (0.0f, vl); @@ -752,20 +799,21 @@ rvv_get_factor (combine_factor_t factor, } #define RVV_MAKE_PD_COMBINERS(name, a, b) \ - static vfloat32m1_t force_inline rvv_pd_combine_##name ( \ + static vfloat32m1_t force_inline rvv_pd_combine_##name##_float ( \ vfloat32m1_t sa, vfloat32m1_t s, vfloat32m1_t da, vfloat32m1_t d, \ size_t vl) \ { \ - const vfloat32m1_t fa = rvv_get_factor (a, sa, da, vl); \ - const vfloat32m1_t fb = rvv_get_factor (b, sa, da, vl); \ + const vfloat32m1_t fa = rvv_get_factor_float (a, sa, da, vl); \ + const vfloat32m1_t fb = rvv_get_factor_float (b, sa, da, vl); \ vfloat32m1_t t0 = __riscv_vfadd_vv_f32m1 ( \ - __riscv_vfmul_vv_f32m1 (s, fa, vl), \ - __riscv_vfmul_vv_f32m1 (d, fb, vl), vl); \ + __riscv_vfmul_vv_f32m1 (s, fa, vl), \ + __riscv_vfmul_vv_f32m1 (d, fb, vl), vl); \ return __riscv_vfmin_vv_f32m1 (__riscv_vfmv_v_f_f32m1 (1.0f, vl), t0, \ vl); \ } \ \ - RVV_MAKE_COMBINERS (name, rvv_pd_combine_##name, rvv_pd_combine_##name) + RVV_MAKE_COMBINERS (name, rvv_pd_combine_##name##_float, \ + rvv_pd_combine_##name##_float) RVV_MAKE_PD_COMBINERS (clear, ZERO, ZERO) RVV_MAKE_PD_COMBINERS (src, ONE, ZERO) @@ -834,8 +882,8 @@ RVV_MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA) vl), \ -1.0f, vl); \ \ - return __riscv_vfadd_vv_f32m1 (f, rvv_blend_##name (sa, s, da, d, vl), \ - vl); \ + return __riscv_vfadd_vv_f32m1 ( \ + f, rvv_blend_##name##_float (sa, s, da, d, vl), vl); \ } \ \ RVV_MAKE_COMBINERS (name, rvv_combine_##name##_a, rvv_combine_##name##_c) @@ -852,16 +900,2205 @@ RVV_MAKE_SEPARABLE_PDF_COMBINERS (soft_light) RVV_MAKE_SEPARABLE_PDF_COMBINERS (difference) RVV_MAKE_SEPARABLE_PDF_COMBINERS (exclusion) +// int implementation + +// pixman-combine32.h RVV implementation plus some convenience functions { + +/* + * x_c = min(x_c + y_c, 255) + */ + +#define rvv_UN8_ADD_UN8_vv(x, y, vl) __riscv_vsaddu (x, y, vl) + +#define rvv_UN8x4_ADD_UN8x4_vv_m4(x, y, vl) \ + RVV_U8x4_U32_m4 (rvv_UN8_ADD_UN8_vv (RVV_U32_U8x4_m4 (x), \ + RVV_U32_U8x4_m4 (y), (vl) * 4)) + +/* +* x_c = (x_c * a_c) / 255 +*/ + +#define __rvv_UN8_MUL_UN8_vv(LMUL, LMUL16) \ + static force_inline VUINT8 (LMUL) rvv_UN8_MUL_UN8_vv_##LMUL ( \ + const VUINT8 (LMUL) x, const VUINT8 (LMUL) a, size_t vl) \ + { \ + VUINT16 (LMUL16) \ + mul_higher = __riscv_vwmaccu ( \ + __riscv_vmv_v_x_u16##LMUL16 (ONE_HALF, vl), x, a, vl); \ + \ + VUINT16 (LMUL16) \ + mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); \ + \ + return __riscv_vnsrl (__riscv_vadd (mul_higher, mul_lower, vl), \ + G_SHIFT, vl); \ + } +__rvv_UN8_MUL_UN8_vv (m1, m2); +__rvv_UN8_MUL_UN8_vv (m2, m4); +__rvv_UN8_MUL_UN8_vv (m4, m8); + +static force_inline vuint8m4_t +rvv_UN8_MUL_UN8_vx_m4 (const vuint8m4_t x, const uint8_t a, size_t vl) +{ + vuint16m8_t mul_higher = __riscv_vwmaccu ( + __riscv_vmv_v_x_u16m8 (ONE_HALF, vl), a, x, vl); + vuint16m8_t mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); + + return __riscv_vnsrl (__riscv_vadd (mul_higher, mul_lower, vl), G_SHIFT, + vl); +} + +#define __rvv_UN8x4_MUL_UN8x4_vv(LMUL, x, a, vl) \ + RVV_U8x4_U32 (LMUL, rvv_UN8_MUL_UN8_vv_##LMUL (RVV_U32_U8x4 (LMUL, x), \ + RVV_U32_U8x4 (LMUL, a), \ + (vl) * 4)) +#define rvv_UN8x4_MUL_UN8x4_vv_m2(x, a, vl) \ + __rvv_UN8x4_MUL_UN8x4_vv (m2, x, a, vl) +#define rvv_UN8x4_MUL_UN8x4_vv_m4(x, a, vl) \ + __rvv_UN8x4_MUL_UN8x4_vv (m4, x, a, vl) + +/* +* a_c = a (broadcast to all components) +*/ + +#define __rvv_UN16_bcast_UN8x4_v(LMUL, LMUL16) \ + static force_inline VUINT32 (LMUL) \ + rvv_UN16_bcast_UN8x4_v_##LMUL (const VUINT16 (LMUL16) a, size_t vl) \ + { \ + VUINT32 (LMUL) \ + a32 = __riscv_vwcvtu_x (__riscv_vmadd (a, 1 << 8, a, vl), vl); \ + \ + return __riscv_vmadd (a32, 1 << 16, a32, vl); \ + } +__rvv_UN16_bcast_UN8x4_v (m2, m1); +__rvv_UN16_bcast_UN8x4_v (m4, m2); + +#define rvv_UN8_bcast_UN8x4_v_m4(a, vl) \ + rvv_UN16_bcast_UN8x4_v_m4 (__riscv_vwcvtu_x (a, vl), vl) + +/* +* x_c = (x_c * a) / 255 +*/ + +#define rvv_UN8x4_MUL_UN8_vv_m4(x, a, vl) \ + rvv_UN8x4_MUL_UN8x4_vv_m4 (x, rvv_UN8_bcast_UN8x4_v_m4 (a, vl), vl) + +#define __rvv_UN8x4_MUL_UN16_vv(LMUL, x, a, vl) \ + rvv_UN8x4_MUL_UN8x4_vv_##LMUL (x, rvv_UN16_bcast_UN8x4_v_##LMUL (a, vl), vl) +#define rvv_UN8x4_MUL_UN16_vv_m2(x, a, vl) \ + __rvv_UN8x4_MUL_UN16_vv (m2, x, a, vl) +#define rvv_UN8x4_MUL_UN16_vv_m4(x, a, vl) \ + __rvv_UN8x4_MUL_UN16_vv (m4, x, a, vl) + +#define rvv_UN8x4_MUL_UN8_vx_m4(x, a, vl) \ + RVV_U8x4_U32_m4 (rvv_UN8_MUL_UN8_vx_m4 (RVV_U32_U8x4_m4 (x), a, (vl) * 4)) + +static force_inline vuint32m2_t +rvv_DIV_ONE_UN32m2_UN32m2_v (const vuint32m2_t x, size_t vl) +{ + vuint32m2_t mul_higher = __riscv_vadd (x, ONE_HALF, vl); + vuint32m2_t mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); + + return __riscv_vsrl (__riscv_vadd (mul_higher, mul_lower, vl), G_SHIFT, vl); +} + +static force_inline vuint8m2_t +rvv_DIV_ONE_UN32m8_UN8m2_v (const vuint32m8_t x, size_t vl) +{ + vuint32m8_t mul_higher = __riscv_vadd (x, ONE_HALF, vl); + vuint32m8_t mul_lower = __riscv_vsrl (mul_higher, G_SHIFT, vl); + + return __riscv_vncvt_x ( + __riscv_vnsrl (__riscv_vadd (mul_higher, mul_lower, vl), G_SHIFT, vl), + vl); +} + +/* +* x_c = (x_c * a) / 255 + y_c +*/ + +#define rvv_UN8x4_MUL_UN16_ADD_UN8x4_vvv_m4(x, a, y, vl) \ + rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN16_vv_m4 (x, a, vl), y, vl) + +/* +* x_c = (x_c * a + y_c * b) / 255 +*/ + +#define rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4(x, a, y, b, vl) \ + rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN16_vv_m4 (x, a, vl), \ + rvv_UN8x4_MUL_UN16_vv_m4 (y, b, vl), vl) + +/* +* x_c = (x_c * a_c) / 255 + y_c +*/ + +#define rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4(x, a, y, vl) \ + rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN8x4_vv_m4 (x, a, vl), y, vl) + +/* +* x_c = (x_c * a_c + y_c * b) / 255 +*/ + +#define rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4(x, a, y, b, vl) \ + rvv_UN8x4_ADD_UN8x4_vv_m4 (rvv_UN8x4_MUL_UN8x4_vv_m4 (x, a, vl), \ + rvv_UN8x4_MUL_UN16_vv_m4 (y, b, vl), vl) + +// } pixman-combine32.h + +// Additional functions. + +#define rvv_shift_alpha_u16(x, vl) __riscv_vnsrl (x, 24, vl) + +#define rvv_shift_not_alpha_u16(x, vl) \ + rvv_shift_alpha_u16 (__riscv_vnot (x, vl), vl) + +#define rvv_load_alpha_u8m1(src, vl) \ + __riscv_vlse8_v_u8m1 ((uint8_t *)src + 3, 4, vl) + +#define rvv_load_not_alpha_u8m1(src, vl) \ + __riscv_vnot (rvv_load_alpha_u8m1 (src, vl), vl) + +#define rvv_u8m2_to_i16m4(in, vl) \ + __riscv_vreinterpret_i16m4 (__riscv_vwcvtu_x (in, vl)) + +#define rvv_over_m4(src, dest, vl) \ + rvv_UN8x4_MUL_UN16_ADD_UN8x4_vvv_m4 ( \ + dest, rvv_shift_not_alpha_u16 (src, vl), src, vl) + +#define rvv_in_m4(x, y, vl) rvv_UN8x4_MUL_UN8_vv_m4 (x, y, vl) + +#define rvv_in_load_s_m_m4(src, mask, vl) \ + rvv_in_m4 (__riscv_vle32_v_u32m4 (src, vl), \ + rvv_load_alpha_u8m1 (mask, vl), vl) + +#define rvv_in_load_s_nm_m4(src, mask, vl) \ + rvv_in_m4 (__riscv_vle32_v_u32m4 (src, vl), \ + rvv_load_not_alpha_u8m1 (mask, vl), vl) + +static force_inline vuint16m2_t +rvv_convert_8888_to_0565_m2 (const vuint32m4_t s, size_t vl) +{ + vuint32m4_t rb = __riscv_vand (s, 0xF800F8, vl); + + return __riscv_vor ( + __riscv_vor (__riscv_vnsrl (rb, 3, vl), __riscv_vnsrl (rb, 8, vl), vl), + __riscv_vand (__riscv_vnsrl (s, 5, vl), 0x7E0, vl), vl); +} + +static force_inline vuint32m4_t +rvv_convert_0565_to_0888_m4 (const vuint16m2_t s, size_t vl) +{ + vuint8m1_t g1, g2; + vuint16m2_t r, g_w, b; + vuint32m4_t r_w, rb_w; + + r = __riscv_vand (s, 0xF800, vl); + b = __riscv_vand (s, 0x001F, vl); + r_w = __riscv_vwmulu (r, 1 << 8, vl); + rb_w = __riscv_vwmaccu (r_w, 1 << 3, b, vl); + rb_w = __riscv_vand (__riscv_vor (rb_w, __riscv_vsrl (rb_w, 5, vl), vl), + 0xFF00FF, vl); + + g1 = __riscv_vsll (__riscv_vnsrl (s, 5, vl), 2, vl); + g2 = __riscv_vsrl (g1, 6, vl); + g_w = __riscv_vwaddu_vv (g1, g2, vl); + + return __riscv_vwmaccu (rb_w, 1 << 8, g_w, vl); +} + +#define rvv_convert_0565_to_8888_m4(s, vl) \ + __riscv_vor (rvv_convert_0565_to_0888_m4 (s, vl), 0xff000000, vl) + +#define __rvv_combine_mask_value_ca(LMUL, src, mask, vl) \ + rvv_UN8x4_MUL_UN8x4_vv_##LMUL (src, mask, vl) +#define rvv_combine_mask_value_ca_m2(src, mask, vl) \ + __rvv_combine_mask_value_ca (m2, src, mask, vl) +#define rvv_combine_mask_value_ca_m4(src, mask, vl) \ + __rvv_combine_mask_value_ca (m4, src, mask, vl) + +#define __rvv_combine_mask_alpha_ca(LMUL, src, mask, vl) \ + rvv_UN8x4_MUL_UN16_vv_##LMUL (mask, rvv_shift_alpha_u16 (src, vl), vl) +#define rvv_combine_mask_alpha_ca_m2(src, mask, vl) \ + __rvv_combine_mask_alpha_ca (m2, src, mask, vl) +#define rvv_combine_mask_alpha_ca_m4(src, mask, vl) \ + __rvv_combine_mask_alpha_ca (m4, src, mask, vl) + +#define __rvv_combine_mask(LMUL, src, mask, vl) \ + rvv_UN8x4_MUL_UN16_vv_##LMUL (src, rvv_shift_alpha_u16 (mask, vl), vl) +#define rvv_combine_mask_m2(src, mask, vl) \ + __rvv_combine_mask (m2, src, mask, vl) +#define rvv_combine_mask_m4(src, mask, vl) \ + __rvv_combine_mask (m4, src, mask, vl) + +#define __rvv_combine_mask_ca(LMUL) \ + static force_inline void rvv_combine_mask_ca_##LMUL ( \ + VUINT32 (LMUL) *__restrict__ src, VUINT32 (LMUL) *__restrict__ mask, \ + size_t vl) \ + { \ + VUINT32 (LMUL) src_cpy = *src; \ + *(src) = rvv_combine_mask_value_ca_##LMUL (*(src), *(mask), vl); \ + *(mask) = rvv_combine_mask_alpha_ca_##LMUL (src_cpy, *(mask), vl); \ + } +__rvv_combine_mask_ca (m2); +__rvv_combine_mask_ca (m4); + +static void +rvv_combine_clear (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *pd = dest; + + vuint32m8_t v = __riscv_vmv_v_x_u32m8 (0, __riscv_vsetvlmax_e32m8 ()); + RVV_FOREACH_1 (width, vl, e32m8, pd) { __riscv_vse32 (pd, v, vl); } +} + +static void +rvv_combine_src_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, rvv_in_load_s_m_m4 (ps, pm, vl), vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m8, ps, pd) + { + __riscv_vse32 (pd, __riscv_vle32_v_u32m8 (ps, vl), vl); + } + } +} + +static void +rvv_combine_over_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_over_m4 (rvv_in_load_s_m_m4 (ps, pm, vl), + __riscv_vle32_v_u32m4 (pd, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 (pd, + rvv_over_m4 (__riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pd, vl), vl), + vl); + } + } +} + +static void +rvv_combine_over_reverse_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_over_m4 (__riscv_vle32_v_u32m4 (pd, vl), + rvv_in_load_s_m_m4 (ps, pm, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 (pd, + rvv_over_m4 (__riscv_vle32_v_u32m4 (pd, vl), + __riscv_vle32_v_u32m4 (ps, vl), vl), + vl); + } + } +} + +static void +rvv_combine_in_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_in_m4 (rvv_in_load_s_m_m4 (ps, pm, vl), + rvv_load_alpha_u8m1 (pd, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 (pd, rvv_in_load_s_m_m4 (ps, pd, vl), vl); + } + } +} + +static void +rvv_combine_in_reverse_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_in_m4 (__riscv_vle32_v_u32m4 (pd, vl), + rvv_UN8_MUL_UN8_vv_m1 ( + rvv_load_alpha_u8m1 (ps, vl), + rvv_load_alpha_u8m1 (pm, vl), vl), + vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 (pd, rvv_in_load_s_m_m4 (pd, ps, vl), vl); + } + } +} + +static void +rvv_combine_out_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_in_m4 (rvv_in_load_s_m_m4 (ps, pm, vl), + rvv_load_not_alpha_u8m1 (pd, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 (pd, rvv_in_load_s_nm_m4 (ps, pd, vl), vl); + } + } +} + +static void +rvv_combine_out_reverse_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 ( + pd, + rvv_in_m4 (__riscv_vle32_v_u32m4 (pd, vl), + __riscv_vnot (rvv_UN8_MUL_UN8_vv_m1 ( + rvv_load_alpha_u8m1 (ps, vl), + rvv_load_alpha_u8m1 (pm, vl), vl), + vl), + vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 (pd, rvv_in_load_s_nm_m4 (pd, ps, vl), vl); + } + } +} + +static void +rvv_combine_atop_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + vuint32m4_t s, d; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = rvv_in_load_s_m_m4 (ps, pm, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_alpha_u16 (d, vl), d, + rvv_shift_not_alpha_u16 (s, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_alpha_u16 (d, vl), d, + rvv_shift_not_alpha_u16 (s, vl), vl), + vl); + } + } +} + +static void +rvv_combine_atop_reverse_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + vuint32m4_t s, d; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = rvv_in_load_s_m_m4 (ps, pm, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_not_alpha_u16 (d, vl), d, + rvv_shift_alpha_u16 (s, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_not_alpha_u16 (d, vl), d, + rvv_shift_alpha_u16 (s, vl), vl), + vl); + } + } +} + +static void +rvv_combine_xor_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + vuint32m4_t s, d; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = rvv_in_load_s_m_m4 (ps, pm, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_not_alpha_u16 (d, vl), d, + rvv_shift_not_alpha_u16 (s, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_not_alpha_u16 (d, vl), d, + rvv_shift_not_alpha_u16 (s, vl), vl), + vl); + } + } +} + +static void +rvv_combine_add_u (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 ( + pd, + rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), + rvv_in_load_s_m_m4 (ps, pm, vl), vl), + vl); + } + } + else + { + RVV_FOREACH_2 (width, vl, e32m4, ps, pd) + { + __riscv_vse32 ( + pd, + rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), + __riscv_vle32_v_u32m4 (ps, vl), vl), + vl); + } + } +} + +/* + * Multiply + * + * ad * as * B(d / ad, s / as) + * = ad * as * d/ad * s/as + * = d * s + * + */ +static void +rvv_combine_multiply_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t s, d; + if (mask) + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = rvv_in_load_s_m_m4 (ps, pm, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + + __riscv_vse32 (pd, + rvv_UN8x4_ADD_UN8x4_vv_m4 ( + rvv_UN8x4_MUL_UN8x4_vv_m4 (d, s, vl), + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_not_alpha_u16 (d, vl), d, + rvv_shift_not_alpha_u16 (s, vl), vl), + vl), + vl); + } + } + else + { + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + d = __riscv_vle32_v_u32m4 (pd, vl); + + __riscv_vse32 (pd, + rvv_UN8x4_ADD_UN8x4_vv_m4 ( + rvv_UN8x4_MUL_UN8x4_vv_m4 (d, s, vl), + rvv_UN8x4_MUL_UN16_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + s, rvv_shift_not_alpha_u16 (d, vl), d, + rvv_shift_not_alpha_u16 (s, vl), vl), + vl), + vl); + } + } +} + +static void +rvv_combine_multiply_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t s, m, d; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + m = __riscv_vle32_v_u32m4 (pm, vl); + rvv_combine_mask_ca_m4 (&s, &m, vl); + + d = __riscv_vle32_v_u32m4 (pd, vl); + + __riscv_vse32 (pd, + rvv_UN8x4_ADD_UN8x4_vv_m4 ( + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + d, __riscv_vnot (m, vl), s, + rvv_shift_not_alpha_u16 (d, vl), vl), + rvv_UN8x4_MUL_UN8x4_vv_m4 (d, s, vl), vl), + vl); + } +} + +#define PDF_SEPARABLE_BLEND_MODE(name) \ + static void rvv_combine_##name##_u ( \ + pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, \ + const uint32_t *src, const uint32_t *mask, int width) \ + { \ + uint32_t *__restrict__ pd = dest; \ + const uint32_t *__restrict__ ps = src; \ + const uint32_t *__restrict__ pm = mask; \ + \ + vuint32m2_t s, d, ra, rx; \ + vuint16m1_t da, sa; \ + size_t vl4; \ + vuint8m2_t s4, d4, sa4, isa4, da4, ida4; \ + vuint32m8_t rx4; \ + \ + RVV_FOREACH_3 (width, vl, e32m2, ps, pm, pd) \ + { \ + vl4 = vl * 4; \ + \ + s = __riscv_vle32_v_u32m2 (ps, vl); \ + if (mask) \ + s = rvv_combine_mask_m2 (s, __riscv_vle32_v_u32m2 (pm, vl), \ + vl); \ + sa = rvv_shift_alpha_u16 (s, vl); \ + \ + d = __riscv_vle32_v_u32m2 (pd, vl); \ + da = rvv_shift_alpha_u16 (d, vl); \ + \ + ra = __riscv_vsub (__riscv_vwaddu_vv (__riscv_vmul (da, 0xFF, vl), \ + __riscv_vmul (sa, 0xFF, vl), \ + vl), \ + __riscv_vwmulu (sa, da, vl), vl); \ + \ + s4 = RVV_U32_U8x4_m2 (s); \ + sa4 = RVV_U32_U8x4_m2 (rvv_UN16_bcast_UN8x4_v_m2 (sa, vl)); \ + isa4 = __riscv_vnot (sa4, vl4); \ + d4 = RVV_U32_U8x4_m2 (d); \ + da4 = RVV_U32_U8x4_m2 (rvv_UN16_bcast_UN8x4_v_m2 (da, vl)); \ + ida4 = __riscv_vnot (da4, vl4); \ + \ + rx4 = __riscv_vadd ( \ + __riscv_vwaddu_vv (__riscv_vwmulu (isa4, d4, vl4), \ + __riscv_vwmulu (ida4, s4, vl4), vl4), \ + rvv_blend_##name##_int (d4, da4, s4, sa4, vl4), vl4); \ + \ + ra = __riscv_vminu (ra, 255 * 255, vl); \ + rx4 = __riscv_vminu (rx4, 255 * 255, vl4); \ + \ + ra = rvv_DIV_ONE_UN32m2_UN32m2_v (ra, vl); \ + rx = RVV_U8x4_U32_m2 (rvv_DIV_ONE_UN32m8_UN8m2_v (rx4, vl4)); \ + \ + __riscv_vse32 (pd, \ + __riscv_vor (__riscv_vsll (ra, 24, vl), \ + __riscv_vand (rx, 0x00FFFFFF, vl), \ + vl), \ + vl); \ + } \ + } \ + \ + static void rvv_combine_##name##_ca ( \ + pixman_implementation_t *imp, pixman_op_t op, uint32_t *dest, \ + const uint32_t *src, const uint32_t *mask, int width) \ + { \ + uint32_t *__restrict__ pd = dest; \ + const uint32_t *__restrict__ ps = src; \ + const uint32_t *__restrict__ pm = mask; \ + \ + vuint32m2_t s, m, d, ra, rx; \ + vuint16m1_t da, sa; \ + size_t vl4; \ + vuint8m2_t s4, m4, d4, ixa4, da4, ida4; \ + vuint32m8_t rx4; \ + \ + RVV_FOREACH_3 (width, vl, e32m2, ps, pm, pd) \ + { \ + m = __riscv_vle32_v_u32m2 (pm, vl); \ + s = __riscv_vle32_v_u32m2 (ps, vl); \ + rvv_combine_mask_ca_m2 (&s, &m, vl); \ + sa = rvv_shift_alpha_u16 (s, vl); \ + \ + d = __riscv_vle32_v_u32m2 (pd, vl); \ + da = rvv_shift_alpha_u16 (d, vl); \ + \ + ra = __riscv_vsub (__riscv_vwaddu_vv (__riscv_vmul (da, 0xFF, vl), \ + __riscv_vmul (sa, 0xFF, vl), \ + vl), \ + __riscv_vwmulu (sa, da, vl), vl); \ + \ + ixa4 = RVV_U32_U8x4_m2 (__riscv_vnot (m, vl)); \ + d4 = RVV_U32_U8x4_m2 (d); \ + ida4 = RVV_U32_U8x4_m2 ( \ + __riscv_vnot (rvv_UN16_bcast_UN8x4_v_m2 (da, vl), vl)); \ + s4 = RVV_U32_U8x4_m2 (s); \ + da4 = RVV_U32_U8x4_m2 (rvv_UN16_bcast_UN8x4_v_m2 (da, vl)); \ + m4 = RVV_U32_U8x4_m2 (m); \ + \ + vl4 = vl * 4; \ + rx4 = __riscv_vadd ( \ + __riscv_vwaddu_vv (__riscv_vwmulu (ixa4, d4, vl4), \ + __riscv_vwmulu (ida4, s4, vl4), vl4), \ + rvv_blend_##name##_int (d4, da4, s4, m4, vl4), vl4); \ + \ + ra = __riscv_vminu (ra, 255 * 255, vl); \ + rx4 = __riscv_vminu (rx4, 255 * 255, vl4); \ + \ + ra = rvv_DIV_ONE_UN32m2_UN32m2_v (ra, vl); \ + rx = RVV_U8x4_U32_m2 (rvv_DIV_ONE_UN32m8_UN8m2_v (rx4, vl4)); \ + \ + __riscv_vse32 (pd, \ + __riscv_vor (__riscv_vsll (ra, 24, vl), \ + __riscv_vand (rx, 0x00FFFFFF, vl), \ + vl), \ + vl); \ + } \ + } + +static force_inline vuint32m8_t +rvv_blend_screen_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + return __riscv_vsub (__riscv_vwaddu_vv (__riscv_vwmulu (s, ad, vl), + __riscv_vwmulu (d, as, vl), vl), + __riscv_vwcvtu_x (__riscv_vwmulu (s, d, vl), vl), vl); +} + +PDF_SEPARABLE_BLEND_MODE (screen) + +static force_inline vuint32m8_t +_rvv_blend_overlay_hard_light (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + const vbool4_t selector, + size_t vl) +{ + vuint32m8_t out_true = __riscv_vwmulu (__riscv_vwmulu (s, d, vl), 2, vl); + + vint16m4_t d_i = rvv_u8m2_to_i16m4 (d, vl); + vint16m4_t ad_i = rvv_u8m2_to_i16m4 (ad, vl); + vint16m4_t s_i = rvv_u8m2_to_i16m4 (s, vl); + vint16m4_t as_i = rvv_u8m2_to_i16m4 (as, vl); + + vuint32m8_t out_false = __riscv_vreinterpret_v_i32m8_u32m8 (__riscv_vsub ( + __riscv_vwmul (as_i, ad_i, vl), + __riscv_vsll (__riscv_vwmul (__riscv_vsub (ad_i, d_i, vl), + __riscv_vsub (as_i, s_i, vl), vl), + 1, vl), + vl)); + + return __riscv_vmerge (out_false, out_true, selector, vl); +} + +static force_inline vuint32m8_t +rvv_blend_overlay_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + return _rvv_blend_overlay_hard_light ( + d, ad, s, as, + __riscv_vmsltu (__riscv_vwmulu (d, 2, vl), __riscv_vwcvtu_x (ad, vl), + vl), + vl); +} + +PDF_SEPARABLE_BLEND_MODE (overlay) + +static force_inline vuint32m8_t +rvv_blend_darken_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + return __riscv_vwcvtu_x (__riscv_vminu (__riscv_vwmulu (ad, s, vl), + __riscv_vwmulu (as, d, vl), vl), + vl); +} + +PDF_SEPARABLE_BLEND_MODE (darken) + +static force_inline vuint32m8_t +rvv_blend_lighten_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + return __riscv_vwcvtu_x (__riscv_vmaxu (__riscv_vwmulu (as, d, vl), + __riscv_vwmulu (ad, s, vl), vl), + vl); +} + +PDF_SEPARABLE_BLEND_MODE (lighten) + +static force_inline vuint32m8_t +rvv_blend_hard_light_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + return _rvv_blend_overlay_hard_light ( + d, ad, s, as, + __riscv_vmsltu (__riscv_vwmulu (s, 2, vl), __riscv_vwcvtu_x (as, vl), + vl), + vl); +} + +PDF_SEPARABLE_BLEND_MODE (hard_light) + +static force_inline vuint32m8_t +rvv_blend_difference_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + vuint16m4_t das = __riscv_vwmulu (d, as, vl); + vuint16m4_t sad = __riscv_vwmulu (s, ad, vl); + + return __riscv_vmerge (__riscv_vwsubu_vv (sad, das, vl), + __riscv_vwsubu_vv (das, sad, vl), + __riscv_vmsltu (sad, das, vl), vl); +} + +PDF_SEPARABLE_BLEND_MODE (difference) + +static force_inline vuint32m8_t +rvv_blend_exclusion_int (const vuint8m2_t d, + const vuint8m2_t ad, + const vuint8m2_t s, + const vuint8m2_t as, + size_t vl) +{ + return __riscv_vsub (__riscv_vwaddu_vv (__riscv_vwmulu (s, ad, vl), + __riscv_vwmulu (d, as, vl), vl), + __riscv_vwmulu (__riscv_vwmulu (d, s, vl), 2, vl), vl); +} + +PDF_SEPARABLE_BLEND_MODE (exclusion) + +#undef PDF_SEPARABLE_BLEND_MODE + +static void +rvv_combine_over_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t s, m; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + m = __riscv_vle32_v_u32m4 (pm, vl); + rvv_combine_mask_ca_m4 (&s, &m, vl); + + __riscv_vse32 ( + pd, + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( + __riscv_vle32_v_u32m4 (pd, vl), __riscv_vnot (m, vl), s, vl), + vl); + } +} + +static void +rvv_combine_over_reverse_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t d; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 ( + pd, + rvv_UN8x4_MUL_UN16_ADD_UN8x4_vvv_m4 ( + rvv_UN8x4_MUL_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + rvv_shift_not_alpha_u16 (d, vl), d, vl), + vl); + } +} + +static void +rvv_combine_atop_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t d, s, m; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + m = __riscv_vle32_v_u32m4 (pm, vl); + rvv_combine_mask_ca_m4 (&s, &m, vl); + + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 ( + pd, + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + d, __riscv_vnot (m, vl), s, rvv_shift_alpha_u16 (d, vl), vl), + vl); + } +} + +static void +rvv_combine_xor_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t d, s, m; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + m = __riscv_vle32_v_u32m4 (pm, vl); + rvv_combine_mask_ca_m4 (&s, &m, vl); + + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + d, __riscv_vnot (m, vl), s, + rvv_shift_not_alpha_u16 (d, vl), vl), + vl); + } +} + +static void +rvv_combine_atop_reverse_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + vuint32m4_t d, s, m; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + s = __riscv_vle32_v_u32m4 (ps, vl); + m = __riscv_vle32_v_u32m4 (pm, vl); + rvv_combine_mask_ca_m4 (&s, &m, vl); + + d = __riscv_vle32_v_u32m4 (pd, vl); + __riscv_vse32 (pd, + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN16_vvvv_m4 ( + d, m, s, rvv_shift_not_alpha_u16 (d, vl), vl), + vl); + } +} + +static void +rvv_combine_src_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 ( + pd, + rvv_combine_mask_value_ca_m4 (__riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + vl); + } +} + +static void +rvv_combine_in_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_in_m4 (rvv_combine_mask_value_ca_m4 ( + __riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + rvv_load_alpha_u8m1 (pd, vl), vl), + vl); + } +} + +static void +rvv_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 ( + pd, + rvv_UN8x4_MUL_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), + rvv_combine_mask_alpha_ca_m4 ( + __riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + vl), + vl); + } +} + +static void +rvv_combine_out_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 (pd, + rvv_in_m4 (rvv_combine_mask_value_ca_m4 ( + __riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + rvv_load_not_alpha_u8m1 (pd, vl), vl), + vl); + } +} + +static void +rvv_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 ( + pd, + rvv_UN8x4_MUL_UN8x4_vv_m4 ( + __riscv_vle32_v_u32m4 (pd, vl), + __riscv_vnot_v_u32m4 (rvv_combine_mask_alpha_ca_m4 ( + __riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + vl), + vl), + vl); + } +} + +static void +rvv_combine_add_ca (pixman_implementation_t *__restrict__ imp, + pixman_op_t op, + uint32_t *__restrict__ dest, + const uint32_t *__restrict__ src, + const uint32_t *__restrict__ mask, + int width) +{ + uint32_t *__restrict__ pd = dest; + const uint32_t *__restrict__ ps = src; + const uint32_t *__restrict__ pm = mask; + + RVV_FOREACH_3 (width, vl, e32m4, ps, pm, pd) + { + __riscv_vse32 ( + pd, + rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (pd, vl), + rvv_combine_mask_value_ca_m4 ( + __riscv_vle32_v_u32m4 (ps, vl), + __riscv_vle32_v_u32m4 (pm, vl), vl), + vl), + vl); + } +} + +static void +rvv_composite_src_x888_8888 (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *__restrict__ dst_line, *__restrict__ dst; + uint32_t *__restrict__ src_line, *__restrict__ src; + int32_t dst_stride, src_stride; + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e32m8, src, dst) + { + __riscv_vse32 ( + dst, + __riscv_vor (__riscv_vle32_v_u32m8 (src, vl), 0xff000000, vl), + vl); + } + } +} + +static void +rvv_composite_src_8888_8888 (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *__restrict__ dst_line, *__restrict__ dst; + uint32_t *__restrict__ src_line, *__restrict__ src; + int32_t dst_stride, src_stride; + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e32m8, src, dst) + { + __riscv_vse32 (dst, __riscv_vle32_v_u32m8 (src, vl), vl); + } + } +} + +static void +rvv_composite_over_x888_8_8888 (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *__restrict__ src, *__restrict__ src_line; + uint32_t *__restrict__ dst, *__restrict__ dst_line; + uint8_t *__restrict__ mask, *__restrict__ mask_line; + int32_t src_stride, mask_stride, dst_stride; + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, + mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_3 (width, vl, e32m4, src, mask, dst) + { + __riscv_vse32 ( + dst, + rvv_over_m4 ( + rvv_in_m4 (__riscv_vor (__riscv_vle32_v_u32m4 (src, vl), + 0xff000000, vl), + __riscv_vle8_v_u8m1 (mask, vl), vl), + __riscv_vle32_v_u32m4 (dst, vl), vl), + vl); + } + } +} + +static void +rvv_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e32m4, src, dst) + { + __riscv_vse32 (dst, + rvv_over_m4 (__riscv_vle32_v_u32m4 (src, vl), + __riscv_vle32_v_u32m4 (dst, vl), vl), + vl); + } + } +} + +static void +rvv_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *__restrict__ dst_line, *__restrict__ dst; + uint8_t *__restrict__ mask_line, *__restrict__ mask; + int dst_stride, mask_stride; + uint32_t src; + vuint32m4_t vsrc; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, + mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e16m2, mask, dst) + { + __riscv_vse16 ( + dst, + rvv_convert_8888_to_0565_m2 ( + rvv_over_m4 ( + rvv_in_m4 (vsrc, __riscv_vle8_v_u8m1 (mask, vl), vl), + rvv_convert_0565_to_0888_m4 ( + __riscv_vle16_v_u16m2 (dst, vl), vl), + vl), + vl), + vl); + } + } +} + +static void +rvv_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t src; + vuint32m4_t vsrc; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, + mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e32m4, mask, dst) + { + __riscv_vse32 ( + dst, + rvv_over_m4 ( + rvv_in_m4 (vsrc, __riscv_vle8_v_u8m1 (mask, vl), vl), + __riscv_vle32_v_u32m4 (dst, vl), vl), + vl); + } + } +} + +static void +rvv_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t src; + vuint32m4_t vsrc; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, + mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e32m4, mask, dst) + { + __riscv_vse32 (dst, + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( + __riscv_vle32_v_u32m4 (mask, vl), vsrc, + __riscv_vle32_v_u32m4 (dst, vl), vl), + vl); + } + } +} + +static void +rvv_composite_over_n_8888_8888_ca (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *__restrict__ dst_line, *__restrict__ dst; + uint32_t *__restrict__ mask_line, *__restrict__ mask; + int dst_stride, mask_stride; + uint32_t src, srca; + vuint32m4_t vsrc; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + srca = src >> 24; + vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, + mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e32m4, mask, dst) + { + vuint32m4_t m = __riscv_vle32_v_u32m4 (mask, vl); + __riscv_vse32 ( + dst, + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( + __riscv_vle32_v_u32m4 (dst, vl), + __riscv_vnot (rvv_UN8x4_MUL_UN8_vx_m4 (m, srca, vl), vl), + rvv_UN8x4_MUL_UN8x4_vv_m4 (m, vsrc, vl), vl), + vl); + } + } +} + +static void +rvv_composite_over_n_8888_0565_ca (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *__restrict__ dst_line, *__restrict__ dst; + uint32_t *__restrict__ mask_line, *__restrict__ mask; + int dst_stride, mask_stride; + uint32_t src, srca; + vuint32m4_t vsrc; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + vsrc = __riscv_vmv_v_x_u32m4 (src, __riscv_vsetvlmax_e32m4 ()); + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, + mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e32m4, mask, dst) + { + vuint32m4_t ma = __riscv_vle32_v_u32m4 (mask, vl); + + __riscv_vse16 ( + dst, + rvv_convert_8888_to_0565_m2 ( + rvv_UN8x4_MUL_UN8x4_ADD_UN8x4_vvv_m4 ( + rvv_convert_0565_to_0888_m4 ( + __riscv_vle16_v_u16m2 (dst, vl), vl), + __riscv_vnot (rvv_UN8x4_MUL_UN8_vx_m4 (ma, srca, vl), + vl), + rvv_UN8x4_MUL_UN8x4_vv_m4 (ma, vsrc, vl), vl), + vl), + vl); + } + } +} + +static void +rvv_composite_over_8888_0565 (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *__restrict__ dst_line, *__restrict__ dst; + uint32_t *__restrict__ src_line, *__restrict__ src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, + dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e16m2, src, dst) + { + __riscv_vse16 ( + dst, + rvv_convert_8888_to_0565_m2 ( + rvv_over_m4 (__riscv_vle32_v_u32m4 (src, vl), + rvv_convert_0565_to_0888_m4 ( + __riscv_vle16_v_u16m2 (dst, vl), vl), + vl), + vl), + vl); + } + } +} + +static void +rvv_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, + dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e8m8, src, dst) + { + __riscv_vse8 (dst, + rvv_UN8_ADD_UN8_vv (__riscv_vle8_v_u8m8 (src, vl), + __riscv_vle8_v_u8m8 (dst, vl), + vl), + vl); + } + } +} + +static void +rvv_composite_add_0565_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint16_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, + dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e16m2, src, dst) + { + __riscv_vse16 (dst, + rvv_convert_8888_to_0565_m2 ( + rvv_UN8x4_ADD_UN8x4_vv_m4 ( + rvv_convert_0565_to_8888_m4 ( + __riscv_vle16_v_u16m2 (src, vl), vl), + rvv_convert_0565_to_8888_m4 ( + __riscv_vle16_v_u16m2 (dst, vl), vl), + vl), + vl), + vl); + } + } +} + +static void +rvv_composite_add_8888_8888 (pixman_implementation_t *__restrict__ imp, + pixman_composite_info_t *__restrict__ info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *__restrict__ dst_line, *__restrict__ dst; + uint32_t *__restrict__ src_line, *__restrict__ src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e32m4, src, dst) + { + __riscv_vse32 ( + dst, + rvv_UN8x4_ADD_UN8x4_vv_m4 (__riscv_vle32_v_u32m4 (src, vl), + __riscv_vle32_v_u32m4 (dst, vl), vl), + vl); + } + } +} + +static void +rvv_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, + mask_line, 1); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + sa = (src >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e8m4, mask, dst) + { + __riscv_vse8 ( + dst, + rvv_UN8_ADD_UN8_vv (rvv_UN8_MUL_UN8_vx_m4 ( + __riscv_vle8_v_u8m4 (mask, vl), sa, vl), + __riscv_vle8_v_u8m4 (dst, vl), vl), + vl); + } + } +} + +static void +rvv_composite_src_memcpy (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; + uint32_t n_bytes = width * bpp; + int dst_stride, src_stride; + uint8_t *dst; + uint8_t *src; + + src_stride = src_image->bits.rowstride * 4; + dst_stride = dest_image->bits.rowstride * 4; + + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; + dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + + while (height--) + { + memcpy (dst, src, n_bytes); + + dst += dst_stride; + src += src_stride; + } +} + +static void +rvv_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + srca = src >> 24; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, + mask_line, 1); + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e8m4, mask, dst) + { + __riscv_vse8 ( + dst, + rvv_UN8_MUL_UN8_vv_m4 (__riscv_vle8_v_u8m4 (mask, vl), + __riscv_vle8_v_u8m4 (dst, vl), vl), + vl); + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + RVV_FOREACH_2 (width, vl, e8m4, mask, dst) + { + __riscv_vse8 (dst, + rvv_UN8_MUL_UN8_vv_m4 ( + rvv_UN8_MUL_UN8_vx_m4 ( + __riscv_vle8_v_u8m4 (mask, vl), srca, vl), + __riscv_vle8_v_u8m4 (dst, vl), vl), + vl); + } + } + } +} + +static void +rvv_composite_in_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, + dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + RVV_FOREACH_2 (width, vl, e8m4, src, dst) + { + __riscv_vse8 (dst, + rvv_UN8_MUL_UN8_vv_m4 (__riscv_vle8_v_u8m4 (src, vl), + __riscv_vle8_v_u8m4 (dst, vl), + vl), + vl); + } + } +} + +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) + +/* + * There is some potential for hand vectorization, but for now let's leave it + * autovectorized. + */ +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) +{ + if (offs) + { + int leading_pixels = 32 - offs; + if (leading_pixels >= width) + { + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); + return; + } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } + } + while (width >= 32) + { + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); + } +} + +static void +rvv_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; + + while (height--) + { + pixman_fill1_line (dst, offs, width, (filler & 1)); + dst += stride; + } +} + +#define RVV_FILL(dtypew) \ + static void rvv_fill_u##dtypew (uint32_t *__restrict__ bits, int stride, \ + int x, int y, int width, int height, \ + uint32_t filler) \ + { \ + uint##dtypew##_t *__restrict__ bitsw = (uint##dtypew##_t *)bits; \ + int32_t vstride = stride * (32 / dtypew); \ + vuint##dtypew##m8_t vfiller = __riscv_vmv_v_x_u##dtypew##m8 ( \ + (uint##dtypew##_t)filler, __riscv_vsetvlmax_e##dtypew##m8 ()); \ + \ + bitsw += y * vstride + x; \ + while (height--) \ + { \ + uint##dtypew##_t *__restrict__ d = bitsw; \ + \ + RVV_FOREACH_1 (width, vl, e##dtypew##m8, d) \ + { \ + __riscv_vse##dtypew (d, vfiller, vl); \ + } \ + \ + bitsw += vstride; \ + } \ + } + +RVV_FILL (8); +RVV_FILL (16); +RVV_FILL (32); + +static pixman_bool_t +rvv_fill (pixman_implementation_t *__restrict__ imp, + uint32_t *__restrict__ bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) +{ + switch (bpp) + { + case 1: + rvv_fill1 (bits, stride, x, y, width, height, filler); + break; + case 8: + rvv_fill_u8 (bits, stride, x, y, width, height, filler); + break; + case 16: + rvv_fill_u16 (bits, stride, x, y, width, height, filler); + break; + case 32: + rvv_fill_u32 (bits, stride, x, y, width, height, filler); + break; + default: + return FALSE; + } + + return TRUE; +} + +static void +rvv_composite_solid_fill (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (dest_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dest_image->bits.format == PIXMAN_a8) + { + src = src >> 24; + } + else if (dest_image->bits.format == PIXMAN_r5g6b5 || + dest_image->bits.format == PIXMAN_b5g6r5) + { + src = convert_8888_to_0565 (src); + } + + rvv_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), dest_x, dest_y, + width, height, src); +} + +#define RVV_BLT(dtypew) \ + static void rvv_blt_u##dtypew ( \ + uint32_t *__restrict__ src_bits, uint32_t *__restrict__ dst_bits, \ + int src_stride, int dst_stride, int src_x, int src_y, int dest_x, \ + int dest_y, int width, int height) \ + { \ + uint##dtypew##_t *src_w = (uint##dtypew##_t *)src_bits; \ + uint##dtypew##_t *dst_w = (uint##dtypew##_t *)dst_bits; \ + \ + src_stride = src_stride * (32 / dtypew); \ + dst_stride = dst_stride * (32 / dtypew); \ + \ + src_w += src_stride * src_y + src_x; \ + dst_w += dst_stride * dest_y + dest_x; \ + \ + while (height--) \ + { \ + uint##dtypew##_t *__restrict__ pd = dst_w; \ + uint##dtypew##_t *__restrict__ ps = src_w; \ + \ + RVV_FOREACH_2 (width, vl, e##dtypew##m8, ps, pd) \ + { \ + __riscv_vse##dtypew ( \ + pd, __riscv_vle##dtypew##_v_u##dtypew##m8 (ps, vl), vl); \ + } \ + \ + dst_w += dst_stride; \ + src_w += src_stride; \ + } \ + } +RVV_BLT (8); +RVV_BLT (16); +RVV_BLT (32); + +static pixman_bool_t +rvv_blt (pixman_implementation_t *__restrict__ imp, + uint32_t *__restrict__ src_bits, + uint32_t *__restrict__ dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 8: + rvv_blt_u8 (src_bits, dst_bits, src_stride, dst_stride, src_x, + src_y, dest_x, dest_y, width, height); + break; + case 16: + rvv_blt_u16 (src_bits, dst_bits, src_stride, dst_stride, src_x, + src_y, dest_x, dest_y, width, height); + break; + case 32: + rvv_blt_u32 (src_bits, dst_bits, src_stride, dst_stride, src_x, + src_y, dest_x, dest_y, width, height); + break; + default: + return FALSE; + } + + return TRUE; +} + +// clang-format off static const pixman_fast_path_t rvv_fast_paths[] = { + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, rvv_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, rvv_composite_over_n_8_0565), + // PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, rvv_composite_over_n_8_0888), + // PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, rvv_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, rvv_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, rvv_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, rvv_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, rvv_composite_over_n_8_8888), + // PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, rvv_composite_over_n_1_8888), + // PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, rvv_composite_over_n_1_8888), + // PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, rvv_composite_over_n_1_8888), + // PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, rvv_composite_over_n_1_8888), + // PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, rvv_composite_over_n_1_0565), + // PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, rvv_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, rvv_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, rvv_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, rvv_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, rvv_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, rvv_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, rvv_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, rvv_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, rvv_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, rvv_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, rvv_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, rvv_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, rvv_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, rvv_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, rvv_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, rvv_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, rvv_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, rvv_composite_add_0565_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, rvv_composite_add_0565_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, rvv_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, rvv_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, rvv_composite_add_8_8), + // PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, rvv_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, rvv_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, rvv_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, rvv_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, rvv_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, rvv_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, rvv_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, rvv_composite_in_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, rvv_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, rvv_composite_src_8888_8888), + {PIXMAN_OP_NONE}, }; -// clang-format off pixman_implementation_t * _pixman_implementation_create_rvv (pixman_implementation_t *fallback) { - pixman_implementation_t *imp = _pixman_implementation_create (fallback, rvv_fast_paths); + pixman_implementation_t *imp = _pixman_implementation_create ( + fallback, rvv_fast_paths); + // clang-format off imp->combine_float[PIXMAN_OP_CLEAR] = rvv_combine_clear_u_float; imp->combine_float[PIXMAN_OP_SRC] = rvv_combine_src_u_float; imp->combine_float[PIXMAN_OP_DST] = rvv_combine_dst_u_float; @@ -955,7 +3192,7 @@ _pixman_implementation_create_rvv (pixman_implementation_t *fallback) imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = rvv_combine_conjoint_over_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = rvv_combine_conjoint_over_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = rvv_combine_conjoint_in_ca_float; - imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] =rvv_combine_conjoint_in_reverse_ca_float; + imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = rvv_combine_conjoint_in_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = rvv_combine_conjoint_out_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = rvv_combine_conjoint_out_reverse_ca_float; imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = rvv_combine_conjoint_atop_ca_float; @@ -981,7 +3218,54 @@ _pixman_implementation_create_rvv (pixman_implementation_t *fallback) imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = rvv_combine_dst_u_float; imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = rvv_combine_dst_u_float; + /* Set up function pointers */ + imp->combine_32[PIXMAN_OP_CLEAR] = rvv_combine_clear; + imp->combine_32[PIXMAN_OP_SRC] = rvv_combine_src_u; + imp->combine_32[PIXMAN_OP_OVER] = rvv_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = rvv_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = rvv_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = rvv_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = rvv_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = rvv_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = rvv_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = rvv_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = rvv_combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = rvv_combine_add_u; + + imp->combine_32[PIXMAN_OP_MULTIPLY] = rvv_combine_multiply_u; + imp->combine_32[PIXMAN_OP_SCREEN] = rvv_combine_screen_u; + imp->combine_32[PIXMAN_OP_OVERLAY] = rvv_combine_overlay_u; + imp->combine_32[PIXMAN_OP_DARKEN] = rvv_combine_darken_u; + imp->combine_32[PIXMAN_OP_LIGHTEN] = rvv_combine_lighten_u; + imp->combine_32[PIXMAN_OP_HARD_LIGHT] = rvv_combine_hard_light_u; + imp->combine_32[PIXMAN_OP_DIFFERENCE] = rvv_combine_difference_u; + imp->combine_32[PIXMAN_OP_EXCLUSION] = rvv_combine_exclusion_u; + + imp->combine_32_ca[PIXMAN_OP_CLEAR] = rvv_combine_clear; + imp->combine_32_ca[PIXMAN_OP_SRC] = rvv_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = rvv_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = rvv_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = rvv_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = rvv_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = rvv_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = rvv_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = rvv_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = rvv_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = rvv_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = rvv_combine_add_ca; + + imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = rvv_combine_multiply_ca; + imp->combine_32_ca[PIXMAN_OP_SCREEN] = rvv_combine_screen_ca; + imp->combine_32_ca[PIXMAN_OP_OVERLAY] = rvv_combine_overlay_ca; + imp->combine_32_ca[PIXMAN_OP_DARKEN] = rvv_combine_darken_ca; + imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = rvv_combine_lighten_ca; + imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = rvv_combine_hard_light_ca; + imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = rvv_combine_difference_ca; + imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = rvv_combine_exclusion_ca; + + imp->fill = rvv_fill; + imp->blt = rvv_blt; + return imp; } - -// clang-format on \ No newline at end of file +// clang-format on diff --git a/gfx/cairo/libpixman/src/pixman-utils.c b/gfx/cairo/libpixman/src/pixman-utils.c index 302cd0c290c9..b171af982eb7 100644 --- a/gfx/cairo/libpixman/src/pixman-utils.c +++ b/gfx/cairo/libpixman/src/pixman-utils.c @@ -303,6 +303,43 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst, return retval; } +pixman_bool_t +pixman_region32_copy_from_region64f (pixman_region32_t *dst, + const pixman_region64f_t *src) +{ + int n_boxes, i; + pixman_box64f_t *boxes64f; + pixman_box32_t *boxes32; + pixman_box32_t tmp_boxes[N_TMP_BOXES]; + pixman_bool_t retval; + + boxes64f = pixman_region64f_rectangles (src, &n_boxes); + + if (n_boxes > N_TMP_BOXES) + boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t)); + else + boxes32 = tmp_boxes; + + if (!boxes32) + return FALSE; + + for (i = 0; i < n_boxes; ++i) + { + boxes32[i].x1 = boxes64f[i].x1; + boxes32[i].y1 = boxes64f[i].y1; + boxes32[i].x2 = boxes64f[i].x2; + boxes32[i].y2 = boxes64f[i].y2; + } + + pixman_region32_fini (dst); + retval = pixman_region32_init_rects (dst, boxes32, n_boxes); + + if (boxes32 != tmp_boxes) + free (boxes32); + + return retval; +} + /* This function is exported for the sake of the test suite and not part * of the ABI. */ diff --git a/gfx/cairo/libpixman/src/pixman-version.h b/gfx/cairo/libpixman/src/pixman-version.h index 5767975fcda7..3505c5f6fa60 100644 --- a/gfx/cairo/libpixman/src/pixman-version.h +++ b/gfx/cairo/libpixman/src/pixman-version.h @@ -32,10 +32,10 @@ #endif #define PIXMAN_VERSION_MAJOR 0 -#define PIXMAN_VERSION_MINOR 44 -#define PIXMAN_VERSION_MICRO 2 +#define PIXMAN_VERSION_MINOR 46 +#define PIXMAN_VERSION_MICRO 0 -#define PIXMAN_VERSION_STRING "0.44.2" +#define PIXMAN_VERSION_STRING "0.46.0" #define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ ((major) * 10000) \ diff --git a/gfx/cairo/libpixman/src/pixman-vmx.c b/gfx/cairo/libpixman/src/pixman-vmx.c index ceac1f3189db..399cfcc64579 100644 --- a/gfx/cairo/libpixman/src/pixman-vmx.c +++ b/gfx/cairo/libpixman/src/pixman-vmx.c @@ -28,121 +28,121 @@ #ifdef HAVE_CONFIG_H #include #endif -#include "pixman-private.h" #include "pixman-combine32.h" #include "pixman-inlines.h" +#include "pixman-private.h" #include -#define AVV(x...) {x} +static const vector unsigned char vzero = (const vector unsigned char){0}; +static vector unsigned char mask_ff000000; -static vector unsigned int mask_ff000000; -static vector unsigned int mask_red; -static vector unsigned int mask_green; -static vector unsigned int mask_blue; -static vector unsigned int mask_565_fix_rb; -static vector unsigned int mask_565_fix_g; - -static force_inline vector unsigned int -splat_alpha (vector unsigned int pix) +static force_inline vector unsigned char +splat_alpha (vector unsigned char pix) { + const vector unsigned char sel = (vector unsigned char){ #ifdef WORDS_BIGENDIAN - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, - 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); + 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, + 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C, #else - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, - 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F)); + 0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, + 0x0B, 0x0B, 0x0B, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F, #endif + }; + + return vec_perm (pix, pix, sel); +} + +static force_inline vector unsigned char +splat_pixel (vector unsigned char pix) +{ + const vector unsigned char sel = (vector unsigned char){ + 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + }; + + return vec_perm (pix, pix, sel); +} + +static force_inline vector unsigned short +create_mask_16_128 (uint32_t mask) +{ + return (vector unsigned short){mask, mask, mask, mask, + mask, mask, mask, mask}; } static force_inline vector unsigned int -splat_pixel (vector unsigned int pix) +create_mask_32_128 (uint32_t mask) { - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, - 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03)); + return (vector unsigned int){mask, mask, mask, mask}; } -static force_inline vector unsigned int -pix_multiply (vector unsigned int p, vector unsigned int a) +static force_inline vector unsigned char +unpacklo_128_16x8 (vector unsigned char data1, vector unsigned char data2) { - vector unsigned short hi, lo, mod; - - /* unpack to short */ - hi = (vector unsigned short) #ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)p); + return vec_mergel (data2, data1); #else - vec_mergeh ((vector unsigned char) p, - (vector unsigned char) AVV (0)); + return vec_mergel (data1, data2); #endif - - mod = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)a); -#else - vec_mergeh ((vector unsigned char) a, - (vector unsigned char) AVV (0)); -#endif - - hi = vec_mladd (hi, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - - hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - - hi = vec_sr (hi, vec_splat_u16 (8)); - - /* unpack to short */ - lo = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)p); -#else - vec_mergel ((vector unsigned char) p, - (vector unsigned char) AVV (0)); -#endif - - mod = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)a); -#else - vec_mergel ((vector unsigned char) a, - (vector unsigned char) AVV (0)); -#endif - - lo = vec_mladd (lo, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - - lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - - lo = vec_sr (lo, vec_splat_u16 (8)); - - return (vector unsigned int)vec_packsu (hi, lo); } -static force_inline vector unsigned int -pix_add (vector unsigned int a, vector unsigned int b) +static force_inline vector unsigned char +unpackhi_128_16x8 (vector unsigned char data1, vector unsigned char data2) { - return (vector unsigned int)vec_adds ((vector unsigned char)a, - (vector unsigned char)b); +#ifdef WORDS_BIGENDIAN + return vec_mergeh (data2, data1); +#else + return vec_mergeh (data1, data2); +#endif } -static force_inline vector unsigned int -pix_add_mul (vector unsigned int x, - vector unsigned int a, - vector unsigned int y, - vector unsigned int b) +static force_inline void +unpack_128_2x128 (vector unsigned char data1, + vector unsigned char data2, + vector unsigned char *data_lo, + vector unsigned char *data_hi) { - vector unsigned int t1, t2; + *data_lo = unpacklo_128_16x8 (data1, data2); + *data_hi = unpackhi_128_16x8 (data1, data2); +} + +static force_inline vector unsigned char +pix_multiply (vector unsigned char a, vector unsigned char b) +{ + const vector unsigned char sel = (vector unsigned char){ +#ifdef WORDS_BIGENDIAN + 0x00, 0x10, 0x02, 0x12, 0x04, 0x14, 0x06, 0x16, + 0x08, 0x18, 0x0a, 0x1a, 0x0c, 0x1c, 0x0e, 0x1e, +#else + 0x01, 0x11, 0x03, 0x13, 0x05, 0x15, 0x07, 0x17, + 0x09, 0x19, 0x0b, 0x1b, 0x0d, 0x1d, 0x0f, 0x1f, +#endif + }; + vector unsigned short e = vec_mule (a, b); + vector unsigned short o = vec_mulo (a, b); + + e = vec_adds (e, create_mask_16_128 (128)); + o = vec_adds (o, create_mask_16_128 (128)); + + e = vec_adds (e, vec_sr (e, vec_splat_u16 (8))); + o = vec_adds (o, vec_sr (o, vec_splat_u16 (8))); + + return (vector unsigned char)vec_perm (e, o, sel); +} + +static force_inline vector unsigned char +pix_add (vector unsigned char a, vector unsigned char b) +{ + return vec_adds (a, b); +} + +static force_inline vector unsigned char +pix_add_mul (vector unsigned char x, + vector unsigned char a, + vector unsigned char y, + vector unsigned char b) +{ + vector unsigned char t1, t2; t1 = pix_multiply (x, a); t2 = pix_multiply (y, b); @@ -150,67 +150,65 @@ pix_add_mul (vector unsigned int x, return pix_add (t1, t2); } -static force_inline vector unsigned int -negate (vector unsigned int src) +static force_inline vector unsigned char +negate (vector unsigned char src) { return vec_nor (src, src); } /* dest*~srca + src */ -static force_inline vector unsigned int -over (vector unsigned int src, - vector unsigned int srca, - vector unsigned int dest) +static force_inline vector unsigned char +over (vector unsigned char src, + vector unsigned char srca, + vector unsigned char dest) { - vector unsigned char tmp = (vector unsigned char) - pix_multiply (dest, negate (srca)); - - tmp = vec_adds ((vector unsigned char)src, tmp); - return (vector unsigned int)tmp; + return vec_adds (src, pix_multiply (dest, negate (srca))); } /* in == pix_multiply */ -#define in_over(src, srca, mask, dest) \ - over (pix_multiply (src, mask), \ - pix_multiply (srca, mask), dest) +static force_inline vector unsigned char +in_over (vector unsigned char src, + vector unsigned char srca, + vector unsigned char mask, + vector unsigned char dest) +{ + return over (pix_multiply (src, mask), pix_multiply (srca, mask), dest); +} #ifdef WORDS_BIGENDIAN -#define COMPUTE_SHIFT_MASK(source) \ - source ## _mask = vec_lvsl (0, source); +#define COMPUTE_SHIFT_MASK(source) source##_mask = vec_lvsl (0, source); -#define COMPUTE_SHIFT_MASKS(dest, source) \ - source ## _mask = vec_lvsl (0, source); +#define COMPUTE_SHIFT_MASKS(dest, source) source##_mask = vec_lvsl (0, source); -#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ - mask ## _mask = vec_lvsl (0, mask); \ - source ## _mask = vec_lvsl (0, source); +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ + mask##_mask = vec_lvsl (0, mask); \ + source##_mask = vec_lvsl (0, source); -#define LOAD_VECTOR(source) \ -do \ -{ \ - vector unsigned char tmp1, tmp2; \ - tmp1 = (typeof(tmp1))vec_ld (0, source); \ - tmp2 = (typeof(tmp2))vec_ld (15, source); \ - v ## source = (typeof(v ## source)) \ - vec_perm (tmp1, tmp2, source ## _mask); \ -} while (0) +#define LOAD_VECTOR(source) \ + do \ + { \ + vector unsigned char tmp1, tmp2; \ + tmp1 = (typeof (tmp1))vec_ld (0, source); \ + tmp2 = (typeof (tmp2))vec_ld (15, source); \ + v##source = (typeof (v##source))vec_perm (tmp1, tmp2, source##_mask); \ + } while (0) -#define LOAD_VECTORS(dest, source) \ -do \ -{ \ - LOAD_VECTOR(source); \ - v ## dest = (typeof(v ## dest))vec_ld (0, dest); \ -} while (0) +#define LOAD_VECTORS(dest, source) \ + do \ + { \ + LOAD_VECTOR (source); \ + v##dest = (typeof (v##dest))vec_ld (0, dest); \ + } while (0) -#define LOAD_VECTORSC(dest, source, mask) \ -do \ -{ \ - LOAD_VECTORS(dest, source); \ - LOAD_VECTOR(mask); \ -} while (0) +#define LOAD_VECTORSC(dest, source, mask) \ + do \ + { \ + LOAD_VECTORS (dest, source); \ + LOAD_VECTOR (mask); \ + } while (0) -#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask +#define DECLARE_SRC_MASK_VAR vector unsigned char src_mask #define DECLARE_MASK_MASK_VAR vector unsigned char mask_mask #else @@ -226,42 +224,39 @@ do \ #define COMPUTE_SHIFT_MASKC(dest, source, mask) -# define LOAD_VECTOR(source) \ - v ## source = (typeof(v ## source))vec_xl(0, source); +#define LOAD_VECTOR(source) v##source = (typeof (v##source))vec_xl (0, source); -# define LOAD_VECTORS(dest, source) \ - LOAD_VECTOR(source); \ - LOAD_VECTOR(dest); \ +#define LOAD_VECTORS(dest, source) \ + LOAD_VECTOR (source); \ + LOAD_VECTOR (dest); -# define LOAD_VECTORSC(dest, source, mask) \ - LOAD_VECTORS(dest, source); \ - LOAD_VECTOR(mask); \ +#define LOAD_VECTORSC(dest, source, mask) \ + LOAD_VECTORS (dest, source); \ + LOAD_VECTOR (mask); #define DECLARE_SRC_MASK_VAR #define DECLARE_MASK_MASK_VAR #endif /* WORDS_BIGENDIAN */ -#define LOAD_VECTORSM(dest, source, mask) \ - LOAD_VECTORSC (dest, source, mask); \ - v ## source = pix_multiply (v ## source, \ - splat_alpha (v ## mask)); +#define LOAD_VECTORSM(dest, source, mask) \ + LOAD_VECTORSC (dest, source, mask); \ + v##source = pix_multiply (v##source, splat_alpha (v##mask)); -#define STORE_VECTOR(dest) \ - vec_st ((vector unsigned int) v ## dest, 0, dest); +#define STORE_VECTOR(dest) vec_st ((vector unsigned int)v##dest, 0, dest); /* load 4 pixels from a 16-byte boundary aligned address */ -static force_inline vector unsigned int -load_128_aligned (const uint32_t* src) +static force_inline vector unsigned char +load_128_aligned (const uint32_t *src) { - return *((vector unsigned int *) src); + return *((vector unsigned char *)src); } /* load 4 pixels from a unaligned address */ -static force_inline vector unsigned int -load_128_unaligned (const uint32_t* src) +static force_inline vector unsigned char +load_128_unaligned (const uint32_t *src) { - vector unsigned int vsrc; + vector unsigned char vsrc; DECLARE_SRC_MASK_VAR; COMPUTE_SHIFT_MASK (src); @@ -272,143 +267,27 @@ load_128_unaligned (const uint32_t* src) /* save 4 pixels on a 16-byte boundary aligned address */ static force_inline void -save_128_aligned (uint32_t* data, - vector unsigned int vdata) +save_128_aligned (uint32_t *data, vector unsigned char vdata) { - STORE_VECTOR(data) -} - -static force_inline vector unsigned int -create_mask_32_128 (uint32_t mask) -{ - return (vector unsigned int) {mask, mask, mask, mask}; -} - -static force_inline vector unsigned int -unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned char lo; - - /* unpack to short */ - lo = (vector unsigned char) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned char) data2, - (vector unsigned char) data1); -#else - vec_mergel ((vector unsigned char) data1, - (vector unsigned char) data2); -#endif - - return (vector unsigned int) lo; -} - -static force_inline vector unsigned int -unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned char hi; - - /* unpack to short */ - hi = (vector unsigned char) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned char) data2, - (vector unsigned char) data1); -#else - vec_mergeh ((vector unsigned char) data1, - (vector unsigned char) data2); -#endif - - return (vector unsigned int) hi; -} - -static force_inline vector unsigned int -unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned short lo; - - /* unpack to char */ - lo = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergel ((vector unsigned short) data2, - (vector unsigned short) data1); -#else - vec_mergel ((vector unsigned short) data1, - (vector unsigned short) data2); -#endif - - return (vector unsigned int) lo; -} - -static force_inline vector unsigned int -unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2) -{ - vector unsigned short hi; - - /* unpack to char */ - hi = (vector unsigned short) -#ifdef WORDS_BIGENDIAN - vec_mergeh ((vector unsigned short) data2, - (vector unsigned short) data1); -#else - vec_mergeh ((vector unsigned short) data1, - (vector unsigned short) data2); -#endif - - return (vector unsigned int) hi; -} - -static force_inline void -unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2, - vector unsigned int* data_lo, vector unsigned int* data_hi) -{ - *data_lo = unpacklo_128_16x8(data1, data2); - *data_hi = unpackhi_128_16x8(data1, data2); -} - -static force_inline void -unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2, - vector unsigned int* data_lo, vector unsigned int* data_hi) -{ - *data_lo = unpacklo_128_8x16(data1, data2); - *data_hi = unpackhi_128_8x16(data1, data2); -} - -static force_inline vector unsigned int -unpack_565_to_8888 (vector unsigned int lo) -{ - vector unsigned int r, g, b, rb, t; - - r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red); - g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green); - b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue); - - rb = vec_or (r, b); - t = vec_and (rb, mask_565_fix_rb); - t = vec_sr (t, create_mask_32_128(5)); - rb = vec_or (rb, t); - - t = vec_and (g, mask_565_fix_g); - t = vec_sr (t, create_mask_32_128(6)); - g = vec_or (g, t); - - return vec_or (rb, g); + STORE_VECTOR (data) } static force_inline int -is_opaque (vector unsigned int x) +is_opaque (vector unsigned char x) { return vec_all_eq (vec_and (x, mask_ff000000), mask_ff000000); } static force_inline int -is_zero (vector unsigned int x) +is_zero (vector unsigned char x) { - return vec_all_eq (x, (vector unsigned int) AVV (0)); + return vec_all_eq (x, vzero); } static force_inline int -is_transparent (vector unsigned int x) +is_transparent (vector unsigned char x) { - return vec_all_eq (vec_and (x, mask_ff000000), (vector unsigned int) AVV (0)); + return vec_all_eq (vec_and (x, mask_ff000000), vzero); } static force_inline uint32_t @@ -416,7 +295,7 @@ core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst) { uint32_t a; - a = ALPHA_8(src); + a = ALPHA_8 (src); if (a == 0xff) { @@ -424,7 +303,7 @@ core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst) } else if (src) { - UN8x4_MUL_UN8_ADD_UN8x4(dst, (~a & MASK), src); + UN8x4_MUL_UN8_ADD_UN8x4 (dst, (~a & MASK), src); } return dst; @@ -436,45 +315,42 @@ combine1 (const uint32_t *ps, const uint32_t *pm) uint32_t s = *ps; if (pm) - UN8x4_MUL_UN8(s, ALPHA_8(*pm)); + UN8x4_MUL_UN8 (s, ALPHA_8 (*pm)); return s; } -static force_inline vector unsigned int -combine4 (const uint32_t* ps, const uint32_t* pm) +static force_inline vector unsigned char +combine4 (const uint32_t *ps, const uint32_t *pm) { - vector unsigned int src, msk; + vector unsigned char src, msk; if (pm) { - msk = load_128_unaligned(pm); + msk = load_128_unaligned (pm); - if (is_transparent(msk)) - return (vector unsigned int) AVV(0); + if (is_transparent (msk)) + return vzero; } - src = load_128_unaligned(ps); + src = load_128_unaligned (ps); if (pm) - src = pix_multiply(src, msk); + src = pix_multiply (src, msk); return src; } static void -vmx_combine_over_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_over_u_no_mask (uint32_t *dest, const uint32_t *src, int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t ia = ALPHA_8 (~s); UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); @@ -486,7 +362,7 @@ vmx_combine_over_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -499,10 +375,10 @@ vmx_combine_over_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t ia = ALPHA_8 (~s); UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); @@ -512,13 +388,12 @@ vmx_combine_over_u_no_mask (uint32_t * dest, } static void -vmx_combine_over_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_over_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -541,7 +416,7 @@ vmx_combine_over_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -554,7 +429,7 @@ vmx_combine_over_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; @@ -572,11 +447,11 @@ vmx_combine_over_u_mask (uint32_t * dest, static void vmx_combine_over_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_over_u_mask (dest, src, mask, width); @@ -585,18 +460,17 @@ vmx_combine_over_u (pixman_implementation_t *imp, } static void -vmx_combine_over_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_over_reverse_u_no_mask (uint32_t *dest, + const uint32_t *src, + int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t ia = ALPHA_8 (~d); UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); @@ -607,7 +481,7 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -620,10 +494,10 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t ia = ALPHA_8 (~dest[i]); UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); @@ -632,21 +506,20 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, } static void -vmx_combine_over_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_over_reverse_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; uint32_t ia = ALPHA_8 (~d); UN8x4_MUL_UN8 (s, m); @@ -659,7 +532,7 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -673,11 +546,11 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t ia = ALPHA_8 (~dest[i]); UN8x4_MUL_UN8 (s, m); @@ -689,11 +562,11 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, static void vmx_combine_over_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_over_reverse_u_mask (dest, src, mask, width); @@ -702,12 +575,9 @@ vmx_combine_over_reverse_u (pixman_implementation_t *imp, } static void -vmx_combine_in_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_in_u_no_mask (uint32_t *dest, const uint32_t *src, int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) @@ -723,7 +593,7 @@ vmx_combine_in_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -735,7 +605,7 @@ vmx_combine_in_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t a = ALPHA_8 (dest[i]); @@ -746,13 +616,12 @@ vmx_combine_in_u_no_mask (uint32_t * dest, } static void -vmx_combine_in_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_in_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -772,7 +641,7 @@ vmx_combine_in_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -785,7 +654,7 @@ vmx_combine_in_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; @@ -800,11 +669,11 @@ vmx_combine_in_u_mask (uint32_t * dest, static void vmx_combine_in_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_in_u_mask (dest, src, mask, width); @@ -813,12 +682,11 @@ vmx_combine_in_u (pixman_implementation_t *imp, } static void -vmx_combine_in_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_in_reverse_u_no_mask (uint32_t *dest, + const uint32_t *src, + int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) @@ -835,7 +703,7 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -847,7 +715,7 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t d = dest[i]; uint32_t a = ALPHA_8 (src[i]); @@ -859,13 +727,12 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, } static void -vmx_combine_in_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_in_reverse_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -886,7 +753,7 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -899,7 +766,7 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t d = dest[i]; @@ -915,11 +782,11 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, static void vmx_combine_in_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_in_reverse_u_mask (dest, src, mask, width); @@ -928,12 +795,9 @@ vmx_combine_in_reverse_u (pixman_implementation_t *imp, } static void -vmx_combine_out_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_out_u_no_mask (uint32_t *dest, const uint32_t *src, int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) @@ -950,7 +814,7 @@ vmx_combine_out_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -962,7 +826,7 @@ vmx_combine_out_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t a = ALPHA_8 (~dest[i]); @@ -974,13 +838,12 @@ vmx_combine_out_u_no_mask (uint32_t * dest, } static void -vmx_combine_out_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_out_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -1000,7 +863,7 @@ vmx_combine_out_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -1013,7 +876,7 @@ vmx_combine_out_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; @@ -1028,11 +891,11 @@ vmx_combine_out_u_mask (uint32_t * dest, static void vmx_combine_out_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_out_u_mask (dest, src, mask, width); @@ -1041,12 +904,11 @@ vmx_combine_out_u (pixman_implementation_t *imp, } static void -vmx_combine_out_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_out_reverse_u_no_mask (uint32_t *dest, + const uint32_t *src, + int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) @@ -1063,7 +925,7 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -1076,7 +938,7 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t d = dest[i]; uint32_t a = ALPHA_8 (~src[i]); @@ -1088,13 +950,12 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, } static void -vmx_combine_out_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_out_reverse_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -1115,7 +976,7 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -1128,7 +989,7 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t d = dest[i]; @@ -1144,11 +1005,11 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, static void vmx_combine_out_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_out_reverse_u_mask (dest, src, mask, width); @@ -1157,18 +1018,15 @@ vmx_combine_out_reverse_u (pixman_implementation_t *imp, } static void -vmx_combine_atop_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_atop_u_no_mask (uint32_t *dest, const uint32_t *src, int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t dest_a = ALPHA_8 (d); uint32_t src_ia = ALPHA_8 (~s); @@ -1181,12 +1039,12 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (vdest), vdest, + splat_alpha (negate (vsrc))); STORE_VECTOR (dest); @@ -1194,10 +1052,10 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t dest_a = ALPHA_8 (d); uint32_t src_ia = ALPHA_8 (~s); @@ -1208,21 +1066,20 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, } static void -vmx_combine_atop_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_atop_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t m = ALPHA_8 (*mask++); - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; uint32_t dest_a = ALPHA_8 (d); uint32_t src_ia; @@ -1239,12 +1096,12 @@ vmx_combine_atop_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (vdest), vdest, + splat_alpha (negate (vsrc))); STORE_VECTOR (dest); @@ -1253,11 +1110,11 @@ vmx_combine_atop_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t m = ALPHA_8 (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t dest_a = ALPHA_8 (d); uint32_t src_ia; @@ -1273,11 +1130,11 @@ vmx_combine_atop_u_mask (uint32_t * dest, static void vmx_combine_atop_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_atop_u_mask (dest, src, mask, width); @@ -1286,19 +1143,18 @@ vmx_combine_atop_u (pixman_implementation_t *imp, } static void -vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_atop_reverse_u_no_mask (uint32_t *dest, + const uint32_t *src, + int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t src_a = ALPHA_8 (s); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a = ALPHA_8 (s); uint32_t dest_ia = ALPHA_8 (~d); UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); @@ -1310,12 +1166,12 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); + vdest = pix_add_mul (vdest, splat_alpha (vsrc), vsrc, + splat_alpha (negate (vdest))); STORE_VECTOR (dest); @@ -1323,11 +1179,11 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_a = ALPHA_8 (s); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a = ALPHA_8 (s); uint32_t dest_ia = ALPHA_8 (~d); UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); @@ -1337,13 +1193,12 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, } static void -vmx_combine_atop_reverse_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_atop_reverse_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -1368,12 +1223,12 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); + vdest = pix_add_mul (vdest, splat_alpha (vsrc), vsrc, + splat_alpha (negate (vdest))); STORE_VECTOR (dest); @@ -1382,7 +1237,7 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; @@ -1402,11 +1257,11 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, static void vmx_combine_atop_reverse_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_atop_reverse_u_mask (dest, src, mask, width); @@ -1415,19 +1270,16 @@ vmx_combine_atop_reverse_u (pixman_implementation_t *imp, } static void -vmx_combine_xor_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_xor_u_no_mask (uint32_t *dest, const uint32_t *src, int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t s = *src++; - uint32_t d = *dest; - uint32_t src_ia = ALPHA_8 (~s); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia = ALPHA_8 (~s); uint32_t dest_ia = ALPHA_8 (~d); UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); @@ -1439,12 +1291,12 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), vdest, + splat_alpha (negate (vsrc))); STORE_VECTOR (dest); @@ -1452,11 +1304,11 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_ia = ALPHA_8 (~s); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia = ALPHA_8 (~s); uint32_t dest_ia = ALPHA_8 (~d); UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); @@ -1466,13 +1318,12 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, } static void -vmx_combine_xor_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_xor_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -1497,12 +1348,12 @@ vmx_combine_xor_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), vdest, + splat_alpha (negate (vsrc))); STORE_VECTOR (dest); @@ -1511,7 +1362,7 @@ vmx_combine_xor_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; @@ -1531,11 +1382,11 @@ vmx_combine_xor_u_mask (uint32_t * dest, static void vmx_combine_xor_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_xor_u_mask (dest, src, mask, width); @@ -1544,12 +1395,9 @@ vmx_combine_xor_u (pixman_implementation_t *imp, } static void -vmx_combine_add_u_no_mask (uint32_t * dest, - const uint32_t *src, - int width) +vmx_combine_add_u_no_mask (uint32_t *dest, const uint32_t *src, int width) { - int i; - vector unsigned int vdest, vsrc; + vector unsigned char vdest, vsrc; DECLARE_SRC_MASK_VAR; while (width && ((uintptr_t)dest & 15)) @@ -1565,7 +1413,7 @@ vmx_combine_add_u_no_mask (uint32_t * dest, COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORS (dest, src); @@ -1577,7 +1425,7 @@ vmx_combine_add_u_no_mask (uint32_t * dest, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; @@ -1589,13 +1437,12 @@ vmx_combine_add_u_no_mask (uint32_t * dest, } static void -vmx_combine_add_u_mask (uint32_t * dest, - const uint32_t *src, - const uint32_t *mask, - int width) +vmx_combine_add_u_mask (uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -1615,7 +1462,7 @@ vmx_combine_add_u_mask (uint32_t * dest, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSM (dest, src, mask); @@ -1628,7 +1475,7 @@ vmx_combine_add_u_mask (uint32_t * dest, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; @@ -1643,11 +1490,11 @@ vmx_combine_add_u_mask (uint32_t * dest, static void vmx_combine_add_u (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { if (mask) vmx_combine_add_u_mask (dest, src, mask, width); @@ -1657,14 +1504,13 @@ vmx_combine_add_u (pixman_implementation_t *imp, static void vmx_combine_src_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -1682,7 +1528,7 @@ vmx_combine_src_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -1695,7 +1541,7 @@ vmx_combine_src_ca (pixman_implementation_t *imp, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; @@ -1708,22 +1554,21 @@ vmx_combine_src_ca (pixman_implementation_t *imp, static void vmx_combine_over_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t sa = ALPHA_8 (s); UN8x4_MUL_UN8x4 (s, a); @@ -1737,7 +1582,7 @@ vmx_combine_over_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -1750,11 +1595,11 @@ vmx_combine_over_ca (pixman_implementation_t *imp, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); UN8x4_MUL_UN8x4 (s, a); @@ -1767,22 +1612,21 @@ vmx_combine_over_ca (pixman_implementation_t *imp, static void vmx_combine_over_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t ida = ALPHA_8 (~d); UN8x4_MUL_UN8x4 (s, a); @@ -1795,7 +1639,7 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -1808,11 +1652,11 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, dest += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t ida = ALPHA_8 (~d); UN8x4_MUL_UN8x4 (s, a); @@ -1824,21 +1668,20 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, static void vmx_combine_in_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; + uint32_t a = *mask++; + uint32_t s = *src++; uint32_t da = ALPHA_8 (*dest); UN8x4_MUL_UN8x4 (s, a); @@ -1851,7 +1694,7 @@ vmx_combine_in_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -1864,10 +1707,10 @@ vmx_combine_in_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; uint32_t da = ALPHA_8 (dest[i]); UN8x4_MUL_UN8x4 (s, a); @@ -1879,21 +1722,20 @@ vmx_combine_in_ca (pixman_implementation_t *imp, static void vmx_combine_in_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t d = *dest; uint32_t sa = ALPHA_8 (*src++); UN8x4_MUL_UN8 (a, sa); @@ -1906,7 +1748,7 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -1920,10 +1762,10 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (src[i]); UN8x4_MUL_UN8 (a, sa); @@ -1935,22 +1777,21 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, static void vmx_combine_out_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t da = ALPHA_8 (~d); UN8x4_MUL_UN8x4 (s, a); @@ -1963,12 +1804,12 @@ vmx_combine_out_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); - vdest = pix_multiply ( - pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); + vdest = pix_multiply (pix_multiply (vsrc, vmask), + splat_alpha (negate (vdest))); STORE_VECTOR (dest); @@ -1977,11 +1818,11 @@ vmx_combine_out_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t da = ALPHA_8 (~d); UN8x4_MUL_UN8x4 (s, a); @@ -1993,22 +1834,21 @@ vmx_combine_out_ca (pixman_implementation_t *imp, static void vmx_combine_out_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t sa = ALPHA_8 (s); UN8x4_MUL_UN8 (a, sa); @@ -2021,7 +1861,7 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -2035,11 +1875,11 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); UN8x4_MUL_UN8 (a, sa); @@ -2051,22 +1891,21 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, static void vmx_combine_atop_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask, vsrca; + vector unsigned char vdest, vsrc, vmask, vsrca; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (d); @@ -2081,17 +1920,16 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); vsrca = splat_alpha (vsrc); - vsrc = pix_multiply (vsrc, vmask); + vsrc = pix_multiply (vsrc, vmask); vmask = pix_multiply (vmask, vsrca); - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - negate (vmask), vdest); + vdest = pix_add_mul (vsrc, splat_alpha (vdest), negate (vmask), vdest); STORE_VECTOR (dest); @@ -2100,11 +1938,11 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (d); @@ -2118,22 +1956,21 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, static void vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (~d); @@ -2148,12 +1985,11 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); - vdest = pix_add_mul (vdest, - pix_multiply (vmask, splat_alpha (vsrc)), + vdest = pix_add_mul (vdest, pix_multiply (vmask, splat_alpha (vsrc)), pix_multiply (vsrc, vmask), negate (splat_alpha (vdest))); @@ -2164,11 +2000,11 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (~d); @@ -2182,22 +2018,21 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, static void vmx_combine_xor_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; while (width && ((uintptr_t)dest & 15)) { - uint32_t a = *mask++; - uint32_t s = *src++; - uint32_t d = *dest; + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (~d); @@ -2212,14 +2047,13 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); - vdest = pix_add_mul (vdest, - negate (pix_multiply (vmask, splat_alpha (vsrc))), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); + vdest = pix_add_mul ( + vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))), + pix_multiply (vsrc, vmask), negate (splat_alpha (vdest))); STORE_VECTOR (dest); @@ -2228,11 +2062,11 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (~d); @@ -2246,14 +2080,13 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, static void vmx_combine_add_ca (pixman_implementation_t *imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) + pixman_op_t op, + uint32_t *dest, + const uint32_t *src, + const uint32_t *mask, + int width) { - int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned char vdest, vsrc, vmask; DECLARE_SRC_MASK_VAR; DECLARE_MASK_MASK_VAR; @@ -2273,7 +2106,7 @@ vmx_combine_add_ca (pixman_implementation_t *imp, COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width / 4; i > 0; i--) + for (int i = width / 4; i > 0; i--) { LOAD_VECTORSC (dest, src, mask); @@ -2286,7 +2119,7 @@ vmx_combine_add_ca (pixman_implementation_t *imp, mask += 4; } - for (i = width % 4; --i >= 0;) + for (int i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; @@ -2301,36 +2134,36 @@ vmx_combine_add_ca (pixman_implementation_t *imp, static void vmx_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, srca; + uint32_t src, srca; uint32_t *dst_line, *dst; - uint8_t *mask_line; - int dst_stride, mask_stride; - int32_t w; - uint32_t m, d, s, ia; + uint8_t *mask_line; + int dst_stride, mask_stride; + int32_t w; + uint32_t m, d, s, ia; - vector unsigned int vsrc, valpha, vmask, vdst; + vector unsigned char vsrc, valpha, vmask, vdst; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - srca = ALPHA_8(src); + srca = ALPHA_8 (src); if (src == 0) return; - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, + mask_line, 1); - vsrc = (vector unsigned int) {src, src, src, src}; - valpha = splat_alpha(vsrc); + vsrc = (vector unsigned char)create_mask_32_128 (src); + valpha = splat_alpha (vsrc); while (height--) { const uint8_t *pm = mask_line; - dst = dst_line; + dst = dst_line; dst_line += dst_stride; mask_line += mask_stride; w = width; @@ -2355,20 +2188,21 @@ vmx_composite_over_n_8_8888 (pixman_implementation_t *imp, while (w >= 4) { - m = *((uint32_t*)pm); + m = *((uint32_t *)pm); if (srca == 0xff && m == 0xffffffff) { - save_128_aligned(dst, vsrc); + save_128_aligned (dst, vsrc); } else if (m) { - vmask = splat_pixel((vector unsigned int) {m, m, m, m}); + vmask = splat_pixel ( + (vector unsigned char)create_mask_32_128 (m)); /* dst is 16-byte aligned */ vdst = in_over (vsrc, valpha, vmask, load_128_aligned (dst)); - save_128_aligned(dst, vdst); + save_128_aligned (dst, vdst); } w -= 4; @@ -2394,19 +2228,18 @@ vmx_composite_over_n_8_8888 (pixman_implementation_t *imp, dst++; } } - } static pixman_bool_t vmx_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t filler) + uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t filler) { uint32_t byte_width; uint8_t *byte_line; @@ -2415,31 +2248,31 @@ vmx_fill (pixman_implementation_t *imp, if (bpp == 8) { - uint8_t b; + uint8_t b; uint16_t w; - stride = stride * (int) sizeof (uint32_t) / 1; - byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); + stride = stride * (int)sizeof (uint32_t) / 1; + byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); byte_width = width; stride *= 1; - b = filler & 0xff; - w = (b << 8) | b; + b = filler & 0xff; + w = (b << 8) | b; filler = (w << 16) | w; } else if (bpp == 16) { - stride = stride * (int) sizeof (uint32_t) / 2; - byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + stride = stride * (int)sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); byte_width = 2 * width; stride *= 2; - filler = (filler & 0xffff) * 0x00010001; + filler = (filler & 0xffff) * 0x00010001; } else if (bpp == 32) { - stride = stride * (int) sizeof (uint32_t) / 4; - byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + stride = stride * (int)sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); byte_width = 4 * width; stride *= 4; } @@ -2448,11 +2281,11 @@ vmx_fill (pixman_implementation_t *imp, return FALSE; } - vfiller = create_mask_32_128(filler); + vfiller = create_mask_32_128 (filler); while (height--) { - int w; + int w; uint8_t *d = byte_line; byte_line += stride; w = byte_width; @@ -2481,14 +2314,14 @@ vmx_fill (pixman_implementation_t *imp, while (w >= 128) { - vec_st(vfiller, 0, (uint32_t *) d); - vec_st(vfiller, 0, (uint32_t *) d + 4); - vec_st(vfiller, 0, (uint32_t *) d + 8); - vec_st(vfiller, 0, (uint32_t *) d + 12); - vec_st(vfiller, 0, (uint32_t *) d + 16); - vec_st(vfiller, 0, (uint32_t *) d + 20); - vec_st(vfiller, 0, (uint32_t *) d + 24); - vec_st(vfiller, 0, (uint32_t *) d + 28); + vec_st (vfiller, 0, (uint32_t *)d); + vec_st (vfiller, 0, (uint32_t *)d + 4); + vec_st (vfiller, 0, (uint32_t *)d + 8); + vec_st (vfiller, 0, (uint32_t *)d + 12); + vec_st (vfiller, 0, (uint32_t *)d + 16); + vec_st (vfiller, 0, (uint32_t *)d + 20); + vec_st (vfiller, 0, (uint32_t *)d + 24); + vec_st (vfiller, 0, (uint32_t *)d + 28); d += 128; w -= 128; @@ -2496,10 +2329,10 @@ vmx_fill (pixman_implementation_t *imp, if (w >= 64) { - vec_st(vfiller, 0, (uint32_t *) d); - vec_st(vfiller, 0, (uint32_t *) d + 4); - vec_st(vfiller, 0, (uint32_t *) d + 8); - vec_st(vfiller, 0, (uint32_t *) d + 12); + vec_st (vfiller, 0, (uint32_t *)d); + vec_st (vfiller, 0, (uint32_t *)d + 4); + vec_st (vfiller, 0, (uint32_t *)d + 8); + vec_st (vfiller, 0, (uint32_t *)d + 12); d += 64; w -= 64; @@ -2507,8 +2340,8 @@ vmx_fill (pixman_implementation_t *imp, if (w >= 32) { - vec_st(vfiller, 0, (uint32_t *) d); - vec_st(vfiller, 0, (uint32_t *) d + 4); + vec_st (vfiller, 0, (uint32_t *)d); + vec_st (vfiller, 0, (uint32_t *)d + 4); d += 32; w -= 32; @@ -2516,7 +2349,7 @@ vmx_fill (pixman_implementation_t *imp, if (w >= 16) { - vec_st(vfiller, 0, (uint32_t *) d); + vec_st (vfiller, 0, (uint32_t *)d); d += 16; w -= 16; @@ -2550,18 +2383,18 @@ vmx_fill (pixman_implementation_t *imp, static void vmx_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int32_t w; - int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int32_t w; + int dst_stride, src_stride; - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); while (height--) { @@ -2579,7 +2412,7 @@ vmx_composite_src_x888_8888 (pixman_implementation_t *imp, while (w >= 16) { - vector unsigned int vmx_src1, vmx_src2, vmx_src3, vmx_src4; + vector unsigned char vmx_src1, vmx_src2, vmx_src3, vmx_src4; vmx_src1 = load_128_unaligned (src); vmx_src2 = load_128_unaligned (src + 4); @@ -2606,25 +2439,26 @@ vmx_composite_src_x888_8888 (pixman_implementation_t *imp, static void vmx_composite_over_n_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; - uint32_t src, ia; - int i, w, dst_stride; - vector unsigned int vdst, vsrc, via; + uint32_t src, ia; + int w, dst_stride; + + vector unsigned char vdst, vsrc, via; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); - vsrc = (vector unsigned int){src, src, src, src}; - via = negate (splat_alpha (vsrc)); - ia = ALPHA_8 (~src); + vsrc = (vector unsigned char)create_mask_32_128 (src); + via = negate (splat_alpha (vsrc)); + ia = ALPHA_8 (~src); while (height--) { @@ -2640,14 +2474,14 @@ vmx_composite_over_n_8888 (pixman_implementation_t *imp, w--; } - for (i = w / 4; i > 0; i--) + for (int i = w / 4; i > 0; i--) { vdst = pix_multiply (load_128_aligned (dst), via); save_128_aligned (dst, pix_add (vsrc, vdst)); dst += 4; } - for (i = w % 4; --i >= 0;) + for (int i = w % 4; --i >= 0;) { uint32_t d = dst[i]; UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src); @@ -2658,63 +2492,63 @@ vmx_composite_over_n_8888 (pixman_implementation_t *imp, static void vmx_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - int dst_stride, src_stride; - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; + int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); dst = dst_line; src = src_line; while (height--) { - vmx_combine_over_u (imp, op, dst, src, NULL, width); + vmx_combine_over_u (imp, op, dst, src, NULL, width); - dst += dst_stride; - src += src_stride; + dst += dst_stride; + src += src_stride; } } static void vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint32_t src, ia; - uint32_t *dst_line, d; - uint32_t *mask_line, m; - uint32_t pack_cmp; - int dst_stride, mask_stride; + uint32_t src, ia; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; - vector unsigned int vsrc, valpha, vmask, vdest; + vector unsigned char vsrc, valpha, vmask, vdest; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE ( - mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, + mask_line, 1); - vsrc = (vector unsigned int) {src, src, src, src}; - valpha = splat_alpha(vsrc); - ia = ALPHA_8 (src); + vsrc = (vector unsigned char)create_mask_32_128 (src); + valpha = splat_alpha (vsrc); + ia = ALPHA_8 (src); while (height--) { - int w = width; + int w = width; const uint32_t *pm = (uint32_t *)mask_line; - uint32_t *pd = (uint32_t *)dst_line; - uint32_t s; + uint32_t *pd = (uint32_t *)dst_line; + uint32_t s; dst_line += dst_stride; mask_line += mask_stride; @@ -2743,7 +2577,7 @@ vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, /* pm is NOT necessarily 16-byte aligned */ vmask = load_128_unaligned (pm); - pack_cmp = vec_all_eq(vmask, (vector unsigned int) AVV(0)); + pack_cmp = vec_all_eq (vmask, vzero); /* if all bits in mask are zero, pack_cmp is not 0 */ if (pack_cmp == 0) @@ -2751,7 +2585,7 @@ vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, /* pd is 16-byte aligned */ vdest = in_over (vsrc, valpha, vmask, load_128_aligned (pd)); - save_128_aligned(pd, vdest); + save_128_aligned (pd, vdest); } pd += 4; @@ -2782,19 +2616,19 @@ vmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, static void vmx_composite_add_8_8 (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; uint16_t t; - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, + dst_line, 1); while (height--) { @@ -2808,13 +2642,13 @@ vmx_composite_add_8_8 (pixman_implementation_t *imp, /* Small head */ while (w && (uintptr_t)dst & 3) { - t = (*dst) + (*src++); + t = (*dst) + (*src++); *dst++ = t | (0 - (t >> 8)); w--; } - vmx_combine_add_u (imp, op, - (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); + vmx_combine_add_u (imp, op, (uint32_t *)dst, (uint32_t *)src, NULL, + w >> 2); /* Small tail */ dst += w & 0xfffc; @@ -2824,7 +2658,7 @@ vmx_composite_add_8_8 (pixman_implementation_t *imp, while (w) { - t = (*dst) + (*src++); + t = (*dst) + (*src++); *dst++ = t | (0 - (t >> 8)); w--; } @@ -2833,17 +2667,17 @@ vmx_composite_add_8_8 (pixman_implementation_t *imp, static void vmx_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_composite_info_t *info) + pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; - PIXMAN_IMAGE_GET_LINE ( - src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE ( - dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, + src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, + dst_line, 1); while (height--) { @@ -2857,18 +2691,18 @@ vmx_composite_add_8888_8888 (pixman_implementation_t *imp, } static force_inline void -scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd, - const uint32_t* ps, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t src_width_fixed, - pixman_bool_t fully_transparent_src) +scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t *pd, + const uint32_t *ps, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t src_width_fixed, + pixman_bool_t fully_transparent_src) { - uint32_t s, d; - const uint32_t* pm = NULL; + uint32_t s, d; + const uint32_t *pm = NULL; - vector unsigned int vsrc, vdst; + vector unsigned char vsrc, vdst; if (fully_transparent_src) return; @@ -2917,7 +2751,7 @@ scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd, } else if (!is_zero (vsrc)) { - vdst = over(vsrc, splat_alpha(vsrc), load_128_aligned (pd)); + vdst = over (vsrc, splat_alpha (vsrc), load_128_aligned (pd)); save_128_aligned (pd, vdst); } @@ -2944,6 +2778,7 @@ scaled_nearest_scanline_vmx_8888_8888_OVER (uint32_t* pd, } } +/* clang-format off */ FAST_NEAREST_MAINLOOP (vmx_8888_8888_cover_OVER, scaled_nearest_scanline_vmx_8888_8888_OVER, uint32_t, uint32_t, COVER) @@ -2990,12 +2825,12 @@ static const pixman_fast_path_t vmx_fast_paths[] = { PIXMAN_OP_NONE }, }; +/* clang-format on */ static uint32_t * vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) { - int w = iter->width; - vector unsigned int ff000000 = mask_ff000000; + int w = iter->width; uint32_t *dst = iter->buffer; uint32_t *src = (uint32_t *)iter->bits; @@ -3009,7 +2844,8 @@ vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) while (w >= 4) { - save_128_aligned(dst, vec_or(load_128_unaligned(src), ff000000)); + save_128_aligned (dst, + vec_or (load_128_unaligned (src), mask_ff000000)); dst += 4; src += 4; @@ -3028,31 +2864,32 @@ vmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) static uint32_t * vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) { - int w = iter->width; + int w = iter->width; uint32_t *dst = iter->buffer; - uint8_t *src = iter->bits; - vector unsigned int vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6; + uint8_t *src = iter->bits; + + vector unsigned char vmx0, vmx1, vmx2, vmx3, vmx4, vmx5, vmx6; iter->bits += iter->stride; while (w && (((uintptr_t)dst) & 15)) { - *dst++ = *(src++) << 24; - w--; + *dst++ = *(src++) << 24; + w--; } while (w >= 16) { - vmx0 = load_128_unaligned((uint32_t *) src); + vmx0 = load_128_unaligned ((uint32_t *)src); - unpack_128_2x128((vector unsigned int) AVV(0), vmx0, &vmx1, &vmx2); - unpack_128_2x128_16((vector unsigned int) AVV(0), vmx1, &vmx3, &vmx4); - unpack_128_2x128_16((vector unsigned int) AVV(0), vmx2, &vmx5, &vmx6); + unpack_128_2x128 (vzero, vmx0, &vmx1, &vmx2); + unpack_128_2x128 (vzero, vmx1, &vmx3, &vmx4); + unpack_128_2x128 (vzero, vmx2, &vmx5, &vmx6); - save_128_aligned(dst, vmx6); - save_128_aligned((dst + 4), vmx5); - save_128_aligned((dst + 8), vmx4); - save_128_aligned((dst + 12), vmx3); + save_128_aligned (dst, vmx6); + save_128_aligned ((dst + 4), vmx5); + save_128_aligned ((dst + 8), vmx4); + save_128_aligned ((dst + 12), vmx3); dst += 16; src += 16; @@ -3068,10 +2905,11 @@ vmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -#define IMAGE_FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) +/* clang-format off */ static const pixman_iter_info_t vmx_iters[] = { { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, @@ -3082,45 +2920,42 @@ static const pixman_iter_info_t vmx_iters[] = }, { PIXMAN_null }, }; +/* clang-format on */ pixman_implementation_t * _pixman_implementation_create_vmx (pixman_implementation_t *fallback) { - pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); + pixman_implementation_t *imp = _pixman_implementation_create ( + fallback, vmx_fast_paths); /* VMX constants */ - mask_ff000000 = create_mask_32_128 (0xff000000); - mask_red = create_mask_32_128 (0x00f80000); - mask_green = create_mask_32_128 (0x0000fc00); - mask_blue = create_mask_32_128 (0x000000f8); - mask_565_fix_rb = create_mask_32_128 (0x00e000e0); - mask_565_fix_g = create_mask_32_128 (0x0000c000); + mask_ff000000 = (vector unsigned char)create_mask_32_128 (0xff000000); /* Set up function pointers */ - imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; - imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; - imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; - imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; + imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; - imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; + imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; - imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; - imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; - imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; - imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; imp->fill = vmx_fill; diff --git a/gfx/cairo/libpixman/src/pixman.c b/gfx/cairo/libpixman/src/pixman.c index 26e500868ed7..1d77f8ef861f 100644 --- a/gfx/cairo/libpixman/src/pixman.c +++ b/gfx/cairo/libpixman/src/pixman.c @@ -739,6 +739,24 @@ pixman_image_composite (pixman_op_t op, mask_x, mask_y, dest_x, dest_y, width, height); } +PIXMAN_EXPORT void +pixman_image_composite64f (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + double src_x, + double src_y, + double mask_x, + double mask_y, + double dest_x, + double dest_y, + double width, + double height) +{ + pixman_image_composite32 (op, src, mask, dest, src_x, src_y, + mask_x, mask_y, dest_x, dest_y, width, height); +} + PIXMAN_EXPORT pixman_bool_t pixman_blt (uint32_t *src_bits, uint32_t *dst_bits, @@ -854,7 +872,7 @@ pixman_image_fill_rectangles (pixman_op_t op, int n_rects, const pixman_rectangle16_t *rects) { - pixman_box32_t stack_boxes[6]; + pixman_box32_t stack_boxes[6] = {0}; pixman_box32_t *boxes; pixman_bool_t result; int i; diff --git a/gfx/cairo/libpixman/src/pixman.h b/gfx/cairo/libpixman/src/pixman.h index a5c51934ca4e..ad3220ee6f1f 100644 --- a/gfx/cairo/libpixman/src/pixman.h +++ b/gfx/cairo/libpixman/src/pixman.h @@ -758,6 +758,173 @@ void pixman_region32_reset (pixman_region32_t PIXMAN_API void pixman_region32_clear (pixman_region32_t *region); +/* + * 64 bit fractional regions + */ +typedef struct pixman_region64f_data pixman_region64f_data_t; +typedef struct pixman_box64f pixman_box64f_t; +typedef struct pixman_rectangle64f pixman_rectangle64f_t; +typedef struct pixman_region64f pixman_region64f_t; + +struct pixman_region64f_data { + long size; + long numRects; +/* pixman_box64f_t rects[size]; in memory but not explicitly declared */ +}; + +struct pixman_rectangle64f +{ + double x, y; + double width, height; +}; + +struct pixman_box64f +{ + double x1, y1, x2, y2; +}; + +struct pixman_region64f +{ + pixman_box64f_t extents; + pixman_region64f_data_t *data; +}; + +/* creation/destruction */ +PIXMAN_API +void pixman_region64f_init (pixman_region64f_t *region); + +PIXMAN_API +void pixman_region64f_init_rect (pixman_region64f_t *region, + int x, + int y, + unsigned int width, + unsigned int height); + +PIXMAN_API +void pixman_region64f_init_rectf (pixman_region64f_t *region, + double x, + double y, + double width, + double height); + +PIXMAN_API +pixman_bool_t pixman_region64f_init_rects (pixman_region64f_t *region, + const pixman_box64f_t *boxes, + int count); + +PIXMAN_API +void pixman_region64f_init_with_extents (pixman_region64f_t *region, + const pixman_box64f_t *extents); + +PIXMAN_API +void pixman_region64f_init_from_image (pixman_region64f_t *region, + pixman_image_t *image); + +PIXMAN_API +void pixman_region64f_fini (pixman_region64f_t *region); + + +/* manipulation */ +PIXMAN_API +void pixman_region64f_translate (pixman_region64f_t *region, + int x, + int y); + +PIXMAN_API +pixman_bool_t pixman_region64f_copy (pixman_region64f_t *dest, + const pixman_region64f_t *source); + +PIXMAN_API +pixman_bool_t pixman_region64f_intersect (pixman_region64f_t *new_reg, + const pixman_region64f_t *reg1, + const pixman_region64f_t *reg2); + +PIXMAN_API +pixman_bool_t pixman_region64f_union (pixman_region64f_t *new_reg, + const pixman_region64f_t *reg1, + const pixman_region64f_t *reg2); + +PIXMAN_API +pixman_bool_t pixman_region64f_intersect_rect (pixman_region64f_t *dest, + const pixman_region64f_t *source, + int x, + int y, + unsigned int width, + unsigned int height); + +PIXMAN_API +pixman_bool_t pixman_region64f_intersect_rectf (pixman_region64f_t *dest, + const pixman_region64f_t *source, + double x, + double y, + double width, + double height); + +PIXMAN_API +pixman_bool_t pixman_region64f_union_rect (pixman_region64f_t *dest, + const pixman_region64f_t *source, + int x, + int y, + unsigned int width, + unsigned int height); + +PIXMAN_API +pixman_bool_t pixman_region64f_union_rectf (pixman_region64f_t *dest, + const pixman_region64f_t *source, + double x, + double y, + double width, + double height); + +PIXMAN_API +pixman_bool_t pixman_region64f_subtract (pixman_region64f_t *reg_d, + const pixman_region64f_t *reg_m, + const pixman_region64f_t *reg_s); + +PIXMAN_API +pixman_bool_t pixman_region64f_inverse (pixman_region64f_t *new_reg, + const pixman_region64f_t *reg1, + const pixman_box64f_t *inv_rect); + +PIXMAN_API +pixman_bool_t pixman_region64f_contains_point (const pixman_region64f_t *region, + int x, + int y, + pixman_box64f_t *box); + +PIXMAN_API +pixman_region_overlap_t pixman_region64f_contains_rectangle(const pixman_region64f_t *region, + const pixman_box64f_t *prect); + +PIXMAN_API +pixman_bool_t pixman_region64f_empty (const pixman_region64f_t *region); + +PIXMAN_API +pixman_bool_t pixman_region64f_not_empty (const pixman_region64f_t *region); + +PIXMAN_API +pixman_box64f_t * pixman_region64f_extents (const pixman_region64f_t *region); + +PIXMAN_API +int pixman_region64f_n_rects (const pixman_region64f_t *region); + +PIXMAN_API +pixman_box64f_t * pixman_region64f_rectangles (const pixman_region64f_t *region, + int *n_rects); + +PIXMAN_API +pixman_bool_t pixman_region64f_equal (const pixman_region64f_t *region1, + const pixman_region64f_t *region2); + +PIXMAN_API +pixman_bool_t pixman_region64f_selfcheck (pixman_region64f_t *region); + +PIXMAN_API +void pixman_region64f_reset (pixman_region64f_t *region, + const pixman_box64f_t *box); + +PIXMAN_API +void pixman_region64f_clear (pixman_region64f_t *region); /* Copy / Fill / Misc */ @@ -884,6 +1051,10 @@ typedef enum { /* 96bpp formats */ PIXMAN_rgb_float = PIXMAN_FORMAT_BYTE(96,PIXMAN_TYPE_RGBA_FLOAT,0,32,32,32), +/* 64bpp formats */ + /* [63:0] A:B:G:R 16:16:16:16 native endian */ + PIXMAN_a16b16g16r16 = PIXMAN_FORMAT_BYTE(64,PIXMAN_TYPE_ABGR,16,16,16,16), + /* 32bpp formats */ PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), @@ -1025,6 +1196,10 @@ PIXMAN_API pixman_bool_t pixman_image_set_clip_region32 (pixman_image_t *image, const pixman_region32_t *region); +PIXMAN_API +pixman_bool_t pixman_image_set_clip_region64f (pixman_image_t *image, + const pixman_region64f_t *region); + PIXMAN_API void pixman_image_set_has_client_clip (pixman_image_t *image, pixman_bool_t clien_clip); @@ -1181,6 +1356,20 @@ void pixman_image_composite32 (pixman_op_t op, int32_t width, int32_t height); +PIXMAN_API +void pixman_image_composite64f (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *mask, + pixman_image_t *dest, + double src_x, + double src_y, + double mask_x, + double mask_y, + double dest_x, + double dest_y, + double width, + double height); + /* Executive Summary: This function is a no-op that only exists * for historical reasons. * diff --git a/gfx/cairo/pixman-neon.patch b/gfx/cairo/pixman-neon.patch deleted file mode 100644 index 049b8b7105f7..000000000000 --- a/gfx/cairo/pixman-neon.patch +++ /dev/null @@ -1,30 +0,0 @@ -diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S -index 6bd27360aa027..cd33babca1e0c 100644 ---- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S -+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S -@@ -55,9 +55,9 @@ - #endif - - .text --.fpu neon - .arch armv7a - .object_arch armv4 -+.fpu neon - .eabi_attribute 10, 0 - .eabi_attribute 12, 0 - .arm -diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S -index 0e092577f1c73..c04b335d1e5bd 100644 ---- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S -+++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S -@@ -40,9 +40,9 @@ - #endif - - .text -- .fpu neon - .arch armv7a - .object_arch armv4 -+ .fpu neon - .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ - .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ - .arm