Bug 1945604 - Update aom to 3990233fc06a35944d6d33797e63931802122a95 r=padenot
Differential Revision: https://phabricator.services.mozilla.com/D236581
This commit is contained in:
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 0
|
AOM_ARCH_AARCH64 equ 0
|
||||||
AOM_ARCH_ARM equ 0
|
AOM_ARCH_ARM equ 0
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 0
|
AOM_ARCH_X86 equ 0
|
||||||
AOM_ARCH_X86_64 equ 0
|
AOM_ARCH_X86_64 equ 0
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 0
|
|||||||
HAVE_NEON equ 0
|
HAVE_NEON equ 0
|
||||||
HAVE_NEON_DOTPROD equ 0
|
HAVE_NEON_DOTPROD equ 0
|
||||||
HAVE_NEON_I8MM equ 0
|
HAVE_NEON_I8MM equ 0
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 0
|
HAVE_SSE equ 0
|
||||||
HAVE_SSE2 equ 0
|
HAVE_SSE2 equ 0
|
||||||
HAVE_SSE3 equ 0
|
HAVE_SSE3 equ 0
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 0
|
#define AOM_ARCH_ARM 0
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 0
|
#define AOM_ARCH_X86 0
|
||||||
#define AOM_ARCH_X86_64 0
|
#define AOM_ARCH_X86_64 0
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 0
|
#define HAVE_NEON 0
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 0
|
#define HAVE_SSE 0
|
||||||
#define HAVE_SSE2 0
|
#define HAVE_SSE2 0
|
||||||
#define HAVE_SSE3 0
|
#define HAVE_SSE3 0
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
.equ AOM_ARCH_AARCH64, 0
|
.equ AOM_ARCH_AARCH64, 0
|
||||||
.equ AOM_ARCH_ARM, 1
|
.equ AOM_ARCH_ARM, 1
|
||||||
.equ AOM_ARCH_PPC, 0
|
.equ AOM_ARCH_PPC, 0
|
||||||
|
.equ AOM_ARCH_RISCV, 0
|
||||||
.equ AOM_ARCH_X86, 0
|
.equ AOM_ARCH_X86, 0
|
||||||
.equ AOM_ARCH_X86_64, 0
|
.equ AOM_ARCH_X86_64, 0
|
||||||
.equ CONFIG_ACCOUNTING, 0
|
.equ CONFIG_ACCOUNTING, 0
|
||||||
@@ -82,6 +83,7 @@
|
|||||||
.equ HAVE_NEON, 1
|
.equ HAVE_NEON, 1
|
||||||
.equ HAVE_NEON_DOTPROD, 0
|
.equ HAVE_NEON_DOTPROD, 0
|
||||||
.equ HAVE_NEON_I8MM, 0
|
.equ HAVE_NEON_I8MM, 0
|
||||||
|
.equ HAVE_RVV, 0
|
||||||
.equ HAVE_SSE, 0
|
.equ HAVE_SSE, 0
|
||||||
.equ HAVE_SSE2, 0
|
.equ HAVE_SSE2, 0
|
||||||
.equ HAVE_SSE3, 0
|
.equ HAVE_SSE3, 0
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 1
|
#define AOM_ARCH_ARM 1
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 0
|
#define AOM_ARCH_X86 0
|
||||||
#define AOM_ARCH_X86_64 0
|
#define AOM_ARCH_X86_64 0
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 1
|
#define HAVE_NEON 1
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 0
|
#define HAVE_SSE 0
|
||||||
#define HAVE_SSE2 0
|
#define HAVE_SSE2 0
|
||||||
#define HAVE_SSE3 0
|
#define HAVE_SSE3 0
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 0
|
AOM_ARCH_AARCH64 equ 0
|
||||||
AOM_ARCH_ARM equ 0
|
AOM_ARCH_ARM equ 0
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 1
|
AOM_ARCH_X86 equ 1
|
||||||
AOM_ARCH_X86_64 equ 0
|
AOM_ARCH_X86_64 equ 0
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 1
|
|||||||
HAVE_NEON equ 0
|
HAVE_NEON equ 0
|
||||||
HAVE_NEON_DOTPROD equ 0
|
HAVE_NEON_DOTPROD equ 0
|
||||||
HAVE_NEON_I8MM equ 0
|
HAVE_NEON_I8MM equ 0
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 1
|
HAVE_SSE equ 1
|
||||||
HAVE_SSE2 equ 1
|
HAVE_SSE2 equ 1
|
||||||
HAVE_SSE3 equ 1
|
HAVE_SSE3 equ 1
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 0
|
#define AOM_ARCH_ARM 0
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 1
|
#define AOM_ARCH_X86 1
|
||||||
#define AOM_ARCH_X86_64 0
|
#define AOM_ARCH_X86_64 0
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 0
|
#define HAVE_NEON 0
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 1
|
#define HAVE_SSE 1
|
||||||
#define HAVE_SSE2 1
|
#define HAVE_SSE2 1
|
||||||
#define HAVE_SSE3 1
|
#define HAVE_SSE3 1
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 0
|
AOM_ARCH_AARCH64 equ 0
|
||||||
AOM_ARCH_ARM equ 0
|
AOM_ARCH_ARM equ 0
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 0
|
AOM_ARCH_X86 equ 0
|
||||||
AOM_ARCH_X86_64 equ 1
|
AOM_ARCH_X86_64 equ 1
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 1
|
|||||||
HAVE_NEON equ 0
|
HAVE_NEON equ 0
|
||||||
HAVE_NEON_DOTPROD equ 0
|
HAVE_NEON_DOTPROD equ 0
|
||||||
HAVE_NEON_I8MM equ 0
|
HAVE_NEON_I8MM equ 0
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 1
|
HAVE_SSE equ 1
|
||||||
HAVE_SSE2 equ 1
|
HAVE_SSE2 equ 1
|
||||||
HAVE_SSE3 equ 1
|
HAVE_SSE3 equ 1
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 0
|
#define AOM_ARCH_ARM 0
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 0
|
#define AOM_ARCH_X86 0
|
||||||
#define AOM_ARCH_X86_64 1
|
#define AOM_ARCH_X86_64 1
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 0
|
#define HAVE_NEON 0
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 1
|
#define HAVE_SSE 1
|
||||||
#define HAVE_SSE2 1
|
#define HAVE_SSE2 1
|
||||||
#define HAVE_SSE3 1
|
#define HAVE_SSE3 1
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 1
|
AOM_ARCH_AARCH64 equ 1
|
||||||
AOM_ARCH_ARM equ 1
|
AOM_ARCH_ARM equ 1
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 0
|
AOM_ARCH_X86 equ 0
|
||||||
AOM_ARCH_X86_64 equ 0
|
AOM_ARCH_X86_64 equ 0
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 0
|
|||||||
HAVE_NEON equ 1
|
HAVE_NEON equ 1
|
||||||
HAVE_NEON_DOTPROD equ 1
|
HAVE_NEON_DOTPROD equ 1
|
||||||
HAVE_NEON_I8MM equ 1
|
HAVE_NEON_I8MM equ 1
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 0
|
HAVE_SSE equ 0
|
||||||
HAVE_SSE2 equ 0
|
HAVE_SSE2 equ 0
|
||||||
HAVE_SSE3 equ 0
|
HAVE_SSE3 equ 0
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 1
|
#define AOM_ARCH_AARCH64 1
|
||||||
#define AOM_ARCH_ARM 1
|
#define AOM_ARCH_ARM 1
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 0
|
#define AOM_ARCH_X86 0
|
||||||
#define AOM_ARCH_X86_64 0
|
#define AOM_ARCH_X86_64 0
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 1
|
#define HAVE_NEON 1
|
||||||
#define HAVE_NEON_DOTPROD 1
|
#define HAVE_NEON_DOTPROD 1
|
||||||
#define HAVE_NEON_I8MM 1
|
#define HAVE_NEON_I8MM 1
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 0
|
#define HAVE_SSE 0
|
||||||
#define HAVE_SSE2 0
|
#define HAVE_SSE2 0
|
||||||
#define HAVE_SSE3 0
|
#define HAVE_SSE3 0
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 0
|
AOM_ARCH_AARCH64 equ 0
|
||||||
AOM_ARCH_ARM equ 0
|
AOM_ARCH_ARM equ 0
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 0
|
AOM_ARCH_X86 equ 0
|
||||||
AOM_ARCH_X86_64 equ 1
|
AOM_ARCH_X86_64 equ 1
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 1
|
|||||||
HAVE_NEON equ 0
|
HAVE_NEON equ 0
|
||||||
HAVE_NEON_DOTPROD equ 0
|
HAVE_NEON_DOTPROD equ 0
|
||||||
HAVE_NEON_I8MM equ 0
|
HAVE_NEON_I8MM equ 0
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 1
|
HAVE_SSE equ 1
|
||||||
HAVE_SSE2 equ 1
|
HAVE_SSE2 equ 1
|
||||||
HAVE_SSE3 equ 1
|
HAVE_SSE3 equ 1
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 0
|
#define AOM_ARCH_ARM 0
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 0
|
#define AOM_ARCH_X86 0
|
||||||
#define AOM_ARCH_X86_64 1
|
#define AOM_ARCH_X86_64 1
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 0
|
#define HAVE_NEON 0
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 1
|
#define HAVE_SSE 1
|
||||||
#define HAVE_SSE2 1
|
#define HAVE_SSE2 1
|
||||||
#define HAVE_SSE3 1
|
#define HAVE_SSE3 1
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 0
|
AOM_ARCH_AARCH64 equ 0
|
||||||
AOM_ARCH_ARM equ 0
|
AOM_ARCH_ARM equ 0
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 1
|
AOM_ARCH_X86 equ 1
|
||||||
AOM_ARCH_X86_64 equ 0
|
AOM_ARCH_X86_64 equ 0
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 1
|
|||||||
HAVE_NEON equ 0
|
HAVE_NEON equ 0
|
||||||
HAVE_NEON_DOTPROD equ 0
|
HAVE_NEON_DOTPROD equ 0
|
||||||
HAVE_NEON_I8MM equ 0
|
HAVE_NEON_I8MM equ 0
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 1
|
HAVE_SSE equ 1
|
||||||
HAVE_SSE2 equ 1
|
HAVE_SSE2 equ 1
|
||||||
HAVE_SSE3 equ 1
|
HAVE_SSE3 equ 1
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 0
|
#define AOM_ARCH_ARM 0
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 1
|
#define AOM_ARCH_X86 1
|
||||||
#define AOM_ARCH_X86_64 0
|
#define AOM_ARCH_X86_64 0
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 0
|
#define HAVE_NEON 0
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 1
|
#define HAVE_SSE 1
|
||||||
#define HAVE_SSE2 1
|
#define HAVE_SSE2 1
|
||||||
#define HAVE_SSE3 1
|
#define HAVE_SSE3 1
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
AOM_ARCH_AARCH64 equ 0
|
AOM_ARCH_AARCH64 equ 0
|
||||||
AOM_ARCH_ARM equ 0
|
AOM_ARCH_ARM equ 0
|
||||||
AOM_ARCH_PPC equ 0
|
AOM_ARCH_PPC equ 0
|
||||||
|
AOM_ARCH_RISCV equ 0
|
||||||
AOM_ARCH_X86 equ 0
|
AOM_ARCH_X86 equ 0
|
||||||
AOM_ARCH_X86_64 equ 1
|
AOM_ARCH_X86_64 equ 1
|
||||||
CONFIG_ACCOUNTING equ 0
|
CONFIG_ACCOUNTING equ 0
|
||||||
@@ -82,6 +83,7 @@ HAVE_MMX equ 1
|
|||||||
HAVE_NEON equ 0
|
HAVE_NEON equ 0
|
||||||
HAVE_NEON_DOTPROD equ 0
|
HAVE_NEON_DOTPROD equ 0
|
||||||
HAVE_NEON_I8MM equ 0
|
HAVE_NEON_I8MM equ 0
|
||||||
|
HAVE_RVV equ 0
|
||||||
HAVE_SSE equ 1
|
HAVE_SSE equ 1
|
||||||
HAVE_SSE2 equ 1
|
HAVE_SSE2 equ 1
|
||||||
HAVE_SSE3 equ 1
|
HAVE_SSE3 equ 1
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#define AOM_ARCH_AARCH64 0
|
#define AOM_ARCH_AARCH64 0
|
||||||
#define AOM_ARCH_ARM 0
|
#define AOM_ARCH_ARM 0
|
||||||
#define AOM_ARCH_PPC 0
|
#define AOM_ARCH_PPC 0
|
||||||
|
#define AOM_ARCH_RISCV 0
|
||||||
#define AOM_ARCH_X86 0
|
#define AOM_ARCH_X86 0
|
||||||
#define AOM_ARCH_X86_64 1
|
#define AOM_ARCH_X86_64 1
|
||||||
#define CONFIG_ACCOUNTING 0
|
#define CONFIG_ACCOUNTING 0
|
||||||
@@ -84,6 +85,7 @@
|
|||||||
#define HAVE_NEON 0
|
#define HAVE_NEON 0
|
||||||
#define HAVE_NEON_DOTPROD 0
|
#define HAVE_NEON_DOTPROD 0
|
||||||
#define HAVE_NEON_I8MM 0
|
#define HAVE_NEON_I8MM 0
|
||||||
|
#define HAVE_RVV 0
|
||||||
#define HAVE_SSE 1
|
#define HAVE_SSE 1
|
||||||
#define HAVE_SSE2 1
|
#define HAVE_SSE2 1
|
||||||
#define HAVE_SSE3 1
|
#define HAVE_SSE3 1
|
||||||
|
|||||||
@@ -20,11 +20,11 @@ origin:
|
|||||||
|
|
||||||
# Human-readable identifier for this version/release
|
# Human-readable identifier for this version/release
|
||||||
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
||||||
release: 0c13a5d54053f82bf8500b421b5cdefb1cc1b3ed (Sun Jan 05 09:13:09 2025 -0800).
|
release: 3990233fc06a35944d6d33797e63931802122a95 (Thu Jan 30 11:32:16 2025 -0800).
|
||||||
|
|
||||||
# Revision to pull in
|
# Revision to pull in
|
||||||
# Must be a long or short commit SHA (long preferred)
|
# Must be a long or short commit SHA (long preferred)
|
||||||
revision: 0c13a5d54053f82bf8500b421b5cdefb1cc1b3ed
|
revision: 3990233fc06a35944d6d33797e63931802122a95
|
||||||
|
|
||||||
# The package's license, where possible using the mnemonic from
|
# The package's license, where possible using the mnemonic from
|
||||||
# https://spdx.org/licenses/
|
# https://spdx.org/licenses/
|
||||||
|
|||||||
10
third_party/aom/CMakeLists.txt
vendored
10
third_party/aom/CMakeLists.txt
vendored
@@ -333,6 +333,12 @@ if(CONFIG_AV1_ENCODER)
|
|||||||
# libaom static library.
|
# libaom static library.
|
||||||
if(BUILD_SHARED_LIBS)
|
if(BUILD_SHARED_LIBS)
|
||||||
target_link_libraries(aom_av1_rc ${AOM_LIB_LINK_TYPE} aom_static)
|
target_link_libraries(aom_av1_rc ${AOM_LIB_LINK_TYPE} aom_static)
|
||||||
|
# TODO: https://aomedia.issues.chromium.org/391715078 - This condition can
|
||||||
|
# be removed after aom_av1_rc restricts its symbol visibility.
|
||||||
|
if(CYGWIN OR MINGW)
|
||||||
|
target_link_options(aom_av1_rc ${AOM_LIB_LINK_TYPE}
|
||||||
|
LINKER:--allow-multiple-definition)
|
||||||
|
endif()
|
||||||
else()
|
else()
|
||||||
target_link_libraries(aom_av1_rc ${AOM_LIB_LINK_TYPE} aom)
|
target_link_libraries(aom_av1_rc ${AOM_LIB_LINK_TYPE} aom)
|
||||||
endif()
|
endif()
|
||||||
@@ -858,8 +864,8 @@ if(BUILD_SHARED_LIBS)
|
|||||||
# errors (don't use it with AddressSanitizer)." See
|
# errors (don't use it with AddressSanitizer)." See
|
||||||
# https://clang.llvm.org/docs/AddressSanitizer.html#usage. Similarly, see
|
# https://clang.llvm.org/docs/AddressSanitizer.html#usage. Similarly, see
|
||||||
# https://clang.llvm.org/docs/MemorySanitizer.html#usage.
|
# https://clang.llvm.org/docs/MemorySanitizer.html#usage.
|
||||||
if(NOT WIN32
|
if(NOT
|
||||||
AND NOT APPLE
|
(APPLE OR CYGWIN OR WIN32)
|
||||||
AND NOT (CMAKE_C_COMPILER_ID MATCHES "Clang" AND SANITIZE))
|
AND NOT (CMAKE_C_COMPILER_ID MATCHES "Clang" AND SANITIZE))
|
||||||
# The -z defs linker option reports unresolved symbol references from object
|
# The -z defs linker option reports unresolved symbol references from object
|
||||||
# files when building a shared library.
|
# files when building a shared library.
|
||||||
|
|||||||
4
third_party/aom/README.md
vendored
4
third_party/aom/README.md
vendored
@@ -60,7 +60,9 @@ README.md {#LREADME}
|
|||||||
present, yasm will be used by default. Pass -DENABLE_NASM=ON to cmake to
|
present, yasm will be used by default. Pass -DENABLE_NASM=ON to cmake to
|
||||||
select nasm.) If you download yasm with the intention to work with Visual
|
select nasm.) If you download yasm with the intention to work with Visual
|
||||||
Studio, please download win32.exe or win64.exe and rename it into yasm.exe.
|
Studio, please download win32.exe or win64.exe and rename it into yasm.exe.
|
||||||
DO NOT download or use vsyasm.exe.
|
DO NOT download or use vsyasm.exe. The MSYS2 version of the yasm binary can
|
||||||
|
also be used and avoids an issue caused by a missing Visual C++
|
||||||
|
Redistributable install (Visual Studio 2010, MSVCR100.dll).
|
||||||
6. Building the documentation requires
|
6. Building the documentation requires
|
||||||
[doxygen version 1.8.10 or newer](http://doxygen.org).
|
[doxygen version 1.8.10 or newer](http://doxygen.org).
|
||||||
7. Emscripten builds require the portable
|
7. Emscripten builds require the portable
|
||||||
|
|||||||
2
third_party/aom/aom/exports_com
vendored
2
third_party/aom/aom/exports_com
vendored
@@ -10,7 +10,6 @@ text aom_codec_set_option
|
|||||||
text aom_codec_version
|
text aom_codec_version
|
||||||
text aom_codec_version_extra_str
|
text aom_codec_version_extra_str
|
||||||
text aom_codec_version_str
|
text aom_codec_version_str
|
||||||
text aom_free
|
|
||||||
text aom_img_add_metadata
|
text aom_img_add_metadata
|
||||||
text aom_img_alloc
|
text aom_img_alloc
|
||||||
text aom_img_alloc_with_border
|
text aom_img_alloc_with_border
|
||||||
@@ -25,7 +24,6 @@ text aom_img_plane_width
|
|||||||
text aom_img_remove_metadata
|
text aom_img_remove_metadata
|
||||||
text aom_img_set_rect
|
text aom_img_set_rect
|
||||||
text aom_img_wrap
|
text aom_img_wrap
|
||||||
text aom_malloc
|
|
||||||
text aom_rb_bytes_read
|
text aom_rb_bytes_read
|
||||||
text aom_rb_read_bit
|
text aom_rb_read_bit
|
||||||
text aom_rb_read_literal
|
text aom_rb_read_literal
|
||||||
|
|||||||
756
third_party/aom/aom_dsp/arm/highbd_loopfilter_neon.c
vendored
756
third_party/aom/aom_dsp/arm/highbd_loopfilter_neon.c
vendored
File diff suppressed because it is too large
Load Diff
804
third_party/aom/aom_dsp/arm/loopfilter_neon.c
vendored
804
third_party/aom/aom_dsp/arm/loopfilter_neon.c
vendored
@@ -146,473 +146,393 @@ static inline uint8x8_t lpf_mask3_chroma(uint8x8_t p2q2, uint8x8_t p1q1,
|
|||||||
return mask_8x8;
|
return mask_8x8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lpf_14_neon(uint8x8_t *p6q6, uint8x8_t *p5q5, uint8x8_t *p4q4,
|
static inline void filter4(const uint8x8_t p0q0, const uint8x8_t p1q1,
|
||||||
uint8x8_t *p3q3, uint8x8_t *p2q2, uint8x8_t *p1q1,
|
uint8x8_t *p0q0_output, uint8x8_t *p1q1_output,
|
||||||
uint8x8_t *p0q0, const uint8_t blimit,
|
uint8x8_t mask_8x8, const uint8_t thresh) {
|
||||||
const uint8_t limit, const uint8_t thresh) {
|
|
||||||
uint16x8_t out;
|
|
||||||
uint8x8_t out_f14_pq0, out_f14_pq1, out_f14_pq2, out_f14_pq3, out_f14_pq4,
|
|
||||||
out_f14_pq5;
|
|
||||||
uint8x8_t out_f7_pq0, out_f7_pq1, out_f7_pq2;
|
|
||||||
uint8x8_t out_f4_pq0, out_f4_pq1;
|
|
||||||
uint8x8_t mask_8x8, flat_8x8, flat2_8x8;
|
|
||||||
uint8x8_t q0p0, q1p1, q2p2;
|
|
||||||
|
|
||||||
// Calculate filter masks
|
|
||||||
mask_8x8 = lpf_mask(*p3q3, *p2q2, *p1q1, *p0q0, blimit, limit);
|
|
||||||
flat_8x8 = lpf_flat_mask4(*p3q3, *p2q2, *p1q1, *p0q0);
|
|
||||||
flat2_8x8 = lpf_flat_mask4(*p6q6, *p5q5, *p4q4, *p0q0);
|
|
||||||
{
|
|
||||||
// filter 4
|
|
||||||
int32x2x2_t ps0_qs0, ps1_qs1;
|
|
||||||
int16x8_t filter_s16;
|
|
||||||
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
|
|
||||||
uint8x8_t temp0_8x8, temp1_8x8;
|
|
||||||
int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
|
|
||||||
int8x8_t op0, oq0, op1, oq1;
|
|
||||||
int8x8_t pq_s0, pq_s1;
|
|
||||||
int8x8_t filter_s8, filter1_s8, filter2_s8;
|
|
||||||
int8x8_t hev_8x8;
|
|
||||||
const int8x8_t sign_mask = vdup_n_s8(0x80);
|
|
||||||
const int8x8_t val_4 = vdup_n_s8(4);
|
|
||||||
const int8x8_t val_3 = vdup_n_s8(3);
|
|
||||||
|
|
||||||
pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
|
|
||||||
pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
|
|
||||||
|
|
||||||
ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
|
|
||||||
ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
|
|
||||||
ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
|
|
||||||
qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
|
|
||||||
ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
|
|
||||||
qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
|
|
||||||
|
|
||||||
// hev_mask
|
|
||||||
temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
|
|
||||||
temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
|
|
||||||
hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
|
|
||||||
|
|
||||||
// add outer taps if we have high edge variance
|
|
||||||
filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
|
|
||||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
|
||||||
|
|
||||||
// inner taps
|
|
||||||
temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
|
|
||||||
filter_s16 = vmovl_s8(filter_s8);
|
|
||||||
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
|
|
||||||
filter_s8 = vqmovn_s16(filter_s16);
|
|
||||||
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
|
|
||||||
|
|
||||||
filter1_s8 = vqadd_s8(filter_s8, val_4);
|
|
||||||
filter2_s8 = vqadd_s8(filter_s8, val_3);
|
|
||||||
filter1_s8 = vshr_n_s8(filter1_s8, 3);
|
|
||||||
filter2_s8 = vshr_n_s8(filter2_s8, 3);
|
|
||||||
|
|
||||||
oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
|
|
||||||
op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
|
|
||||||
|
|
||||||
hev_8x8 = vmvn_s8(hev_8x8);
|
|
||||||
filter_s8 = vrshr_n_s8(filter1_s8, 1);
|
|
||||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
|
||||||
|
|
||||||
oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
|
|
||||||
op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
|
|
||||||
|
|
||||||
out_f4_pq0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
|
|
||||||
out_f4_pq1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
|
|
||||||
}
|
|
||||||
// reverse p and q
|
|
||||||
q0p0 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0)));
|
|
||||||
q1p1 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1)));
|
|
||||||
q2p2 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p2q2)));
|
|
||||||
{
|
|
||||||
// filter 8
|
|
||||||
uint16x8_t out_pq0, out_pq1, out_pq2;
|
|
||||||
out = vaddl_u8(*p3q3, *p2q2);
|
|
||||||
out = vaddw_u8(out, *p1q1);
|
|
||||||
out = vaddw_u8(out, *p0q0);
|
|
||||||
|
|
||||||
out = vaddw_u8(out, q0p0);
|
|
||||||
out_pq1 = vaddw_u8(out, *p3q3);
|
|
||||||
out_pq2 = vaddw_u8(out_pq1, *p3q3);
|
|
||||||
out_pq2 = vaddw_u8(out_pq2, *p2q2);
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, *p1q1);
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, q1p1);
|
|
||||||
|
|
||||||
out_pq0 = vaddw_u8(out, *p0q0);
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, q1p1);
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, q2p2);
|
|
||||||
|
|
||||||
out_f7_pq0 = vrshrn_n_u16(out_pq0, 3);
|
|
||||||
out_f7_pq1 = vrshrn_n_u16(out_pq1, 3);
|
|
||||||
out_f7_pq2 = vrshrn_n_u16(out_pq2, 3);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
// filter 14
|
|
||||||
uint16x8_t out_pq0, out_pq1, out_pq2, out_pq3, out_pq4, out_pq5;
|
|
||||||
uint16x8_t p6q6_2, p6q6_temp, qp_sum;
|
|
||||||
uint8x8_t qp_rev;
|
|
||||||
|
|
||||||
out = vaddw_u8(out, *p4q4);
|
|
||||||
out = vaddw_u8(out, *p5q5);
|
|
||||||
out = vaddw_u8(out, *p6q6);
|
|
||||||
|
|
||||||
out_pq5 = vaddw_u8(out, *p4q4);
|
|
||||||
out_pq4 = vaddw_u8(out_pq5, *p3q3);
|
|
||||||
out_pq3 = vaddw_u8(out_pq4, *p2q2);
|
|
||||||
|
|
||||||
out_pq5 = vaddw_u8(out_pq5, *p5q5);
|
|
||||||
out_pq4 = vaddw_u8(out_pq4, *p5q5);
|
|
||||||
|
|
||||||
out_pq0 = vaddw_u8(out, *p1q1);
|
|
||||||
out_pq1 = vaddw_u8(out_pq0, *p2q2);
|
|
||||||
out_pq2 = vaddw_u8(out_pq1, *p3q3);
|
|
||||||
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, *p0q0);
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, *p0q0);
|
|
||||||
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, *p6q6);
|
|
||||||
p6q6_2 = vaddl_u8(*p6q6, *p6q6);
|
|
||||||
out_pq2 = vaddq_u16(out_pq2, p6q6_2);
|
|
||||||
p6q6_temp = vaddw_u8(p6q6_2, *p6q6);
|
|
||||||
out_pq3 = vaddq_u16(out_pq3, p6q6_temp);
|
|
||||||
p6q6_temp = vaddw_u8(p6q6_temp, *p6q6);
|
|
||||||
out_pq4 = vaddq_u16(out_pq4, p6q6_temp);
|
|
||||||
p6q6_temp = vaddq_u16(p6q6_temp, p6q6_2);
|
|
||||||
out_pq5 = vaddq_u16(out_pq5, p6q6_temp);
|
|
||||||
|
|
||||||
out_pq4 = vaddw_u8(out_pq4, q1p1);
|
|
||||||
|
|
||||||
qp_sum = vaddl_u8(q2p2, q1p1);
|
|
||||||
out_pq3 = vaddq_u16(out_pq3, qp_sum);
|
|
||||||
|
|
||||||
qp_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p3q3)));
|
|
||||||
qp_sum = vaddw_u8(qp_sum, qp_rev);
|
|
||||||
out_pq2 = vaddq_u16(out_pq2, qp_sum);
|
|
||||||
|
|
||||||
qp_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p4q4)));
|
|
||||||
qp_sum = vaddw_u8(qp_sum, qp_rev);
|
|
||||||
out_pq1 = vaddq_u16(out_pq1, qp_sum);
|
|
||||||
|
|
||||||
qp_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p5q5)));
|
|
||||||
qp_sum = vaddw_u8(qp_sum, qp_rev);
|
|
||||||
out_pq0 = vaddq_u16(out_pq0, qp_sum);
|
|
||||||
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, q0p0);
|
|
||||||
|
|
||||||
out_f14_pq0 = vrshrn_n_u16(out_pq0, 4);
|
|
||||||
out_f14_pq1 = vrshrn_n_u16(out_pq1, 4);
|
|
||||||
out_f14_pq2 = vrshrn_n_u16(out_pq2, 4);
|
|
||||||
out_f14_pq3 = vrshrn_n_u16(out_pq3, 4);
|
|
||||||
out_f14_pq4 = vrshrn_n_u16(out_pq4, 4);
|
|
||||||
out_f14_pq5 = vrshrn_n_u16(out_pq5, 4);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
uint8x8_t filter4_cond, filter8_cond, filter14_cond;
|
|
||||||
filter8_cond = vand_u8(flat_8x8, mask_8x8);
|
|
||||||
filter4_cond = vmvn_u8(filter8_cond);
|
|
||||||
filter14_cond = vand_u8(filter8_cond, flat2_8x8);
|
|
||||||
|
|
||||||
// filter4 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
|
||||||
|
|
||||||
// filter8 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
|
|
||||||
*p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
|
|
||||||
|
|
||||||
// filter14 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter14_cond, out_f14_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter14_cond, out_f14_pq1, *p1q1);
|
|
||||||
*p2q2 = vbsl_u8(filter14_cond, out_f14_pq2, *p2q2);
|
|
||||||
*p3q3 = vbsl_u8(filter14_cond, out_f14_pq3, *p3q3);
|
|
||||||
*p4q4 = vbsl_u8(filter14_cond, out_f14_pq4, *p4q4);
|
|
||||||
*p5q5 = vbsl_u8(filter14_cond, out_f14_pq5, *p5q5);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void lpf_8_neon(uint8x8_t *p3q3, uint8x8_t *p2q2, uint8x8_t *p1q1,
|
|
||||||
uint8x8_t *p0q0, const uint8_t blimit,
|
|
||||||
const uint8_t limit, const uint8_t thresh) {
|
|
||||||
uint16x8_t out;
|
|
||||||
uint8x8_t out_f7_pq0, out_f7_pq1, out_f7_pq2;
|
|
||||||
uint8x8_t out_f4_pq0, out_f4_pq1;
|
|
||||||
uint8x8_t mask_8x8, flat_8x8;
|
|
||||||
|
|
||||||
// Calculate filter masks
|
|
||||||
mask_8x8 = lpf_mask(*p3q3, *p2q2, *p1q1, *p0q0, blimit, limit);
|
|
||||||
flat_8x8 = lpf_flat_mask4(*p3q3, *p2q2, *p1q1, *p0q0);
|
|
||||||
{
|
|
||||||
// filter 4
|
|
||||||
int32x2x2_t ps0_qs0, ps1_qs1;
|
|
||||||
int16x8_t filter_s16;
|
|
||||||
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
|
|
||||||
uint8x8_t temp0_8x8, temp1_8x8;
|
|
||||||
int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
|
|
||||||
int8x8_t op0, oq0, op1, oq1;
|
|
||||||
int8x8_t pq_s0, pq_s1;
|
|
||||||
int8x8_t filter_s8, filter1_s8, filter2_s8;
|
|
||||||
int8x8_t hev_8x8;
|
|
||||||
const int8x8_t sign_mask = vdup_n_s8(0x80);
|
|
||||||
const int8x8_t val_4 = vdup_n_s8(4);
|
|
||||||
const int8x8_t val_3 = vdup_n_s8(3);
|
|
||||||
|
|
||||||
pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
|
|
||||||
pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
|
|
||||||
|
|
||||||
ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
|
|
||||||
ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
|
|
||||||
ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
|
|
||||||
qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
|
|
||||||
ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
|
|
||||||
qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
|
|
||||||
|
|
||||||
// hev_mask
|
|
||||||
temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
|
|
||||||
temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
|
|
||||||
hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
|
|
||||||
|
|
||||||
// add outer taps if we have high edge variance
|
|
||||||
filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
|
|
||||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
|
||||||
|
|
||||||
// inner taps
|
|
||||||
temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
|
|
||||||
filter_s16 = vmovl_s8(filter_s8);
|
|
||||||
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
|
|
||||||
filter_s8 = vqmovn_s16(filter_s16);
|
|
||||||
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
|
|
||||||
|
|
||||||
filter1_s8 = vqadd_s8(filter_s8, val_4);
|
|
||||||
filter2_s8 = vqadd_s8(filter_s8, val_3);
|
|
||||||
filter1_s8 = vshr_n_s8(filter1_s8, 3);
|
|
||||||
filter2_s8 = vshr_n_s8(filter2_s8, 3);
|
|
||||||
|
|
||||||
oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
|
|
||||||
op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
|
|
||||||
|
|
||||||
hev_8x8 = vmvn_s8(hev_8x8);
|
|
||||||
filter_s8 = vrshr_n_s8(filter1_s8, 1);
|
|
||||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
|
||||||
|
|
||||||
oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
|
|
||||||
op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
|
|
||||||
|
|
||||||
out_f4_pq0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
|
|
||||||
out_f4_pq1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
// filter 8
|
|
||||||
uint16x8_t out_pq0, out_pq1, out_pq2;
|
|
||||||
uint8x8_t q0p0, q1p1, q2p2;
|
|
||||||
|
|
||||||
out = vaddl_u8(*p3q3, *p2q2);
|
|
||||||
out = vaddw_u8(out, *p1q1);
|
|
||||||
out = vaddw_u8(out, *p0q0);
|
|
||||||
|
|
||||||
// reverse p and q
|
|
||||||
q0p0 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0)));
|
|
||||||
q1p1 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1)));
|
|
||||||
q2p2 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p2q2)));
|
|
||||||
|
|
||||||
out = vaddw_u8(out, q0p0);
|
|
||||||
out_pq1 = vaddw_u8(out, *p3q3);
|
|
||||||
out_pq2 = vaddw_u8(out_pq1, *p3q3);
|
|
||||||
out_pq2 = vaddw_u8(out_pq2, *p2q2);
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, *p1q1);
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, q1p1);
|
|
||||||
|
|
||||||
out_pq0 = vaddw_u8(out, *p0q0);
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, q1p1);
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, q2p2);
|
|
||||||
|
|
||||||
out_f7_pq0 = vrshrn_n_u16(out_pq0, 3);
|
|
||||||
out_f7_pq1 = vrshrn_n_u16(out_pq1, 3);
|
|
||||||
out_f7_pq2 = vrshrn_n_u16(out_pq2, 3);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
uint8x8_t filter4_cond, filter8_cond;
|
|
||||||
filter8_cond = vand_u8(flat_8x8, mask_8x8);
|
|
||||||
filter4_cond = vmvn_u8(filter8_cond);
|
|
||||||
|
|
||||||
// filter4 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
|
||||||
|
|
||||||
// filter8 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
|
|
||||||
*p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void lpf_6_neon(uint8x8_t *p2q2, uint8x8_t *p1q1, uint8x8_t *p0q0,
|
|
||||||
const uint8_t blimit, const uint8_t limit,
|
|
||||||
const uint8_t thresh) {
|
|
||||||
uint16x8_t out;
|
|
||||||
uint8x8_t out_f6_pq0, out_f6_pq1;
|
|
||||||
uint8x8_t out_f4_pq0, out_f4_pq1;
|
|
||||||
uint8x8_t mask_8x8, flat_8x8;
|
|
||||||
|
|
||||||
// Calculate filter masks
|
|
||||||
mask_8x8 = lpf_mask3_chroma(*p2q2, *p1q1, *p0q0, blimit, limit);
|
|
||||||
flat_8x8 = lpf_flat_mask3(*p2q2, *p1q1, *p0q0);
|
|
||||||
{
|
|
||||||
// filter 4
|
|
||||||
int32x2x2_t ps0_qs0, ps1_qs1;
|
|
||||||
int16x8_t filter_s16;
|
|
||||||
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
|
|
||||||
uint8x8_t temp0_8x8, temp1_8x8;
|
|
||||||
int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
|
|
||||||
int8x8_t op0, oq0, op1, oq1;
|
|
||||||
int8x8_t pq_s0, pq_s1;
|
|
||||||
int8x8_t filter_s8, filter1_s8, filter2_s8;
|
|
||||||
int8x8_t hev_8x8;
|
|
||||||
const int8x8_t sign_mask = vdup_n_s8(0x80);
|
|
||||||
const int8x8_t val_4 = vdup_n_s8(4);
|
|
||||||
const int8x8_t val_3 = vdup_n_s8(3);
|
|
||||||
|
|
||||||
pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
|
|
||||||
pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
|
|
||||||
|
|
||||||
ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
|
|
||||||
ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
|
|
||||||
ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
|
|
||||||
qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
|
|
||||||
ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
|
|
||||||
qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
|
|
||||||
|
|
||||||
// hev_mask
|
|
||||||
temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
|
|
||||||
temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
|
|
||||||
hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
|
|
||||||
|
|
||||||
// add outer taps if we have high edge variance
|
|
||||||
filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
|
|
||||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
|
||||||
|
|
||||||
// inner taps
|
|
||||||
temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
|
|
||||||
filter_s16 = vmovl_s8(filter_s8);
|
|
||||||
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
|
|
||||||
filter_s8 = vqmovn_s16(filter_s16);
|
|
||||||
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
|
|
||||||
|
|
||||||
filter1_s8 = vqadd_s8(filter_s8, val_4);
|
|
||||||
filter2_s8 = vqadd_s8(filter_s8, val_3);
|
|
||||||
filter1_s8 = vshr_n_s8(filter1_s8, 3);
|
|
||||||
filter2_s8 = vshr_n_s8(filter2_s8, 3);
|
|
||||||
|
|
||||||
oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
|
|
||||||
op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
|
|
||||||
|
|
||||||
filter_s8 = vrshr_n_s8(filter1_s8, 1);
|
|
||||||
filter_s8 = vbic_s8(filter_s8, hev_8x8);
|
|
||||||
|
|
||||||
oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
|
|
||||||
op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
|
|
||||||
|
|
||||||
out_f4_pq0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
|
|
||||||
out_f4_pq1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
// filter 6
|
|
||||||
uint16x8_t out_pq0, out_pq1;
|
|
||||||
uint8x8_t pq_rev;
|
|
||||||
|
|
||||||
out = vaddl_u8(*p0q0, *p1q1);
|
|
||||||
out = vaddq_u16(out, out);
|
|
||||||
out = vaddw_u8(out, *p2q2);
|
|
||||||
|
|
||||||
pq_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0)));
|
|
||||||
out = vaddw_u8(out, pq_rev);
|
|
||||||
|
|
||||||
out_pq0 = vaddw_u8(out, pq_rev);
|
|
||||||
pq_rev = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p1q1)));
|
|
||||||
out_pq0 = vaddw_u8(out_pq0, pq_rev);
|
|
||||||
|
|
||||||
out_pq1 = vaddw_u8(out, *p2q2);
|
|
||||||
out_pq1 = vaddw_u8(out_pq1, *p2q2);
|
|
||||||
|
|
||||||
out_f6_pq0 = vrshrn_n_u16(out_pq0, 3);
|
|
||||||
out_f6_pq1 = vrshrn_n_u16(out_pq1, 3);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
uint8x8_t filter4_cond, filter6_cond;
|
|
||||||
filter6_cond = vand_u8(flat_8x8, mask_8x8);
|
|
||||||
filter4_cond = vmvn_u8(filter6_cond);
|
|
||||||
|
|
||||||
// filter4 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
|
||||||
|
|
||||||
// filter6 outputs
|
|
||||||
*p0q0 = vbsl_u8(filter6_cond, out_f6_pq0, *p0q0);
|
|
||||||
*p1q1 = vbsl_u8(filter6_cond, out_f6_pq1, *p1q1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void lpf_4_neon(uint8x8_t *p1q1, uint8x8_t *p0q0, const uint8_t blimit,
|
|
||||||
const uint8_t limit, const uint8_t thresh) {
|
|
||||||
int32x2x2_t ps0_qs0, ps1_qs1;
|
|
||||||
int16x8_t filter_s16;
|
|
||||||
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
|
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
|
||||||
uint8x8_t mask_8x8, temp0_8x8, temp1_8x8;
|
|
||||||
int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
|
|
||||||
int8x8_t op0, oq0, op1, oq1;
|
|
||||||
int8x8_t pq_s0, pq_s1;
|
|
||||||
int8x8_t filter_s8, filter1_s8, filter2_s8;
|
|
||||||
int8x8_t hev_8x8;
|
|
||||||
const int8x8_t sign_mask = vdup_n_s8(0x80);
|
const int8x8_t sign_mask = vdup_n_s8(0x80);
|
||||||
const int8x8_t val_4 = vdup_n_s8(4);
|
const int8x8_t val_4 = vdup_n_s8(4);
|
||||||
const int8x8_t val_3 = vdup_n_s8(3);
|
const int8x8_t val_3 = vdup_n_s8(3);
|
||||||
|
|
||||||
// Calculate filter mask
|
int8x8_t pq_s0 = veor_s8(vreinterpret_s8_u8(p0q0), sign_mask);
|
||||||
mask_8x8 = lpf_mask2(*p1q1, *p0q0, blimit, limit);
|
int8x8_t pq_s1 = veor_s8(vreinterpret_s8_u8(p1q1), sign_mask);
|
||||||
|
|
||||||
pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
|
int32x2x2_t ps0_qs0 =
|
||||||
pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
|
vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
|
||||||
|
int32x2x2_t ps1_qs1 =
|
||||||
ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
|
vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
|
||||||
ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
|
int8x8_t ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
|
||||||
ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
|
int8x8_t qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
|
||||||
qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
|
int8x8_t ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
|
||||||
ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
|
int8x8_t qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
|
||||||
qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
|
|
||||||
|
|
||||||
// hev_mask
|
// hev_mask
|
||||||
temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
|
uint8x8_t temp0_8x8 = vcgt_u8(vabd_u8(p0q0, p1q1), thresh_f4);
|
||||||
temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
|
uint8x8_t temp1_8x8 =
|
||||||
hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
|
vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
|
||||||
|
int8x8_t hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
|
||||||
|
|
||||||
// add outer taps if we have high edge variance
|
// add outer taps if we have high edge variance
|
||||||
filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
|
int8x8_t filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
|
||||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
||||||
|
|
||||||
// inner taps
|
// inner taps
|
||||||
temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
|
int8x8_t temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
|
||||||
filter_s16 = vmovl_s8(filter_s8);
|
int16x8_t filter_s16 = vmovl_s8(filter_s8);
|
||||||
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
|
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
|
||||||
filter_s8 = vqmovn_s16(filter_s16);
|
filter_s8 = vqmovn_s16(filter_s16);
|
||||||
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
|
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
|
||||||
|
|
||||||
filter1_s8 = vqadd_s8(filter_s8, val_4);
|
int8x8_t filter1_s8 = vqadd_s8(filter_s8, val_4);
|
||||||
filter2_s8 = vqadd_s8(filter_s8, val_3);
|
int8x8_t filter2_s8 = vqadd_s8(filter_s8, val_3);
|
||||||
filter1_s8 = vshr_n_s8(filter1_s8, 3);
|
filter1_s8 = vshr_n_s8(filter1_s8, 3);
|
||||||
filter2_s8 = vshr_n_s8(filter2_s8, 3);
|
filter2_s8 = vshr_n_s8(filter2_s8, 3);
|
||||||
|
|
||||||
oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
|
int8x8_t oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
|
||||||
op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
|
int8x8_t op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
|
||||||
|
|
||||||
filter_s8 = vrshr_n_s8(filter1_s8, 1);
|
filter_s8 = vrshr_n_s8(filter1_s8, 1);
|
||||||
filter_s8 = vbic_s8(filter_s8, hev_8x8);
|
filter_s8 = vbic_s8(filter_s8, hev_8x8);
|
||||||
|
|
||||||
oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
|
int8x8_t oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
|
||||||
op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
|
int8x8_t op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
|
||||||
|
|
||||||
*p0q0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
|
*p0q0_output = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
|
||||||
*p1q1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
|
*p1q1_output = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void filter8(const uint8x8_t p0q0, const uint8x8_t p1q1,
|
||||||
|
const uint8x8_t p2q2, const uint8x8_t p3q3,
|
||||||
|
uint8x8_t *p0q0_output, uint8x8_t *p1q1_output,
|
||||||
|
uint8x8_t *p2q2_output) {
|
||||||
|
// Reverse p and q.
|
||||||
|
uint8x8_t q0p0 = vext_u8(p0q0, p0q0, 4);
|
||||||
|
uint8x8_t q1p1 = vext_u8(p1q1, p1q1, 4);
|
||||||
|
uint8x8_t q2p2 = vext_u8(p2q2, p2q2, 4);
|
||||||
|
|
||||||
|
uint16x8_t p0q0_p1q1 = vaddl_u8(p0q0, p1q1);
|
||||||
|
uint16x8_t p2q2_p3q3 = vaddl_u8(p3q3, p2q2);
|
||||||
|
uint16x8_t out = vaddq_u16(p0q0_p1q1, p2q2_p3q3);
|
||||||
|
|
||||||
|
uint16x8_t q0p0_p3q3 = vaddl_u8(q0p0, p3q3);
|
||||||
|
uint16x8_t out_q0p0_p3q3 = vaddq_u16(out, q0p0_p3q3);
|
||||||
|
|
||||||
|
uint16x8_t out_pq2 = vaddq_u16(out_q0p0_p3q3, p2q2_p3q3);
|
||||||
|
|
||||||
|
uint16x8_t p1q1_q1p1 = vaddl_u8(p1q1, q1p1);
|
||||||
|
uint16x8_t out_pq1 = vaddq_u16(out_q0p0_p3q3, p1q1_q1p1);
|
||||||
|
|
||||||
|
uint16x8_t q0p0_p0q0 = vaddl_u8(q0p0, p0q0);
|
||||||
|
uint16x8_t q1p1_q2p2 = vaddl_u8(q1p1, q2p2);
|
||||||
|
uint16x8_t out_pq0 = vaddq_u16(q0p0_p0q0, q1p1_q2p2);
|
||||||
|
out_pq0 = vaddq_u16(out_pq0, out);
|
||||||
|
|
||||||
|
*p0q0_output = vrshrn_n_u16(out_pq0, 3);
|
||||||
|
*p1q1_output = vrshrn_n_u16(out_pq1, 3);
|
||||||
|
*p2q2_output = vrshrn_n_u16(out_pq2, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void filter14(const uint8x8_t p0q0, const uint8x8_t p1q1,
|
||||||
|
const uint8x8_t p2q2, const uint8x8_t p3q3,
|
||||||
|
const uint8x8_t p4q4, const uint8x8_t p5q5,
|
||||||
|
const uint8x8_t p6q6, uint8x8_t *p0q0_output,
|
||||||
|
uint8x8_t *p1q1_output, uint8x8_t *p2q2_output,
|
||||||
|
uint8x8_t *p3q3_output, uint8x8_t *p4q4_output,
|
||||||
|
uint8x8_t *p5q5_output) {
|
||||||
|
// Reverse p and q.
|
||||||
|
uint8x8_t q0p0 = vext_u8(p0q0, p0q0, 4);
|
||||||
|
uint8x8_t q1p1 = vext_u8(p1q1, p1q1, 4);
|
||||||
|
uint8x8_t q2p2 = vext_u8(p2q2, p2q2, 4);
|
||||||
|
uint8x8_t q3p3 = vext_u8(p3q3, p3q3, 4);
|
||||||
|
uint8x8_t q4p4 = vext_u8(p4q4, p4q4, 4);
|
||||||
|
uint8x8_t q5p5 = vext_u8(p5q5, p5q5, 4);
|
||||||
|
|
||||||
|
uint16x8_t p0q0_p1q1 = vaddl_u8(p0q0, p1q1);
|
||||||
|
uint16x8_t p2q2_p3q3 = vaddl_u8(p2q2, p3q3);
|
||||||
|
uint16x8_t out = vaddq_u16(p0q0_p1q1, p2q2_p3q3);
|
||||||
|
|
||||||
|
uint16x8_t q0p0_p4q4 = vaddl_u8(q0p0, p4q4);
|
||||||
|
uint16x8_t p5q5_p6q6 = vaddl_u8(p5q5, p6q6);
|
||||||
|
uint16x8_t tmp = vaddq_u16(q0p0_p4q4, p5q5_p6q6);
|
||||||
|
// This offset removes the need for a rounding shift at the end.
|
||||||
|
uint16x8_t tmp_offset = vaddq_u16(tmp, vdupq_n_u16(1 << 3));
|
||||||
|
out = vaddq_u16(out, tmp_offset);
|
||||||
|
|
||||||
|
uint16x8_t out_pq5 = vaddw_u8(out, p4q4);
|
||||||
|
uint16x8_t out_pq4 = vaddw_u8(out_pq5, p3q3);
|
||||||
|
uint16x8_t out_pq3 = vaddw_u8(out_pq4, p2q2);
|
||||||
|
|
||||||
|
out_pq5 = vaddw_u8(out_pq5, p5q5);
|
||||||
|
|
||||||
|
uint16x8_t out_pq0 = vaddw_u8(out, p1q1);
|
||||||
|
uint16x8_t out_pq1 = vaddw_u8(out_pq0, p2q2);
|
||||||
|
uint16x8_t out_pq2 = vaddw_u8(out_pq1, p3q3);
|
||||||
|
|
||||||
|
uint16x8_t p0q0_q0p0 = vaddl_u8(p0q0, q0p0);
|
||||||
|
out_pq0 = vaddq_u16(out_pq0, p0q0_q0p0);
|
||||||
|
|
||||||
|
uint16x8_t p0q0_p6q6 = vaddl_u8(p0q0, p6q6);
|
||||||
|
out_pq1 = vaddq_u16(out_pq1, p0q0_p6q6);
|
||||||
|
uint16x8_t p5q5_q1p1 = vaddl_u8(p5q5, q1p1);
|
||||||
|
out_pq4 = vaddq_u16(out_pq4, p5q5_q1p1);
|
||||||
|
|
||||||
|
uint16x8_t p6q6_p6q6 = vaddl_u8(p6q6, p6q6);
|
||||||
|
out_pq2 = vaddq_u16(out_pq2, p6q6_p6q6);
|
||||||
|
uint16x8_t p6q6_temp = vaddw_u8(p6q6_p6q6, p6q6);
|
||||||
|
out_pq3 = vaddq_u16(out_pq3, p6q6_temp);
|
||||||
|
p6q6_temp = vaddw_u8(p6q6_temp, p6q6);
|
||||||
|
out_pq4 = vaddq_u16(out_pq4, p6q6_temp);
|
||||||
|
p6q6_temp = vaddq_u16(p6q6_temp, p6q6_p6q6);
|
||||||
|
out_pq5 = vaddq_u16(out_pq5, p6q6_temp);
|
||||||
|
|
||||||
|
uint16x8_t qp_sum = vaddl_u8(q2p2, q1p1);
|
||||||
|
out_pq3 = vaddq_u16(out_pq3, qp_sum);
|
||||||
|
|
||||||
|
qp_sum = vaddw_u8(qp_sum, q3p3);
|
||||||
|
out_pq2 = vaddq_u16(out_pq2, qp_sum);
|
||||||
|
|
||||||
|
qp_sum = vaddw_u8(qp_sum, q4p4);
|
||||||
|
out_pq1 = vaddq_u16(out_pq1, qp_sum);
|
||||||
|
|
||||||
|
qp_sum = vaddw_u8(qp_sum, q5p5);
|
||||||
|
out_pq0 = vaddq_u16(out_pq0, qp_sum);
|
||||||
|
|
||||||
|
*p0q0_output = vshrn_n_u16(out_pq0, 4);
|
||||||
|
*p1q1_output = vshrn_n_u16(out_pq1, 4);
|
||||||
|
*p2q2_output = vshrn_n_u16(out_pq2, 4);
|
||||||
|
*p3q3_output = vshrn_n_u16(out_pq3, 4);
|
||||||
|
*p4q4_output = vshrn_n_u16(out_pq4, 4);
|
||||||
|
*p5q5_output = vshrn_n_u16(out_pq5, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void lpf_14_neon(uint8x8_t *p6q6, uint8x8_t *p5q5,
|
||||||
|
uint8x8_t *p4q4, uint8x8_t *p3q3,
|
||||||
|
uint8x8_t *p2q2, uint8x8_t *p1q1,
|
||||||
|
uint8x8_t *p0q0, const uint8_t blimit,
|
||||||
|
const uint8_t limit, const uint8_t thresh) {
|
||||||
|
uint8x8_t out_f14_pq0, out_f14_pq1, out_f14_pq2, out_f14_pq3, out_f14_pq4,
|
||||||
|
out_f14_pq5;
|
||||||
|
uint8x8_t out_f7_pq0, out_f7_pq1, out_f7_pq2;
|
||||||
|
uint8x8_t out_f4_pq0, out_f4_pq1;
|
||||||
|
|
||||||
|
// Calculate filter masks.
|
||||||
|
uint8x8_t mask_8x8 = lpf_mask(*p3q3, *p2q2, *p1q1, *p0q0, blimit, limit);
|
||||||
|
uint8x8_t flat_8x8 = lpf_flat_mask4(*p3q3, *p2q2, *p1q1, *p0q0);
|
||||||
|
uint8x8_t flat2_8x8 = lpf_flat_mask4(*p6q6, *p5q5, *p4q4, *p0q0);
|
||||||
|
|
||||||
|
// No filtering.
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(mask_8x8), 0) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8x8_t filter8_cond = vand_u8(flat_8x8, mask_8x8);
|
||||||
|
uint8x8_t filter4_cond = vmvn_u8(filter8_cond);
|
||||||
|
uint8x8_t filter14_cond = vand_u8(filter8_cond, flat2_8x8);
|
||||||
|
|
||||||
|
if (vget_lane_s64(vreinterpret_s64_u8(filter14_cond), 0) == -1) {
|
||||||
|
// Only filter14() applies.
|
||||||
|
filter14(*p0q0, *p1q1, *p2q2, *p3q3, *p4q4, *p5q5, *p6q6, &out_f14_pq0,
|
||||||
|
&out_f14_pq1, &out_f14_pq2, &out_f14_pq3, &out_f14_pq4,
|
||||||
|
&out_f14_pq5);
|
||||||
|
|
||||||
|
*p0q0 = out_f14_pq0;
|
||||||
|
*p1q1 = out_f14_pq1;
|
||||||
|
*p2q2 = out_f14_pq2;
|
||||||
|
*p3q3 = out_f14_pq3;
|
||||||
|
*p4q4 = out_f14_pq4;
|
||||||
|
*p5q5 = out_f14_pq5;
|
||||||
|
} else if (vget_lane_u64(vreinterpret_u64_u8(filter14_cond), 0) == 0 &&
|
||||||
|
vget_lane_s64(vreinterpret_s64_u8(filter8_cond), 0) == -1) {
|
||||||
|
// Only filter8() applies.
|
||||||
|
filter8(*p0q0, *p1q1, *p2q2, *p3q3, &out_f7_pq0, &out_f7_pq1, &out_f7_pq2);
|
||||||
|
|
||||||
|
*p0q0 = out_f7_pq0;
|
||||||
|
*p1q1 = out_f7_pq1;
|
||||||
|
*p2q2 = out_f7_pq2;
|
||||||
|
} else {
|
||||||
|
filter4(*p0q0, *p1q1, &out_f4_pq0, &out_f4_pq1, mask_8x8, thresh);
|
||||||
|
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(filter14_cond), 0) == 0 &&
|
||||||
|
vget_lane_u64(vreinterpret_u64_u8(filter8_cond), 0) == 0) {
|
||||||
|
// filter8() and filter14() do not apply, but filter4() applies to one or
|
||||||
|
// more values.
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
} else {
|
||||||
|
filter8(*p0q0, *p1q1, *p2q2, *p3q3, &out_f7_pq0, &out_f7_pq1,
|
||||||
|
&out_f7_pq2);
|
||||||
|
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(filter14_cond), 0) == 0) {
|
||||||
|
// filter14() does not apply, but filter8() and filter4() apply to one
|
||||||
|
// or more values. filter4 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
|
||||||
|
// filter8 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
|
||||||
|
*p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
|
||||||
|
} else {
|
||||||
|
// All filters may contribute values to final outputs.
|
||||||
|
filter14(*p0q0, *p1q1, *p2q2, *p3q3, *p4q4, *p5q5, *p6q6, &out_f14_pq0,
|
||||||
|
&out_f14_pq1, &out_f14_pq2, &out_f14_pq3, &out_f14_pq4,
|
||||||
|
&out_f14_pq5);
|
||||||
|
|
||||||
|
// filter4 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
|
||||||
|
// filter8 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
|
||||||
|
*p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
|
||||||
|
|
||||||
|
// filter14 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter14_cond, out_f14_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter14_cond, out_f14_pq1, *p1q1);
|
||||||
|
*p2q2 = vbsl_u8(filter14_cond, out_f14_pq2, *p2q2);
|
||||||
|
*p3q3 = vbsl_u8(filter14_cond, out_f14_pq3, *p3q3);
|
||||||
|
*p4q4 = vbsl_u8(filter14_cond, out_f14_pq4, *p4q4);
|
||||||
|
*p5q5 = vbsl_u8(filter14_cond, out_f14_pq5, *p5q5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void lpf_8_neon(uint8x8_t *p3q3, uint8x8_t *p2q2, uint8x8_t *p1q1,
|
||||||
|
uint8x8_t *p0q0, const uint8_t blimit,
|
||||||
|
const uint8_t limit, const uint8_t thresh) {
|
||||||
|
uint8x8_t out_f7_pq0, out_f7_pq1, out_f7_pq2;
|
||||||
|
uint8x8_t out_f4_pq0, out_f4_pq1;
|
||||||
|
|
||||||
|
// Calculate filter masks.
|
||||||
|
uint8x8_t mask_8x8 = lpf_mask(*p3q3, *p2q2, *p1q1, *p0q0, blimit, limit);
|
||||||
|
uint8x8_t flat_8x8 = lpf_flat_mask4(*p3q3, *p2q2, *p1q1, *p0q0);
|
||||||
|
|
||||||
|
// No filtering.
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(mask_8x8), 0) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8x8_t filter8_cond = vand_u8(flat_8x8, mask_8x8);
|
||||||
|
uint8x8_t filter4_cond = vmvn_u8(filter8_cond);
|
||||||
|
|
||||||
|
// Not needing filter4() at all is a very common case, so isolate it to avoid
|
||||||
|
// needlessly computing filter4().
|
||||||
|
if (vget_lane_s64(vreinterpret_s64_u8(filter8_cond), 0) == -1) {
|
||||||
|
filter8(*p0q0, *p1q1, *p2q2, *p3q3, &out_f7_pq0, &out_f7_pq1, &out_f7_pq2);
|
||||||
|
|
||||||
|
*p0q0 = out_f7_pq0;
|
||||||
|
*p1q1 = out_f7_pq1;
|
||||||
|
*p2q2 = out_f7_pq2;
|
||||||
|
} else {
|
||||||
|
filter4(*p0q0, *p1q1, &out_f4_pq0, &out_f4_pq1, mask_8x8, thresh);
|
||||||
|
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(filter8_cond), 0) == 0) {
|
||||||
|
// filter8() does not apply, but filter4() applies to one or more values.
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
} else {
|
||||||
|
filter8(*p0q0, *p1q1, *p2q2, *p3q3, &out_f7_pq0, &out_f7_pq1,
|
||||||
|
&out_f7_pq2);
|
||||||
|
|
||||||
|
// filter4 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
|
||||||
|
// filter8 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter8_cond, out_f7_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter8_cond, out_f7_pq1, *p1q1);
|
||||||
|
*p2q2 = vbsl_u8(filter8_cond, out_f7_pq2, *p2q2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void filter6(const uint8x8_t p0q0, const uint8x8_t p1q1,
|
||||||
|
const uint8x8_t p2q2, uint8x8_t *p0q0_output,
|
||||||
|
uint8x8_t *p1q1_output) {
|
||||||
|
uint8x8_t q0p0 = vext_u8(p0q0, p0q0, 4);
|
||||||
|
|
||||||
|
uint16x8_t p0q0_p1q1 = vaddl_u8(p0q0, p1q1);
|
||||||
|
uint16x8_t out = vaddq_u16(p0q0_p1q1, p0q0_p1q1);
|
||||||
|
|
||||||
|
uint16x8_t q0p0_p2q2 = vaddl_u8(q0p0, p2q2);
|
||||||
|
out = vaddq_u16(out, q0p0_p2q2);
|
||||||
|
|
||||||
|
uint16x8_t q0p0_q1p1 = vextq_u16(p0q0_p1q1, p0q0_p1q1, 4);
|
||||||
|
uint16x8_t out_pq0 = vaddq_u16(out, q0p0_q1p1);
|
||||||
|
|
||||||
|
uint16x8_t p2q2_p2q2 = vaddl_u8(p2q2, p2q2);
|
||||||
|
uint16x8_t out_pq1 = vaddq_u16(out, p2q2_p2q2);
|
||||||
|
|
||||||
|
*p0q0_output = vrshrn_n_u16(out_pq0, 3);
|
||||||
|
*p1q1_output = vrshrn_n_u16(out_pq1, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void lpf_6_neon(uint8x8_t *p2q2, uint8x8_t *p1q1, uint8x8_t *p0q0,
|
||||||
|
const uint8_t blimit, const uint8_t limit,
|
||||||
|
const uint8_t thresh) {
|
||||||
|
uint8x8_t out_f6_pq0, out_f6_pq1;
|
||||||
|
uint8x8_t out_f4_pq0, out_f4_pq1;
|
||||||
|
|
||||||
|
// Calculate filter masks.
|
||||||
|
uint8x8_t mask_8x8 = lpf_mask3_chroma(*p2q2, *p1q1, *p0q0, blimit, limit);
|
||||||
|
uint8x8_t flat_8x8 = lpf_flat_mask3(*p2q2, *p1q1, *p0q0);
|
||||||
|
|
||||||
|
// No filtering.
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(mask_8x8), 0) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8x8_t filter6_cond = vand_u8(flat_8x8, mask_8x8);
|
||||||
|
uint8x8_t filter4_cond = vmvn_u8(filter6_cond);
|
||||||
|
|
||||||
|
// Not needing filter4 at all is a very common case, so isolate it to avoid
|
||||||
|
// needlessly computing filter4.
|
||||||
|
if (vget_lane_s64(vreinterpret_s64_u8(filter6_cond), 0) == -1) {
|
||||||
|
filter6(*p0q0, *p1q1, *p2q2, &out_f6_pq0, &out_f6_pq1);
|
||||||
|
|
||||||
|
*p0q0 = out_f6_pq0;
|
||||||
|
*p1q1 = out_f6_pq1;
|
||||||
|
} else {
|
||||||
|
filter4(*p0q0, *p1q1, &out_f4_pq0, &out_f4_pq1, mask_8x8, thresh);
|
||||||
|
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(filter6_cond), 0) == 0) {
|
||||||
|
// filter6 does not apply, but filter4 applies to one or more values.
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
} else {
|
||||||
|
// All filters may contribute to the final output.
|
||||||
|
filter6(*p0q0, *p1q1, *p2q2, &out_f6_pq0, &out_f6_pq1);
|
||||||
|
|
||||||
|
// filter4 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter4_cond, out_f4_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter4_cond, out_f4_pq1, *p1q1);
|
||||||
|
|
||||||
|
// filter6 outputs
|
||||||
|
*p0q0 = vbsl_u8(filter6_cond, out_f6_pq0, *p0q0);
|
||||||
|
*p1q1 = vbsl_u8(filter6_cond, out_f6_pq1, *p1q1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void lpf_4_neon(uint8x8_t *p1q1, uint8x8_t *p0q0,
|
||||||
|
const uint8_t blimit, const uint8_t limit,
|
||||||
|
const uint8_t thresh) {
|
||||||
|
uint8x8_t out_f4_pq0, out_f4_pq1;
|
||||||
|
|
||||||
|
// Calculate filter mask
|
||||||
|
uint8x8_t mask_8x8 = lpf_mask2(*p1q1, *p0q0, blimit, limit);
|
||||||
|
|
||||||
|
// No filtering.
|
||||||
|
if (vget_lane_u64(vreinterpret_u64_u8(mask_8x8), 0) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
filter4(*p0q0, *p1q1, &out_f4_pq0, &out_f4_pq1, mask_8x8, thresh);
|
||||||
|
|
||||||
|
*p0q0 = out_f4_pq0;
|
||||||
|
*p1q1 = out_f4_pq1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void aom_lpf_vertical_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
void aom_lpf_vertical_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||||
|
|||||||
185
third_party/aom/aom_dsp/arm/mem_neon.h
vendored
185
third_party/aom/aom_dsp/arm/mem_neon.h
vendored
@@ -55,12 +55,52 @@ static inline uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int16x8x2_t vld1q_s16_x2(const int16_t *ptr) {
|
||||||
|
int16x8x2_t res = { { vld1q_s16(ptr + 0 * 8), vld1q_s16(ptr + 1 * 8) } };
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int16x8x4_t vld1q_s16_x4(const int16_t *ptr) {
|
||||||
|
int16x8x4_t res = { { vld1q_s16(ptr + 0 * 8), vld1q_s16(ptr + 1 * 8),
|
||||||
|
vld1q_s16(ptr + 2 * 8), vld1q_s16(ptr + 3 * 8) } };
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1_u8_x2(uint8_t *ptr, uint8x8x2_t a) {
|
||||||
|
vst1_u8(ptr + 0 * 8, a.val[0]);
|
||||||
|
vst1_u8(ptr + 1 * 8, a.val[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1_u8_x4(uint8_t *ptr, uint8x8x4_t a) {
|
||||||
|
vst1_u8(ptr + 0 * 8, a.val[0]);
|
||||||
|
vst1_u8(ptr + 1 * 8, a.val[1]);
|
||||||
|
vst1_u8(ptr + 2 * 8, a.val[2]);
|
||||||
|
vst1_u8(ptr + 3 * 8, a.val[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1q_u16_x2(uint16_t *ptr, uint16x8x2_t a) {
|
||||||
|
vst1q_u16(ptr + 0 * 8, a.val[0]);
|
||||||
|
vst1q_u16(ptr + 1 * 8, a.val[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1q_u16_x4(uint16_t *ptr, uint16x8x4_t a) {
|
||||||
|
vst1q_u16(ptr + 0 * 8, a.val[0]);
|
||||||
|
vst1q_u16(ptr + 1 * 8, a.val[1]);
|
||||||
|
vst1q_u16(ptr + 2 * 8, a.val[2]);
|
||||||
|
vst1q_u16(ptr + 3 * 8, a.val[3]);
|
||||||
|
}
|
||||||
|
|
||||||
#elif defined(__GNUC__) && !defined(__clang__) // GCC 64-bit.
|
#elif defined(__GNUC__) && !defined(__clang__) // GCC 64-bit.
|
||||||
#if __GNUC__ < 8
|
#if __GNUC__ < 8
|
||||||
static inline uint8x16x2_t vld1q_u8_x2(const uint8_t *ptr) {
|
static inline uint8x16x2_t vld1q_u8_x2(const uint8_t *ptr) {
|
||||||
uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };
|
uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int16x8x2_t vld1q_s16_x2(const int16_t *ptr) {
|
||||||
|
int16x8x2_t res = { { vld1q_s16(ptr + 0 * 8), vld1q_s16(ptr + 1 * 8) } };
|
||||||
|
return res;
|
||||||
|
}
|
||||||
#endif // __GNUC__ < 8
|
#endif // __GNUC__ < 8
|
||||||
|
|
||||||
#if __GNUC__ < 9
|
#if __GNUC__ < 9
|
||||||
@@ -71,13 +111,30 @@ static inline uint8x16x3_t vld1q_u8_x3(const uint8_t *ptr) {
|
|||||||
}
|
}
|
||||||
#endif // __GNUC__ < 9
|
#endif // __GNUC__ < 9
|
||||||
|
|
||||||
// vld1q_u16_x4 is defined from GCC 8.5.0 and onwards.
|
|
||||||
#if ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
|
#if ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
|
||||||
static inline uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
|
static inline uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
|
||||||
uint16x8x4_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8),
|
uint16x8x4_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8),
|
||||||
vld1q_u16(ptr + 2 * 8), vld1q_u16(ptr + 3 * 8) } };
|
vld1q_u16(ptr + 2 * 8), vld1q_u16(ptr + 3 * 8) } };
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int16x8x4_t vld1q_s16_x4(const int16_t *ptr) {
|
||||||
|
int16x8x4_t res = { { vld1q_s16(ptr + 0 * 8), vld1q_s16(ptr + 1 * 8),
|
||||||
|
vld1q_s16(ptr + 2 * 8), vld1q_s16(ptr + 3 * 8) } };
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1_u8_x2(uint8_t *ptr, uint8x8x2_t a) {
|
||||||
|
vst1_u8(ptr + 0 * 8, a.val[0]);
|
||||||
|
vst1_u8(ptr + 1 * 8, a.val[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vst1_u8_x4(uint8_t *ptr, uint8x8x4_t a) {
|
||||||
|
vst1_u8(ptr + 0 * 8, a.val[0]);
|
||||||
|
vst1_u8(ptr + 1 * 8, a.val[1]);
|
||||||
|
vst1_u8(ptr + 2 * 8, a.val[2]);
|
||||||
|
vst1_u8(ptr + 3 * 8, a.val[3]);
|
||||||
|
}
|
||||||
#endif // ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
|
#endif // ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
|
||||||
#endif // defined(__GNUC__) && !defined(__clang__)
|
#endif // defined(__GNUC__) && !defined(__clang__)
|
||||||
|
|
||||||
@@ -215,6 +272,23 @@ static inline void load_u16_4x4(const uint16_t *s, const ptrdiff_t p,
|
|||||||
s += p;
|
s += p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void load_u16_4x6(const uint16_t *s, ptrdiff_t p,
|
||||||
|
uint16x4_t *const s0, uint16x4_t *const s1,
|
||||||
|
uint16x4_t *const s2, uint16x4_t *const s3,
|
||||||
|
uint16x4_t *const s4, uint16x4_t *const s5) {
|
||||||
|
*s0 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s1 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s2 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s3 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s4 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s5 = vld1_u16(s);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void load_u16_4x7(const uint16_t *s, ptrdiff_t p,
|
static inline void load_u16_4x7(const uint16_t *s, ptrdiff_t p,
|
||||||
uint16x4_t *const s0, uint16x4_t *const s1,
|
uint16x4_t *const s0, uint16x4_t *const s1,
|
||||||
uint16x4_t *const s2, uint16x4_t *const s3,
|
uint16x4_t *const s2, uint16x4_t *const s3,
|
||||||
@@ -235,6 +309,65 @@ static inline void load_u16_4x7(const uint16_t *s, ptrdiff_t p,
|
|||||||
*s6 = vld1_u16(s);
|
*s6 = vld1_u16(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void load_u16_4x8(const uint16_t *s, ptrdiff_t p,
|
||||||
|
uint16x4_t *const s0, uint16x4_t *const s1,
|
||||||
|
uint16x4_t *const s2, uint16x4_t *const s3,
|
||||||
|
uint16x4_t *const s4, uint16x4_t *const s5,
|
||||||
|
uint16x4_t *const s6, uint16x4_t *const s7) {
|
||||||
|
*s0 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s1 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s2 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s3 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s4 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s5 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s6 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s7 = vld1_u16(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void load_u16_4x14(const uint16_t *s, ptrdiff_t p,
|
||||||
|
uint16x4_t *const s0, uint16x4_t *const s1,
|
||||||
|
uint16x4_t *const s2, uint16x4_t *const s3,
|
||||||
|
uint16x4_t *const s4, uint16x4_t *const s5,
|
||||||
|
uint16x4_t *const s6, uint16x4_t *const s7,
|
||||||
|
uint16x4_t *const s8, uint16x4_t *const s9,
|
||||||
|
uint16x4_t *const s10, uint16x4_t *const s11,
|
||||||
|
uint16x4_t *const s12, uint16x4_t *const s13) {
|
||||||
|
*s0 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s1 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s2 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s3 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s4 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s5 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s6 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s7 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s8 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s9 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s10 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s11 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s12 = vld1_u16(s);
|
||||||
|
s += p;
|
||||||
|
*s13 = vld1_u16(s);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void load_s16_8x2(const int16_t *s, const ptrdiff_t p,
|
static inline void load_s16_8x2(const int16_t *s, const ptrdiff_t p,
|
||||||
int16x8_t *const s0, int16x8_t *const s1) {
|
int16x8_t *const s0, int16x8_t *const s1) {
|
||||||
*s0 = vld1q_s16(s);
|
*s0 = vld1q_s16(s);
|
||||||
@@ -597,6 +730,56 @@ static inline void store_u16_4x4(uint16_t *s, ptrdiff_t dst_stride,
|
|||||||
vst1_u16(s, s3);
|
vst1_u16(s, s3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void store_u16_4x6(uint16_t *s, ptrdiff_t dst_stride,
|
||||||
|
const uint16x4_t s0, const uint16x4_t s1,
|
||||||
|
const uint16x4_t s2, const uint16x4_t s3,
|
||||||
|
const uint16x4_t s4, const uint16x4_t s5) {
|
||||||
|
vst1_u16(s, s0);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s1);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s2);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s3);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s4);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s5);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void store_u16_4x12(uint16_t *s, ptrdiff_t dst_stride,
|
||||||
|
const uint16x4_t s0, const uint16x4_t s1,
|
||||||
|
const uint16x4_t s2, const uint16x4_t s3,
|
||||||
|
const uint16x4_t s4, const uint16x4_t s5,
|
||||||
|
const uint16x4_t s6, const uint16x4_t s7,
|
||||||
|
const uint16x4_t s8, const uint16x4_t s9,
|
||||||
|
const uint16x4_t s10, const uint16x4_t s11) {
|
||||||
|
vst1_u16(s, s0);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s1);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s2);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s3);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s4);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s5);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s6);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s7);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s8);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s9);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s10);
|
||||||
|
s += dst_stride;
|
||||||
|
vst1_u16(s, s11);
|
||||||
|
s += dst_stride;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void store_u16_8x2(uint16_t *s, ptrdiff_t dst_stride,
|
static inline void store_u16_8x2(uint16_t *s, ptrdiff_t dst_stride,
|
||||||
const uint16x8_t s0, const uint16x8_t s1) {
|
const uint16x8_t s0, const uint16x8_t s1) {
|
||||||
vst1q_u16(s, s0);
|
vst1q_u16(s, s0);
|
||||||
|
|||||||
11
third_party/aom/aom_dsp/x86/synonyms.h
vendored
11
third_party/aom/aom_dsp/x86/synonyms.h
vendored
@@ -46,16 +46,6 @@ static inline __m128i xx_loadu_128(const void *a) {
|
|||||||
return _mm_loadu_si128((const __m128i *)a);
|
return _mm_loadu_si128((const __m128i *)a);
|
||||||
}
|
}
|
||||||
|
|
||||||
// _mm_loadu_si64 has been introduced in GCC 9, reimplement the function
|
|
||||||
// manually on older compilers.
|
|
||||||
#if !defined(__clang__) && __GNUC_MAJOR__ < 9
|
|
||||||
static inline __m128i xx_loadu_2x64(const void *hi, const void *lo) {
|
|
||||||
__m64 hi_, lo_;
|
|
||||||
memcpy(&hi_, hi, sizeof(hi_));
|
|
||||||
memcpy(&lo_, lo, sizeof(lo_));
|
|
||||||
return _mm_set_epi64(hi_, lo_);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// Load 64 bits from each of hi and low, and pack into an SSE register
|
// Load 64 bits from each of hi and low, and pack into an SSE register
|
||||||
// Since directly loading as `int64_t`s and using _mm_set_epi64 may violate
|
// Since directly loading as `int64_t`s and using _mm_set_epi64 may violate
|
||||||
// the strict aliasing rule, this takes a different approach
|
// the strict aliasing rule, this takes a different approach
|
||||||
@@ -63,7 +53,6 @@ static inline __m128i xx_loadu_2x64(const void *hi, const void *lo) {
|
|||||||
return _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)lo),
|
return _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)lo),
|
||||||
_mm_loadl_epi64((const __m128i *)hi));
|
_mm_loadl_epi64((const __m128i *)hi));
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline void xx_storel_32(void *const a, const __m128i v) {
|
static inline void xx_storel_32(void *const a, const __m128i v) {
|
||||||
const int val = _mm_cvtsi128_si32(v);
|
const int val = _mm_cvtsi128_si32(v);
|
||||||
|
|||||||
15
third_party/aom/aom_dsp/x86/synonyms_avx2.h
vendored
15
third_party/aom/aom_dsp/x86/synonyms_avx2.h
vendored
@@ -76,26 +76,11 @@ static inline __m256i yy_loadu_4x64(const void *e3, const void *e2,
|
|||||||
return yy_set_m128i(_mm_castpd_si128(v23), _mm_castpd_si128(v01));
|
return yy_set_m128i(_mm_castpd_si128(v23), _mm_castpd_si128(v01));
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GCC_VERSION (__GNUC__ * 10000 \
|
|
||||||
+ __GNUC_MINOR__ * 100 \
|
|
||||||
+ __GNUC_PATCHLEVEL__)
|
|
||||||
|
|
||||||
// _mm256_loadu2_m128i has been introduced in GCC 10.1
|
|
||||||
#if !defined(__clang__) && GCC_VERSION < 101000
|
|
||||||
static inline __m256i yy_loadu2_128(const void *hi, const void *lo) {
|
|
||||||
__m128i mhi = _mm_loadu_si128((const __m128i *)(hi));
|
|
||||||
__m128i mlo = _mm_loadu_si128((const __m128i *)(lo));
|
|
||||||
return _mm256_set_m128i(mhi, mlo);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline __m256i yy_loadu2_128(const void *hi, const void *lo) {
|
static inline __m256i yy_loadu2_128(const void *hi, const void *lo) {
|
||||||
__m128i mhi = _mm_loadu_si128((const __m128i *)(hi));
|
__m128i mhi = _mm_loadu_si128((const __m128i *)(hi));
|
||||||
__m128i mlo = _mm_loadu_si128((const __m128i *)(lo));
|
__m128i mlo = _mm_loadu_si128((const __m128i *)(lo));
|
||||||
return yy_set_m128i(mhi, mlo);
|
return yy_set_m128i(mhi, mlo);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef GCC_VERSION
|
|
||||||
|
|
||||||
static inline void yy_storeu2_128(void *hi, void *lo, const __m256i a) {
|
static inline void yy_storeu2_128(void *hi, void *lo, const __m256i a) {
|
||||||
_mm_storeu_si128((__m128i *)hi, _mm256_extracti128_si256(a, 1));
|
_mm_storeu_si128((__m128i *)hi, _mm256_extracti128_si256(a, 1));
|
||||||
|
|||||||
8
third_party/aom/aom_ports/aom_ports.cmake
vendored
8
third_party/aom/aom_ports/aom_ports.cmake
vendored
@@ -38,6 +38,9 @@ endif()
|
|||||||
list(APPEND AOM_PORTS_SOURCES_PPC "${AOM_ROOT}/aom_ports/ppc.h"
|
list(APPEND AOM_PORTS_SOURCES_PPC "${AOM_ROOT}/aom_ports/ppc.h"
|
||||||
"${AOM_ROOT}/aom_ports/ppc_cpudetect.c")
|
"${AOM_ROOT}/aom_ports/ppc_cpudetect.c")
|
||||||
|
|
||||||
|
list(APPEND AOM_PORTS_SOURCES_RISCV "${AOM_ROOT}/aom_ports/riscv.h"
|
||||||
|
"${AOM_ROOT}/aom_ports/riscv_cpudetect.c")
|
||||||
|
|
||||||
# For arm and x86 targets:
|
# For arm and x86 targets:
|
||||||
#
|
#
|
||||||
# * Creates the aom_ports build target, adds the includes in aom_ports to the
|
# * Creates the aom_ports build target, adds the includes in aom_ports to the
|
||||||
@@ -68,9 +71,12 @@ function(setup_aom_ports_targets)
|
|||||||
elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
|
elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
|
||||||
add_library(aom_ports OBJECT ${AOM_PORTS_SOURCES_PPC})
|
add_library(aom_ports OBJECT ${AOM_PORTS_SOURCES_PPC})
|
||||||
set(aom_ports_has_symbols 1)
|
set(aom_ports_has_symbols 1)
|
||||||
|
elseif("${AOM_TARGET_CPU}" MATCHES "riscv")
|
||||||
|
add_library(aom_ports OBJECT ${AOM_PORTS_SOURCES_RISCV})
|
||||||
|
set(aom_ports_has_symbols 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if("${AOM_TARGET_CPU}" MATCHES "arm|ppc")
|
if("${AOM_TARGET_CPU}" MATCHES "arm|ppc|riscv")
|
||||||
target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_ports>)
|
target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_ports>)
|
||||||
if(BUILD_SHARED_LIBS)
|
if(BUILD_SHARED_LIBS)
|
||||||
target_sources(aom_static PRIVATE $<TARGET_OBJECTS:aom_ports>)
|
target_sources(aom_static PRIVATE $<TARGET_OBJECTS:aom_ports>)
|
||||||
|
|||||||
30
third_party/aom/aom_ports/riscv.h
vendored
Normal file
30
third_party/aom/aom_ports/riscv.h
vendored
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025, Alliance for Open Media. All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||||
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||||
|
* was not distributed with this source code in the LICENSE file, you can
|
||||||
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||||
|
* Media Patent License 1.0 was not distributed with this source code in the
|
||||||
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AOM_AOM_PORTS_RISCV_H_
|
||||||
|
#define AOM_AOM_PORTS_RISCV_H_
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "config/aom_config.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define HAS_RVV 0x01
|
||||||
|
|
||||||
|
int riscv_simd_caps(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // AOM_AOM_PORTS_RISCV_H_
|
||||||
38
third_party/aom/aom_ports/riscv_cpudetect.c
vendored
Normal file
38
third_party/aom/aom_ports/riscv_cpudetect.c
vendored
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025, Alliance for Open Media. All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||||
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||||
|
* was not distributed with this source code in the LICENSE file, you can
|
||||||
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||||
|
* Media Patent License 1.0 was not distributed with this source code in the
|
||||||
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "config/aom_config.h"
|
||||||
|
|
||||||
|
#include "aom_ports/riscv.h"
|
||||||
|
|
||||||
|
#if CONFIG_RUNTIME_CPU_DETECT
|
||||||
|
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
|
||||||
|
#define HWCAP_RVV (1 << ('v' - 'a'))
|
||||||
|
|
||||||
|
int riscv_simd_caps(void) {
|
||||||
|
int flags = 0;
|
||||||
|
#if HAVE_RVV
|
||||||
|
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||||
|
if (hwcap & HWCAP_RVV) flags |= HAS_RVV;
|
||||||
|
#endif
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// If there is no RTCD the function pointers are not used and can not be
|
||||||
|
// changed.
|
||||||
|
int riscv_simd_caps(void) { return 0; }
|
||||||
|
#endif // CONFIG_RUNTIME_CPU_DETECT
|
||||||
5
third_party/aom/apps/aomenc.c
vendored
5
third_party/aom/apps/aomenc.c
vendored
@@ -2318,8 +2318,9 @@ int main(int argc, const char **argv_) {
|
|||||||
"match input format.\n",
|
"match input format.\n",
|
||||||
stream->config.cfg.g_profile);
|
stream->config.cfg.g_profile);
|
||||||
}
|
}
|
||||||
if ((global.show_psnr == 2) && (stream->config.cfg.g_input_bit_depth ==
|
if (global.show_psnr == 2 &&
|
||||||
stream->config.cfg.g_bit_depth)) {
|
stream->config.cfg.g_input_bit_depth ==
|
||||||
|
(unsigned int)stream->config.cfg.g_bit_depth) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"Warning: --psnr==2 and --psnr==1 will provide same "
|
"Warning: --psnr==2 and --psnr==1 will provide same "
|
||||||
"results when input bit-depth == stream bit-depth, "
|
"results when input bit-depth == stream bit-depth, "
|
||||||
|
|||||||
10
third_party/aom/av1/av1.cmake
vendored
10
third_party/aom/av1/av1.cmake
vendored
@@ -445,6 +445,9 @@ list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_2
|
|||||||
|
|
||||||
list(APPEND AOM_AV1_COMMON_INTRIN_VSX "${AOM_ROOT}/av1/common/ppc/cfl_ppc.c")
|
list(APPEND AOM_AV1_COMMON_INTRIN_VSX "${AOM_ROOT}/av1/common/ppc/cfl_ppc.c")
|
||||||
|
|
||||||
|
list(APPEND AOM_AV1_COMMON_INTRIN_RVV
|
||||||
|
"${AOM_ROOT}/av1/common/riscv/cdef_block_rvv.c")
|
||||||
|
|
||||||
if(CONFIG_THREE_PASS)
|
if(CONFIG_THREE_PASS)
|
||||||
list(APPEND AOM_AV1_ENCODER_SOURCES "${AOM_ROOT}/av1/encoder/thirdpass.c"
|
list(APPEND AOM_AV1_ENCODER_SOURCES "${AOM_ROOT}/av1/encoder/thirdpass.c"
|
||||||
"${AOM_ROOT}/av1/encoder/thirdpass.h")
|
"${AOM_ROOT}/av1/encoder/thirdpass.h")
|
||||||
@@ -822,6 +825,13 @@ function(setup_av1_targets)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(HAVE_RVV)
|
||||||
|
if(AOM_AV1_COMMON_INTRIN_RVV)
|
||||||
|
add_intrinsics_object_library("-march=rv64gcv" "rvv" "aom_av1_common"
|
||||||
|
"AOM_AV1_COMMON_INTRIN_RVV")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# Pass the new lib targets up to the parent scope instance of
|
# Pass the new lib targets up to the parent scope instance of
|
||||||
# $AOM_LIB_TARGETS.
|
# $AOM_LIB_TARGETS.
|
||||||
set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
|
set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
|
||||||
|
|||||||
7
third_party/aom/av1/av1_cx_iface.c
vendored
7
third_party/aom/av1/av1_cx_iface.c
vendored
@@ -1084,7 +1084,6 @@ static void set_encoder_config(AV1EncoderConfig *oxcf,
|
|||||||
AlgoCfg *const algo_cfg = &oxcf->algo_cfg;
|
AlgoCfg *const algo_cfg = &oxcf->algo_cfg;
|
||||||
ToolCfg *const tool_cfg = &oxcf->tool_cfg;
|
ToolCfg *const tool_cfg = &oxcf->tool_cfg;
|
||||||
|
|
||||||
const int is_vbr = cfg->rc_end_usage == AOM_VBR;
|
|
||||||
oxcf->profile = cfg->g_profile;
|
oxcf->profile = cfg->g_profile;
|
||||||
oxcf->max_threads = (int)cfg->g_threads;
|
oxcf->max_threads = (int)cfg->g_threads;
|
||||||
|
|
||||||
@@ -1167,9 +1166,9 @@ static void set_encoder_config(AV1EncoderConfig *oxcf,
|
|||||||
rc_cfg->cq_level = av1_quantizer_to_qindex(extra_cfg->cq_level);
|
rc_cfg->cq_level = av1_quantizer_to_qindex(extra_cfg->cq_level);
|
||||||
rc_cfg->under_shoot_pct = cfg->rc_undershoot_pct;
|
rc_cfg->under_shoot_pct = cfg->rc_undershoot_pct;
|
||||||
rc_cfg->over_shoot_pct = cfg->rc_overshoot_pct;
|
rc_cfg->over_shoot_pct = cfg->rc_overshoot_pct;
|
||||||
rc_cfg->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz;
|
rc_cfg->maximum_buffer_size_ms = cfg->rc_buf_sz;
|
||||||
rc_cfg->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz;
|
rc_cfg->starting_buffer_level_ms = cfg->rc_buf_initial_sz;
|
||||||
rc_cfg->optimal_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_optimal_sz;
|
rc_cfg->optimal_buffer_level_ms = cfg->rc_buf_optimal_sz;
|
||||||
// Convert target bandwidth from Kbit/s to Bit/s
|
// Convert target bandwidth from Kbit/s to Bit/s
|
||||||
rc_cfg->target_bandwidth = 1000 * cfg->rc_target_bitrate;
|
rc_cfg->target_bandwidth = 1000 * cfg->rc_target_bitrate;
|
||||||
rc_cfg->drop_frames_water_mark = cfg->rc_dropframe_thresh;
|
rc_cfg->drop_frames_water_mark = cfg->rc_dropframe_thresh;
|
||||||
|
|||||||
19
third_party/aom/av1/common/arm/cfl_neon.c
vendored
19
third_party/aom/av1/common/arm/cfl_neon.c
vendored
@@ -13,6 +13,7 @@
|
|||||||
#include "config/aom_config.h"
|
#include "config/aom_config.h"
|
||||||
#include "config/av1_rtcd.h"
|
#include "config/av1_rtcd.h"
|
||||||
|
|
||||||
|
#include "aom_dsp/arm/mem_neon.h"
|
||||||
#include "av1/common/cfl.h"
|
#include "av1/common/cfl.h"
|
||||||
|
|
||||||
static inline void vldsubstq_s16(int16_t *dst, const uint16_t *src, int offset,
|
static inline void vldsubstq_s16(int16_t *dst, const uint16_t *src, int offset,
|
||||||
@@ -428,10 +429,7 @@ static inline int16x8_t predict_w8(const int16_t *pred_buf_q3,
|
|||||||
static inline int16x8x2_t predict_w16(const int16_t *pred_buf_q3,
|
static inline int16x8x2_t predict_w16(const int16_t *pred_buf_q3,
|
||||||
int16x8_t alpha_sign, int abs_alpha_q12,
|
int16x8_t alpha_sign, int abs_alpha_q12,
|
||||||
int16x8_t dc) {
|
int16x8_t dc) {
|
||||||
// vld2q_s16 interleaves, which is not useful for prediction. vst1q_s16_x2
|
const int16x8x2_t ac_q3 = vld1q_s16_x2(pred_buf_q3);
|
||||||
// does not interleave, but is not currently available in the compilier used
|
|
||||||
// by the AOM build system.
|
|
||||||
const int16x8x2_t ac_q3 = vld2q_s16(pred_buf_q3);
|
|
||||||
const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
|
const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
|
||||||
const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
|
const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
|
||||||
const int16x8_t scaled_luma_0 =
|
const int16x8_t scaled_luma_0 =
|
||||||
@@ -447,10 +445,7 @@ static inline int16x8x2_t predict_w16(const int16_t *pred_buf_q3,
|
|||||||
static inline int16x8x4_t predict_w32(const int16_t *pred_buf_q3,
|
static inline int16x8x4_t predict_w32(const int16_t *pred_buf_q3,
|
||||||
int16x8_t alpha_sign, int abs_alpha_q12,
|
int16x8_t alpha_sign, int abs_alpha_q12,
|
||||||
int16x8_t dc) {
|
int16x8_t dc) {
|
||||||
// vld4q_s16 interleaves, which is not useful for prediction. vst1q_s16_x4
|
const int16x8x4_t ac_q3 = vld1q_s16_x4(pred_buf_q3);
|
||||||
// does not interleave, but is not currently available in the compilier used
|
|
||||||
// by the AOM build system.
|
|
||||||
const int16x8x4_t ac_q3 = vld4q_s16(pred_buf_q3);
|
|
||||||
const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
|
const int16x8_t ac_sign_0 = veorq_s16(alpha_sign, ac_q3.val[0]);
|
||||||
const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
|
const int16x8_t ac_sign_1 = veorq_s16(alpha_sign, ac_q3.val[1]);
|
||||||
const int16x8_t ac_sign_2 = veorq_s16(alpha_sign, ac_q3.val[2]);
|
const int16x8_t ac_sign_2 = veorq_s16(alpha_sign, ac_q3.val[2]);
|
||||||
@@ -497,7 +492,7 @@ static inline void cfl_predict_lbd_neon(const int16_t *pred_buf_q3,
|
|||||||
predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
||||||
const uint8x8x2_t predun = { { vqmovun_s16(pred.val[0]),
|
const uint8x8x2_t predun = { { vqmovun_s16(pred.val[0]),
|
||||||
vqmovun_s16(pred.val[1]) } };
|
vqmovun_s16(pred.val[1]) } };
|
||||||
vst2_u8(dst, predun);
|
vst1_u8_x2(dst, predun);
|
||||||
} else {
|
} else {
|
||||||
const int16x8x4_t pred =
|
const int16x8x4_t pred =
|
||||||
predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
||||||
@@ -505,7 +500,7 @@ static inline void cfl_predict_lbd_neon(const int16_t *pred_buf_q3,
|
|||||||
{ vqmovun_s16(pred.val[0]), vqmovun_s16(pred.val[1]),
|
{ vqmovun_s16(pred.val[0]), vqmovun_s16(pred.val[1]),
|
||||||
vqmovun_s16(pred.val[2]), vqmovun_s16(pred.val[3]) }
|
vqmovun_s16(pred.val[2]), vqmovun_s16(pred.val[3]) }
|
||||||
};
|
};
|
||||||
vst4_u8(dst, predun);
|
vst1_u8_x4(dst, predun);
|
||||||
}
|
}
|
||||||
dst += dst_stride;
|
dst += dst_stride;
|
||||||
} while ((pred_buf_q3 += CFL_BUF_LINE) < end);
|
} while ((pred_buf_q3 += CFL_BUF_LINE) < end);
|
||||||
@@ -574,11 +569,11 @@ static inline void cfl_predict_hbd_neon(const int16_t *pred_buf_q3,
|
|||||||
} else if (width == 16) {
|
} else if (width == 16) {
|
||||||
const int16x8x2_t pred =
|
const int16x8x2_t pred =
|
||||||
predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
predict_w16(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
||||||
vst2q_u16(dst, clamp2q_s16(pred, max_16x8));
|
vst1q_u16_x2(dst, clamp2q_s16(pred, max_16x8));
|
||||||
} else {
|
} else {
|
||||||
const int16x8x4_t pred =
|
const int16x8x4_t pred =
|
||||||
predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
predict_w32(pred_buf_q3, alpha_sign, abs_alpha_q12, dc);
|
||||||
vst4q_u16(dst, clamp4q_s16(pred, max_16x8));
|
vst1q_u16_x4(dst, clamp4q_s16(pred, max_16x8));
|
||||||
}
|
}
|
||||||
dst += dst_stride;
|
dst += dst_stride;
|
||||||
} while ((pred_buf_q3 += CFL_BUF_LINE) < end);
|
} while ((pred_buf_q3 += CFL_BUF_LINE) < end);
|
||||||
|
|||||||
@@ -53,8 +53,7 @@ static AOM_FORCE_INLINE int32x4x2_t vertical_filter_8x1_f8(const int16x8_t *tmp,
|
|||||||
static AOM_FORCE_INLINE int16x8_t load_filters_1(int ofs) {
|
static AOM_FORCE_INLINE int16x8_t load_filters_1(int ofs) {
|
||||||
const int ofs0 = ROUND_POWER_OF_TWO(ofs, WARPEDDIFF_PREC_BITS);
|
const int ofs0 = ROUND_POWER_OF_TWO(ofs, WARPEDDIFF_PREC_BITS);
|
||||||
|
|
||||||
const int16_t *base =
|
const int16_t *base = av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS];
|
||||||
(int16_t *)av1_warped_filter + WARPEDPIXEL_PREC_SHIFTS * 8;
|
|
||||||
return vld1q_s16(base + ofs0 * 8);
|
return vld1q_s16(base + ofs0 * 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,8 +64,7 @@ static AOM_FORCE_INLINE void load_filters_4(int16x8_t out[], int ofs,
|
|||||||
const int ofs2 = ROUND_POWER_OF_TWO(ofs + stride * 2, WARPEDDIFF_PREC_BITS);
|
const int ofs2 = ROUND_POWER_OF_TWO(ofs + stride * 2, WARPEDDIFF_PREC_BITS);
|
||||||
const int ofs3 = ROUND_POWER_OF_TWO(ofs + stride * 3, WARPEDDIFF_PREC_BITS);
|
const int ofs3 = ROUND_POWER_OF_TWO(ofs + stride * 3, WARPEDDIFF_PREC_BITS);
|
||||||
|
|
||||||
const int16_t *base =
|
const int16_t *base = av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS];
|
||||||
(int16_t *)av1_warped_filter + WARPEDPIXEL_PREC_SHIFTS * 8;
|
|
||||||
out[0] = vld1q_s16(base + ofs0 * 8);
|
out[0] = vld1q_s16(base + ofs0 * 8);
|
||||||
out[1] = vld1q_s16(base + ofs1 * 8);
|
out[1] = vld1q_s16(base + ofs1 * 8);
|
||||||
out[2] = vld1q_s16(base + ofs2 * 8);
|
out[2] = vld1q_s16(base + ofs2 * 8);
|
||||||
@@ -84,8 +82,7 @@ static AOM_FORCE_INLINE void load_filters_8(int16x8_t out[], int ofs,
|
|||||||
const int ofs6 = ROUND_POWER_OF_TWO(ofs + stride * 6, WARPEDDIFF_PREC_BITS);
|
const int ofs6 = ROUND_POWER_OF_TWO(ofs + stride * 6, WARPEDDIFF_PREC_BITS);
|
||||||
const int ofs7 = ROUND_POWER_OF_TWO(ofs + stride * 7, WARPEDDIFF_PREC_BITS);
|
const int ofs7 = ROUND_POWER_OF_TWO(ofs + stride * 7, WARPEDDIFF_PREC_BITS);
|
||||||
|
|
||||||
const int16_t *base =
|
const int16_t *base = av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS];
|
||||||
(int16_t *)av1_warped_filter + WARPEDPIXEL_PREC_SHIFTS * 8;
|
|
||||||
out[0] = vld1q_s16(base + ofs0 * 8);
|
out[0] = vld1q_s16(base + ofs0 * 8);
|
||||||
out[1] = vld1q_s16(base + ofs1 * 8);
|
out[1] = vld1q_s16(base + ofs1 * 8);
|
||||||
out[2] = vld1q_s16(base + ofs2 * 8);
|
out[2] = vld1q_s16(base + ofs2 * 8);
|
||||||
|
|||||||
12
third_party/aom/av1/common/arm/warp_plane_neon.c
vendored
12
third_party/aom/av1/common/arm/warp_plane_neon.c
vendored
@@ -101,8 +101,7 @@ horizontal_filter_4x1_f1_beta0(const uint8x16_t in, int16x8_t f_s16) {
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE int16x8_t horizontal_filter_4x1_f1(const uint8x16_t in,
|
static AOM_FORCE_INLINE int16x8_t horizontal_filter_4x1_f1(const uint8x16_t in,
|
||||||
int sx) {
|
int sx) {
|
||||||
int16x8_t f_s16 =
|
int16x8_t f_s16 = vld1q_s16(av1_warped_filter[sx >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
return horizontal_filter_4x1_f1_beta0(in, f_s16);
|
return horizontal_filter_4x1_f1_beta0(in, f_s16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -140,8 +139,7 @@ horizontal_filter_8x1_f1_beta0(const uint8x16_t in, int16x8_t f_s16) {
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE int16x8_t horizontal_filter_8x1_f1(const uint8x16_t in,
|
static AOM_FORCE_INLINE int16x8_t horizontal_filter_8x1_f1(const uint8x16_t in,
|
||||||
int sx) {
|
int sx) {
|
||||||
int16x8_t f_s16 =
|
int16x8_t f_s16 = vld1q_s16(av1_warped_filter[sx >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
return horizontal_filter_8x1_f1_beta0(in, f_s16);
|
return horizontal_filter_8x1_f1_beta0(in, f_s16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,8 +154,7 @@ static AOM_FORCE_INLINE void vertical_filter_4x1_f1(const int16x8_t *src,
|
|||||||
int16x4_t s6 = vget_low_s16(src[6]);
|
int16x4_t s6 = vget_low_s16(src[6]);
|
||||||
int16x4_t s7 = vget_low_s16(src[7]);
|
int16x4_t s7 = vget_low_s16(src[7]);
|
||||||
|
|
||||||
int16x8_t f =
|
int16x8_t f = vld1q_s16(av1_warped_filter[sy >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
|
|
||||||
int32x4_t m0123 = vmull_lane_s16(s0, vget_low_s16(f), 0);
|
int32x4_t m0123 = vmull_lane_s16(s0, vget_low_s16(f), 0);
|
||||||
m0123 = vmlal_lane_s16(m0123, s1, vget_low_s16(f), 1);
|
m0123 = vmlal_lane_s16(m0123, s1, vget_low_s16(f), 1);
|
||||||
@@ -210,8 +207,7 @@ static AOM_FORCE_INLINE void vertical_filter_8x1_f1(const int16x8_t *src,
|
|||||||
int16x8_t s6 = src[6];
|
int16x8_t s6 = src[6];
|
||||||
int16x8_t s7 = src[7];
|
int16x8_t s7 = src[7];
|
||||||
|
|
||||||
int16x8_t f =
|
int16x8_t f = vld1q_s16(av1_warped_filter[sy >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
|
|
||||||
int32x4_t m0123 = vmull_lane_s16(vget_low_s16(s0), vget_low_s16(f), 0);
|
int32x4_t m0123 = vmull_lane_s16(vget_low_s16(s0), vget_low_s16(f), 0);
|
||||||
m0123 = vmlal_lane_s16(m0123, vget_low_s16(s1), vget_low_s16(f), 1);
|
m0123 = vmlal_lane_s16(m0123, vget_low_s16(s1), vget_low_s16(f), 1);
|
||||||
|
|||||||
56
third_party/aom/av1/common/arm/warp_plane_neon.h
vendored
56
third_party/aom/av1/common/arm/warp_plane_neon.h
vendored
@@ -61,34 +61,34 @@ static AOM_FORCE_INLINE void vertical_filter_8x1_f8(const int16x8_t *src,
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE void load_filters_4(int16x8_t out[], int offset,
|
static AOM_FORCE_INLINE void load_filters_4(int16x8_t out[], int offset,
|
||||||
int stride) {
|
int stride) {
|
||||||
out[0] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 0 * stride) >>
|
out[0] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 0 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[1] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 1 * stride) >>
|
out[1] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 1 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[2] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 2 * stride) >>
|
out[2] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 2 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[3] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 3 * stride) >>
|
out[3] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 3 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static AOM_FORCE_INLINE void load_filters_8(int16x8_t out[], int offset,
|
static AOM_FORCE_INLINE void load_filters_8(int16x8_t out[], int offset,
|
||||||
int stride) {
|
int stride) {
|
||||||
out[0] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 0 * stride) >>
|
out[0] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 0 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[1] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 1 * stride) >>
|
out[1] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 1 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[2] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 2 * stride) >>
|
out[2] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 2 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[3] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 3 * stride) >>
|
out[3] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 3 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[4] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 4 * stride) >>
|
out[4] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 4 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[5] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 5 * stride) >>
|
out[5] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 5 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[6] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 6 * stride) >>
|
out[6] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 6 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
out[7] = vld1q_s16((int16_t *)(av1_warped_filter + ((offset + 7 * stride) >>
|
out[7] = vld1q_s16(
|
||||||
WARPEDDIFF_PREC_BITS)));
|
av1_warped_filter[(offset + 7 * stride) >> WARPEDDIFF_PREC_BITS]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static AOM_FORCE_INLINE int clamp_iy(int iy, int height) {
|
static AOM_FORCE_INLINE int clamp_iy(int iy, int height) {
|
||||||
@@ -175,8 +175,8 @@ static AOM_FORCE_INLINE void warp_affine_horizontal(
|
|||||||
if (p_width == 4) {
|
if (p_width == 4) {
|
||||||
if (beta == 0) {
|
if (beta == 0) {
|
||||||
if (alpha == 0) {
|
if (alpha == 0) {
|
||||||
int16x8_t f_s16 = vld1q_s16(
|
int16x8_t f_s16 =
|
||||||
(int16_t *)(av1_warped_filter + (sx4 >> WARPEDDIFF_PREC_BITS)));
|
vld1q_s16(av1_warped_filter[sx4 >> WARPEDDIFF_PREC_BITS]);
|
||||||
APPLY_HORIZONTAL_SHIFT(horizontal_filter_4x1_f1_beta0, f_s16);
|
APPLY_HORIZONTAL_SHIFT(horizontal_filter_4x1_f1_beta0, f_s16);
|
||||||
} else {
|
} else {
|
||||||
APPLY_HORIZONTAL_SHIFT(horizontal_filter_4x1_f4, sx4, alpha);
|
APPLY_HORIZONTAL_SHIFT(horizontal_filter_4x1_f4, sx4, alpha);
|
||||||
@@ -193,8 +193,8 @@ static AOM_FORCE_INLINE void warp_affine_horizontal(
|
|||||||
} else {
|
} else {
|
||||||
if (beta == 0) {
|
if (beta == 0) {
|
||||||
if (alpha == 0) {
|
if (alpha == 0) {
|
||||||
int16x8_t f_s16 = vld1q_s16(
|
int16x8_t f_s16 =
|
||||||
(int16_t *)(av1_warped_filter + (sx4 >> WARPEDDIFF_PREC_BITS)));
|
vld1q_s16(av1_warped_filter[sx4 >> WARPEDDIFF_PREC_BITS]);
|
||||||
APPLY_HORIZONTAL_SHIFT(horizontal_filter_8x1_f1_beta0, f_s16);
|
APPLY_HORIZONTAL_SHIFT(horizontal_filter_8x1_f1_beta0, f_s16);
|
||||||
} else {
|
} else {
|
||||||
APPLY_HORIZONTAL_SHIFT(horizontal_filter_8x1_f8, sx4, alpha);
|
APPLY_HORIZONTAL_SHIFT(horizontal_filter_8x1_f8, sx4, alpha);
|
||||||
|
|||||||
@@ -109,8 +109,7 @@ horizontal_filter_4x1_f1_beta0(const uint8x16_t in, int16x8_t f_s16) {
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE int16x8_t horizontal_filter_4x1_f1(const uint8x16_t in,
|
static AOM_FORCE_INLINE int16x8_t horizontal_filter_4x1_f1(const uint8x16_t in,
|
||||||
int sx) {
|
int sx) {
|
||||||
int16x8_t f_s16 =
|
int16x8_t f_s16 = vld1q_s16(av1_warped_filter[sx >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
return horizontal_filter_4x1_f1_beta0(in, f_s16);
|
return horizontal_filter_4x1_f1_beta0(in, f_s16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -145,8 +144,7 @@ horizontal_filter_8x1_f1_beta0(const uint8x16_t in, int16x8_t f_s16) {
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE int16x8_t horizontal_filter_8x1_f1(const uint8x16_t in,
|
static AOM_FORCE_INLINE int16x8_t horizontal_filter_8x1_f1(const uint8x16_t in,
|
||||||
int sx) {
|
int sx) {
|
||||||
int16x8_t f_s16 =
|
int16x8_t f_s16 = vld1q_s16(av1_warped_filter[sx >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
return horizontal_filter_8x1_f1_beta0(in, f_s16);
|
return horizontal_filter_8x1_f1_beta0(in, f_s16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,8 +159,7 @@ static AOM_FORCE_INLINE void vertical_filter_4x1_f1(const int16x8_t *src,
|
|||||||
int16x4_t s6 = vget_low_s16(src[6]);
|
int16x4_t s6 = vget_low_s16(src[6]);
|
||||||
int16x4_t s7 = vget_low_s16(src[7]);
|
int16x4_t s7 = vget_low_s16(src[7]);
|
||||||
|
|
||||||
int16x8_t f =
|
int16x8_t f = vld1q_s16(av1_warped_filter[sy >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
|
|
||||||
int32x4_t m0123 = vmull_lane_s16(s0, vget_low_s16(f), 0);
|
int32x4_t m0123 = vmull_lane_s16(s0, vget_low_s16(f), 0);
|
||||||
m0123 = vmlal_lane_s16(m0123, s1, vget_low_s16(f), 1);
|
m0123 = vmlal_lane_s16(m0123, s1, vget_low_s16(f), 1);
|
||||||
@@ -215,8 +212,7 @@ static AOM_FORCE_INLINE void vertical_filter_8x1_f1(const int16x8_t *src,
|
|||||||
int16x8_t s6 = src[6];
|
int16x8_t s6 = src[6];
|
||||||
int16x8_t s7 = src[7];
|
int16x8_t s7 = src[7];
|
||||||
|
|
||||||
int16x8_t f =
|
int16x8_t f = vld1q_s16(av1_warped_filter[sy >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
|
|
||||||
int32x4_t m0123 = vmull_lane_s16(vget_low_s16(s0), vget_low_s16(f), 0);
|
int32x4_t m0123 = vmull_lane_s16(vget_low_s16(s0), vget_low_s16(f), 0);
|
||||||
m0123 = vmlal_lane_s16(m0123, vget_low_s16(s1), vget_low_s16(f), 1);
|
m0123 = vmlal_lane_s16(m0123, vget_low_s16(s1), vget_low_s16(f), 1);
|
||||||
|
|||||||
12
third_party/aom/av1/common/arm/warp_plane_sve.c
vendored
12
third_party/aom/av1/common/arm/warp_plane_sve.c
vendored
@@ -112,8 +112,7 @@ horizontal_filter_4x1_f1_beta0(const uint8x16_t in, int16x8_t f_s16) {
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE int16x8_t horizontal_filter_4x1_f1(const uint8x16_t in,
|
static AOM_FORCE_INLINE int16x8_t horizontal_filter_4x1_f1(const uint8x16_t in,
|
||||||
int sx) {
|
int sx) {
|
||||||
int16x8_t f_s16 =
|
int16x8_t f_s16 = vld1q_s16(av1_warped_filter[sx >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
return horizontal_filter_4x1_f1_beta0(in, f_s16);
|
return horizontal_filter_4x1_f1_beta0(in, f_s16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,8 +147,7 @@ horizontal_filter_8x1_f1_beta0(const uint8x16_t in, int16x8_t f_s16) {
|
|||||||
|
|
||||||
static AOM_FORCE_INLINE int16x8_t horizontal_filter_8x1_f1(const uint8x16_t in,
|
static AOM_FORCE_INLINE int16x8_t horizontal_filter_8x1_f1(const uint8x16_t in,
|
||||||
int sx) {
|
int sx) {
|
||||||
int16x8_t f_s16 =
|
int16x8_t f_s16 = vld1q_s16(av1_warped_filter[sx >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sx >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
return horizontal_filter_8x1_f1_beta0(in, f_s16);
|
return horizontal_filter_8x1_f1_beta0(in, f_s16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,8 +162,7 @@ static AOM_FORCE_INLINE void vertical_filter_4x1_f1(const int16x8_t *src,
|
|||||||
int16x4_t s6 = vget_low_s16(src[6]);
|
int16x4_t s6 = vget_low_s16(src[6]);
|
||||||
int16x4_t s7 = vget_low_s16(src[7]);
|
int16x4_t s7 = vget_low_s16(src[7]);
|
||||||
|
|
||||||
int16x8_t f =
|
int16x8_t f = vld1q_s16(av1_warped_filter[sy >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
|
|
||||||
int32x4_t m0123 = vmull_lane_s16(s0, vget_low_s16(f), 0);
|
int32x4_t m0123 = vmull_lane_s16(s0, vget_low_s16(f), 0);
|
||||||
m0123 = vmlal_lane_s16(m0123, s1, vget_low_s16(f), 1);
|
m0123 = vmlal_lane_s16(m0123, s1, vget_low_s16(f), 1);
|
||||||
@@ -215,8 +212,7 @@ static AOM_FORCE_INLINE void vertical_filter_8x1_f1(const int16x8_t *src,
|
|||||||
int16x8_t s6 = src[6];
|
int16x8_t s6 = src[6];
|
||||||
int16x8_t s7 = src[7];
|
int16x8_t s7 = src[7];
|
||||||
|
|
||||||
int16x8_t f =
|
int16x8_t f = vld1q_s16(av1_warped_filter[sy >> WARPEDDIFF_PREC_BITS]);
|
||||||
vld1q_s16((int16_t *)(av1_warped_filter + (sy >> WARPEDDIFF_PREC_BITS)));
|
|
||||||
|
|
||||||
int32x4_t m0123 = vmull_lane_s16(vget_low_s16(s0), vget_low_s16(f), 0);
|
int32x4_t m0123 = vmull_lane_s16(vget_low_s16(s0), vget_low_s16(f), 0);
|
||||||
m0123 = vmlal_lane_s16(m0123, vget_low_s16(s1), vget_low_s16(f), 1);
|
m0123 = vmlal_lane_s16(m0123, vget_low_s16(s1), vget_low_s16(f), 1);
|
||||||
|
|||||||
22
third_party/aom/av1/common/av1_rtcd_defs.pl
vendored
22
third_party/aom/av1/common/av1_rtcd_defs.pl
vendored
@@ -495,22 +495,22 @@ if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
|
|||||||
# structs as arguments, which makes the v256 type of the intrinsics
|
# structs as arguments, which makes the v256 type of the intrinsics
|
||||||
# hard to support, so optimizations for this target are disabled.
|
# hard to support, so optimizations for this target are disabled.
|
||||||
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
|
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
|
||||||
specialize qw/cdef_find_dir sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_find_dir sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_find_dir_dual sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_find_dir_dual sse4_1 avx2 neon/, "$ssse3_x86";
|
||||||
|
|
||||||
specialize qw/cdef_filter_8_0 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_8_0 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_filter_8_1 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_8_1 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_filter_8_2 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_8_2 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_filter_8_3 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_8_3 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
|
|
||||||
specialize qw/cdef_filter_16_0 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_16_0 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_filter_16_1 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_16_1 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_filter_16_2 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_16_2 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
specialize qw/cdef_filter_16_3 sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_filter_16_3 sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
|
|
||||||
specialize qw/cdef_copy_rect8_8bit_to_16bit sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_copy_rect8_8bit_to_16bit sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
|
if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
|
||||||
specialize qw/cdef_copy_rect8_16bit_to_16bit sse4_1 avx2 neon/, "$ssse3_x86";
|
specialize qw/cdef_copy_rect8_16bit_to_16bit sse4_1 avx2 neon rvv/, "$ssse3_x86";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1354
third_party/aom/av1/common/riscv/cdef_block_rvv.c
vendored
Normal file
1354
third_party/aom/av1/common/riscv/cdef_block_rvv.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
11
third_party/aom/av1/common/warped_motion.c
vendored
11
third_party/aom/av1/common/warped_motion.c
vendored
@@ -27,7 +27,8 @@
|
|||||||
// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
|
// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
|
||||||
// We need an extra 2 taps to fit this in, for a total of 8 taps.
|
// We need an extra 2 taps to fit this in, for a total of 8 taps.
|
||||||
/* clang-format off */
|
/* clang-format off */
|
||||||
const int16_t av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
|
const WarpedFilterCoeff av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1]
|
||||||
|
[8] = {
|
||||||
// [-1, 0)
|
// [-1, 0)
|
||||||
{ 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },
|
{ 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },
|
||||||
{ 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 },
|
{ 1, - 3, 127, 4, - 1, 0, 0, 0 }, { 1, - 4, 126, 6, - 2, 1, 0, 0 },
|
||||||
@@ -344,7 +345,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
|
|||||||
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
|
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
|
||||||
WARPEDPIXEL_PREC_SHIFTS;
|
WARPEDPIXEL_PREC_SHIFTS;
|
||||||
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
||||||
const int16_t *coeffs = av1_warped_filter[offs];
|
const WarpedFilterCoeff *coeffs = av1_warped_filter[offs];
|
||||||
|
|
||||||
int32_t sum = 1 << offset_bits_horiz;
|
int32_t sum = 1 << offset_bits_horiz;
|
||||||
for (int m = 0; m < 8; ++m) {
|
for (int m = 0; m < 8; ++m) {
|
||||||
@@ -365,7 +366,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
|
|||||||
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
|
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
|
||||||
WARPEDPIXEL_PREC_SHIFTS;
|
WARPEDPIXEL_PREC_SHIFTS;
|
||||||
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
||||||
const int16_t *coeffs = av1_warped_filter[offs];
|
const WarpedFilterCoeff *coeffs = av1_warped_filter[offs];
|
||||||
|
|
||||||
int32_t sum = 1 << offset_bits_vert;
|
int32_t sum = 1 << offset_bits_vert;
|
||||||
for (int m = 0; m < 8; ++m) {
|
for (int m = 0; m < 8; ++m) {
|
||||||
@@ -575,7 +576,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
|
|||||||
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
|
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
|
||||||
WARPEDPIXEL_PREC_SHIFTS;
|
WARPEDPIXEL_PREC_SHIFTS;
|
||||||
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
||||||
const int16_t *coeffs = av1_warped_filter[offs];
|
const WarpedFilterCoeff *coeffs = av1_warped_filter[offs];
|
||||||
|
|
||||||
int32_t sum = 1 << offset_bits_horiz;
|
int32_t sum = 1 << offset_bits_horiz;
|
||||||
for (int m = 0; m < 8; ++m) {
|
for (int m = 0; m < 8; ++m) {
|
||||||
@@ -599,7 +600,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
|
|||||||
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
|
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
|
||||||
WARPEDPIXEL_PREC_SHIFTS;
|
WARPEDPIXEL_PREC_SHIFTS;
|
||||||
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
|
||||||
const int16_t *coeffs = av1_warped_filter[offs];
|
const WarpedFilterCoeff *coeffs = av1_warped_filter[offs];
|
||||||
|
|
||||||
int32_t sum = 1 << offset_bits_vert;
|
int32_t sum = 1 << offset_bits_vert;
|
||||||
for (int m = 0; m < 8; ++m) {
|
for (int m = 0; m < 8; ++m) {
|
||||||
|
|||||||
9
third_party/aom/av1/common/warped_motion.h
vendored
9
third_party/aom/av1/common/warped_motion.h
vendored
@@ -33,7 +33,14 @@
|
|||||||
#define WARP_ERROR_BLOCK_LOG 5
|
#define WARP_ERROR_BLOCK_LOG 5
|
||||||
#define WARP_ERROR_BLOCK (1 << WARP_ERROR_BLOCK_LOG)
|
#define WARP_ERROR_BLOCK (1 << WARP_ERROR_BLOCK_LOG)
|
||||||
|
|
||||||
extern const int16_t av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
|
#if AOM_ARCH_ARM || AOM_ARCH_AARCH64 || AOM_ARCH_X86 || AOM_ARCH_X86_64
|
||||||
|
typedef int16_t WarpedFilterCoeff;
|
||||||
|
#else
|
||||||
|
typedef int8_t WarpedFilterCoeff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern const WarpedFilterCoeff
|
||||||
|
av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
|
||||||
|
|
||||||
DECLARE_ALIGNED(8, extern const int8_t,
|
DECLARE_ALIGNED(8, extern const int8_t,
|
||||||
av1_filter_8bit[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]);
|
av1_filter_8bit[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8]);
|
||||||
|
|||||||
4
third_party/aom/av1/encoder/ratectrl.c
vendored
4
third_party/aom/av1/encoder/ratectrl.c
vendored
@@ -3822,6 +3822,10 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi, FRAME_TYPE *const frame_type,
|
|||||||
resize_reset_rc(cpi, resize_pending_params->width,
|
resize_reset_rc(cpi, resize_pending_params->width,
|
||||||
resize_pending_params->height, cm->width, cm->height);
|
resize_pending_params->height, cm->width, cm->height);
|
||||||
}
|
}
|
||||||
|
if (svc->temporal_layer_id == 0) {
|
||||||
|
rc->num_col_blscroll_last_tl0 = 0;
|
||||||
|
rc->num_row_blscroll_last_tl0 = 0;
|
||||||
|
}
|
||||||
// Set the GF interval and update flag.
|
// Set the GF interval and update flag.
|
||||||
if (!rc->rtc_external_ratectrl)
|
if (!rc->rtc_external_ratectrl)
|
||||||
set_gf_interval_update_onepass_rt(cpi, *frame_type);
|
set_gf_interval_update_onepass_rt(cpi, *frame_type);
|
||||||
|
|||||||
2
third_party/aom/av1/encoder/ratectrl.h
vendored
2
third_party/aom/av1/encoder/ratectrl.h
vendored
@@ -200,6 +200,8 @@ typedef struct {
|
|||||||
int last_target_size_keyframe;
|
int last_target_size_keyframe;
|
||||||
int frames_since_scene_change;
|
int frames_since_scene_change;
|
||||||
int perc_spatial_flat_blocks;
|
int perc_spatial_flat_blocks;
|
||||||
|
int num_col_blscroll_last_tl0;
|
||||||
|
int num_row_blscroll_last_tl0;
|
||||||
|
|
||||||
int avg_frame_bandwidth; // Average frame size target for clip
|
int avg_frame_bandwidth; // Average frame size target for clip
|
||||||
int min_frame_bandwidth; // Minimum allocation used for any frame
|
int min_frame_bandwidth; // Minimum allocation used for any frame
|
||||||
|
|||||||
88
third_party/aom/av1/encoder/var_based_part.c
vendored
88
third_party/aom/av1/encoder/var_based_part.c
vendored
@@ -1325,6 +1325,53 @@ static inline void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x,
|
||||||
|
unsigned int *y_sad, int mi_row,
|
||||||
|
int mi_col, int source_sad_nonrd) {
|
||||||
|
AV1_COMMON *const cm = &cpi->common;
|
||||||
|
MACROBLOCKD *xd = &x->e_mbd;
|
||||||
|
MB_MODE_INFO *mi = xd->mi[0];
|
||||||
|
const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
|
||||||
|
const int increase_col_sw = source_sad_nonrd > kMedSad &&
|
||||||
|
!cpi->rc.high_motion_content_screen_rtc &&
|
||||||
|
(cpi->svc.temporal_layer_id == 0 ||
|
||||||
|
cpi->rc.num_col_blscroll_last_tl0 > 2);
|
||||||
|
int me_search_size_col = is_screen
|
||||||
|
? increase_col_sw ? 512 : 96
|
||||||
|
: block_size_wide[cm->seq_params->sb_size] >> 1;
|
||||||
|
// For screen use larger search size row motion to capture
|
||||||
|
// vertical scroll, which can be larger motion.
|
||||||
|
int me_search_size_row = is_screen
|
||||||
|
? source_sad_nonrd > kMedSad ? 512 : 192
|
||||||
|
: block_size_high[cm->seq_params->sb_size] >> 1;
|
||||||
|
unsigned int y_sad_zero;
|
||||||
|
*y_sad = av1_int_pro_motion_estimation(
|
||||||
|
cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero,
|
||||||
|
me_search_size_col, me_search_size_row);
|
||||||
|
// The logic below selects whether the motion estimated in the
|
||||||
|
// int_pro_motion() will be used in nonrd_pickmode. Only do this
|
||||||
|
// for screen for now.
|
||||||
|
if (is_screen) {
|
||||||
|
unsigned int thresh_sad =
|
||||||
|
(cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000;
|
||||||
|
if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) {
|
||||||
|
x->sb_me_partition = 1;
|
||||||
|
x->sb_me_mv.as_int = mi->mv[0].as_int;
|
||||||
|
if (cpi->svc.temporal_layer_id == 0) {
|
||||||
|
if (abs(mi->mv[0].as_mv.col) > 16 && abs(mi->mv[0].as_mv.row) == 0)
|
||||||
|
cpi->rc.num_col_blscroll_last_tl0++;
|
||||||
|
else if (abs(mi->mv[0].as_mv.row) > 16 && abs(mi->mv[0].as_mv.col) == 0)
|
||||||
|
cpi->rc.num_row_blscroll_last_tl0++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
x->sb_me_partition = 0;
|
||||||
|
// Fall back to using zero motion.
|
||||||
|
*y_sad = y_sad_zero;
|
||||||
|
mi->mv[0].as_int = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
|
static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
|
||||||
unsigned int *y_sad_g, unsigned int *y_sad_alt,
|
unsigned int *y_sad_g, unsigned int *y_sad_alt,
|
||||||
unsigned int *y_sad_last,
|
unsigned int *y_sad_last,
|
||||||
@@ -1418,42 +1465,11 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
|
|||||||
// so for now force it to 2 based on superblock sad.
|
// so for now force it to 2 based on superblock sad.
|
||||||
if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2;
|
if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2;
|
||||||
|
|
||||||
if (est_motion == 1 || est_motion == 2) {
|
if ((est_motion == 1 || est_motion == 2) && xd->mb_to_right_edge >= 0 &&
|
||||||
if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
|
xd->mb_to_bottom_edge >= 0 && x->source_variance > 100 &&
|
||||||
// For screen only do int_pro_motion for spatial variance above
|
source_sad_nonrd > kLowSad) {
|
||||||
// threshold and motion level above LowSad.
|
do_int_pro_motion_estimation(cpi, x, y_sad, mi_row, mi_col,
|
||||||
if (x->source_variance > 100 && source_sad_nonrd > kLowSad) {
|
source_sad_nonrd);
|
||||||
int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
|
|
||||||
int me_search_size_col =
|
|
||||||
is_screen ? source_sad_nonrd > kMedSad ? 160 : 96
|
|
||||||
: block_size_wide[cm->seq_params->sb_size] >> 1;
|
|
||||||
// For screen use larger search size row motion to capture
|
|
||||||
// vertical scroll, which can be larger motion.
|
|
||||||
int me_search_size_row =
|
|
||||||
is_screen ? source_sad_nonrd > kMedSad ? 512 : 192
|
|
||||||
: block_size_high[cm->seq_params->sb_size] >> 1;
|
|
||||||
unsigned int y_sad_zero;
|
|
||||||
*y_sad = av1_int_pro_motion_estimation(
|
|
||||||
cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv,
|
|
||||||
&y_sad_zero, me_search_size_col, me_search_size_row);
|
|
||||||
// The logic below selects whether the motion estimated in the
|
|
||||||
// int_pro_motion() will be used in nonrd_pickmode. Only do this
|
|
||||||
// for screen for now.
|
|
||||||
if (is_screen) {
|
|
||||||
unsigned int thresh_sad =
|
|
||||||
(cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000;
|
|
||||||
if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) {
|
|
||||||
x->sb_me_partition = 1;
|
|
||||||
x->sb_me_mv.as_int = mi->mv[0].as_int;
|
|
||||||
} else {
|
|
||||||
x->sb_me_partition = 0;
|
|
||||||
// Fall back to using zero motion.
|
|
||||||
*y_sad = y_sad_zero;
|
|
||||||
mi->mv[0].as_int = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*y_sad == UINT_MAX) {
|
if (*y_sad == UINT_MAX) {
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ set_aom_detect_var(AOM_ARCH_ARM 0 "Enables ARM architecture.")
|
|||||||
set_aom_detect_var(AOM_ARCH_PPC 0 "Enables PPC architecture.")
|
set_aom_detect_var(AOM_ARCH_PPC 0 "Enables PPC architecture.")
|
||||||
set_aom_detect_var(AOM_ARCH_X86 0 "Enables X86 architecture.")
|
set_aom_detect_var(AOM_ARCH_X86 0 "Enables X86 architecture.")
|
||||||
set_aom_detect_var(AOM_ARCH_X86_64 0 "Enables X86_64 architecture.")
|
set_aom_detect_var(AOM_ARCH_X86_64 0 "Enables X86_64 architecture.")
|
||||||
|
set_aom_detect_var(AOM_ARCH_RISCV 0 "Enables RISC-V architecture.")
|
||||||
|
|
||||||
# Arm/AArch64 feature flags.
|
# Arm/AArch64 feature flags.
|
||||||
set_aom_detect_var(HAVE_NEON 0 "Enables Neon intrinsics optimizations.")
|
set_aom_detect_var(HAVE_NEON 0 "Enables Neon intrinsics optimizations.")
|
||||||
@@ -51,6 +52,9 @@ set_aom_detect_var(HAVE_SSE4_2 0 "Enables SSE 4.2 optimizations.")
|
|||||||
set_aom_detect_var(HAVE_AVX 0 "Enables AVX optimizations.")
|
set_aom_detect_var(HAVE_AVX 0 "Enables AVX optimizations.")
|
||||||
set_aom_detect_var(HAVE_AVX2 0 "Enables AVX2 optimizations.")
|
set_aom_detect_var(HAVE_AVX2 0 "Enables AVX2 optimizations.")
|
||||||
|
|
||||||
|
# RISC-V64 feature flags.
|
||||||
|
set_aom_detect_var(HAVE_RVV 0 "Enables RVV optimizations.")
|
||||||
|
|
||||||
# Flags describing the build environment.
|
# Flags describing the build environment.
|
||||||
set_aom_detect_var(HAVE_FEXCEPT 0
|
set_aom_detect_var(HAVE_FEXCEPT 0
|
||||||
"Internal flag, GNU fenv.h present for target.")
|
"Internal flag, GNU fenv.h present for target.")
|
||||||
@@ -241,3 +245,6 @@ set_aom_option_var(ENABLE_AVX "Enables AVX optimizations on x86/x86_64 targets."
|
|||||||
ON)
|
ON)
|
||||||
set_aom_option_var(ENABLE_AVX2
|
set_aom_option_var(ENABLE_AVX2
|
||||||
"Enables AVX2 optimizations on x86/x86_64 targets." ON)
|
"Enables AVX2 optimizations on x86/x86_64 targets." ON)
|
||||||
|
|
||||||
|
# RVV intrinsics flags.
|
||||||
|
set_aom_option_var(ENABLE_RVV "Enables RVV optimizations on RISC-V targets." ON)
|
||||||
|
|||||||
@@ -75,6 +75,8 @@ if(NOT AOM_TARGET_CPU)
|
|||||||
set(AOM_TARGET_CPU "arm64")
|
set(AOM_TARGET_CPU "arm64")
|
||||||
elseif(cpu_lowercase MATCHES "^ppc")
|
elseif(cpu_lowercase MATCHES "^ppc")
|
||||||
set(AOM_TARGET_CPU "ppc")
|
set(AOM_TARGET_CPU "ppc")
|
||||||
|
elseif(cpu_lowercase MATCHES "^riscv")
|
||||||
|
set(AOM_TARGET_CPU "riscv")
|
||||||
else()
|
else()
|
||||||
message(WARNING "The architecture ${CMAKE_SYSTEM_PROCESSOR} is not "
|
message(WARNING "The architecture ${CMAKE_SYSTEM_PROCESSOR} is not "
|
||||||
"supported, falling back to the generic target")
|
"supported, falling back to the generic target")
|
||||||
|
|||||||
11
third_party/aom/build/cmake/cpu.cmake
vendored
11
third_party/aom/build/cmake/cpu.cmake
vendored
@@ -132,4 +132,15 @@ elseif("${AOM_TARGET_CPU}" MATCHES "^x86")
|
|||||||
set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-${flavor})
|
set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-${flavor})
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
elseif("${AOM_TARGET_CPU}" MATCHES "riscv")
|
||||||
|
set(AOM_ARCH_RISCV64 1)
|
||||||
|
set(RTCD_ARCH_RISCV64 "yes")
|
||||||
|
|
||||||
|
if(ENABLE_RVV)
|
||||||
|
set(HAVE_RVV 1)
|
||||||
|
set(RTCD_HAVE_RVV "yes")
|
||||||
|
else()
|
||||||
|
set(HAVE_RVV 0)
|
||||||
|
set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-rvv)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
33
third_party/aom/build/cmake/rtcd.pl
vendored
33
third_party/aom/build/cmake/rtcd.pl
vendored
@@ -370,6 +370,36 @@ EOF
|
|||||||
common_bottom;
|
common_bottom;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub riscv() {
|
||||||
|
determine_indirection("c", @ALL_ARCHS);
|
||||||
|
|
||||||
|
# Assign the helper variable for each enabled extension
|
||||||
|
foreach my $opt (@ALL_ARCHS) {
|
||||||
|
my $opt_uc = uc $opt;
|
||||||
|
eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
common_top;
|
||||||
|
print <<EOF;
|
||||||
|
#ifdef RTCD_C
|
||||||
|
#include "aom_ports/riscv.h"
|
||||||
|
static void setup_rtcd_internal(void)
|
||||||
|
{
|
||||||
|
int flags = riscv_simd_caps();
|
||||||
|
|
||||||
|
(void)flags;
|
||||||
|
|
||||||
|
EOF
|
||||||
|
|
||||||
|
set_function_pointers("c", @ALL_ARCHS);
|
||||||
|
|
||||||
|
print <<EOF;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
EOF
|
||||||
|
common_bottom;
|
||||||
|
}
|
||||||
|
|
||||||
sub unoptimized() {
|
sub unoptimized() {
|
||||||
determine_indirection "c";
|
determine_indirection "c";
|
||||||
common_top;
|
common_top;
|
||||||
@@ -415,6 +445,9 @@ if ($opts{arch} eq 'x86') {
|
|||||||
} elsif ($opts{arch} eq 'ppc') {
|
} elsif ($opts{arch} eq 'ppc') {
|
||||||
@ALL_ARCHS = filter(qw/vsx/);
|
@ALL_ARCHS = filter(qw/vsx/);
|
||||||
ppc;
|
ppc;
|
||||||
|
} elsif ($opts{arch} eq 'riscv') {
|
||||||
|
@ALL_ARCHS = filter(qw/rvv/);
|
||||||
|
riscv;
|
||||||
} else {
|
} else {
|
||||||
unoptimized;
|
unoptimized;
|
||||||
}
|
}
|
||||||
|
|||||||
63
third_party/aom/test/cdef_test.cc
vendored
63
third_party/aom/test/cdef_test.cc
vendored
@@ -618,7 +618,8 @@ TEST_P(CDEFCopyRect16to16Test, TestSIMDNoMismatch) {
|
|||||||
|
|
||||||
using std::make_tuple;
|
using std::make_tuple;
|
||||||
|
|
||||||
#if ((AOM_ARCH_X86 && HAVE_SSSE3) || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON)
|
#if ((AOM_ARCH_X86 && HAVE_SSSE3) || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON || \
|
||||||
|
HAVE_RVV)
|
||||||
static const CdefFilterBlockFunctions kCdefFilterFuncC[] = {
|
static const CdefFilterBlockFunctions kCdefFilterFuncC[] = {
|
||||||
{ &cdef_filter_8_0_c, &cdef_filter_8_1_c, &cdef_filter_8_2_c,
|
{ &cdef_filter_8_0_c, &cdef_filter_8_1_c, &cdef_filter_8_2_c,
|
||||||
&cdef_filter_8_3_c }
|
&cdef_filter_8_3_c }
|
||||||
@@ -811,6 +812,46 @@ INSTANTIATE_TEST_SUITE_P(
|
|||||||
#endif // CONFIG_AV1_HIGHBITDEPTH
|
#endif // CONFIG_AV1_HIGHBITDEPTH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
static const CdefFilterBlockFunctions kCdefFilterFuncRvv[] = {
|
||||||
|
{ &cdef_filter_8_0_rvv, &cdef_filter_8_1_rvv, &cdef_filter_8_2_rvv,
|
||||||
|
&cdef_filter_8_3_rvv }
|
||||||
|
};
|
||||||
|
|
||||||
|
static const CdefFilterBlockFunctions kCdefFilterHighbdFuncRvv[] = {
|
||||||
|
{ &cdef_filter_16_0_rvv, &cdef_filter_16_1_rvv, &cdef_filter_16_2_rvv,
|
||||||
|
&cdef_filter_16_3_rvv }
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RVV, CDEFBlockTest,
|
||||||
|
::testing::Combine(::testing::ValuesIn(kCdefFilterFuncRvv),
|
||||||
|
::testing::ValuesIn(kCdefFilterFuncC),
|
||||||
|
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||||
|
BLOCK_8X8),
|
||||||
|
::testing::Range(0, 16), ::testing::Values(8)));
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RVV, CDEFBlockHighbdTest,
|
||||||
|
::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncRvv),
|
||||||
|
::testing::ValuesIn(kCdefFilterHighbdFuncC),
|
||||||
|
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||||
|
BLOCK_8X8),
|
||||||
|
::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
|
||||||
|
INSTANTIATE_TEST_SUITE_P(RVV, CDEFFindDirTest,
|
||||||
|
::testing::Values(make_tuple(&cdef_find_dir_rvv,
|
||||||
|
&cdef_find_dir_c)));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RVV, CDEFCopyRect8to16Test,
|
||||||
|
::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c,
|
||||||
|
&cdef_copy_rect8_8bit_to_16bit_rvv)));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RVV, CDEFCopyRect16to16Test,
|
||||||
|
::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c,
|
||||||
|
&cdef_copy_rect8_16bit_to_16bit_rvv)));
|
||||||
|
#endif
|
||||||
|
|
||||||
// Test speed for all supported architectures
|
// Test speed for all supported architectures
|
||||||
#if AOM_ARCH_X86 && HAVE_SSSE3
|
#if AOM_ARCH_X86 && HAVE_SSSE3
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
@@ -905,4 +946,24 @@ INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualSpeedTest,
|
|||||||
&cdef_find_dir_dual_c)));
|
&cdef_find_dir_dual_c)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_RVV
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RVV, CDEFSpeedTest,
|
||||||
|
::testing::Combine(::testing::ValuesIn(kCdefFilterFuncRvv),
|
||||||
|
::testing::ValuesIn(kCdefFilterFuncC),
|
||||||
|
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||||
|
BLOCK_8X8),
|
||||||
|
::testing::Range(0, 16), ::testing::Values(8)));
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RVV, CDEFSpeedHighbdTest,
|
||||||
|
::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncRvv),
|
||||||
|
::testing::ValuesIn(kCdefFilterHighbdFuncC),
|
||||||
|
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
|
||||||
|
BLOCK_8X8),
|
||||||
|
::testing::Range(0, 16), ::testing::Values(10)));
|
||||||
|
INSTANTIATE_TEST_SUITE_P(RVV, CDEFFindDirSpeedTest,
|
||||||
|
::testing::Values(make_tuple(&cdef_find_dir_rvv,
|
||||||
|
&cdef_find_dir_c)));
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|||||||
41
third_party/aom/test/svc_datarate_test.cc
vendored
41
third_party/aom/test/svc_datarate_test.cc
vendored
@@ -1078,6 +1078,39 @@ class DatarateTestSVC
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual void BasicRateTargetingSVC2TL1SLScreenDropFrame1920x1080Test() {
|
||||||
|
cfg_.rc_buf_initial_sz = 50;
|
||||||
|
cfg_.rc_buf_optimal_sz = 50;
|
||||||
|
cfg_.rc_buf_sz = 100;
|
||||||
|
cfg_.rc_dropframe_thresh = 30;
|
||||||
|
cfg_.rc_min_quantizer = 0;
|
||||||
|
cfg_.rc_max_quantizer = 52;
|
||||||
|
cfg_.rc_end_usage = AOM_CBR;
|
||||||
|
cfg_.g_lag_in_frames = 0;
|
||||||
|
cfg_.g_error_resilient = 0;
|
||||||
|
|
||||||
|
::libaom_test::Y4mVideoSource video("screendata.1920_1080.y4m", 0, 60);
|
||||||
|
|
||||||
|
const int bitrate_array[2] = { 60, 100 };
|
||||||
|
cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
|
||||||
|
ResetModel();
|
||||||
|
screen_mode_ = 1;
|
||||||
|
number_temporal_layers_ = 2;
|
||||||
|
number_spatial_layers_ = 1;
|
||||||
|
target_layer_bitrate_[0] = 60 * cfg_.rc_target_bitrate / 100;
|
||||||
|
target_layer_bitrate_[1] = cfg_.rc_target_bitrate;
|
||||||
|
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||||
|
#if CONFIG_AV1_DECODER
|
||||||
|
// Top temporal layers are non_reference, so exclude them from
|
||||||
|
// mismatch count, since loopfilter/cdef is not applied for these on
|
||||||
|
// encoder side, but is always applied on decoder.
|
||||||
|
// This means 150 = #frames(300) - #TL2_frames(150).
|
||||||
|
// We use LE for screen since loopfilter level can become very small
|
||||||
|
// or zero and then the frame is not a mismatch.
|
||||||
|
EXPECT_LE(GetMismatchFrames(), 150u);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
virtual void BasicRateTargetingSVC1TL3SLScreenTest() {
|
virtual void BasicRateTargetingSVC1TL3SLScreenTest() {
|
||||||
cfg_.rc_buf_initial_sz = 500;
|
cfg_.rc_buf_initial_sz = 500;
|
||||||
cfg_.rc_buf_optimal_sz = 500;
|
cfg_.rc_buf_optimal_sz = 500;
|
||||||
@@ -2651,6 +2684,14 @@ TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLScreenDropFrame) {
|
|||||||
BasicRateTargetingSVC2TL1SLScreenDropFrameTest();
|
BasicRateTargetingSVC2TL1SLScreenDropFrameTest();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check basic rate targeting for CBR, for 2 temporal layers, 1 spatial
|
||||||
|
// for screen mode, with frame dropper on at low bitrates. Use small
|
||||||
|
// values of rc_buf_initial/optimal/sz to trigger postencode frame drop.
|
||||||
|
// Use 1920x1080 clip.
|
||||||
|
TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLScreenDropFrame1920x1080) {
|
||||||
|
BasicRateTargetingSVC2TL1SLScreenDropFrame1920x1080Test();
|
||||||
|
}
|
||||||
|
|
||||||
// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal
|
// Check basic rate targeting for CBR, for 3 spatial layers, 1 temporal
|
||||||
// for screen mode.
|
// for screen mode.
|
||||||
TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLScreen) {
|
TEST_P(DatarateTestSVC, BasicRateTargetingSVC1TL3SLScreen) {
|
||||||
|
|||||||
1
third_party/aom/test/test-data.sha1
vendored
1
third_party/aom/test/test-data.sha1
vendored
@@ -573,3 +573,4 @@ c7f336958e7af6162c20ddc84d67c7dfa9826910 *av1-1-b8-16-intra_only-intrabc-extreme
|
|||||||
4d1ad6d3070268ccb000d7fc3ae0f5a9447bfe82 *test_input_w1h1.yuv
|
4d1ad6d3070268ccb000d7fc3ae0f5a9447bfe82 *test_input_w1h1.yuv
|
||||||
ad9942a073e245585c93f764ea299382a65939a7 *crowd_run_360p_10_150f.y4m
|
ad9942a073e245585c93f764ea299382a65939a7 *crowd_run_360p_10_150f.y4m
|
||||||
9c2aa2d0f63f706f775bf661dfa81e8bb3089d8b *wikipedia_420_360p_60f.y4m
|
9c2aa2d0f63f706f775bf661dfa81e8bb3089d8b *wikipedia_420_360p_60f.y4m
|
||||||
|
9e4d2ba84ba62f7ea4b617a13af5db9c39e7f0f9 *screendata.1920_1080.y4m
|
||||||
|
|||||||
1
third_party/aom/test/test_data_util.cmake
vendored
1
third_party/aom/test/test_data_util.cmake
vendored
@@ -35,6 +35,7 @@ list(APPEND AOM_TEST_DATA_FILE_NAMES
|
|||||||
"niklas_1280_720_30.y4m"
|
"niklas_1280_720_30.y4m"
|
||||||
"rush_hour_444.y4m"
|
"rush_hour_444.y4m"
|
||||||
"screendata.y4m"
|
"screendata.y4m"
|
||||||
|
"screendata.1920_1080.y4m"
|
||||||
"niklas_640_480_30.yuv"
|
"niklas_640_480_30.yuv"
|
||||||
"vase10x10.yuv"
|
"vase10x10.yuv"
|
||||||
"vase10x10_tiles.txt"
|
"vase10x10_tiles.txt"
|
||||||
|
|||||||
Reference in New Issue
Block a user