Bug 1793238 - Update libjxl r=tnikkel
Differential Revision: https://phabricator.services.mozilla.com/D158771
This commit is contained in:
@@ -20,11 +20,11 @@ origin:
|
||||
|
||||
# Human-readable identifier for this version/release
|
||||
# Generally "version NNN", "tag SSS", "bookmark SSS"
|
||||
release: 7f2e26854086fba4255220fd6c77e9141f1f87cc
|
||||
release: 22e3d7276f4157d4a47586ba9fd91dd6303f441a
|
||||
|
||||
# Revision to pull in
|
||||
# Must be a long or short commit SHA (long preferred)
|
||||
revision: 7f2e26854086fba4255220fd6c77e9141f1f87cc
|
||||
revision: 22e3d7276f4157d4a47586ba9fd91dd6303f441a
|
||||
|
||||
# The package's license, where possible using the mnemonic from
|
||||
# https://spdx.org/licenses/
|
||||
|
||||
14
media/libjxl/include/jxl/version.h
Normal file
14
media/libjxl/include/jxl/version.h
Normal file
@@ -0,0 +1,14 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef JXL_VERSION_H_
|
||||
#define JXL_VERSION_H_
|
||||
|
||||
#define JPEGXL_MAJOR_VERSION 0
|
||||
#define JPEGXL_MINOR_VERSION 0
|
||||
#define JPEGXL_PATCH_VERSION 0
|
||||
|
||||
#endif /* JXL_VERSION_H_ */
|
||||
@@ -103,13 +103,10 @@ SOURCES += [
|
||||
"/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.cc",
|
||||
]
|
||||
|
||||
DEFINES["JPEGXL_MAJOR_VERSION"] = "0"
|
||||
DEFINES["JPEGXL_MINOR_VERSION"] = "0"
|
||||
DEFINES["JPEGXL_PATCH_VERSION"] = "0"
|
||||
|
||||
EXPORTS.jxl += [
|
||||
"./include/jxl/jxl_export.h",
|
||||
"./include/jxl/jxl_threads_export.h",
|
||||
"./include/jxl/version.h",
|
||||
"/third_party/jpeg-xl/lib/include/jxl/butteraugli.h",
|
||||
"/third_party/jpeg-xl/lib/include/jxl/butteraugli_cxx.h",
|
||||
"/third_party/jpeg-xl/lib/include/jxl/cms_interface.h",
|
||||
|
||||
@@ -10,9 +10,9 @@ origin:
|
||||
|
||||
url: https://github.com/libjxl/libjxl
|
||||
|
||||
release: 3e0b08d4ee53a08f9b58739e088c5bdecebae74d (2022-09-09T11:59:45Z).
|
||||
release: 19e36b964cd966e2408bad87182faa38b7de3e9e
|
||||
|
||||
revision: 3e0b08d4ee53a08f9b58739e088c5bdecebae74d
|
||||
revision: 19e36b964cd966e2408bad87182faa38b7de3e9e
|
||||
|
||||
license: Apache-2.0
|
||||
|
||||
|
||||
5
third_party/highway/CMakeLists.txt
vendored
5
third_party/highway/CMakeLists.txt
vendored
@@ -19,7 +19,7 @@ if(POLICY CMP0083)
|
||||
cmake_policy(SET CMP0083 NEW)
|
||||
endif()
|
||||
|
||||
project(hwy VERSION 1.0.0) # Keep in sync with highway.h version
|
||||
project(hwy VERSION 1.0.1) # Keep in sync with highway.h version
|
||||
|
||||
# Directly define the ABI version from the cmake project() version values:
|
||||
set(LIBRARY_VERSION "${hwy_VERSION}")
|
||||
@@ -89,6 +89,9 @@ list(APPEND HWY_CONTRIB_SOURCES
|
||||
hwy/contrib/sort/vqsort-inl.h
|
||||
hwy/contrib/sort/vqsort.cc
|
||||
hwy/contrib/sort/vqsort.h
|
||||
hwy/contrib/algo/copy-inl.h
|
||||
hwy/contrib/algo/find-inl.h
|
||||
hwy/contrib/algo/transform-inl.h
|
||||
)
|
||||
endif() # HWY_ENABLE_CONTRIB
|
||||
|
||||
|
||||
12
third_party/highway/debian/changelog
vendored
12
third_party/highway/debian/changelog
vendored
@@ -1,3 +1,15 @@
|
||||
highway (1.0.1-1) UNRELEASED; urgency=medium
|
||||
|
||||
* Add Eq128, i64 Mul, unsigned->float ConvertTo
|
||||
* Faster sort for few unique keys, more robust pivot selection
|
||||
* Fix: floating-point generator for sort tests, Min/MaxOfLanes for i16
|
||||
* Fix: avoid always_inline in debug, link atomic
|
||||
* GCC warnings: string.h, maybe-uninitialized, ignored-attributes
|
||||
* GCC warnings: preprocessor int overflow, spurious use-after-free/overflow
|
||||
* Doc: <=HWY_AVX3, Full32/64/128, how to use generic-inl
|
||||
|
||||
-- Jan Wassenberg <janwas@google.com> Tue, 23 Aug 2022 10:00:00 +0200
|
||||
|
||||
highway (1.0.0-1) UNRELEASED; urgency=medium
|
||||
|
||||
* ABI change: 64-bit target values, more room for expansion
|
||||
|
||||
30
third_party/highway/hwy/base.h
vendored
30
third_party/highway/hwy/base.h
vendored
@@ -24,6 +24,9 @@
|
||||
#include "hwy/detect_compiler_arch.h"
|
||||
#include "hwy/highway_export.h"
|
||||
|
||||
#if HWY_COMPILER_MSVC
|
||||
#include <string.h> // memcpy
|
||||
#endif
|
||||
#if HWY_ARCH_X86
|
||||
#include <atomic>
|
||||
#endif
|
||||
@@ -131,6 +134,19 @@
|
||||
#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#if HWY_COMPILER_GCC_ACTUAL
|
||||
// nielskm: GCC does not support '#pragma GCC unroll' without the factor.
|
||||
#define HWY_UNROLL(factor) HWY_PRAGMA(GCC unroll factor)
|
||||
#define HWY_DEFAULT_UNROLL HWY_UNROLL(4)
|
||||
#elif HWY_COMPILER_CLANG || HWY_COMPILER_ICC || HWY_COMPILER_ICX
|
||||
#define HWY_UNROLL(factor) HWY_PRAGMA(unroll factor)
|
||||
#define HWY_DEFAULT_UNROLL HWY_UNROLL()
|
||||
#else
|
||||
#define HWY_UNROLL(factor)
|
||||
#define HWY_DEFAULT_UNROLL HWY_UNROLL()
|
||||
#endif
|
||||
|
||||
|
||||
// Compile-time fence to prevent undesirable code reordering. On Clang x86, the
|
||||
// typical asm volatile("" : : : "memory") has no effect, whereas atomic fence
|
||||
// does, without generating code.
|
||||
@@ -863,10 +879,18 @@ HWY_API void CopyBytes(const From* from, To* to) {
|
||||
#if HWY_COMPILER_MSVC
|
||||
memcpy(to, from, kBytes);
|
||||
#else
|
||||
__builtin_memcpy(to, from, kBytes);
|
||||
__builtin_memcpy(
|
||||
static_cast<void*>(to), static_cast<const void*>(from), kBytes);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Same as CopyBytes, but for same-sized objects; avoids a size argument.
|
||||
template <typename From, typename To>
|
||||
HWY_API void CopySameSize(const From* HWY_RESTRICT from, To* HWY_RESTRICT to) {
|
||||
static_assert(sizeof(From) == sizeof(To), "");
|
||||
CopyBytes<sizeof(From)>(from, to);
|
||||
}
|
||||
|
||||
template <size_t kBytes, typename To>
|
||||
HWY_API void ZeroBytes(To* to) {
|
||||
#if HWY_COMPILER_MSVC
|
||||
@@ -880,13 +904,13 @@ HWY_API float F32FromBF16(bfloat16_t bf) {
|
||||
uint32_t bits = bf.bits;
|
||||
bits <<= 16;
|
||||
float f;
|
||||
CopyBytes<4>(&bits, &f);
|
||||
CopySameSize(&bits, &f);
|
||||
return f;
|
||||
}
|
||||
|
||||
HWY_API bfloat16_t BF16FromF32(float f) {
|
||||
uint32_t bits;
|
||||
CopyBytes<4>(&f, &bits);
|
||||
CopySameSize(&f, &bits);
|
||||
bfloat16_t bf;
|
||||
bf.bits = static_cast<uint16_t>(bits >> 16);
|
||||
return bf;
|
||||
|
||||
@@ -22,8 +22,6 @@
|
||||
#define HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
|
||||
#endif
|
||||
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include "hwy/highway.h"
|
||||
|
||||
HWY_BEFORE_NAMESPACE();
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include "hwy/aligned_allocator.h"
|
||||
|
||||
|
||||
21
third_party/highway/hwy/contrib/image/image.h
vendored
21
third_party/highway/hwy/contrib/image/image.h
vendored
@@ -18,7 +18,6 @@
|
||||
|
||||
// SIMD/multicore-friendly planar image representation with row accessors.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
@@ -104,7 +103,7 @@ struct HWY_CONTRIB_DLLEXPORT ImageBase {
|
||||
HWY_INLINE void* VoidRow(const size_t y) const {
|
||||
#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
|
||||
if (y >= ysize_) {
|
||||
HWY_ABORT("Row(%" PRIu64 ") >= %u\n", static_cast<uint64_t>(y), ysize_);
|
||||
HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -223,14 +222,11 @@ class Image3 {
|
||||
|
||||
Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
|
||||
if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
|
||||
HWY_ABORT("Not same size: %" PRIu64 " x %" PRIu64 ", %" PRIu64
|
||||
" x %" PRIu64 ", %" PRIu64 " x %" PRIu64 "\n",
|
||||
static_cast<uint64_t>(plane0.xsize()),
|
||||
static_cast<uint64_t>(plane0.ysize()),
|
||||
static_cast<uint64_t>(plane1.xsize()),
|
||||
static_cast<uint64_t>(plane1.ysize()),
|
||||
static_cast<uint64_t>(plane2.xsize()),
|
||||
static_cast<uint64_t>(plane2.ysize()));
|
||||
HWY_ABORT(
|
||||
"Not same size: %d x %d, %d x %d, %d x %d\n",
|
||||
static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()),
|
||||
static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()),
|
||||
static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize()));
|
||||
}
|
||||
planes_[0] = std::move(plane0);
|
||||
planes_[1] = std::move(plane1);
|
||||
@@ -294,9 +290,8 @@ class Image3 {
|
||||
HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
|
||||
#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
|
||||
if (c >= kNumPlanes || y >= ysize()) {
|
||||
HWY_ABORT("PlaneRow(%" PRIu64 ", %" PRIu64 ") >= %" PRIu64 "\n",
|
||||
static_cast<uint64_t>(c), static_cast<uint64_t>(y),
|
||||
static_cast<uint64_t>(ysize()));
|
||||
HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c),
|
||||
static_cast<int>(y), static_cast<int>(ysize()));
|
||||
}
|
||||
#endif
|
||||
// Use the first plane's stride because the compiler might not realize they
|
||||
|
||||
@@ -13,6 +13,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <cfloat> // FLT_MAX
|
||||
|
||||
1
third_party/highway/hwy/contrib/sort/BUILD
vendored
1
third_party/highway/hwy/contrib/sort/BUILD
vendored
@@ -99,6 +99,7 @@ cc_library(
|
||||
"traits-inl.h",
|
||||
"traits128-inl.h",
|
||||
"vqsort-inl.h",
|
||||
# Placeholder for internal instrumentation. Do not remove.
|
||||
],
|
||||
deps = [
|
||||
# Only if VQSORT_SECURE_RNG is set.
|
||||
|
||||
@@ -124,7 +124,7 @@ class InputStats {
|
||||
// bit representations as the checksum.
|
||||
uint64_t bits = 0;
|
||||
static_assert(sizeof(T) <= 8, "Expected a built-in type");
|
||||
CopyBytes<sizeof(T)>(&value, &bits);
|
||||
CopyBytes<sizeof(T)>(&value, &bits); // not same size
|
||||
sum_ += bits;
|
||||
count_ += 1;
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include <vector>
|
||||
|
||||
@@ -51,6 +50,7 @@ using detail::SharedTraits;
|
||||
|
||||
#if VQSORT_ENABLED || HWY_IDE
|
||||
using detail::OrderAscending128;
|
||||
using detail::OrderAscendingKV128;
|
||||
using detail::Traits128;
|
||||
|
||||
template <class Traits>
|
||||
@@ -81,8 +81,9 @@ HWY_NOINLINE void BenchPartition() {
|
||||
// The pivot value can influence performance. Do exactly what vqsort will
|
||||
// do so that the performance (influenced by prefetching and branch
|
||||
// prediction) is likely to predict the actual performance inside vqsort.
|
||||
const auto pivot = detail::ChoosePivot(d, st, aligned.get(), 0, num_lanes,
|
||||
buf.get(), rng);
|
||||
detail::PivotResult result;
|
||||
const auto pivot = detail::ChoosePivot(d, st, aligned.get(), num_lanes,
|
||||
buf.get(), rng, result);
|
||||
|
||||
const Timestamp t0;
|
||||
detail::Partition(d, st, aligned.get(), 0, num_lanes - 1, pivot,
|
||||
@@ -110,7 +111,7 @@ HWY_NOINLINE void BenchAllPartition() {
|
||||
BenchPartition<TraitsLane<OrderDescending<int64_t>>>();
|
||||
BenchPartition<Traits128<OrderAscending128>>();
|
||||
// BenchPartition<Traits128<OrderDescending128>>();
|
||||
// BenchPartition<Traits128<OrderAscendingKV128>>();
|
||||
BenchPartition<Traits128<OrderAscendingKV128>>();
|
||||
}
|
||||
|
||||
template <class Traits>
|
||||
@@ -258,12 +259,9 @@ HWY_NOINLINE void BenchSort(size_t num_keys) {
|
||||
|
||||
HWY_NOINLINE void BenchAllSort() {
|
||||
// Not interested in benchmark results for these targets
|
||||
if (HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4 ||
|
||||
HWY_TARGET == HWY_EMU128) {
|
||||
if (HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4) {
|
||||
return;
|
||||
}
|
||||
// Only enable EMU128 on x86 - it's slow on emulators.
|
||||
if (!HWY_ARCH_X86 && (HWY_TARGET == HWY_EMU128)) return;
|
||||
|
||||
constexpr size_t K = 1000;
|
||||
constexpr size_t M = K * K;
|
||||
@@ -287,7 +285,7 @@ HWY_NOINLINE void BenchAllSort() {
|
||||
|
||||
#if !HAVE_VXSORT && VQSORT_ENABLED
|
||||
BenchSort<Traits128<OrderAscending128>>(num_keys);
|
||||
// BenchSort<Traits128<OrderAscendingKV128>>(num_keys);
|
||||
BenchSort<Traits128<OrderAscendingKV128>>(num_keys);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h> // memcpy
|
||||
@@ -218,9 +222,6 @@ HWY_NOINLINE void TestAllBaseCase() {
|
||||
#if defined(_MSC_VER)
|
||||
return;
|
||||
#endif
|
||||
// Only enable EMU128 on x86 - it's slow on emulators.
|
||||
if (!HWY_ARCH_X86 && (HWY_TARGET == HWY_EMU128)) return;
|
||||
|
||||
TestBaseCase<TraitsLane<OrderAscending<int32_t> > >();
|
||||
TestBaseCase<TraitsLane<OrderDescending<int64_t> > >();
|
||||
TestBaseCase<Traits128<OrderAscending128> >();
|
||||
@@ -356,9 +357,6 @@ static HWY_NOINLINE void TestPartition() {
|
||||
}
|
||||
|
||||
HWY_NOINLINE void TestAllPartition() {
|
||||
// Only enable EMU128 on x86 - it's slow on emulators.
|
||||
if (!HWY_ARCH_X86 && (HWY_TARGET == HWY_EMU128)) return;
|
||||
|
||||
TestPartition<TraitsLane<OrderAscending<int16_t> > >();
|
||||
TestPartition<TraitsLane<OrderDescending<int32_t> > >();
|
||||
TestPartition<TraitsLane<OrderAscending<int64_t> > >();
|
||||
@@ -490,9 +488,6 @@ void TestSort(size_t num_lanes) {
|
||||
#if defined(_MSC_VER)
|
||||
return;
|
||||
#endif
|
||||
// Only enable EMU128 on x86 - it's slow on emulators.
|
||||
if (!HWY_ARCH_X86 && (HWY_TARGET == HWY_EMU128)) return;
|
||||
|
||||
using Order = typename Traits::Order;
|
||||
using LaneType = typename Traits::LaneType;
|
||||
using KeyType = typename Traits::KeyType;
|
||||
|
||||
@@ -41,7 +41,7 @@ namespace detail {
|
||||
// independent of the order.
|
||||
template <typename T>
|
||||
struct KeyLane {
|
||||
constexpr bool Is128() const { return false; }
|
||||
static constexpr bool Is128() { return false; }
|
||||
constexpr size_t LanesPerKey() const { return 1; }
|
||||
|
||||
// What type bench_sort should allocate for generating inputs.
|
||||
@@ -130,7 +130,7 @@ struct KeyLane {
|
||||
#if HWY_HAVE_FLOAT64 // in case D is float32
|
||||
const RepartitionToWide<D> dw;
|
||||
#else
|
||||
const RepartitionToWide<RebindToUnsigned<D>> dw;
|
||||
const RepartitionToWide<RebindToUnsigned<D> > dw;
|
||||
#endif
|
||||
return BitCast(d, SwapAdjacentPairs(dw, BitCast(dw, v)));
|
||||
}
|
||||
@@ -146,7 +146,7 @@ struct KeyLane {
|
||||
#if HWY_HAVE_FLOAT64 // in case D is float32
|
||||
const RepartitionToWide<D> dw;
|
||||
#else
|
||||
const RepartitionToWide<RebindToUnsigned<D>> dw;
|
||||
const RepartitionToWide<RebindToUnsigned<D> > dw;
|
||||
#endif
|
||||
return BitCast(d, OddEven(BitCast(dw, odd), BitCast(dw, even)));
|
||||
}
|
||||
@@ -160,7 +160,7 @@ struct KeyLane {
|
||||
#if HWY_HAVE_FLOAT64 // in case D is float32
|
||||
const RepartitionToWide<D> dw;
|
||||
#else
|
||||
const RepartitionToWide<RebindToUnsigned<D>> dw;
|
||||
const RepartitionToWide<RebindToUnsigned<D> > dw;
|
||||
#endif
|
||||
return BitCast(d, OddEvenPairs(dw, BitCast(dw, odd), BitCast(dw, even)));
|
||||
}
|
||||
@@ -181,9 +181,7 @@ template <typename T>
|
||||
struct OrderAscending : public KeyLane<T> {
|
||||
using Order = SortAscending;
|
||||
|
||||
HWY_INLINE bool Compare1(const T* a, const T* b) {
|
||||
return *a < *b;
|
||||
}
|
||||
HWY_INLINE bool Compare1(const T* a, const T* b) { return *a < *b; }
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
|
||||
@@ -222,15 +220,18 @@ struct OrderAscending : public KeyLane<T> {
|
||||
HWY_INLINE Vec<D> LastValue(D d) const {
|
||||
return Set(d, hwy::HighestValue<T>());
|
||||
}
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
|
||||
return Sub(v, Set(d, 1));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct OrderDescending : public KeyLane<T> {
|
||||
using Order = SortDescending;
|
||||
|
||||
HWY_INLINE bool Compare1(const T* a, const T* b) {
|
||||
return *b < *a;
|
||||
}
|
||||
HWY_INLINE bool Compare1(const T* a, const T* b) { return *b < *a; }
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
|
||||
@@ -268,6 +269,11 @@ struct OrderDescending : public KeyLane<T> {
|
||||
HWY_INLINE Vec<D> LastValue(D d) const {
|
||||
return Set(d, hwy::LowestValue<T>());
|
||||
}
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
|
||||
return Add(v, Set(d, 1));
|
||||
}
|
||||
};
|
||||
|
||||
// Shared code that depends on Order.
|
||||
|
||||
@@ -39,7 +39,7 @@ namespace detail {
|
||||
// along with an abstraction layer for single-lane vs. lane-pair, which is
|
||||
// independent of the order.
|
||||
struct KeyAny128 {
|
||||
constexpr bool Is128() const { return true; }
|
||||
static constexpr bool Is128() { return true; }
|
||||
constexpr size_t LanesPerKey() const { return 2; }
|
||||
|
||||
// What type bench_sort should allocate for generating inputs.
|
||||
@@ -130,8 +130,8 @@ struct Key128 : public KeyAny128 {
|
||||
std::string KeyString() const { return "U128"; }
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
|
||||
return Eq128(a, b);
|
||||
HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
|
||||
return Eq128(d, a, b);
|
||||
}
|
||||
|
||||
HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) {
|
||||
@@ -184,6 +184,12 @@ struct OrderAscending128 : public Key128 {
|
||||
HWY_INLINE Vec<D> LastValue(D d) const {
|
||||
return Set(d, hwy::HighestValue<TFromD<D> >());
|
||||
}
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
|
||||
const Vec<D> k1 = OddEven(Zero(d), Set(d, 1));
|
||||
return Sub(v, k1);
|
||||
}
|
||||
};
|
||||
|
||||
struct OrderDescending128 : public Key128 {
|
||||
@@ -224,6 +230,12 @@ struct OrderDescending128 : public Key128 {
|
||||
HWY_INLINE Vec<D> LastValue(D d) const {
|
||||
return Set(d, hwy::LowestValue<TFromD<D> >());
|
||||
}
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
|
||||
const Vec<D> k1 = OddEven(Zero(d), Set(d, 1));
|
||||
return Add(v, k1);
|
||||
}
|
||||
};
|
||||
|
||||
// Base class shared between OrderAscendingKV128, OrderDescendingKV128.
|
||||
@@ -234,8 +246,8 @@ struct KeyValue128 : public KeyAny128 {
|
||||
std::string KeyString() const { return "KV128"; }
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
|
||||
return Eq128Upper(a, b);
|
||||
HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
|
||||
return Eq128Upper(d, a, b);
|
||||
}
|
||||
|
||||
HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) {
|
||||
@@ -281,6 +293,12 @@ struct OrderAscendingKV128 : public KeyValue128 {
|
||||
HWY_INLINE Vec<D> LastValue(D d) const {
|
||||
return Set(d, hwy::HighestValue<TFromD<D> >());
|
||||
}
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
|
||||
const Vec<D> k1 = OddEven(Zero(d), Set(d, 1));
|
||||
return Sub(v, k1);
|
||||
}
|
||||
};
|
||||
|
||||
struct OrderDescendingKV128 : public KeyValue128 {
|
||||
@@ -321,6 +339,12 @@ struct OrderDescendingKV128 : public KeyValue128 {
|
||||
HWY_INLINE Vec<D> LastValue(D d) const {
|
||||
return Set(d, hwy::LowestValue<TFromD<D> >());
|
||||
}
|
||||
|
||||
template <class D>
|
||||
HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
|
||||
const Vec<D> k1 = OddEven(Zero(d), Set(d, 1));
|
||||
return Add(v, k1);
|
||||
}
|
||||
};
|
||||
|
||||
// Shared code that depends on Order.
|
||||
|
||||
285
third_party/highway/hwy/contrib/sort/vqsort-inl.h
vendored
285
third_party/highway/hwy/contrib/sort/vqsort-inl.h
vendored
@@ -61,6 +61,7 @@
|
||||
|
||||
#include "hwy/contrib/sort/shared-inl.h"
|
||||
#include "hwy/contrib/sort/sorting_networks-inl.h"
|
||||
// Placeholder for internal instrumentation. Do not remove.
|
||||
#include "hwy/highway.h"
|
||||
|
||||
HWY_BEFORE_NAMESPACE();
|
||||
@@ -573,14 +574,44 @@ HWY_NOINLINE void ScanMinMax(D d, Traits st, const T* HWY_RESTRICT keys,
|
||||
}
|
||||
#endif // VQSORT_PRINT
|
||||
|
||||
template <class V>
|
||||
V OrXor(const V o, const V x1, const V x2) {
|
||||
// TODO(janwas): ternlog?
|
||||
return Or(o, Xor(x1, x2));
|
||||
}
|
||||
|
||||
// Returns a lower bound on the index of the first mismatch, or `num` if all
|
||||
// are equal. `num` is const to ensure we don't change it, which would lead to
|
||||
// bugs because the caller will check whether we return the original value.
|
||||
template <class D, class Traits, typename T>
|
||||
HWY_INLINE bool ScanEqual(D d, Traits st, const T* HWY_RESTRICT keys,
|
||||
size_t num) {
|
||||
HWY_NOINLINE size_t LowerBoundOfMismatch(D d, Traits st,
|
||||
const T* HWY_RESTRICT keys,
|
||||
const size_t num) {
|
||||
using V = Vec<decltype(d)>;
|
||||
const size_t N = Lanes(d);
|
||||
HWY_DASSERT(num >= N); // See HandleSpecialCases
|
||||
const V reference = st.SetKey(d, keys);
|
||||
const V zero = Zero(d);
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
// Vector-align keys + i.
|
||||
const size_t misalign =
|
||||
(reinterpret_cast<uintptr_t>(keys) / sizeof(T)) & (N - 1);
|
||||
if (HWY_LIKELY(misalign != 0)) {
|
||||
HWY_DASSERT(misalign % st.LanesPerKey() == 0);
|
||||
const size_t consume = N - misalign;
|
||||
const auto mask = FirstN(d, consume);
|
||||
const V v0 = LoadU(d, keys);
|
||||
// Only check masked lanes; consider others to be equal to the reference.
|
||||
if (!AllTrue(d, Or(Not(mask), Eq(v0, reference)))) {
|
||||
return 0; // not equal
|
||||
}
|
||||
i = consume;
|
||||
}
|
||||
HWY_DASSERT(((reinterpret_cast<uintptr_t>(keys + i) / sizeof(T)) & (N - 1)) ==
|
||||
0);
|
||||
|
||||
// Sticky bits registering any difference between `keys` and the first key.
|
||||
// We use vector XOR because it may be cheaper than comparisons, especially
|
||||
// for 128-bit. 2x unrolled for more ILP.
|
||||
@@ -592,81 +623,112 @@ HWY_INLINE bool ScanEqual(D d, Traits st, const T* HWY_RESTRICT keys,
|
||||
// after a 'group', which consists of kLoops times two vectors.
|
||||
constexpr size_t kLoops = 4;
|
||||
const size_t lanes_per_group = kLoops * 2 * N;
|
||||
size_t i = 0;
|
||||
|
||||
for (; i + lanes_per_group <= num; i += lanes_per_group) {
|
||||
HWY_DEFAULT_UNROLL
|
||||
for (size_t loop = 0; loop < kLoops; ++loop) {
|
||||
const V v0 = LoadU(d, keys + i + loop * 2 * N);
|
||||
const V v1 = LoadU(d, keys + i + loop * 2 * N + N);
|
||||
// TODO(janwas): ternlog
|
||||
diff0 = Or(diff0, Xor(v0, reference));
|
||||
diff1 = Or(diff1, Xor(v1, reference));
|
||||
const V v0 = Load(d, keys + i + loop * 2 * N);
|
||||
const V v1 = Load(d, keys + i + loop * 2 * N + N);
|
||||
diff0 = OrXor(diff0, v0, reference);
|
||||
diff1 = OrXor(diff1, v1, reference);
|
||||
}
|
||||
diff0 = Or(diff0, diff1);
|
||||
if (!AllTrue(d, Eq(diff0, zero))) {
|
||||
return false;
|
||||
return i; // not equal
|
||||
}
|
||||
}
|
||||
// Whole vectors, no unrolling
|
||||
// Whole vectors, no unrolling, compare directly
|
||||
for (; i + N <= num; i += N) {
|
||||
const V v0 = LoadU(d, keys + i);
|
||||
// TODO(janwas): ternlog
|
||||
diff0 = Or(diff0, Xor(v0, reference));
|
||||
if (!AllTrue(d, Eq(diff0, zero))) {
|
||||
return false;
|
||||
const V v0 = Load(d, keys + i);
|
||||
if (!AllTrue(d, Eq(v0, reference))) {
|
||||
return i; // not equal
|
||||
}
|
||||
}
|
||||
// If there are remainders, re-check the last whole vector.
|
||||
if (HWY_LIKELY(i != num)) {
|
||||
const V v0 = LoadU(d, keys + num - N);
|
||||
// TODO(janwas): ternlog
|
||||
diff0 = Or(diff0, Xor(v0, reference));
|
||||
if (!AllTrue(d, Eq(diff0, zero))) {
|
||||
return false;
|
||||
if (!AllTrue(d, Eq(v0, reference))) {
|
||||
return i; // not equal
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return num; // all equal
|
||||
}
|
||||
|
||||
// Returns key prior to reference in sort order.
|
||||
enum class PivotResult {
|
||||
kAllEqual, // stop without partitioning
|
||||
kNormal, // partition and recurse left and right
|
||||
kIsFirst, // partition but skip left recursion
|
||||
kWasLast, // partition but skip right recursion
|
||||
};
|
||||
|
||||
// Classifies (and possibly modifies) `pivot` by scanning for the first/last
|
||||
// key from index `idx_diff`, which is less than `num`.
|
||||
template <class D, class Traits, typename T>
|
||||
HWY_INLINE Vec<D> ScanForPrev(D d, Traits st, const T* HWY_RESTRICT keys,
|
||||
size_t num, Vec<D> reference,
|
||||
T* HWY_RESTRICT buf) {
|
||||
HWY_NOINLINE PivotResult CheckFirstLast(D d, Traits st,
|
||||
const T* HWY_RESTRICT keys, size_t num,
|
||||
size_t idx_diff,
|
||||
Vec<D>* HWY_RESTRICT pivot,
|
||||
T* HWY_RESTRICT buf) {
|
||||
const size_t N = Lanes(d);
|
||||
HWY_DASSERT(num >= N); // See HandleSpecialCases
|
||||
HWY_DASSERT(idx_diff < num);
|
||||
|
||||
Vec<D> prev = st.FirstValue(d);
|
||||
Mask<D> any_found = st.Compare(d, prev, prev); // false
|
||||
Vec<D> first = st.LastValue(d);
|
||||
Vec<D> last = st.FirstValue(d);
|
||||
// Early out for mostly-0 arrays, where pivot is often FirstValue.
|
||||
if (AllTrue(d, st.EqualKeys(d, *pivot, last))) {
|
||||
return PivotResult::kIsFirst;
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
// We know keys[0, idx_diff) are equal, but they might be the first/last, so
|
||||
// start scanning one vector before.
|
||||
size_t i = static_cast<size_t>(
|
||||
HWY_MAX(static_cast<intptr_t>(idx_diff) - static_cast<intptr_t>(N), 0));
|
||||
|
||||
constexpr size_t kLoops = 4;
|
||||
const size_t lanes_per_group = kLoops * N;
|
||||
|
||||
// Whole group, unrolled
|
||||
for (; i + lanes_per_group <= num; i += lanes_per_group) {
|
||||
HWY_DEFAULT_UNROLL
|
||||
for (size_t loop = 0; loop < kLoops; ++loop) {
|
||||
const Vec<D> curr = LoadU(d, keys + i + loop * N);
|
||||
first = st.First(d, first, curr);
|
||||
last = st.Last(d, last, curr);
|
||||
}
|
||||
}
|
||||
// Whole vectors, no unrolling
|
||||
for (; i + N <= num; i += N) {
|
||||
const Vec<D> curr = LoadU(d, keys + i);
|
||||
const auto is_before = st.Compare(d, curr, reference);
|
||||
any_found = Or(any_found, is_before);
|
||||
prev = IfThenElse(is_before, st.Last(d, prev, curr), prev);
|
||||
first = st.First(d, first, curr);
|
||||
last = st.Last(d, last, curr);
|
||||
}
|
||||
// If there are remainders, re-check the last whole vector.
|
||||
if (HWY_LIKELY(i != num)) {
|
||||
const Vec<D> curr = LoadU(d, keys + num - N);
|
||||
const auto is_before = st.Compare(d, curr, reference);
|
||||
any_found = Or(any_found, is_before);
|
||||
prev = IfThenElse(is_before, st.Last(d, prev, curr), prev);
|
||||
first = st.First(d, first, curr);
|
||||
last = st.Last(d, last, curr);
|
||||
}
|
||||
|
||||
const Vec<D> candidate = st.LastOfLanes(d, prev, buf);
|
||||
// If we didn't find any key less than reference, we're still stuck with
|
||||
// FirstValue; replace that with reference. (We cannot compare directly to
|
||||
// FirstValue because that might be the desired value of prev.)
|
||||
return IfThenElse(any_found, candidate, reference);
|
||||
first = st.FirstOfLanes(d, first, buf);
|
||||
last = st.LastOfLanes(d, last, buf);
|
||||
|
||||
if (AllTrue(d, st.EqualKeys(d, first, *pivot))) {
|
||||
return PivotResult::kIsFirst;
|
||||
}
|
||||
// Fixup required because keys equal to the pivot go to the left partition,
|
||||
// and the pivot is the last, so Partition would not change anything.
|
||||
// Instead use the previous value in sort order, which is not necessarily an
|
||||
// actual key.
|
||||
if (AllTrue(d, st.EqualKeys(d, last, *pivot))) {
|
||||
*pivot = st.PrevValue(d, *pivot);
|
||||
return PivotResult::kWasLast;
|
||||
}
|
||||
return PivotResult::kNormal;
|
||||
}
|
||||
|
||||
enum class PivotResult {
|
||||
kNormal, // use partition
|
||||
kAllEqual, // already done
|
||||
};
|
||||
|
||||
// Writes samples from `keys[0, num)` into `buf`.
|
||||
template <class D, class Traits, typename T>
|
||||
HWY_INLINE void DrawSamples(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
|
||||
T* HWY_RESTRICT buf, Generator& rng) {
|
||||
@@ -732,27 +794,25 @@ HWY_INLINE void DrawSamples(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
|
||||
}
|
||||
}
|
||||
|
||||
// Returns pivot, which is never the largest key (thus the right partition will
|
||||
// never be empty).
|
||||
// Returns pivot chosen from `keys[0, num)`. It will never be the largest key
|
||||
// (thus the right partition will never be empty).
|
||||
template <class D, class Traits, typename T>
|
||||
HWY_NOINLINE Vec<D> ChoosePivot(D d, Traits st, T* HWY_RESTRICT keys,
|
||||
const size_t begin, const size_t end,
|
||||
T* HWY_RESTRICT buf, Generator& rng,
|
||||
PivotResult& result) {
|
||||
const size_t num, T* HWY_RESTRICT buf,
|
||||
Generator& rng, PivotResult& result) {
|
||||
using V = decltype(Zero(d));
|
||||
const size_t N = Lanes(d);
|
||||
|
||||
constexpr size_t kSampleLanes = 3 * 64 / sizeof(T);
|
||||
constexpr size_t N1 = st.LanesPerKey();
|
||||
|
||||
const size_t num = end - begin;
|
||||
#if VQSORT_PRINT
|
||||
fprintf(stderr, "\nChoosePivot num %zu:\n", num);
|
||||
#endif
|
||||
DrawSamples(d, st, keys + begin, num, buf, rng);
|
||||
DrawSamples(d, st, keys, num, buf, rng);
|
||||
|
||||
SortSamples(st, buf);
|
||||
#if VQSORT_PRINT
|
||||
const size_t N = Lanes(d);
|
||||
for (size_t i = 0; i < kSampleLanes; i += N) {
|
||||
Print(d, "", Load(d, buf + i), 0, N);
|
||||
}
|
||||
@@ -760,27 +820,22 @@ HWY_NOINLINE Vec<D> ChoosePivot(D d, Traits st, T* HWY_RESTRICT keys,
|
||||
|
||||
// All samples are equal.
|
||||
if (st.Equal1(buf, buf + kSampleLanes - N1)) {
|
||||
const bool all_eq = ScanEqual(d, st, keys + begin, num);
|
||||
const size_t idx_diff = LowerBoundOfMismatch(d, st, keys, num);
|
||||
const bool all_eq = idx_diff == num;
|
||||
#if VQSORT_PRINT
|
||||
fprintf(stderr, "Pivot num=%zu all eq samples, keys also: %d\n", num,
|
||||
all_eq);
|
||||
fprintf(stderr, "Pivot num=%zu samplesEq, idxDiff %zu keysEq: %d\n", num,
|
||||
idx_diff, all_eq);
|
||||
#endif
|
||||
if (all_eq) {
|
||||
result = PivotResult::kAllEqual;
|
||||
return Zero(d);
|
||||
}
|
||||
|
||||
// If the sample is indeed the most common key and it is the largest, then
|
||||
// the right partition will be empty. Prevent this by replacing the pivot
|
||||
// with the previous key in sort order. By contrast, selecting the first key
|
||||
// in sort order would guarantee (minimal) progress. We instead do a full
|
||||
// scan to maximize load balance in case there are numerous keys that
|
||||
// precede the most common key.
|
||||
result = PivotResult::kNormal;
|
||||
const V reference = st.SetKey(d, buf);
|
||||
const V pivot = ScanForPrev(d, st, keys + begin, num, reference, buf);
|
||||
V pivot = st.SetKey(d, buf); // the single unique sample
|
||||
result = CheckFirstLast(d, st, keys, num, idx_diff, &pivot, buf);
|
||||
#if VQSORT_PRINT
|
||||
Print(d, "PREV pivot", pivot, 0, st.LanesPerKey());
|
||||
fprintf(stderr, "PivotResult %d\n", static_cast<int>(result));
|
||||
Print(d, "Adjusted pivot", pivot, 0, st.LanesPerKey());
|
||||
#endif
|
||||
return pivot;
|
||||
}
|
||||
@@ -796,19 +851,32 @@ HWY_NOINLINE Vec<D> ChoosePivot(D d, Traits st, T* HWY_RESTRICT keys,
|
||||
}
|
||||
|
||||
template <class D, class Traits, typename T>
|
||||
void Recurse(D d, Traits st, T* HWY_RESTRICT keys, T* HWY_RESTRICT keys_end,
|
||||
const size_t begin, const size_t end, const Vec<D> pivot,
|
||||
T* HWY_RESTRICT buf, Generator& rng, size_t remaining_levels) {
|
||||
HWY_DASSERT(begin + 1 < end);
|
||||
const size_t num = end - begin; // >= 2
|
||||
HWY_NOINLINE void Recurse(D d, Traits st, T* HWY_RESTRICT keys,
|
||||
T* HWY_RESTRICT keys_end, const size_t begin,
|
||||
const size_t end, T* HWY_RESTRICT buf, Generator& rng,
|
||||
size_t remaining_levels) {
|
||||
const size_t num = end - begin; // >= 1
|
||||
#if VQSORT_PRINT
|
||||
fprintf(stderr, "- Recurse remaining %zu [%zu %zu) len %zu\n",
|
||||
remaining_levels, begin, end, num);
|
||||
Vec<D> first, last;
|
||||
ScanMinMax(d, st, keys + begin, num, buf, first, last);
|
||||
if (num >= Lanes(d)) {
|
||||
ScanMinMax(d, st, keys + begin, num, buf, first, last);
|
||||
}
|
||||
Print(d, "first", first, 0, st.LanesPerKey());
|
||||
Print(d, "last", last, 0, st.LanesPerKey());
|
||||
#endif
|
||||
HWY_DASSERT(begin < end);
|
||||
|
||||
if (HWY_UNLIKELY(num <= Constants::BaseCaseNum(Lanes(d)))) {
|
||||
BaseCase(d, st, keys + begin, keys_end, num, buf);
|
||||
return;
|
||||
}
|
||||
PivotResult result;
|
||||
Vec<D> pivot = ChoosePivot(d, st, keys + begin, num, buf, rng, result);
|
||||
if (HWY_UNLIKELY(result == PivotResult::kAllEqual)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Too many recursions. This is unlikely to happen because we select pivots
|
||||
// from large (though still O(1)) samples.
|
||||
@@ -820,47 +888,24 @@ void Recurse(D d, Traits st, T* HWY_RESTRICT keys, T* HWY_RESTRICT keys_end,
|
||||
return;
|
||||
}
|
||||
|
||||
const ptrdiff_t base_case_num =
|
||||
static_cast<ptrdiff_t>(Constants::BaseCaseNum(Lanes(d)));
|
||||
const size_t bound = Partition(d, st, keys, begin, end, pivot, buf);
|
||||
|
||||
const ptrdiff_t num_left =
|
||||
static_cast<ptrdiff_t>(bound) - static_cast<ptrdiff_t>(begin);
|
||||
const ptrdiff_t num_right =
|
||||
static_cast<ptrdiff_t>(end) - static_cast<ptrdiff_t>(bound);
|
||||
|
||||
// ChoosePivot ensures pivot != largest key, so this should never happen.
|
||||
HWY_ASSERT(num_right != 0);
|
||||
|
||||
if (HWY_UNLIKELY(num_left <= base_case_num)) {
|
||||
BaseCase(d, st, keys + begin, keys_end, static_cast<size_t>(num_left), buf);
|
||||
} else {
|
||||
PivotResult result;
|
||||
const Vec<D> next_pivot =
|
||||
ChoosePivot(d, st, keys, begin, bound, buf, rng, result);
|
||||
if (result != PivotResult::kAllEqual) {
|
||||
Recurse(d, st, keys, keys_end, begin, bound, next_pivot, buf, rng,
|
||||
remaining_levels - 1);
|
||||
}
|
||||
// ChoosePivot ensures pivot != last key, so the right partition is never
|
||||
// empty. Nor is the left, because the pivot is either one of the keys, or
|
||||
// the value prior to the last (which is not the only value).
|
||||
HWY_ASSERT(begin != bound && bound != end);
|
||||
if (HWY_LIKELY(result != PivotResult::kIsFirst)) {
|
||||
Recurse(d, st, keys, keys_end, begin, bound, buf, rng,
|
||||
remaining_levels - 1);
|
||||
}
|
||||
if (HWY_UNLIKELY(num_right <= base_case_num)) {
|
||||
BaseCase(d, st, keys + bound, keys_end, static_cast<size_t>(num_right),
|
||||
buf);
|
||||
} else {
|
||||
PivotResult result;
|
||||
const Vec<D> next_pivot =
|
||||
ChoosePivot(d, st, keys, bound, end, buf, rng, result);
|
||||
if (result != PivotResult::kAllEqual) {
|
||||
Recurse(d, st, keys, keys_end, bound, end, next_pivot, buf, rng,
|
||||
remaining_levels - 1);
|
||||
}
|
||||
if (HWY_LIKELY(result != PivotResult::kWasLast)) {
|
||||
Recurse(d, st, keys, keys_end, bound, end, buf, rng, remaining_levels - 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if sorting is finished.
|
||||
template <class D, class Traits, typename T>
|
||||
bool HandleSpecialCases(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
|
||||
T* HWY_RESTRICT buf) {
|
||||
HWY_INLINE bool HandleSpecialCases(D d, Traits st, T* HWY_RESTRICT keys,
|
||||
size_t num) {
|
||||
const size_t N = Lanes(d);
|
||||
const size_t base_case_num = Constants::BaseCaseNum(N);
|
||||
|
||||
@@ -876,16 +921,15 @@ bool HandleSpecialCases(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
|
||||
HWY_MAX_BYTES / sizeof(T) > Constants::kMaxRows * Constants::kMaxCols;
|
||||
const bool huge_vec = kPotentiallyHuge && (2 * N > base_case_num);
|
||||
if (partial_128 || huge_vec) {
|
||||
// PERFORMANCE WARNING: falling back to HeapSort.
|
||||
#if VQSORT_PRINT
|
||||
fprintf(stderr, "WARNING: using slow HeapSort: partial %d huge %d\n",
|
||||
partial_128, huge_vec);
|
||||
#endif
|
||||
HeapSort(st, keys, num);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Small arrays: use sorting network, no need for other checks.
|
||||
if (HWY_UNLIKELY(num <= base_case_num)) {
|
||||
BaseCase(d, st, keys, keys + num, num, buf);
|
||||
return true;
|
||||
}
|
||||
// Small arrays are already handled by Recurse.
|
||||
|
||||
// We could also check for already sorted/reverse/equal, but that's probably
|
||||
// counterproductive if vqsort is used as a base case.
|
||||
@@ -925,31 +969,26 @@ void Sort(D d, Traits st, T* HWY_RESTRICT keys, size_t num,
|
||||
buf = storage;
|
||||
#endif // !HWY_HAVE_SCALABLE
|
||||
|
||||
if (detail::HandleSpecialCases(d, st, keys, num, buf)) return;
|
||||
if (detail::HandleSpecialCases(d, st, keys, num)) return;
|
||||
|
||||
#if HWY_MAX_BYTES > 64
|
||||
// sorting_networks-inl and traits assume no more than 512 bit vectors.
|
||||
if (Lanes(d) > 64 / sizeof(T)) {
|
||||
if (HWY_UNLIKELY(Lanes(d) > 64 / sizeof(T))) {
|
||||
return Sort(CappedTag<T, 64 / sizeof(T)>(), st, keys, num, buf);
|
||||
}
|
||||
#endif // HWY_MAX_BYTES > 64
|
||||
|
||||
// Pulled out of the recursion so we can special-case degenerate partitions.
|
||||
detail::Generator rng(keys, num);
|
||||
detail::PivotResult result;
|
||||
const Vec<D> pivot =
|
||||
detail::ChoosePivot(d, st, keys, 0, num, buf, rng, result);
|
||||
|
||||
if (result != detail::PivotResult::kAllEqual) {
|
||||
// Introspection: switch to worst-case N*logN heapsort after this many.
|
||||
const size_t max_levels = 2 * hwy::CeilLog2(num) + 4;
|
||||
detail::Recurse(d, st, keys, keys + num, 0, num, pivot, buf, rng,
|
||||
max_levels);
|
||||
}
|
||||
// Introspection: switch to worst-case N*logN heapsort after this many.
|
||||
const size_t max_levels = 2 * hwy::CeilLog2(num) + 4;
|
||||
detail::Recurse(d, st, keys, keys + num, 0, num, buf, rng, max_levels);
|
||||
#else
|
||||
(void)d;
|
||||
(void)buf;
|
||||
// PERFORMANCE WARNING: vqsort is not enabled for the non-SIMD target
|
||||
#if VQSORT_PRINT
|
||||
fprintf(stderr, "WARNING: using slow HeapSort because vqsort disabled\n");
|
||||
#endif
|
||||
return detail::HeapSort(st, keys, num);
|
||||
#endif // VQSORT_ENABLED
|
||||
}
|
||||
|
||||
@@ -50,6 +50,12 @@
|
||||
#define HWY_COMPILER_ICC 0
|
||||
#endif
|
||||
|
||||
#ifdef __INTEL_LLVM_COMPILER
|
||||
#define HWY_COMPILER_ICX __INTEL_LLVM_COMPILER
|
||||
#else
|
||||
#define HWY_COMPILER_ICX 0
|
||||
#endif
|
||||
|
||||
// HWY_COMPILER_GCC is a generic macro for all compilers implementing the GNU
|
||||
// compiler extensions (eg. Clang, Intel...)
|
||||
#ifdef __GNUC__
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
19
third_party/highway/hwy/examples/skeleton-inl.h
vendored
19
third_party/highway/hwy/examples/skeleton-inl.h
vendored
@@ -19,7 +19,9 @@
|
||||
// splitting code into different files while still inlining instead of requiring
|
||||
// calling through function pointers.
|
||||
|
||||
// Include guard (still compiled once per target)
|
||||
// Per-target include guard. This is only required when using dynamic dispatch,
|
||||
// i.e. including foreach_target.h. For static dispatch, a normal include
|
||||
// guard would be fine because the header is only compiled once.
|
||||
#if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
|
||||
#ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
|
||||
#undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
|
||||
@@ -36,7 +38,8 @@ HWY_BEFORE_NAMESPACE();
|
||||
namespace skeleton {
|
||||
namespace HWY_NAMESPACE {
|
||||
|
||||
using namespace hwy::HWY_NAMESPACE;
|
||||
// Highway ops reside here; ADL does not find templates nor builtins.
|
||||
namespace hn = hwy::HWY_NAMESPACE;
|
||||
|
||||
// Example of a type-agnostic (caller-specified lane type) and width-agnostic
|
||||
// (uses best available instruction set) function in a header.
|
||||
@@ -46,12 +49,12 @@ template <class D, typename T>
|
||||
HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
|
||||
const T* HWY_RESTRICT add_array,
|
||||
const size_t size, T* HWY_RESTRICT x_array) {
|
||||
for (size_t i = 0; i < size; i += Lanes(d)) {
|
||||
const auto mul = Load(d, mul_array + i);
|
||||
const auto add = Load(d, add_array + i);
|
||||
auto x = Load(d, x_array + i);
|
||||
x = MulAdd(mul, x, add);
|
||||
Store(x, d, x_array + i);
|
||||
for (size_t i = 0; i < size; i += hn::Lanes(d)) {
|
||||
const auto mul = hn::Load(d, mul_array + i);
|
||||
const auto add = hn::Load(d, add_array + i);
|
||||
auto x = hn::Load(d, x_array + i);
|
||||
x = hn::MulAdd(mul, x, add);
|
||||
hn::Store(x, d, x_array + i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
11
third_party/highway/hwy/examples/skeleton.cc
vendored
11
third_party/highway/hwy/examples/skeleton.cc
vendored
@@ -17,22 +17,28 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// >>>> for dynamic dispatch only, skip if you want static dispatch
|
||||
|
||||
// First undef to prevent error when re-included.
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
// For runtime dispatch, specify the name of the current file (unfortunately
|
||||
// For dynamic dispatch, specify the name of the current file (unfortunately
|
||||
// __FILE__ is not reliable) so that foreach_target.h can re-include it.
|
||||
#define HWY_TARGET_INCLUDE "hwy/examples/skeleton.cc"
|
||||
// Generates code for each enabled target by re-including this source file.
|
||||
#include "hwy/foreach_target.h" // IWYU pragma: keep
|
||||
|
||||
// <<<< end of dynamic dispatch
|
||||
|
||||
// Must come after foreach_target.h to avoid redefinition errors.
|
||||
#include "hwy/highway.h"
|
||||
|
||||
// Optional, can instead add HWY_ATTR to all functions.
|
||||
HWY_BEFORE_NAMESPACE();
|
||||
|
||||
namespace skeleton {
|
||||
// This namespace name is unique per target, which allows code for multiple
|
||||
// targets to co-exist in the same translation unit.
|
||||
// targets to co-exist in the same translation unit. Required when using dynamic
|
||||
// dispatch, otherwise optional.
|
||||
namespace HWY_NAMESPACE {
|
||||
|
||||
// Highway ops reside here; ADL does not find templates nor builtins.
|
||||
@@ -104,6 +110,7 @@ HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in,
|
||||
uint8_t* HWY_RESTRICT out) {
|
||||
// This must reside outside of HWY_NAMESPACE because it references (calls the
|
||||
// appropriate one from) the per-target implementations there.
|
||||
// For static dispatch, use HWY_STATIC_DISPATCH.
|
||||
return HWY_DYNAMIC_DISPATCH(FloorLog2)(in, count, out);
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ struct TestFloorLog2 {
|
||||
};
|
||||
|
||||
HWY_NOINLINE void TestAllFloorLog2() {
|
||||
ForPartialVectors<TestFloorLog2>()(float());
|
||||
hn::ForPartialVectors<TestFloorLog2>()(float());
|
||||
}
|
||||
|
||||
// Calls function defined in skeleton-inl.h.
|
||||
@@ -91,7 +91,7 @@ struct TestSumMulAdd {
|
||||
};
|
||||
|
||||
HWY_NOINLINE void TestAllSumMulAdd() {
|
||||
ForFloatTypes(ForPartialVectors<TestSumMulAdd>());
|
||||
hn::ForFloatTypes(hn::ForPartialVectors<TestSumMulAdd>());
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
|
||||
2
third_party/highway/hwy/highway.h
vendored
2
third_party/highway/hwy/highway.h
vendored
@@ -29,7 +29,7 @@ namespace hwy {
|
||||
// API version (https://semver.org/); keep in sync with CMakeLists.txt.
|
||||
#define HWY_MAJOR 1
|
||||
#define HWY_MINOR 0
|
||||
#define HWY_PATCH 0
|
||||
#define HWY_PATCH 1
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Shorthand for tags (defined in shared-inl.h) used to select overloads.
|
||||
|
||||
3
third_party/highway/hwy/highway_test.cc
vendored
3
third_party/highway/hwy/highway_test.cc
vendored
@@ -15,7 +15,6 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <bitset>
|
||||
|
||||
@@ -224,7 +223,7 @@ HWY_INLINE void AssertNaN(D d, VecArg<V> v, const char* file, int line) {
|
||||
// avoid truncating doubles.
|
||||
uint8_t bytes[HWY_MAX(sizeof(T), 8)] = {0};
|
||||
const T lane = GetLane(v);
|
||||
memcpy(bytes, &lane, sizeof(T));
|
||||
CopyBytes<sizeof(T)>(&lane, bytes);
|
||||
Abort(file, line,
|
||||
"Expected %s NaN, got %E (bytes %02x %02x %02x %02x %02x %02x %02x "
|
||||
"%02x)",
|
||||
|
||||
8
third_party/highway/hwy/nanobenchmark.cc
vendored
8
third_party/highway/hwy/nanobenchmark.cc
vendored
@@ -15,11 +15,13 @@
|
||||
|
||||
#include "hwy/nanobenchmark.h"
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> // abort
|
||||
#include <string.h> // memcpy
|
||||
#include <stdlib.h>
|
||||
#include <time.h> // clock_gettime
|
||||
|
||||
#include <algorithm> // sort
|
||||
@@ -414,7 +416,7 @@ std::string BrandString() {
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
Cpuid(static_cast<uint32_t>(0x80000002U + i), 0, abcd.data());
|
||||
memcpy(brand_string + i * 16, abcd.data(), sizeof(abcd));
|
||||
CopyBytes<sizeof(abcd)>(&abcd[0], brand_string + i * 16); // not same size
|
||||
}
|
||||
brand_string[48] = 0;
|
||||
return brand_string;
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
|
||||
#include "hwy/nanobenchmark.h"
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
130
third_party/highway/hwy/ops/arm_neon-inl.h
vendored
130
third_party/highway/hwy/ops/arm_neon-inl.h
vendored
@@ -1030,6 +1030,9 @@ template <typename T, size_t N>
|
||||
HWY_API Vec128<T, N> Undefined(Simd<T, N, 0> /*d*/) {
|
||||
HWY_DIAGNOSTICS(push)
|
||||
HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
|
||||
#if HWY_COMPILER_GCC_ACTUAL
|
||||
HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wmaybe-uninitialized")
|
||||
#endif
|
||||
typename detail::Raw128<T, N>::type a;
|
||||
return Vec128<T, N>(a);
|
||||
HWY_DIAGNOSTICS(pop)
|
||||
@@ -3285,6 +3288,16 @@ HWY_API Vec128<float, N> ConvertTo(Simd<float, N, 0> /* tag */,
|
||||
return Vec128<float, N>(vcvt_f32_s32(v.raw));
|
||||
}
|
||||
|
||||
HWY_API Vec128<float> ConvertTo(Full128<float> /* tag */,
|
||||
const Vec128<uint32_t> v) {
|
||||
return Vec128<float>(vcvtq_f32_u32(v.raw));
|
||||
}
|
||||
template <size_t N, HWY_IF_LE64(uint32_t, N)>
|
||||
HWY_API Vec128<float, N> ConvertTo(Simd<float, N, 0> /* tag */,
|
||||
const Vec128<uint32_t, N> v) {
|
||||
return Vec128<float, N>(vcvt_f32_u32(v.raw));
|
||||
}
|
||||
|
||||
// Truncates (rounds toward zero).
|
||||
HWY_API Vec128<int32_t> ConvertTo(Full128<int32_t> /* tag */,
|
||||
const Vec128<float> v) {
|
||||
@@ -3307,6 +3320,15 @@ HWY_API Vec64<double> ConvertTo(Full64<double> /* tag */,
|
||||
return Vec64<double>(vcvt_f64_s64(v.raw));
|
||||
}
|
||||
|
||||
HWY_API Vec128<double> ConvertTo(Full128<double> /* tag */,
|
||||
const Vec128<uint64_t> v) {
|
||||
return Vec128<double>(vcvtq_f64_u64(v.raw));
|
||||
}
|
||||
HWY_API Vec64<double> ConvertTo(Full64<double> /* tag */,
|
||||
const Vec64<uint64_t> v) {
|
||||
return Vec64<double>(vcvt_f64_u64(v.raw));
|
||||
}
|
||||
|
||||
// Truncates (rounds toward zero).
|
||||
HWY_API Vec128<int64_t> ConvertTo(Full128<int64_t> /* tag */,
|
||||
const Vec128<double> v) {
|
||||
@@ -4979,24 +5001,52 @@ HWY_INLINE Vec128<T> MaxOfLanes(hwy::SizeTag<8> /* tag */,
|
||||
return Max(v10, v01);
|
||||
}
|
||||
|
||||
// u16/i16
|
||||
template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2), HWY_IF_GE32(T, N)>
|
||||
HWY_API Vec128<T, N> MinOfLanes(hwy::SizeTag<2> /* tag */, Vec128<T, N> v) {
|
||||
const Repartition<int32_t, Simd<T, N, 0>> d32;
|
||||
template <size_t N, HWY_IF_GE32(uint16_t, N)>
|
||||
HWY_API Vec128<uint16_t, N> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<uint16_t, N> v) {
|
||||
const Simd<uint16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Simd<T, N, 0>(), Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2), HWY_IF_GE32(T, N)>
|
||||
HWY_API Vec128<T, N> MaxOfLanes(hwy::SizeTag<2> /* tag */, Vec128<T, N> v) {
|
||||
const Repartition<int32_t, Simd<T, N, 0>> d32;
|
||||
template <size_t N, HWY_IF_GE32(int16_t, N)>
|
||||
HWY_API Vec128<int16_t, N> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<int16_t, N> v) {
|
||||
const Simd<int16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
template <size_t N, HWY_IF_GE32(uint16_t, N)>
|
||||
HWY_API Vec128<uint16_t, N> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<uint16_t, N> v) {
|
||||
const Simd<uint16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Simd<T, N, 0>(), Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <size_t N, HWY_IF_GE32(int16_t, N)>
|
||||
HWY_API Vec128<int16_t, N> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<int16_t, N> v) {
|
||||
const Simd<int16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
@@ -6356,64 +6406,6 @@ HWY_INLINE VFromD<D> Max128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
|
||||
return IfThenElse(Lt128Upper(d, b, a), a, b);
|
||||
}
|
||||
|
||||
// ================================================== Operator wrapper
|
||||
|
||||
// These apply to all x86_*-inl.h because there are no restrictions on V.
|
||||
|
||||
template <class V>
|
||||
HWY_API V Add(V a, V b) {
|
||||
return a + b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Sub(V a, V b) {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API V Mul(V a, V b) {
|
||||
return a * b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Div(V a, V b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
V Shl(V a, V b) {
|
||||
return a << b;
|
||||
}
|
||||
template <class V>
|
||||
V Shr(V a, V b) {
|
||||
return a >> b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Eq(V a, V b) -> decltype(a == b) {
|
||||
return a == b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ne(V a, V b) -> decltype(a == b) {
|
||||
return a != b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Lt(V a, V b) -> decltype(a == b) {
|
||||
return a < b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Gt(V a, V b) -> decltype(a == b) {
|
||||
return a > b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ge(V a, V b) -> decltype(a == b) {
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Le(V a, V b) -> decltype(a == b) {
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
namespace detail { // for code folding
|
||||
#if HWY_ARCH_ARM_V7
|
||||
#undef vuzp1_s8
|
||||
|
||||
20
third_party/highway/hwy/ops/arm_sve-inl.h
vendored
20
third_party/highway/hwy/ops/arm_sve-inl.h
vendored
@@ -629,6 +629,13 @@ HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGPVN, MaxN, max_n)
|
||||
HWY_SVE_FOREACH_UI16(HWY_SVE_RETV_ARGPVV, Mul, mul)
|
||||
HWY_SVE_FOREACH_UIF3264(HWY_SVE_RETV_ARGPVV, Mul, mul)
|
||||
|
||||
// Per-target flag to prevent generic_ops-inl.h from defining i64 operator*.
|
||||
#ifdef HWY_NATIVE_I64MULLO
|
||||
#undef HWY_NATIVE_I64MULLO
|
||||
#else
|
||||
#define HWY_NATIVE_I64MULLO
|
||||
#endif
|
||||
|
||||
// ------------------------------ MulHigh
|
||||
HWY_SVE_FOREACH_UI16(HWY_SVE_RETV_ARGPVV, MulHigh, mulh)
|
||||
namespace detail {
|
||||
@@ -1497,11 +1504,18 @@ HWY_API svint32_t DemoteTo(Simd<int32_t, N, kPow2> d, const svfloat64_t v) {
|
||||
// ------------------------------ ConvertTo F
|
||||
|
||||
#define HWY_SVE_CONVERT(BASE, CHAR, BITS, HALF, NAME, OP) \
|
||||
/* signed integers */ \
|
||||
template <size_t N, int kPow2> \
|
||||
HWY_API HWY_SVE_V(BASE, BITS) \
|
||||
NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, HWY_SVE_V(int, BITS) v) { \
|
||||
return sv##OP##_##CHAR##BITS##_s##BITS##_x(HWY_SVE_PTRUE(BITS), v); \
|
||||
} \
|
||||
/* unsigned integers */ \
|
||||
template <size_t N, int kPow2> \
|
||||
HWY_API HWY_SVE_V(BASE, BITS) \
|
||||
NAME(HWY_SVE_D(BASE, BITS, N, kPow2) /* d */, HWY_SVE_V(uint, BITS) v) { \
|
||||
return sv##OP##_##CHAR##BITS##_u##BITS##_x(HWY_SVE_PTRUE(BITS), v); \
|
||||
} \
|
||||
/* Truncates (rounds toward zero). */ \
|
||||
template <size_t N, int kPow2> \
|
||||
HWY_API HWY_SVE_V(int, BITS) \
|
||||
@@ -2248,9 +2262,9 @@ HWY_API svuint64_t CompressBlocksNot(svuint64_t v, svbool_t mask) {
|
||||
#endif
|
||||
#if HWY_TARGET == HWY_SVE_256 || HWY_IDE
|
||||
uint64_t bits = 0; // predicate reg is 32-bit
|
||||
CopyBytes<4>(&mask, &bits);
|
||||
CopyBytes<4>(&mask, &bits); // not same size - 64-bit more efficient
|
||||
// Concatenate LSB for upper and lower blocks, pre-scale by 4 for table idx.
|
||||
const size_t offset = ((bits & 1) ? 4 : 0) + ((bits & 0x10000) ? 8 : 0);
|
||||
const size_t offset = ((bits & 1) ? 4u : 0u) + ((bits & 0x10000) ? 8u : 0u);
|
||||
// See CompressIsPartition. Manually generated; flip halves if mask = [0, 1].
|
||||
alignas(16) static constexpr uint64_t table[4 * 4] = {0, 1, 2, 3, 2, 3, 0, 1,
|
||||
0, 1, 2, 3, 0, 1, 2, 3};
|
||||
@@ -2680,7 +2694,7 @@ HWY_INLINE svbool_t LoadMaskBits(D /* tag */,
|
||||
// Max 2048 bits = 32 lanes = 32 input bits; replicate those into each lane.
|
||||
// The "at least 8 byte" guarantee in quick_reference ensures this is safe.
|
||||
uint32_t mask_bits;
|
||||
CopyBytes<4>(bits, &mask_bits);
|
||||
CopyBytes<4>(bits, &mask_bits); // copy from bytes
|
||||
const auto vbits = Set(du, mask_bits);
|
||||
|
||||
// 2 ^ {0,1, .., 31}, will not have more lanes than that.
|
||||
|
||||
98
third_party/highway/hwy/ops/emu128-inl.h
vendored
98
third_party/highway/hwy/ops/emu128-inl.h
vendored
@@ -101,9 +101,7 @@ using TFromV = TFromD<DFromV<V>>;
|
||||
template <typename T, size_t N, typename FromT, size_t FromN>
|
||||
HWY_API Vec128<T, N> BitCast(Simd<T, N, 0> /* tag */, Vec128<FromT, FromN> v) {
|
||||
Vec128<T, N> to;
|
||||
static_assert(sizeof(T) * N == sizeof(FromT) * FromN,
|
||||
"Casting does not change size");
|
||||
CopyBytes<sizeof(T) * N>(v.raw, to.raw);
|
||||
CopySameSize(&v, &to);
|
||||
return to;
|
||||
}
|
||||
|
||||
@@ -285,8 +283,7 @@ template <typename TFrom, typename TTo, size_t N>
|
||||
HWY_API Mask128<TTo, N> RebindMask(Simd<TTo, N, 0> /*tag*/,
|
||||
Mask128<TFrom, N> mask) {
|
||||
Mask128<TTo, N> to;
|
||||
static_assert(sizeof(TTo) * N == sizeof(TFrom) * N, "Must have same size");
|
||||
CopyBytes<sizeof(TTo) * N>(mask.bits, to.bits);
|
||||
CopySameSize(&mask, &to);
|
||||
return to;
|
||||
}
|
||||
|
||||
@@ -294,15 +291,14 @@ HWY_API Mask128<TTo, N> RebindMask(Simd<TTo, N, 0> /*tag*/,
|
||||
template <typename T, size_t N>
|
||||
HWY_API Mask128<T, N> MaskFromVec(const Vec128<T, N> v) {
|
||||
Mask128<T, N> mask;
|
||||
static_assert(sizeof(v) == sizeof(mask), "Must have same size");
|
||||
CopyBytes<sizeof(T) * N>(v.raw, mask.bits);
|
||||
CopySameSize(&v, &mask);
|
||||
return mask;
|
||||
}
|
||||
|
||||
template <typename T, size_t N>
|
||||
Vec128<T, N> VecFromMask(const Mask128<T, N> mask) {
|
||||
Vec128<T, N> v;
|
||||
CopyBytes<sizeof(T) * N>(mask.bits, v.raw);
|
||||
CopySameSize(&mask, &v);
|
||||
return v;
|
||||
}
|
||||
|
||||
@@ -926,10 +922,10 @@ HWY_API Vec128<float, N> ApproximateReciprocalSqrt(Vec128<float, N> v) {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const float half = v.raw[i] * 0.5f;
|
||||
uint32_t bits;
|
||||
CopyBytes<4>(&v.raw[i], &bits);
|
||||
CopySameSize(&v.raw[i], &bits);
|
||||
// Initial guess based on log2(f)
|
||||
bits = 0x5F3759DF - (bits >> 1);
|
||||
CopyBytes<4>(&bits, &v.raw[i]);
|
||||
CopySameSize(&bits, &v.raw[i]);
|
||||
// One Newton-Raphson iteration
|
||||
v.raw[i] = v.raw[i] * (1.5f - (half * v.raw[i] * v.raw[i]));
|
||||
}
|
||||
@@ -1039,7 +1035,7 @@ Vec128<Float, N> Ceil(Vec128<Float, N> v) {
|
||||
const bool positive = v.raw[i] > Float(0.0);
|
||||
|
||||
Bits bits;
|
||||
CopyBytes<sizeof(Bits)>(&v.raw[i], &bits);
|
||||
CopySameSize(&v.raw[i], &bits);
|
||||
|
||||
const int exponent =
|
||||
static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
|
||||
@@ -1059,7 +1055,7 @@ Vec128<Float, N> Ceil(Vec128<Float, N> v) {
|
||||
if (positive) bits += (kMantissaMask + 1) >> exponent;
|
||||
bits &= ~mantissa_mask;
|
||||
|
||||
CopyBytes<sizeof(Bits)>(&bits, &v.raw[i]);
|
||||
CopySameSize(&bits, &v.raw[i]);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
@@ -1077,7 +1073,7 @@ Vec128<Float, N> Floor(Vec128<Float, N> v) {
|
||||
const bool negative = v.raw[i] < Float(0.0);
|
||||
|
||||
Bits bits;
|
||||
CopyBytes<sizeof(Bits)>(&v.raw[i], &bits);
|
||||
CopySameSize(&v.raw[i], &bits);
|
||||
|
||||
const int exponent =
|
||||
static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
|
||||
@@ -1097,7 +1093,7 @@ Vec128<Float, N> Floor(Vec128<Float, N> v) {
|
||||
if (negative) bits += (kMantissaMask + 1) >> exponent;
|
||||
bits &= ~mantissa_mask;
|
||||
|
||||
CopyBytes<sizeof(Bits)>(&bits, &v.raw[i]);
|
||||
CopySameSize(&bits, &v.raw[i]);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
@@ -1110,7 +1106,7 @@ HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
// std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
|
||||
MakeUnsigned<T> bits;
|
||||
memcpy(&bits, &v.raw[i], sizeof(T));
|
||||
CopySameSize(&v.raw[i], &bits);
|
||||
bits += bits;
|
||||
bits >>= 1; // clear sign bit
|
||||
// NaN if all exponent bits are set and the mantissa is not zero.
|
||||
@@ -1278,7 +1274,7 @@ template <typename T, size_t N>
|
||||
HWY_API Vec128<T, N> Load(Simd<T, N, 0> /* tag */,
|
||||
const T* HWY_RESTRICT aligned) {
|
||||
Vec128<T, N> v;
|
||||
CopyBytes<sizeof(T) * N>(aligned, v.raw);
|
||||
CopyBytes<sizeof(T) * N>(aligned, v.raw); // copy from array
|
||||
return v;
|
||||
}
|
||||
|
||||
@@ -1305,7 +1301,7 @@ HWY_API Vec128<T, N> LoadDup128(Simd<T, N, 0> d,
|
||||
template <typename T, size_t N>
|
||||
HWY_API void Store(const Vec128<T, N> v, Simd<T, N, 0> /* tag */,
|
||||
T* HWY_RESTRICT aligned) {
|
||||
CopyBytes<sizeof(T) * N>(v.raw, aligned);
|
||||
CopyBytes<sizeof(T) * N>(v.raw, aligned); // copy to array
|
||||
}
|
||||
|
||||
template <typename T, size_t N>
|
||||
@@ -1434,7 +1430,7 @@ HWY_API void ScatterOffset(Vec128<T, N> v, Simd<T, N, 0> /* tag */, T* base,
|
||||
static_assert(sizeof(T) == sizeof(Offset), "Must match for portability");
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
uint8_t* const base8 = reinterpret_cast<uint8_t*>(base) + offset.raw[i];
|
||||
CopyBytes<sizeof(T)>(&v.raw[i], base8);
|
||||
CopyBytes<sizeof(T)>(&v.raw[i], base8); // copy to bytes
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1457,7 +1453,7 @@ HWY_API Vec128<T, N> GatherOffset(Simd<T, N, 0> /* tag */, const T* base,
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const uint8_t* base8 =
|
||||
reinterpret_cast<const uint8_t*>(base) + offset.raw[i];
|
||||
CopyBytes<sizeof(T)>(base8, &v.raw[i]);
|
||||
CopyBytes<sizeof(T)>(base8, &v.raw[i]); // copy from bytes
|
||||
}
|
||||
return v;
|
||||
}
|
||||
@@ -1556,12 +1552,12 @@ namespace detail {
|
||||
|
||||
HWY_INLINE void StoreU16ToF16(const uint16_t val,
|
||||
hwy::float16_t* HWY_RESTRICT to) {
|
||||
CopyBytes<2>(&val, to);
|
||||
CopySameSize(&val, to);
|
||||
}
|
||||
|
||||
HWY_INLINE uint16_t U16FromF16(const hwy::float16_t* HWY_RESTRICT from) {
|
||||
uint16_t bits16;
|
||||
CopyBytes<2>(from, &bits16);
|
||||
CopySameSize(from, &bits16);
|
||||
return bits16;
|
||||
}
|
||||
|
||||
@@ -1590,7 +1586,7 @@ HWY_API Vec128<float, N> PromoteTo(Simd<float, N, 0> /* tag */,
|
||||
const uint32_t biased_exp32 = biased_exp + (127 - 15);
|
||||
const uint32_t mantissa32 = mantissa << (23 - 10);
|
||||
const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
|
||||
CopyBytes<4>(&bits32, &ret.raw[i]);
|
||||
CopySameSize(&bits32, &ret.raw[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -1611,7 +1607,7 @@ HWY_API Vec128<float16_t, N> DemoteTo(Simd<float16_t, N, 0> /* tag */,
|
||||
Vec128<float16_t, N> ret;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
uint32_t bits32;
|
||||
CopyBytes<4>(&v.raw[i], &bits32);
|
||||
CopySameSize(&v.raw[i], &bits32);
|
||||
const uint32_t sign = bits32 >> 31;
|
||||
const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
|
||||
const uint32_t mantissa32 = bits32 & 0x7FFFFF;
|
||||
@@ -2446,62 +2442,6 @@ HWY_INLINE Vec128<uint64_t> MulOdd(const Vec128<uint64_t> a,
|
||||
return Load(Full128<uint64_t>(), mul);
|
||||
}
|
||||
|
||||
// ================================================== Operator wrapper
|
||||
|
||||
template <class V>
|
||||
HWY_API V Add(V a, V b) {
|
||||
return a + b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Sub(V a, V b) {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API V Mul(V a, V b) {
|
||||
return a * b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Div(V a, V b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
V Shl(V a, V b) {
|
||||
return a << b;
|
||||
}
|
||||
template <class V>
|
||||
V Shr(V a, V b) {
|
||||
return a >> b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Eq(V a, V b) -> decltype(a == b) {
|
||||
return a == b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ne(V a, V b) -> decltype(a == b) {
|
||||
return a != b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Lt(V a, V b) -> decltype(a == b) {
|
||||
return a < b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Gt(V a, V b) -> decltype(a == b) {
|
||||
return a > b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ge(V a, V b) -> decltype(a == b) {
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Le(V a, V b) -> decltype(a == b) {
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace hwy
|
||||
|
||||
102
third_party/highway/hwy/ops/generic_ops-inl.h
vendored
102
third_party/highway/hwy/ops/generic_ops-inl.h
vendored
@@ -1209,7 +1209,8 @@ HWY_API V PopulationCount(V v) {
|
||||
// RVV has a specialization that avoids the Set().
|
||||
#if HWY_TARGET != HWY_RVV
|
||||
// Slower fallback for capped vectors.
|
||||
template <typename V, class D = DFromV<V>, HWY_IF_LANE_SIZE_D(D, 1), HWY_IF_LT128_D(D)>
|
||||
template <typename V, class D = DFromV<V>, HWY_IF_LANE_SIZE_D(D, 1),
|
||||
HWY_IF_LT128_D(D)>
|
||||
HWY_API V PopulationCount(V v) {
|
||||
static_assert(IsSame<TFromD<D>, uint8_t>(), "V must be u8");
|
||||
const D d;
|
||||
@@ -1251,6 +1252,105 @@ HWY_API V PopulationCount(V v) {
|
||||
|
||||
#endif // HWY_NATIVE_POPCNT
|
||||
|
||||
template <class V, class D = DFromV<V>, HWY_IF_LANE_SIZE_D(D, 8),
|
||||
HWY_IF_LT128_D(D)>
|
||||
HWY_API V operator*(V x, V y) {
|
||||
return Set(D(), GetLane(x) * GetLane(y));
|
||||
}
|
||||
|
||||
// "Include guard": skip if native 64-bit mul instructions are available.
|
||||
#if (defined(HWY_NATIVE_I64MULLO) == defined(HWY_TARGET_TOGGLE))
|
||||
#ifdef HWY_NATIVE_I64MULLO
|
||||
#undef HWY_NATIVE_I64MULLO
|
||||
#else
|
||||
#define HWY_NATIVE_I64MULLO
|
||||
#endif
|
||||
|
||||
template <class V, class D64 = DFromV<V>, typename T = LaneType<V>,
|
||||
HWY_IF_LANE_SIZE(T, 8), HWY_IF_UNSIGNED(T), HWY_IF_GE128_D(D64)>
|
||||
HWY_API V operator*(V x, V y) {
|
||||
RepartitionToNarrow<D64> d32;
|
||||
auto x32 = BitCast(d32, x);
|
||||
auto y32 = BitCast(d32, y);
|
||||
auto lolo = BitCast(d32, MulEven(x32, y32));
|
||||
auto lohi = BitCast(d32, MulEven(x32, BitCast(d32, ShiftRight<32>(y))));
|
||||
auto hilo = BitCast(d32, MulEven(BitCast(d32, ShiftRight<32>(x)), y32));
|
||||
auto hi = BitCast(d32, ShiftLeft<32>(BitCast(D64{}, lohi + hilo)));
|
||||
return BitCast(D64{}, lolo + hi);
|
||||
}
|
||||
template <class V, class DI64 = DFromV<V>, typename T = LaneType<V>,
|
||||
HWY_IF_LANE_SIZE(T, 8), HWY_IF_SIGNED(T), HWY_IF_GE128_D(DI64)>
|
||||
HWY_API V operator*(V x, V y) {
|
||||
RebindToUnsigned<DI64> du64;
|
||||
return BitCast(DI64{}, BitCast(du64, x) * BitCast(du64, y));
|
||||
}
|
||||
|
||||
#endif // HWY_NATIVE_I64MULLO
|
||||
|
||||
// ================================================== Operator wrapper
|
||||
|
||||
// These targets currently cannot define operators and have already defined
|
||||
// (only) the corresponding functions such as Add.
|
||||
#if HWY_TARGET != HWY_RVV && HWY_TARGET != HWY_SVE && \
|
||||
HWY_TARGET != HWY_SVE2 && HWY_TARGET != HWY_SVE_256 && \
|
||||
HWY_TARGET != HWY_SVE2_128
|
||||
|
||||
template <class V>
|
||||
HWY_API V Add(V a, V b) {
|
||||
return a + b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Sub(V a, V b) {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API V Mul(V a, V b) {
|
||||
return a * b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Div(V a, V b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
V Shl(V a, V b) {
|
||||
return a << b;
|
||||
}
|
||||
template <class V>
|
||||
V Shr(V a, V b) {
|
||||
return a >> b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Eq(V a, V b) -> decltype(a == b) {
|
||||
return a == b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ne(V a, V b) -> decltype(a == b) {
|
||||
return a != b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Lt(V a, V b) -> decltype(a == b) {
|
||||
return a < b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Gt(V a, V b) -> decltype(a == b) {
|
||||
return a > b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ge(V a, V b) -> decltype(a == b) {
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Le(V a, V b) -> decltype(a == b) {
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
#endif // HWY_TARGET for operators
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace hwy
|
||||
|
||||
25
third_party/highway/hwy/ops/rvv-inl.h
vendored
25
third_party/highway/hwy/ops/rvv-inl.h
vendored
@@ -949,16 +949,16 @@ HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Max, fmax, _ALL)
|
||||
|
||||
// ------------------------------ Mul
|
||||
|
||||
// Only for internal use (Highway only promises Mul for 16/32-bit inputs).
|
||||
// Used by MulLower.
|
||||
namespace detail {
|
||||
HWY_RVV_FOREACH_U64(HWY_RVV_RETV_ARGVV, Mul, mul, _ALL)
|
||||
} // namespace detail
|
||||
|
||||
HWY_RVV_FOREACH_UI16(HWY_RVV_RETV_ARGVV, Mul, mul, _ALL)
|
||||
HWY_RVV_FOREACH_UI32(HWY_RVV_RETV_ARGVV, Mul, mul, _ALL)
|
||||
HWY_RVV_FOREACH_UI163264(HWY_RVV_RETV_ARGVV, Mul, mul, _ALL)
|
||||
HWY_RVV_FOREACH_F(HWY_RVV_RETV_ARGVV, Mul, fmul, _ALL)
|
||||
|
||||
// Per-target flag to prevent generic_ops-inl.h from defining i64 operator*.
|
||||
#ifdef HWY_NATIVE_I64MULLO
|
||||
#undef HWY_NATIVE_I64MULLO
|
||||
#else
|
||||
#define HWY_NATIVE_I64MULLO
|
||||
#endif
|
||||
|
||||
// ------------------------------ MulHigh
|
||||
|
||||
// Only for internal use (Highway only promises MulHigh for 16-bit inputs).
|
||||
@@ -2019,6 +2019,11 @@ HWY_API VFromD<Simd<uint16_t, N, kPow2>> DemoteTo(
|
||||
HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(int, SEW, LMUL) v) { \
|
||||
return vfcvt_f_x_v_f##SEW##LMUL(v, Lanes(d)); \
|
||||
} \
|
||||
template <size_t N> \
|
||||
HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo( \
|
||||
HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(uint, SEW, LMUL) v) {\
|
||||
return vfcvt_f_xu_v_f##SEW##LMUL(v, Lanes(d)); \
|
||||
} \
|
||||
/* Truncates (rounds toward zero). */ \
|
||||
template <size_t N> \
|
||||
HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(int, SEW, N, SHIFT) d, \
|
||||
@@ -3069,14 +3074,14 @@ HWY_API VFromD<DW> MulEven(const V a, const V b) {
|
||||
// There is no 64x64 vwmul.
|
||||
template <class V, HWY_IF_LANE_SIZE_V(V, 8)>
|
||||
HWY_INLINE V MulEven(const V a, const V b) {
|
||||
const auto lo = detail::Mul(a, b);
|
||||
const auto lo = Mul(a, b);
|
||||
const auto hi = detail::MulHigh(a, b);
|
||||
return OddEven(detail::Slide1Up(hi), lo);
|
||||
}
|
||||
|
||||
template <class V, HWY_IF_LANE_SIZE_V(V, 8)>
|
||||
HWY_INLINE V MulOdd(const V a, const V b) {
|
||||
const auto lo = detail::Mul(a, b);
|
||||
const auto lo = Mul(a, b);
|
||||
const auto hi = detail::MulHigh(a, b);
|
||||
return OddEven(hi, detail::Slide1Down(lo));
|
||||
}
|
||||
|
||||
96
third_party/highway/hwy/ops/scalar-inl.h
vendored
96
third_party/highway/hwy/ops/scalar-inl.h
vendored
@@ -102,7 +102,7 @@ template <typename T, typename FromT>
|
||||
HWY_API Vec1<T> BitCast(Sisd<T> /* tag */, Vec1<FromT> v) {
|
||||
static_assert(sizeof(T) <= sizeof(FromT), "Promoting is undefined");
|
||||
T to;
|
||||
CopyBytes<sizeof(FromT)>(&v.raw, &to);
|
||||
CopyBytes<sizeof(FromT)>(&v.raw, &to); // not same size - ok to shrink
|
||||
return Vec1<T>(to);
|
||||
}
|
||||
|
||||
@@ -260,21 +260,21 @@ HWY_API Mask1<TTo> RebindMask(Sisd<TTo> /*tag*/, Mask1<TFrom> m) {
|
||||
template <typename T>
|
||||
HWY_API Mask1<T> MaskFromVec(const Vec1<T> v) {
|
||||
Mask1<T> mask;
|
||||
CopyBytes<sizeof(mask.bits)>(&v.raw, &mask.bits);
|
||||
CopySameSize(&v, &mask);
|
||||
return mask;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Vec1<T> VecFromMask(const Mask1<T> mask) {
|
||||
Vec1<T> v;
|
||||
CopyBytes<sizeof(v.raw)>(&mask.bits, &v.raw);
|
||||
CopySameSize(&mask, &v);
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Vec1<T> VecFromMask(Sisd<T> /* tag */, const Mask1<T> mask) {
|
||||
Vec1<T> v;
|
||||
CopyBytes<sizeof(v.raw)>(&mask.bits, &v.raw);
|
||||
CopySameSize(&mask, &v);
|
||||
return v;
|
||||
}
|
||||
|
||||
@@ -697,10 +697,10 @@ HWY_API Vec1<float> ApproximateReciprocalSqrt(const Vec1<float> v) {
|
||||
float f = v.raw;
|
||||
const float half = f * 0.5f;
|
||||
uint32_t bits;
|
||||
CopyBytes<4>(&f, &bits);
|
||||
CopySameSize(&f, &bits);
|
||||
// Initial guess based on log2(f)
|
||||
bits = 0x5F3759DF - (bits >> 1);
|
||||
CopyBytes<4>(&bits, &f);
|
||||
CopySameSize(&bits, &f);
|
||||
// One Newton-Raphson iteration
|
||||
return Vec1<float>(f * (1.5f - (half * f * f)));
|
||||
}
|
||||
@@ -778,7 +778,7 @@ V Ceiling(const V v) {
|
||||
const bool positive = f > Float(0.0);
|
||||
|
||||
Bits bits;
|
||||
CopyBytes<sizeof(Bits)>(&v, &bits);
|
||||
CopySameSize(&v, &bits);
|
||||
|
||||
const int exponent =
|
||||
static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
|
||||
@@ -795,7 +795,7 @@ V Ceiling(const V v) {
|
||||
if (positive) bits += (kMantissaMask + 1) >> exponent;
|
||||
bits &= ~mantissa_mask;
|
||||
|
||||
CopyBytes<sizeof(Bits)>(&bits, &f);
|
||||
CopySameSize(&bits, &f);
|
||||
return V(f);
|
||||
}
|
||||
|
||||
@@ -810,7 +810,7 @@ V Floor(const V v) {
|
||||
const bool negative = f < Float(0.0);
|
||||
|
||||
Bits bits;
|
||||
CopyBytes<sizeof(Bits)>(&v, &bits);
|
||||
CopySameSize(&v, &bits);
|
||||
|
||||
const int exponent =
|
||||
static_cast<int>(((bits >> kMantissaBits) & kExponentMask) - kBias);
|
||||
@@ -827,7 +827,7 @@ V Floor(const V v) {
|
||||
if (negative) bits += (kMantissaMask + 1) >> exponent;
|
||||
bits &= ~mantissa_mask;
|
||||
|
||||
CopyBytes<sizeof(Bits)>(&bits, &f);
|
||||
CopySameSize(&bits, &f);
|
||||
return V(f);
|
||||
}
|
||||
|
||||
@@ -889,7 +889,7 @@ template <typename T>
|
||||
HWY_API Mask1<T> IsNaN(const Vec1<T> v) {
|
||||
// std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
|
||||
MakeUnsigned<T> bits;
|
||||
memcpy(&bits, &v, sizeof(v));
|
||||
CopySameSize(&v, &bits);
|
||||
bits += bits;
|
||||
bits >>= 1; // clear sign bit
|
||||
// NaN if all exponent bits are set and the mantissa is not zero.
|
||||
@@ -929,7 +929,7 @@ HWY_API Mask1<double> IsFinite(const Vec1<double> v) {
|
||||
template <typename T>
|
||||
HWY_API Vec1<T> Load(Sisd<T> /* tag */, const T* HWY_RESTRICT aligned) {
|
||||
T t;
|
||||
CopyBytes<sizeof(T)>(aligned, &t);
|
||||
CopySameSize(aligned, &t);
|
||||
return Vec1<T>(t);
|
||||
}
|
||||
|
||||
@@ -955,7 +955,7 @@ HWY_API Vec1<T> LoadDup128(Sisd<T> d, const T* HWY_RESTRICT aligned) {
|
||||
template <typename T>
|
||||
HWY_API void Store(const Vec1<T> v, Sisd<T> /* tag */,
|
||||
T* HWY_RESTRICT aligned) {
|
||||
CopyBytes<sizeof(T)>(&v.raw, aligned);
|
||||
CopySameSize(&v.raw, aligned);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -1119,7 +1119,7 @@ HWY_API Vec1<ToT> DemoteTo(Sisd<ToT> /* tag */, Vec1<FromT> from) {
|
||||
|
||||
HWY_API Vec1<float> PromoteTo(Sisd<float> /* tag */, const Vec1<float16_t> v) {
|
||||
uint16_t bits16;
|
||||
CopyBytes<2>(&v.raw, &bits16);
|
||||
CopySameSize(&v.raw, &bits16);
|
||||
const uint32_t sign = static_cast<uint32_t>(bits16 >> 15);
|
||||
const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
|
||||
const uint32_t mantissa = bits16 & 0x3FF;
|
||||
@@ -1136,7 +1136,7 @@ HWY_API Vec1<float> PromoteTo(Sisd<float> /* tag */, const Vec1<float16_t> v) {
|
||||
const uint32_t mantissa32 = mantissa << (23 - 10);
|
||||
const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
|
||||
float out;
|
||||
CopyBytes<4>(&bits32, &out);
|
||||
CopySameSize(&bits32, &out);
|
||||
return Vec1<float>(out);
|
||||
}
|
||||
|
||||
@@ -1147,7 +1147,7 @@ HWY_API Vec1<float> PromoteTo(Sisd<float> d, const Vec1<bfloat16_t> v) {
|
||||
HWY_API Vec1<float16_t> DemoteTo(Sisd<float16_t> /* tag */,
|
||||
const Vec1<float> v) {
|
||||
uint32_t bits32;
|
||||
CopyBytes<4>(&v.raw, &bits32);
|
||||
CopySameSize(&v.raw, &bits32);
|
||||
const uint32_t sign = bits32 >> 31;
|
||||
const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
|
||||
const uint32_t mantissa32 = bits32 & 0x7FFFFF;
|
||||
@@ -1158,7 +1158,7 @@ HWY_API Vec1<float16_t> DemoteTo(Sisd<float16_t> /* tag */,
|
||||
Vec1<float16_t> out;
|
||||
if (exp < -24) {
|
||||
const uint16_t zero = 0;
|
||||
CopyBytes<2>(&zero, &out.raw);
|
||||
CopySameSize(&zero, &out.raw);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -1182,7 +1182,7 @@ HWY_API Vec1<float16_t> DemoteTo(Sisd<float16_t> /* tag */,
|
||||
const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
|
||||
HWY_DASSERT(bits16 < 0x10000);
|
||||
const uint16_t narrowed = static_cast<uint16_t>(bits16); // big-endian safe
|
||||
CopyBytes<2>(&narrowed, &out.raw);
|
||||
CopySameSize(&narrowed, &out.raw);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -1379,7 +1379,7 @@ HWY_API Vec1<TI> TableLookupBytes(const Vec1<T> in, const Vec1<TI> indices) {
|
||||
uint8_t in_bytes[sizeof(T)];
|
||||
uint8_t idx_bytes[sizeof(T)];
|
||||
uint8_t out_bytes[sizeof(T)];
|
||||
CopyBytes<sizeof(T)>(&in, &in_bytes);
|
||||
CopyBytes<sizeof(T)>(&in, &in_bytes); // copy to bytes
|
||||
CopyBytes<sizeof(T)>(&indices, &idx_bytes);
|
||||
for (size_t i = 0; i < sizeof(T); ++i) {
|
||||
out_bytes[i] = in_bytes[idx_bytes[i]];
|
||||
@@ -1394,7 +1394,7 @@ HWY_API Vec1<TI> TableLookupBytesOr0(const Vec1<T> in, const Vec1<TI> indices) {
|
||||
uint8_t in_bytes[sizeof(T)];
|
||||
uint8_t idx_bytes[sizeof(T)];
|
||||
uint8_t out_bytes[sizeof(T)];
|
||||
CopyBytes<sizeof(T)>(&in, &in_bytes);
|
||||
CopyBytes<sizeof(T)>(&in, &in_bytes); // copy to bytes
|
||||
CopyBytes<sizeof(T)>(&indices, &idx_bytes);
|
||||
for (size_t i = 0; i < sizeof(T); ++i) {
|
||||
out_bytes[i] = idx_bytes[i] & 0x80 ? 0 : in_bytes[idx_bytes[i]];
|
||||
@@ -1546,62 +1546,6 @@ HWY_API Vec1<T> MaxOfLanes(Sisd<T> /* tag */, const Vec1<T> v) {
|
||||
return v;
|
||||
}
|
||||
|
||||
// ================================================== Operator wrapper
|
||||
|
||||
template <class V>
|
||||
HWY_API V Add(V a, V b) {
|
||||
return a + b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Sub(V a, V b) {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API V Mul(V a, V b) {
|
||||
return a * b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Div(V a, V b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
V Shl(V a, V b) {
|
||||
return a << b;
|
||||
}
|
||||
template <class V>
|
||||
V Shr(V a, V b) {
|
||||
return a >> b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Eq(V a, V b) -> decltype(a == b) {
|
||||
return a == b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ne(V a, V b) -> decltype(a == b) {
|
||||
return a != b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Lt(V a, V b) -> decltype(a == b) {
|
||||
return a < b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Gt(V a, V b) -> decltype(a == b) {
|
||||
return a > b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ge(V a, V b) -> decltype(a == b) {
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Le(V a, V b) -> decltype(a == b) {
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace hwy
|
||||
|
||||
113
third_party/highway/hwy/ops/wasm_128-inl.h
vendored
113
third_party/highway/hwy/ops/wasm_128-inl.h
vendored
@@ -3367,6 +3367,11 @@ HWY_API Vec128<float, N> ConvertTo(Simd<float, N, 0> /* tag */,
|
||||
const Vec128<int32_t, N> v) {
|
||||
return Vec128<float, N>{wasm_f32x4_convert_i32x4(v.raw)};
|
||||
}
|
||||
template <size_t N>
|
||||
HWY_API Vec128<float, N> ConvertTo(Simd<float, N, 0> /* tag */,
|
||||
const Vec128<uint32_t, N> v) {
|
||||
return Vec128<float, N>{wasm_f32x4_convert_u32x4(v.raw)};
|
||||
}
|
||||
// Truncates (rounds toward zero).
|
||||
template <size_t N>
|
||||
HWY_API Vec128<int32_t, N> ConvertTo(Simd<int32_t, N, 0> /* tag */,
|
||||
@@ -4348,26 +4353,52 @@ HWY_INLINE Vec128<T> MaxOfLanes(hwy::SizeTag<8> /* tag */,
|
||||
return Max(v10, v01);
|
||||
}
|
||||
|
||||
// u16/i16
|
||||
template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2), HWY_IF_GE32(T, N)>
|
||||
HWY_API Vec128<T, N> MinOfLanes(hwy::SizeTag<2> /* tag */, Vec128<T, N> v) {
|
||||
const DFromV<decltype(v)> d;
|
||||
const Repartition<int32_t, decltype(d)> d32;
|
||||
template <size_t N, HWY_IF_GE32(uint16_t, N)>
|
||||
HWY_API Vec128<uint16_t, N> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<uint16_t, N> v) {
|
||||
const Simd<uint16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(d, Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2), HWY_IF_GE32(T, N)>
|
||||
HWY_API Vec128<T, N> MaxOfLanes(hwy::SizeTag<2> /* tag */, Vec128<T, N> v) {
|
||||
const DFromV<decltype(v)> d;
|
||||
const Repartition<int32_t, decltype(d)> d32;
|
||||
template <size_t N, HWY_IF_GE32(int16_t, N)>
|
||||
HWY_API Vec128<int16_t, N> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<int16_t, N> v) {
|
||||
const Simd<int16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
template <size_t N, HWY_IF_GE32(uint16_t, N)>
|
||||
HWY_API Vec128<uint16_t, N> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<uint16_t, N> v) {
|
||||
const Simd<uint16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(d, Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <size_t N, HWY_IF_GE32(int16_t, N)>
|
||||
HWY_API Vec128<int16_t, N> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<int16_t, N> v) {
|
||||
const Simd<int16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
@@ -4463,62 +4494,6 @@ HWY_INLINE VFromD<D> Max128Upper(D d, const VFromD<D> a, const VFromD<D> b) {
|
||||
return IfThenElse(Lt128Upper(d, b, a), a, b);
|
||||
}
|
||||
|
||||
// ================================================== Operator wrapper
|
||||
|
||||
template <class V>
|
||||
HWY_API V Add(V a, V b) {
|
||||
return a + b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Sub(V a, V b) {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API V Mul(V a, V b) {
|
||||
return a * b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Div(V a, V b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
V Shl(V a, V b) {
|
||||
return a << b;
|
||||
}
|
||||
template <class V>
|
||||
V Shr(V a, V b) {
|
||||
return a >> b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Eq(V a, V b) -> decltype(a == b) {
|
||||
return a == b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ne(V a, V b) -> decltype(a == b) {
|
||||
return a != b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Lt(V a, V b) -> decltype(a == b) {
|
||||
return a < b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Gt(V a, V b) -> decltype(a == b) {
|
||||
return a > b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ge(V a, V b) -> decltype(a == b) {
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Le(V a, V b) -> decltype(a == b) {
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace hwy
|
||||
|
||||
42
third_party/highway/hwy/ops/wasm_256-inl.h
vendored
42
third_party/highway/hwy/ops/wasm_256-inl.h
vendored
@@ -592,7 +592,7 @@ HWY_API Vec256<int16_t> MulHigh(const Vec256<int16_t> a,
|
||||
}
|
||||
|
||||
HWY_API Vec256<int16_t> MulFixedPoint15(Vec256<int16_t>, Vec256<int16_t>) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// Multiplies even lanes (0, 2 ..) and returns the double-width result.
|
||||
@@ -1043,7 +1043,7 @@ HWY_API Vec256<T> IfThenZeroElse(Mask256<T> mask, Vec256<T> no) {
|
||||
template <typename T>
|
||||
HWY_API Vec256 <
|
||||
T IfNegativeThenElse(Vec256<T> v, Vec256<T> yes, Vec256<T> no) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
template <typename T, HWY_IF_FLOAT(T)>
|
||||
@@ -1333,13 +1333,13 @@ HWY_API Vec256<T> GatherIndex(const Full256<T> d, const T* HWY_RESTRICT base,
|
||||
// ------------------------------ ExtractLane
|
||||
template <typename T, size_t N>
|
||||
HWY_API T ExtractLane(const Vec128<T, N> v, size_t i) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ InsertLane
|
||||
template <typename T, size_t N>
|
||||
HWY_API Vec128<T, N> InsertLane(const Vec128<T, N> v, size_t i, T t) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ GetLane
|
||||
@@ -1846,21 +1846,21 @@ HWY_API Vec256<T> Reverse(Full256<T> d, const Vec256<T> v) {
|
||||
|
||||
template <typename T>
|
||||
HWY_API Vec256<T> Reverse2(Full256<T> d, const Vec256<T> v) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ Reverse4
|
||||
|
||||
template <typename T>
|
||||
HWY_API Vec256<T> Reverse4(Full256<T> d, const Vec256<T> v) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ Reverse8
|
||||
|
||||
template <typename T>
|
||||
HWY_API Vec256<T> Reverse8(Full256<T> d, const Vec256<T> v) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ InterleaveLower
|
||||
@@ -2065,13 +2065,13 @@ HWY_API Vec256<T> ConcatEven(Full256<T> /* tag */, Vec256<T> hi, Vec256<T> lo) {
|
||||
// ------------------------------ DupEven
|
||||
template <typename T>
|
||||
HWY_API Vec256<T> DupEven(Vec256<T> v) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ DupOdd
|
||||
template <typename T>
|
||||
HWY_API Vec256<T> DupOdd(Vec256<T> v) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ OddEven
|
||||
@@ -2354,6 +2354,10 @@ HWY_API Vec256<float> ConvertTo(Full256<float> /* tag */,
|
||||
const Vec256<int32_t> v) {
|
||||
return Vec256<float>{wasm_f32x4_convert_i32x4(v.raw)};
|
||||
}
|
||||
HWY_API Vec256<float> ConvertTo(Full256<float> /* tag */,
|
||||
const Vec256<uint32_t> v) {
|
||||
return Vec256<float>{wasm_f32x4_convert_u32x4(v.raw)};
|
||||
}
|
||||
// Truncates (rounds toward zero).
|
||||
HWY_API Vec256<int32_t> ConvertTo(Full256<int32_t> /* tag */,
|
||||
const Vec256<float> v) {
|
||||
@@ -2811,7 +2815,7 @@ HWY_API Vec256<T> Compress(Vec256<T> v, const Mask256<T> mask) {
|
||||
// ------------------------------ CompressBlocksNot
|
||||
HWY_API Vec256<uint64_t> CompressBlocksNot(Vec256<uint64_t> v,
|
||||
Mask256<uint64_t> mask) {
|
||||
HWY_ASSERT(0);
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
// ------------------------------ CompressBits
|
||||
@@ -2968,22 +2972,12 @@ HWY_INLINE Vec256<T> MaxOfLanes(hwy::SizeTag<8> /* tag */,
|
||||
|
||||
// u16/i16
|
||||
template <typename T, HWY_IF_LANE_SIZE(T, 2)>
|
||||
HWY_API Vec256<T> MinOfLanes(hwy::SizeTag<2> /* tag */, Vec256<T> v) {
|
||||
const Repartition<int32_t, Full256<T>> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Full256<T>(), Or(min, ShiftLeft<16>(min)));
|
||||
HWY_API Vec256<T> MinOfLanes(hwy::SizeTag<2> /* tag */, Vec256<T> /*v*/) {
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
template <typename T, HWY_IF_LANE_SIZE(T, 2)>
|
||||
HWY_API Vec256<T> MaxOfLanes(hwy::SizeTag<2> /* tag */, Vec256<T> v) {
|
||||
const Repartition<int32_t, Full256<T>> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Full256<T>(), Or(min, ShiftLeft<16>(min)));
|
||||
HWY_API Vec256<T> MaxOfLanes(hwy::SizeTag<2> /* tag */, Vec256<T> /*v*/) {
|
||||
HWY_ASSERT(0); // Not implemented
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
208
third_party/highway/hwy/ops/x86_128-inl.h
vendored
208
third_party/highway/hwy/ops/x86_128-inl.h
vendored
@@ -17,6 +17,17 @@
|
||||
// operations when compiling for those targets.
|
||||
// External include guard in highway.h - see comment there.
|
||||
|
||||
// Must come before HWY_DIAGNOSTICS and HWY_COMPILER_GCC_ACTUAL
|
||||
#include "hwy/base.h"
|
||||
|
||||
// Avoid uninitialized warnings in GCC's emmintrin.h - see
|
||||
// https://github.com/google/highway/issues/710 and pull/902)
|
||||
HWY_DIAGNOSTICS(push)
|
||||
#if HWY_COMPILER_GCC_ACTUAL
|
||||
HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wuninitialized")
|
||||
HWY_DIAGNOSTICS_OFF(disable : 4703 6001 26494, ignored "-Wmaybe-uninitialized")
|
||||
#endif
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <stdio.h>
|
||||
#if HWY_TARGET == HWY_SSSE3
|
||||
@@ -27,8 +38,8 @@
|
||||
#endif
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include "hwy/base.h"
|
||||
#include "hwy/ops/shared-inl.h"
|
||||
|
||||
#if HWY_IS_MSAN
|
||||
@@ -1910,7 +1921,7 @@ template <typename T>
|
||||
HWY_API Vec64<T> Load(Full64<T> /* tag */, const T* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
__m128i v = _mm_setzero_si128();
|
||||
CopyBytes<8>(p, &v);
|
||||
CopyBytes<8>(p, &v); // not same size
|
||||
return Vec64<T>{v};
|
||||
#else
|
||||
return Vec64<T>{_mm_loadl_epi64(reinterpret_cast<const __m128i*>(p))};
|
||||
@@ -1921,7 +1932,7 @@ HWY_API Vec128<float, 2> Load(Full64<float> /* tag */,
|
||||
const float* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
__m128 v = _mm_setzero_ps();
|
||||
CopyBytes<8>(p, &v);
|
||||
CopyBytes<8>(p, &v); // not same size
|
||||
return Vec128<float, 2>{v};
|
||||
#else
|
||||
const __m128 hi = _mm_setzero_ps();
|
||||
@@ -1933,7 +1944,7 @@ HWY_API Vec64<double> Load(Full64<double> /* tag */,
|
||||
const double* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
__m128d v = _mm_setzero_pd();
|
||||
CopyBytes<8>(p, &v);
|
||||
CopyBytes<8>(p, &v); // not same size
|
||||
return Vec64<double>{v};
|
||||
#else
|
||||
return Vec64<double>{_mm_load_sd(p)};
|
||||
@@ -1944,7 +1955,7 @@ HWY_API Vec128<float, 1> Load(Full32<float> /* tag */,
|
||||
const float* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
__m128 v = _mm_setzero_ps();
|
||||
CopyBytes<4>(p, &v);
|
||||
CopyBytes<4>(p, &v); // not same size
|
||||
return Vec128<float, 1>{v};
|
||||
#else
|
||||
return Vec128<float, 1>{_mm_load_ss(p)};
|
||||
@@ -1957,11 +1968,11 @@ HWY_API Vec128<T, N> Load(Simd<T, N, 0> /* tag */, const T* HWY_RESTRICT p) {
|
||||
constexpr size_t kSize = sizeof(T) * N;
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
__m128 v = _mm_setzero_ps();
|
||||
CopyBytes<kSize>(p, &v);
|
||||
CopyBytes<kSize>(p, &v); // not same size
|
||||
return Vec128<T, N>{v};
|
||||
#else
|
||||
int32_t bits = 0;
|
||||
CopyBytes<kSize>(p, &bits);
|
||||
CopyBytes<kSize>(p, &bits); // not same size
|
||||
return Vec128<T, N>{_mm_cvtsi32_si128(bits)};
|
||||
#endif
|
||||
}
|
||||
@@ -2111,7 +2122,7 @@ HWY_API void StoreU(const Vec128<double> v, Full128<double> /* tag */,
|
||||
template <typename T>
|
||||
HWY_API void Store(Vec64<T> v, Full64<T> /* tag */, T* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
CopyBytes<8>(&v, p);
|
||||
CopyBytes<8>(&v, p); // not same size
|
||||
#else
|
||||
_mm_storel_epi64(reinterpret_cast<__m128i*>(p), v.raw);
|
||||
#endif
|
||||
@@ -2119,7 +2130,7 @@ HWY_API void Store(Vec64<T> v, Full64<T> /* tag */, T* HWY_RESTRICT p) {
|
||||
HWY_API void Store(const Vec128<float, 2> v, Full64<float> /* tag */,
|
||||
float* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
CopyBytes<8>(&v, p);
|
||||
CopyBytes<8>(&v, p); // not same size
|
||||
#else
|
||||
_mm_storel_pi(reinterpret_cast<__m64*>(p), v.raw);
|
||||
#endif
|
||||
@@ -2127,7 +2138,7 @@ HWY_API void Store(const Vec128<float, 2> v, Full64<float> /* tag */,
|
||||
HWY_API void Store(const Vec64<double> v, Full64<double> /* tag */,
|
||||
double* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
CopyBytes<8>(&v, p);
|
||||
CopyBytes<8>(&v, p); // not same size
|
||||
#else
|
||||
_mm_storel_pd(p, v.raw);
|
||||
#endif
|
||||
@@ -2136,12 +2147,12 @@ HWY_API void Store(const Vec64<double> v, Full64<double> /* tag */,
|
||||
// Any <= 32 bit except <float, 1>
|
||||
template <typename T, size_t N, HWY_IF_LE32(T, N)>
|
||||
HWY_API void Store(Vec128<T, N> v, Simd<T, N, 0> /* tag */, T* HWY_RESTRICT p) {
|
||||
CopyBytes<sizeof(T) * N>(&v, p);
|
||||
CopyBytes<sizeof(T) * N>(&v, p); // not same size
|
||||
}
|
||||
HWY_API void Store(const Vec128<float, 1> v, Full32<float> /* tag */,
|
||||
float* HWY_RESTRICT p) {
|
||||
#if HWY_SAFE_PARTIAL_LOAD_STORE
|
||||
CopyBytes<4>(&v, p);
|
||||
CopyBytes<4>(&v, p); // not same size
|
||||
#else
|
||||
_mm_store_ss(p, v.raw);
|
||||
#endif
|
||||
@@ -2172,7 +2183,7 @@ HWY_API void ScalarMaskedStore(Vec128<T, N> v, Mask128<T, N> m, Simd<T, N, 0> d,
|
||||
Store(BitCast(di, VecFromMask(d, m)), di, mask);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
if (mask[i]) {
|
||||
CopyBytes<sizeof(T)>(buf + i, p + i);
|
||||
CopySameSize(buf + i, p + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3635,9 +3646,9 @@ HWY_INLINE float ExtractLane(const Vec128<float, N> v) {
|
||||
return lanes[kLane];
|
||||
#else
|
||||
// Bug in the intrinsic, returns int but should be float.
|
||||
const int bits = _mm_extract_ps(v.raw, kLane);
|
||||
const int32_t bits = _mm_extract_ps(v.raw, kLane);
|
||||
float ret;
|
||||
CopyBytes<4>(&bits, &ret);
|
||||
CopySameSize(&bits, &ret);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
@@ -3814,7 +3825,7 @@ HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
|
||||
return Load(d, lanes);
|
||||
#else
|
||||
MakeSigned<T> ti;
|
||||
CopyBytes<sizeof(T)>(&t, &ti); // don't just cast because T might be float.
|
||||
CopySameSize(&t, &ti); // don't just cast because T might be float.
|
||||
return Vec128<T, N>{_mm_insert_epi32(v.raw, ti, kLane)};
|
||||
#endif
|
||||
}
|
||||
@@ -3830,7 +3841,7 @@ HWY_INLINE Vec128<T, N> InsertLane(const Vec128<T, N> v, T t) {
|
||||
return Load(d, lanes);
|
||||
#else
|
||||
MakeSigned<T> ti;
|
||||
CopyBytes<sizeof(T)>(&t, &ti); // don't just cast because T might be float.
|
||||
CopySameSize(&t, &ti); // don't just cast because T might be float.
|
||||
return Vec128<T, N>{_mm_insert_epi64(v.raw, ti, kLane)};
|
||||
#endif
|
||||
}
|
||||
@@ -5582,6 +5593,26 @@ HWY_API Vec128<float, N> ConvertTo(Simd<float, N, 0> /* tag */,
|
||||
return Vec128<float, N>{_mm_cvtepi32_ps(v.raw)};
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
HWY_API Vec128<float, N> ConvertTo(HWY_MAYBE_UNUSED Simd<float, N, 0> df,
|
||||
const Vec128<uint32_t, N> v) {
|
||||
#if HWY_TARGET <= HWY_AVX3
|
||||
return Vec128<float, N>{_mm_cvtepu32_ps(v.raw)};
|
||||
#else
|
||||
// Based on wim's approach (https://stackoverflow.com/questions/34066228/)
|
||||
const RebindToUnsigned<decltype(df)> du32;
|
||||
const RebindToSigned<decltype(df)> d32;
|
||||
|
||||
const auto msk_lo = Set(du32, 0xFFFF);
|
||||
const auto cnst2_16_flt = Set(df, 65536.0f); // 2^16
|
||||
|
||||
// Extract the 16 lowest/highest significant bits of v and cast to signed int
|
||||
const auto v_lo = BitCast(d32, And(v, msk_lo));
|
||||
const auto v_hi = BitCast(d32, ShiftRight<16>(v));
|
||||
return MulAdd(cnst2_16_flt, ConvertTo(df, v_hi), ConvertTo(df, v_lo));
|
||||
#endif
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
HWY_API Vec128<double, N> ConvertTo(Simd<double, N, 0> dd,
|
||||
const Vec128<int64_t, N> v) {
|
||||
@@ -5606,6 +5637,33 @@ HWY_API Vec128<double, N> ConvertTo(Simd<double, N, 0> dd,
|
||||
#endif
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
HWY_API Vec128<double, N> ConvertTo(HWY_MAYBE_UNUSED Simd<double, N, 0> dd,
|
||||
const Vec128<uint64_t, N> v) {
|
||||
#if HWY_TARGET <= HWY_AVX3
|
||||
return Vec128<double, N>{_mm_cvtepu64_pd(v.raw)};
|
||||
#else
|
||||
// Based on wim's approach (https://stackoverflow.com/questions/41144668/)
|
||||
const RebindToUnsigned<decltype(dd)> d64;
|
||||
using VU = VFromD<decltype(d64)>;
|
||||
|
||||
const VU msk_lo = Set(d64, 0xFFFFFFFF);
|
||||
const auto cnst2_32_dbl = Set(dd, 4294967296.0); // 2^32
|
||||
|
||||
// Extract the 32 lowest/highest significant bits of v
|
||||
const VU v_lo = And(v, msk_lo);
|
||||
const VU v_hi = ShiftRight<32>(v);
|
||||
|
||||
auto uint64_to_double128_fast = [&dd](VU w) HWY_ATTR {
|
||||
w = Or(w, VU{detail::BitCastToInteger(Set(dd, 0x0010000000000000).raw)});
|
||||
return BitCast(dd, w) - Set(dd, 0x0010000000000000);
|
||||
};
|
||||
|
||||
const auto v_lo_dbl = uint64_to_double128_fast(v_lo);
|
||||
return MulAdd(cnst2_32_dbl, uint64_to_double128_fast(v_hi), v_lo_dbl);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Truncates (rounds toward zero).
|
||||
template <size_t N>
|
||||
HWY_API Vec128<int32_t, N> ConvertTo(const Simd<int32_t, N, 0> di,
|
||||
@@ -5959,8 +6017,8 @@ HWY_API size_t StoreMaskBits(const Simd<T, N, 0> /* tag */,
|
||||
|
||||
// Non-full byte, need to clear the undefined upper bits.
|
||||
if (N < 8) {
|
||||
const int mask = (1 << N) - 1;
|
||||
bits[0] = static_cast<uint8_t>(bits[0] & mask);
|
||||
const int mask_bits = (1 << N) - 1;
|
||||
bits[0] = static_cast<uint8_t>(bits[0] & mask_bits);
|
||||
}
|
||||
|
||||
return kNumBytes;
|
||||
@@ -7103,24 +7161,52 @@ HWY_INLINE Vec128<T> MaxOfLanes(hwy::SizeTag<8> /* tag */,
|
||||
return Max(v10, v01);
|
||||
}
|
||||
|
||||
// u16/i16
|
||||
template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2), HWY_IF_GE32(T, N)>
|
||||
HWY_API Vec128<T, N> MinOfLanes(hwy::SizeTag<2> /* tag */, Vec128<T, N> v) {
|
||||
const Repartition<int32_t, Simd<T, N, 0>> d32;
|
||||
template <size_t N, HWY_IF_GE32(uint16_t, N)>
|
||||
HWY_API Vec128<uint16_t, N> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<uint16_t, N> v) {
|
||||
const Simd<uint16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Simd<T, N, 0>(), Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <typename T, size_t N, HWY_IF_LANE_SIZE(T, 2), HWY_IF_GE32(T, N)>
|
||||
HWY_API Vec128<T, N> MaxOfLanes(hwy::SizeTag<2> /* tag */, Vec128<T, N> v) {
|
||||
const Repartition<int32_t, Simd<T, N, 0>> d32;
|
||||
template <size_t N, HWY_IF_GE32(int16_t, N)>
|
||||
HWY_API Vec128<int16_t, N> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<int16_t, N> v) {
|
||||
const Simd<int16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
template <size_t N, HWY_IF_GE32(uint16_t, N)>
|
||||
HWY_API Vec128<uint16_t, N> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<uint16_t, N> v) {
|
||||
const Simd<uint16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Simd<T, N, 0>(), Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <size_t N, HWY_IF_GE32(int16_t, N)>
|
||||
HWY_API Vec128<int16_t, N> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec128<int16_t, N> v) {
|
||||
const Simd<int16_t, N, 0> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
@@ -7237,65 +7323,11 @@ HWY_API V Max128Upper(D d, const V a, const V b) {
|
||||
return IfVecThenElse(detail::Lt128UpperVec(d, b, a), a, b);
|
||||
}
|
||||
|
||||
// ================================================== Operator wrapper
|
||||
|
||||
// These apply to all x86_*-inl.h because there are no restrictions on V.
|
||||
|
||||
template <class V>
|
||||
HWY_API V Add(V a, V b) {
|
||||
return a + b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Sub(V a, V b) {
|
||||
return a - b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API V Mul(V a, V b) {
|
||||
return a * b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API V Div(V a, V b) {
|
||||
return a / b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
V Shl(V a, V b) {
|
||||
return a << b;
|
||||
}
|
||||
template <class V>
|
||||
V Shr(V a, V b) {
|
||||
return a >> b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Eq(V a, V b) -> decltype(a == b) {
|
||||
return a == b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ne(V a, V b) -> decltype(a == b) {
|
||||
return a != b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Lt(V a, V b) -> decltype(a == b) {
|
||||
return a < b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Gt(V a, V b) -> decltype(a == b) {
|
||||
return a > b;
|
||||
}
|
||||
template <class V>
|
||||
HWY_API auto Ge(V a, V b) -> decltype(a == b) {
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
HWY_API auto Le(V a, V b) -> decltype(a == b) {
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace hwy
|
||||
HWY_AFTER_NAMESPACE();
|
||||
|
||||
// Note that the GCC warnings are not suppressed if we only wrap the *intrin.h -
|
||||
// the warning seems to be issued at the call site of intrinsics, i.e. our code.
|
||||
HWY_DIAGNOSTICS(pop)
|
||||
|
||||
100
third_party/highway/hwy/ops/x86_256-inl.h
vendored
100
third_party/highway/hwy/ops/x86_256-inl.h
vendored
@@ -49,6 +49,7 @@ HWY_DIAGNOSTICS_OFF(disable : 4703 6001 26494, ignored "-Wmaybe-uninitialized")
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#if HWY_IS_MSAN
|
||||
#include <sanitizer/msan_interface.h>
|
||||
@@ -2368,7 +2369,7 @@ HWY_API void BlendedStore(Vec256<T> v, Mask256<T> m, Full256<T> d,
|
||||
Store(BitCast(du, VecFromMask(d, m)), du, mask);
|
||||
for (size_t i = 0; i < 32 / sizeof(T); ++i) {
|
||||
if (mask[i]) {
|
||||
CopyBytes<sizeof(T)>(buf + i, p + i);
|
||||
CopySameSize(buf + i, p + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4207,6 +4208,53 @@ HWY_API Vec256<double> ConvertTo(Full256<double> dd, const Vec256<int64_t> v) {
|
||||
#endif
|
||||
}
|
||||
|
||||
HWY_API Vec256<float> ConvertTo(HWY_MAYBE_UNUSED Full256<float> df,
|
||||
const Vec256<uint32_t> v) {
|
||||
#if HWY_TARGET <= HWY_AVX3
|
||||
return Vec256<float>{_mm256_cvtepu32_ps(v.raw)};
|
||||
#else
|
||||
// Based on wim's approach (https://stackoverflow.com/questions/34066228/)
|
||||
const RebindToUnsigned<decltype(df)> du32;
|
||||
const RebindToSigned<decltype(df)> d32;
|
||||
|
||||
const auto msk_lo = Set(du32, 0xFFFF);
|
||||
const auto cnst2_16_flt = Set(df, 65536.0f); // 2^16
|
||||
|
||||
// Extract the 16 lowest/highest significant bits of v and cast to signed int
|
||||
const auto v_lo = BitCast(d32, And(v, msk_lo));
|
||||
const auto v_hi = BitCast(d32, ShiftRight<16>(v));
|
||||
|
||||
return MulAdd(cnst2_16_flt, ConvertTo(df, v_hi), ConvertTo(df, v_lo));
|
||||
#endif
|
||||
}
|
||||
|
||||
HWY_API Vec256<double> ConvertTo(HWY_MAYBE_UNUSED Full256<double> dd,
|
||||
const Vec256<uint64_t> v) {
|
||||
#if HWY_TARGET <= HWY_AVX3
|
||||
return Vec256<double>{_mm256_cvtepu64_pd(v.raw)};
|
||||
#else
|
||||
// Based on wim's approach (https://stackoverflow.com/questions/41144668/)
|
||||
const RebindToUnsigned<decltype(dd)> d64;
|
||||
using VU = VFromD<decltype(d64)>;
|
||||
|
||||
const VU msk_lo = Set(d64, 0xFFFFFFFFULL);
|
||||
const auto cnst2_32_dbl = Set(dd, 4294967296.0); // 2^32
|
||||
|
||||
// Extract the 32 lowest significant bits of v
|
||||
const VU v_lo = And(v, msk_lo);
|
||||
const VU v_hi = ShiftRight<32>(v);
|
||||
|
||||
auto uint64_to_double256_fast = [&dd](Vec256<uint64_t> w) HWY_ATTR {
|
||||
w = Or(w, Vec256<uint64_t>{
|
||||
detail::BitCastToInteger(Set(dd, 0x0010000000000000).raw)});
|
||||
return BitCast(dd, w) - Set(dd, 0x0010000000000000);
|
||||
};
|
||||
|
||||
const auto v_lo_dbl = uint64_to_double256_fast(v_lo);
|
||||
return MulAdd(cnst2_32_dbl, uint64_to_double256_fast(v_hi), v_lo_dbl);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Truncates (rounds toward zero).
|
||||
HWY_API Vec256<int32_t> ConvertTo(Full256<int32_t> d, const Vec256<float> v) {
|
||||
return detail::FixConversionOverflow(d, v, _mm256_cvttps_epi32(v.raw));
|
||||
@@ -4396,8 +4444,8 @@ HWY_API size_t StoreMaskBits(const Full256<T> /* tag */, const Mask256<T> mask,
|
||||
|
||||
// Non-full byte, need to clear the undefined upper bits.
|
||||
if (N < 8) {
|
||||
const int mask = static_cast<int>((1ull << N) - 1);
|
||||
bits[0] = static_cast<uint8_t>(bits[0] & mask);
|
||||
const int mask_bits = static_cast<int>((1ull << N) - 1);
|
||||
bits[0] = static_cast<uint8_t>(bits[0] & mask_bits);
|
||||
}
|
||||
return kNumBytes;
|
||||
}
|
||||
@@ -5381,24 +5429,48 @@ HWY_INLINE Vec256<T> MaxOfLanes(hwy::SizeTag<8> /* tag */,
|
||||
return Max(v10, v01);
|
||||
}
|
||||
|
||||
// u16/i16
|
||||
template <typename T, HWY_IF_LANE_SIZE(T, 2)>
|
||||
HWY_API Vec256<T> MinOfLanes(hwy::SizeTag<2> /* tag */, Vec256<T> v) {
|
||||
const Repartition<int32_t, Full256<T>> d32;
|
||||
HWY_API Vec256<uint16_t> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec256<uint16_t> v) {
|
||||
const Full256<uint16_t> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Full256<T>(), Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
template <typename T, HWY_IF_LANE_SIZE(T, 2)>
|
||||
HWY_API Vec256<T> MaxOfLanes(hwy::SizeTag<2> /* tag */, Vec256<T> v) {
|
||||
const Repartition<int32_t, Full256<T>> d32;
|
||||
HWY_API Vec256<int16_t> MinOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec256<int16_t> v) {
|
||||
const Full256<int16_t> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(hwy::SizeTag<4>(), Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
HWY_API Vec256<uint16_t> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec256<uint16_t> v) {
|
||||
const Full256<uint16_t> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(Full256<T>(), Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
HWY_API Vec256<int16_t> MaxOfLanes(hwy::SizeTag<2> /* tag */,
|
||||
Vec256<int16_t> v) {
|
||||
const Full256<int16_t> d;
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(hwy::SizeTag<4>(), Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
68
third_party/highway/hwy/ops/x86_512-inl.h
vendored
68
third_party/highway/hwy/ops/x86_512-inl.h
vendored
@@ -1164,6 +1164,22 @@ HWY_API Vec512<uint16_t> operator*(Vec512<uint16_t> a, Vec512<uint16_t> b) {
|
||||
HWY_API Vec512<uint32_t> operator*(Vec512<uint32_t> a, Vec512<uint32_t> b) {
|
||||
return Vec512<uint32_t>{_mm512_mullo_epi32(a.raw, b.raw)};
|
||||
}
|
||||
HWY_API Vec512<uint64_t> operator*(Vec512<uint64_t> a, Vec512<uint64_t> b) {
|
||||
return Vec512<uint64_t>{_mm512_mullo_epi64(a.raw, b.raw)};
|
||||
}
|
||||
HWY_API Vec256<uint64_t> operator*(Vec256<uint64_t> a, Vec256<uint64_t> b) {
|
||||
return Vec256<uint64_t>{_mm256_mullo_epi64(a.raw, b.raw)};
|
||||
}
|
||||
HWY_API Vec128<uint64_t> operator*(Vec128<uint64_t> a, Vec128<uint64_t> b) {
|
||||
return Vec128<uint64_t>{_mm_mullo_epi64(a.raw, b.raw)};
|
||||
}
|
||||
|
||||
// Per-target flag to prevent generic_ops-inl.h from defining i64 operator*.
|
||||
#ifdef HWY_NATIVE_I64MULLO
|
||||
#undef HWY_NATIVE_I64MULLO
|
||||
#else
|
||||
#define HWY_NATIVE_I64MULLO
|
||||
#endif
|
||||
|
||||
// Signed
|
||||
HWY_API Vec512<int16_t> operator*(Vec512<int16_t> a, Vec512<int16_t> b) {
|
||||
@@ -1172,7 +1188,15 @@ HWY_API Vec512<int16_t> operator*(Vec512<int16_t> a, Vec512<int16_t> b) {
|
||||
HWY_API Vec512<int32_t> operator*(Vec512<int32_t> a, Vec512<int32_t> b) {
|
||||
return Vec512<int32_t>{_mm512_mullo_epi32(a.raw, b.raw)};
|
||||
}
|
||||
|
||||
HWY_API Vec512<int64_t> operator*(Vec512<int64_t> a, Vec512<int64_t> b) {
|
||||
return Vec512<int64_t>{_mm512_mullo_epi64(a.raw, b.raw)};
|
||||
}
|
||||
HWY_API Vec256<int64_t> operator*(Vec256<int64_t> a, Vec256<int64_t> b) {
|
||||
return Vec256<int64_t>{_mm256_mullo_epi64(a.raw, b.raw)};
|
||||
}
|
||||
HWY_API Vec128<int64_t> operator*(Vec128<int64_t> a, Vec128<int64_t> b) {
|
||||
return Vec128<int64_t>{_mm_mullo_epi64(a.raw, b.raw)};
|
||||
}
|
||||
// Returns the upper 16 bits of a * b in each lane.
|
||||
HWY_API Vec512<uint16_t> MulHigh(Vec512<uint16_t> a, Vec512<uint16_t> b) {
|
||||
return Vec512<uint16_t>{_mm512_mulhi_epu16(a.raw, b.raw)};
|
||||
@@ -3399,6 +3423,16 @@ HWY_API Vec512<double> ConvertTo(Full512<double> /* tag */,
|
||||
return Vec512<double>{_mm512_cvtepi64_pd(v.raw)};
|
||||
}
|
||||
|
||||
HWY_API Vec512<float> ConvertTo(Full512<float> /* tag*/,
|
||||
const Vec512<uint32_t> v) {
|
||||
return Vec512<float>{_mm512_cvtepu32_ps(v.raw)};
|
||||
}
|
||||
|
||||
HWY_API Vec512<double> ConvertTo(Full512<double> /* tag*/,
|
||||
const Vec512<uint64_t> v) {
|
||||
return Vec512<double>{_mm512_cvtepu64_pd(v.raw)};
|
||||
}
|
||||
|
||||
// Truncates (rounds toward zero).
|
||||
HWY_API Vec512<int32_t> ConvertTo(Full512<int32_t> d, const Vec512<float> v) {
|
||||
return detail::FixConversionOverflow(d, v, _mm512_cvttps_epi32(v.raw));
|
||||
@@ -4231,14 +4265,22 @@ HWY_API Vec512<float> MinOfLanes(Full512<float> d, Vec512<float> v) {
|
||||
HWY_API Vec512<double> MinOfLanes(Full512<double> d, Vec512<double> v) {
|
||||
return Set(d, _mm512_reduce_min_pd(v.raw));
|
||||
}
|
||||
template <typename T, HWY_IF_LANE_SIZE(T, 2)>
|
||||
HWY_API Vec512<T> MinOfLanes(Full512<T> d, Vec512<T> v) {
|
||||
const Repartition<int32_t, decltype(d)> d32;
|
||||
HWY_API Vec512<uint16_t> MinOfLanes(Full512<uint16_t> d, Vec512<uint16_t> v) {
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(d, Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
HWY_API Vec512<int16_t> MinOfLanes(Full512<int16_t> d, Vec512<int16_t> v) {
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MinOfLanes(d32, Min(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
// Returns the maximum in each lane.
|
||||
@@ -4260,14 +4302,22 @@ HWY_API Vec512<float> MaxOfLanes(Full512<float> d, Vec512<float> v) {
|
||||
HWY_API Vec512<double> MaxOfLanes(Full512<double> d, Vec512<double> v) {
|
||||
return Set(d, _mm512_reduce_max_pd(v.raw));
|
||||
}
|
||||
template <typename T, HWY_IF_LANE_SIZE(T, 2)>
|
||||
HWY_API Vec512<T> MaxOfLanes(Full512<T> d, Vec512<T> v) {
|
||||
const Repartition<int32_t, decltype(d)> d32;
|
||||
HWY_API Vec512<uint16_t> MaxOfLanes(Full512<uint16_t> d, Vec512<uint16_t> v) {
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
const auto even = And(BitCast(d32, v), Set(d32, 0xFFFF));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return BitCast(d, Or(min, ShiftLeft<16>(min)));
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
HWY_API Vec512<int16_t> MaxOfLanes(Full512<int16_t> d, Vec512<int16_t> v) {
|
||||
const RepartitionToWide<decltype(d)> d32;
|
||||
// Sign-extend
|
||||
const auto even = ShiftRight<16>(ShiftLeft<16>(BitCast(d32, v)));
|
||||
const auto odd = ShiftRight<16>(BitCast(d32, v));
|
||||
const auto min = MaxOfLanes(d32, Max(even, odd));
|
||||
// Also broadcast into odd lanes.
|
||||
return OddEven(BitCast(d, ShiftLeft<16>(min)), BitCast(d, min));
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
|
||||
1
third_party/highway/hwy/print-inl.h
vendored
1
third_party/highway/hwy/print-inl.h
vendored
@@ -15,7 +15,6 @@
|
||||
|
||||
// Print() function
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "hwy/aligned_allocator.h"
|
||||
|
||||
3
third_party/highway/hwy/print.cc
vendored
3
third_party/highway/hwy/print.cc
vendored
@@ -15,6 +15,9 @@
|
||||
|
||||
#include "hwy/print.h"
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
5
third_party/highway/hwy/targets.cc
vendored
5
third_party/highway/hwy/targets.cc
vendored
@@ -15,6 +15,9 @@
|
||||
|
||||
#include "hwy/targets.h"
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h> // PRIx64
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
@@ -23,7 +26,7 @@
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "hwy/per_target.h"
|
||||
#include "hwy/per_target.h" // VectorBytes
|
||||
|
||||
#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
|
||||
#include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
#define HWY_TARGET_INCLUDE "tests/blockwise_shift_test.cc"
|
||||
@@ -63,17 +63,17 @@ struct TestShiftBytes {
|
||||
auto expected = AllocateAligned<T>(N);
|
||||
uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
|
||||
|
||||
const size_t kBlockSize = HWY_MIN(N8, 16);
|
||||
for (size_t block = 0; block < N8; block += kBlockSize) {
|
||||
const size_t block_size = HWY_MIN(N8, 16);
|
||||
for (size_t block = 0; block < N8; block += block_size) {
|
||||
expected_bytes[block] = 0;
|
||||
memcpy(expected_bytes + block + 1, in_bytes + block, kBlockSize - 1);
|
||||
memcpy(expected_bytes + block + 1, in_bytes + block, block_size - 1);
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(v));
|
||||
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(d, v));
|
||||
|
||||
for (size_t block = 0; block < N8; block += kBlockSize) {
|
||||
memcpy(expected_bytes + block, in_bytes + block + 1, kBlockSize - 1);
|
||||
expected_bytes[block + kBlockSize - 1] = 0;
|
||||
for (size_t block = 0; block < N8; block += block_size) {
|
||||
memcpy(expected_bytes + block, in_bytes + block + 1, block_size - 1);
|
||||
expected_bytes[block + block_size - 1] = 0;
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightBytes<1>(d, v));
|
||||
#else
|
||||
@@ -152,7 +152,7 @@ template <int kBytes>
|
||||
struct TestCombineShiftRightBytes {
|
||||
template <class T, class D>
|
||||
HWY_NOINLINE void operator()(T, D d) {
|
||||
const size_t kBlockSize = 16;
|
||||
constexpr size_t kBlockSize = 16;
|
||||
static_assert(kBytes < kBlockSize, "Shift count is per block");
|
||||
const Repartition<uint8_t, D> d8;
|
||||
const size_t N8 = Lanes(d8);
|
||||
@@ -170,6 +170,7 @@ struct TestCombineShiftRightBytes {
|
||||
lo_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
|
||||
}
|
||||
for (size_t i = 0; i < N8; i += kBlockSize) {
|
||||
// Arguments are not the same size.
|
||||
CopyBytes<kBlockSize>(&lo_bytes[i], combined);
|
||||
CopyBytes<kBlockSize>(&hi_bytes[i], combined + kBlockSize);
|
||||
CopyBytes<kBlockSize>(combined + kBytes, &expected_bytes[i]);
|
||||
@@ -194,7 +195,7 @@ struct TestCombineShiftRightLanes {
|
||||
auto hi_bytes = AllocateAligned<uint8_t>(N8);
|
||||
auto lo_bytes = AllocateAligned<uint8_t>(N8);
|
||||
auto expected_bytes = AllocateAligned<uint8_t>(N8);
|
||||
const size_t kBlockSize = 16;
|
||||
constexpr size_t kBlockSize = 16;
|
||||
uint8_t combined[2 * kBlockSize];
|
||||
|
||||
// Random inputs in each lane
|
||||
@@ -205,6 +206,7 @@ struct TestCombineShiftRightLanes {
|
||||
lo_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
|
||||
}
|
||||
for (size_t i = 0; i < N8; i += kBlockSize) {
|
||||
// Arguments are not the same size.
|
||||
CopyBytes<kBlockSize>(&lo_bytes[i], combined);
|
||||
CopyBytes<kBlockSize>(&hi_bytes[i], combined + kBlockSize);
|
||||
CopyBytes<kBlockSize>(combined + kLanes * sizeof(T),
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
#define HWY_TARGET_INCLUDE "tests/combine_test.cc"
|
||||
|
||||
32
third_party/highway/hwy/tests/compress_test.cc
vendored
32
third_party/highway/hwy/tests/compress_test.cc
vendored
@@ -13,7 +13,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <inttypes.h> // PRIu64
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h> // memset
|
||||
@@ -44,19 +43,16 @@ void CheckStored(D d, DI di, size_t expected_pos, size_t actual_pos,
|
||||
const AlignedFreeUniquePtr<T[]>& expected, const T* actual_u,
|
||||
int line) {
|
||||
if (expected_pos != actual_pos) {
|
||||
hwy::Abort(
|
||||
__FILE__, line,
|
||||
"Size mismatch for %s: expected %" PRIu64 ", actual %" PRIu64 "\n",
|
||||
TypeName(T(), Lanes(d)).c_str(), static_cast<uint64_t>(expected_pos),
|
||||
static_cast<uint64_t>(actual_pos));
|
||||
hwy::Abort(__FILE__, line, "Size mismatch for %s: expected %d, actual %d\n",
|
||||
TypeName(T(), Lanes(d)).c_str(), static_cast<int>(expected_pos),
|
||||
static_cast<int>(actual_pos));
|
||||
}
|
||||
// Modified from AssertVecEqual - we may not be checking all lanes.
|
||||
for (size_t i = 0; i < num_to_check; ++i) {
|
||||
if (!IsEqual(expected[i], actual_u[i])) {
|
||||
const size_t N = Lanes(d);
|
||||
fprintf(stderr, "Mismatch at i=%" PRIu64 " of %" PRIu64 ", line %d:\n\n",
|
||||
static_cast<uint64_t>(i), static_cast<uint64_t>(num_to_check),
|
||||
line);
|
||||
fprintf(stderr, "Mismatch at i=%d of %d, line %d:\n\n",
|
||||
static_cast<int>(i), static_cast<int>(num_to_check), line);
|
||||
Print(di, "mask", Load(di, mask_lanes.get()), 0, N);
|
||||
Print(d, "in", Load(d, in.get()), 0, N);
|
||||
Print(d, "expect", Load(d, expected.get()), 0, N);
|
||||
@@ -97,7 +93,7 @@ struct TestCompress {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const uint64_t bits = Random32(&rng);
|
||||
in_lanes[i] = T(); // cannot initialize float16_t directly.
|
||||
CopyBytes<sizeof(T)>(&bits, &in_lanes[i]);
|
||||
CopyBytes<sizeof(T)>(&bits, &in_lanes[i]); // not same size
|
||||
mask_lanes[i] = (Random32(&rng) & 1024) ? TI(1) : TI(0);
|
||||
if (mask_lanes[i] > 0) {
|
||||
expected[expected_pos++] = in_lanes[i];
|
||||
@@ -203,8 +199,8 @@ struct TestCompressBlocks {
|
||||
for (size_t i = 0; i < N; i += 2) {
|
||||
const uint64_t bits = Random32(&rng);
|
||||
in_lanes[i + 1] = in_lanes[i] = T(); // cannot set float16_t directly.
|
||||
CopyBytes<sizeof(T)>(&bits, &in_lanes[i]);
|
||||
CopyBytes<sizeof(T)>(&bits, &in_lanes[i + 1]);
|
||||
CopyBytes<sizeof(T)>(&bits, &in_lanes[i]); // not same size
|
||||
CopyBytes<sizeof(T)>(&bits, &in_lanes[i + 1]); // not same size
|
||||
mask_lanes[i + 1] = mask_lanes[i] = TI{(Random32(&rng) & 8) ? 1 : 0};
|
||||
if (mask_lanes[i] > 0) {
|
||||
expected[expected_pos++] = in_lanes[i];
|
||||
@@ -598,8 +594,7 @@ void PrintCompress32x4Tables() {
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
|
||||
printf("%" PRIu64 ",",
|
||||
static_cast<uint64_t>(sizeof(T) * indices[i] + idx_byte));
|
||||
printf("%d,", static_cast<int>(sizeof(T) * indices[i] + idx_byte));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -630,8 +625,7 @@ void PrintCompressNot32x4Tables() {
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
|
||||
printf("%" PRIu64 ",",
|
||||
static_cast<uint64_t>(sizeof(T) * indices[i] + idx_byte));
|
||||
printf("%d,", static_cast<int>(sizeof(T) * indices[i] + idx_byte));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -662,8 +656,7 @@ void PrintCompress64x2Tables() {
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
|
||||
printf("%" PRIu64 ",",
|
||||
static_cast<uint64_t>(sizeof(T) * indices[i] + idx_byte));
|
||||
printf("%d,", static_cast<int>(sizeof(T) * indices[i] + idx_byte));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -694,8 +687,7 @@ void PrintCompressNot64x2Tables() {
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
for (size_t idx_byte = 0; idx_byte < sizeof(T); ++idx_byte) {
|
||||
printf("%" PRIu64 ",",
|
||||
static_cast<uint64_t>(sizeof(T) * indices[i] + idx_byte));
|
||||
printf("%d,", static_cast<int>(sizeof(T) * indices[i] + idx_byte));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
76
third_party/highway/hwy/tests/convert_test.cc
vendored
76
third_party/highway/hwy/tests/convert_test.cc
vendored
@@ -16,6 +16,9 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cmath> // std::isfinite
|
||||
|
||||
#include "hwy/base.h"
|
||||
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
@@ -155,7 +158,7 @@ struct TestPromoteTo {
|
||||
for (size_t rep = 0; rep < AdjustedReps(200); ++rep) {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const uint64_t bits = rng();
|
||||
memcpy(&from[i], &bits, sizeof(T));
|
||||
CopyBytes<sizeof(T)>(&bits, &from[i]); // not same size
|
||||
expected[i] = from[i];
|
||||
}
|
||||
|
||||
@@ -235,13 +238,19 @@ AlignedFreeUniquePtr<float[]> F16TestCases(D d, size_t& padded) {
|
||||
-2.00390625f, -3.99609375f,
|
||||
// No infinity/NaN - implementation-defined due to ARM.
|
||||
};
|
||||
const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
|
||||
constexpr size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
|
||||
const size_t N = Lanes(d);
|
||||
HWY_ASSERT(N != 0);
|
||||
padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors
|
||||
auto in = AllocateAligned<float>(padded);
|
||||
auto expected = AllocateAligned<float>(padded);
|
||||
std::copy(test_cases, test_cases + kNumTestCases, in.get());
|
||||
std::fill(in.get() + kNumTestCases, in.get() + padded, 0.0f);
|
||||
size_t i = 0;
|
||||
for (; i < kNumTestCases; ++i) {
|
||||
in[i] = test_cases[i];
|
||||
}
|
||||
for (; i < padded; ++i) {
|
||||
in[i] = 0.0f;
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
@@ -250,10 +259,11 @@ struct TestF16 {
|
||||
HWY_NOINLINE void operator()(TF32 /*t*/, DF32 d32) {
|
||||
#if HWY_HAVE_FLOAT16
|
||||
size_t padded;
|
||||
const size_t N = Lanes(d32); // same count for f16
|
||||
HWY_ASSERT(N != 0);
|
||||
auto in = F16TestCases(d32, padded);
|
||||
using TF16 = float16_t;
|
||||
const Rebind<TF16, DF32> d16;
|
||||
const size_t N = Lanes(d32); // same count for f16
|
||||
auto temp16 = AllocateAligned<TF16>(N);
|
||||
|
||||
for (size_t i = 0; i < padded; i += N) {
|
||||
@@ -289,13 +299,19 @@ AlignedFreeUniquePtr<float[]> BF16TestCases(D d, size_t& padded) {
|
||||
// negative +/- delta
|
||||
-2.015625f, -3.984375f,
|
||||
};
|
||||
const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
|
||||
constexpr size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
|
||||
const size_t N = Lanes(d);
|
||||
HWY_ASSERT(N != 0);
|
||||
padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors
|
||||
auto in = AllocateAligned<float>(padded);
|
||||
auto expected = AllocateAligned<float>(padded);
|
||||
std::copy(test_cases, test_cases + kNumTestCases, in.get());
|
||||
std::fill(in.get() + kNumTestCases, in.get() + padded, 0.0f);
|
||||
size_t i = 0;
|
||||
for (; i < kNumTestCases; ++i) {
|
||||
in[i] = test_cases[i];
|
||||
}
|
||||
for (; i < padded; ++i) {
|
||||
in[i] = 0.0f;
|
||||
}
|
||||
return in;
|
||||
}
|
||||
|
||||
@@ -387,10 +403,13 @@ HWY_NOINLINE void TestAllTruncate() {
|
||||
struct TestIntFromFloatHuge {
|
||||
template <typename TF, class DF>
|
||||
HWY_NOINLINE void operator()(TF /*unused*/, const DF df) {
|
||||
// Still does not work, although ARMv7 manual says that float->int
|
||||
// saturates, i.e. chooses the nearest representable value. Also causes
|
||||
// out-of-memory for MSVC.
|
||||
#if HWY_TARGET != HWY_NEON && !HWY_COMPILER_MSVC
|
||||
// The ARMv7 manual says that float->int saturates, i.e. chooses the
|
||||
// nearest representable value. This works correctly on armhf with GCC, but
|
||||
// not with clang. For reasons unknown, MSVC also runs into an out-of-memory
|
||||
// error here.
|
||||
#if HWY_COMPILER_CLANG || HWY_COMPILER_MSVC
|
||||
(void)df;
|
||||
#else
|
||||
using TI = MakeSigned<TF>;
|
||||
const Rebind<TI, DF> di;
|
||||
|
||||
@@ -406,8 +425,6 @@ struct TestIntFromFloatHuge {
|
||||
// Huge negative
|
||||
Store(Set(di, LimitsMin<TI>()), di, expected.get());
|
||||
HWY_ASSERT_VEC_EQ(di, expected.get(), ConvertTo(di, Set(df, TF(-1E20))));
|
||||
#else
|
||||
(void)df;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
@@ -451,7 +468,7 @@ class TestIntFromFloat {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
do {
|
||||
const uint64_t bits = rng();
|
||||
memcpy(&from[i], &bits, sizeof(TF));
|
||||
CopyBytes<sizeof(TF)>(&bits, &from[i]); // not same size
|
||||
} while (!std::isfinite(from[i]));
|
||||
if (from[i] >= max) {
|
||||
expected[i] = LimitsMax<TI>();
|
||||
@@ -532,6 +549,34 @@ HWY_NOINLINE void TestAllFloatFromInt() {
|
||||
ForFloatTypes(ForPartialVectors<TestFloatFromInt>());
|
||||
}
|
||||
|
||||
struct TestFloatFromUint {
|
||||
template <typename TF, class DF>
|
||||
HWY_NOINLINE void operator()(TF /*unused*/, const DF df) {
|
||||
using TU = MakeUnsigned<TF>;
|
||||
const RebindToUnsigned<DF> du;
|
||||
|
||||
// Integer positive
|
||||
HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), ConvertTo(df, Iota(du, TU(4))));
|
||||
HWY_ASSERT_VEC_EQ(df, Iota(df, TF(65535.0)),
|
||||
ConvertTo(df, Iota(du, 65535))); // 2^16-1
|
||||
if (sizeof(TF) > 4) {
|
||||
HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4294967295.0)),
|
||||
ConvertTo(df, Iota(du, 4294967295ULL))); // 2^32-1
|
||||
}
|
||||
|
||||
// Max positive
|
||||
HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMax<TU>())),
|
||||
ConvertTo(df, Set(du, LimitsMax<TU>())));
|
||||
|
||||
// Zero
|
||||
HWY_ASSERT_VEC_EQ(df, Zero(df), ConvertTo(df, Zero(du)));
|
||||
}
|
||||
};
|
||||
|
||||
HWY_NOINLINE void TestAllFloatFromUint() {
|
||||
ForFloatTypes(ForPartialVectors<TestFloatFromUint>());
|
||||
}
|
||||
|
||||
struct TestI32F64 {
|
||||
template <typename TF, class DF>
|
||||
HWY_NOINLINE void operator()(TF /*unused*/, const DF df) {
|
||||
@@ -591,6 +636,7 @@ HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllConvertU8);
|
||||
HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllTruncate);
|
||||
HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllIntFromFloat);
|
||||
HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllFloatFromInt);
|
||||
HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllFloatFromUint);
|
||||
HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllI32F64);
|
||||
} // namespace hwy
|
||||
|
||||
|
||||
4
third_party/highway/hwy/tests/crypto_test.cc
vendored
4
third_party/highway/hwy/tests/crypto_test.cc
vendored
@@ -492,8 +492,8 @@ struct TestCLMul {
|
||||
const size_t padded = RoundUpTo(kCLMulNum, N);
|
||||
auto expected_lower = AllocateAligned<T>(padded);
|
||||
auto expected_upper = AllocateAligned<T>(padded);
|
||||
memcpy(expected_lower.get(), kCLMulLower, kCLMulNum * sizeof(T));
|
||||
memcpy(expected_upper.get(), kCLMulUpper, kCLMulNum * sizeof(T));
|
||||
CopyBytes<kCLMulNum * sizeof(T)>(kCLMulLower, expected_lower.get());
|
||||
CopyBytes<kCLMulNum * sizeof(T)>(kCLMulUpper, expected_upper.get());
|
||||
const size_t padding_size = (padded - kCLMulNum) * sizeof(T);
|
||||
memset(expected_lower.get() + kCLMulNum, 0, padding_size);
|
||||
memset(expected_upper.get() + kCLMulNum, 0, padding_size);
|
||||
|
||||
5
third_party/highway/hwy/tests/demote_test.cc
vendored
5
third_party/highway/hwy/tests/demote_test.cc
vendored
@@ -15,7 +15,6 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
#define HWY_TARGET_INCLUDE "tests/demote_test.cc"
|
||||
@@ -66,7 +65,7 @@ struct TestDemoteTo {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
do {
|
||||
const uint64_t bits = rng();
|
||||
memcpy(&from[i], &bits, sizeof(T));
|
||||
CopyBytes<sizeof(T)>(&bits, &from[i]); // not same size
|
||||
} while (!value_ok(from[i]));
|
||||
expected[i] = static_cast<ToT>(HWY_MIN(HWY_MAX(min, from[i]), max));
|
||||
}
|
||||
@@ -116,7 +115,7 @@ struct TestDemoteToFloat {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
do {
|
||||
const uint64_t bits = rng();
|
||||
memcpy(&from[i], &bits, sizeof(T));
|
||||
CopyBytes<sizeof(T)>(&bits, &from[i]); // not same size
|
||||
} while (!IsFiniteT(from[i]));
|
||||
const T magn = std::abs(from[i]);
|
||||
const T max_abs = HighestValue<ToT>();
|
||||
|
||||
6
third_party/highway/hwy/tests/float_test.cc
vendored
6
third_party/highway/hwy/tests/float_test.cc
vendored
@@ -15,7 +15,6 @@
|
||||
|
||||
// Tests some ops specific to floating-point types (Div, Round etc.)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -113,9 +112,8 @@ struct TestReciprocalSquareRoot {
|
||||
float err = lanes[i] - 0.090166f;
|
||||
if (err < 0.0f) err = -err;
|
||||
if (err >= 4E-4f) {
|
||||
HWY_ABORT("Lane %" PRIu64 "(%" PRIu64 "): actual %f err %f\n",
|
||||
static_cast<uint64_t>(i), static_cast<uint64_t>(N), lanes[i],
|
||||
err);
|
||||
HWY_ABORT("Lane %d (%d): actual %f err %f\n", static_cast<int>(i),
|
||||
static_cast<int>(N), lanes[i], err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS // before inttypes.h
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
1
third_party/highway/hwy/tests/mask_test.cc
vendored
1
third_party/highway/hwy/tests/mask_test.cc
vendored
@@ -13,7 +13,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h> // memcmp
|
||||
|
||||
7
third_party/highway/hwy/tests/mul_test.cc
vendored
7
third_party/highway/hwy/tests/mul_test.cc
vendored
@@ -13,7 +13,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -58,7 +57,7 @@ struct TestUnsignedMul {
|
||||
HWY_ASSERT_VEC_EQ(d, vmax, Mul(v1, vmax));
|
||||
|
||||
const size_t bits = sizeof(T) * 8;
|
||||
const uint64_t mask = (1ull << bits) - 1;
|
||||
const uint64_t mask = bits==64 ? (~uint64_t{0}) : (1ull << bits) - 1;
|
||||
const T max2 = (static_cast<uint64_t>(max) * max) & mask;
|
||||
HWY_ASSERT_VEC_EQ(d, Set(d, max2), Mul(vmax, vmax));
|
||||
}
|
||||
@@ -97,13 +96,13 @@ HWY_NOINLINE void TestAllMul() {
|
||||
// No u8.
|
||||
test_unsigned(uint16_t());
|
||||
test_unsigned(uint32_t());
|
||||
// No u64.
|
||||
test_unsigned(uint64_t());
|
||||
|
||||
const ForPartialVectors<TestSignedMul> test_signed;
|
||||
// No i8.
|
||||
test_signed(int16_t());
|
||||
test_signed(int32_t());
|
||||
// No i64.
|
||||
test_signed(int64_t());
|
||||
}
|
||||
|
||||
struct TestMulHigh {
|
||||
|
||||
59
third_party/highway/hwy/tests/reduction_test.cc
vendored
59
third_party/highway/hwy/tests/reduction_test.cc
vendored
@@ -13,7 +13,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -80,6 +79,35 @@ struct TestMinOfLanes {
|
||||
min = HWY_MIN(min, in_lanes[i]);
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get())));
|
||||
|
||||
// Bug #910: also check negative values
|
||||
min = HighestValue<T>();
|
||||
const T input_copy[] = {static_cast<T>(-1),
|
||||
static_cast<T>(-2),
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14};
|
||||
size_t i = 0;
|
||||
for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) {
|
||||
in_lanes[i] = input_copy[i];
|
||||
min = HWY_MIN(min, input_copy[i]);
|
||||
}
|
||||
// Pad with neutral element to full vector (so we can load)
|
||||
for (; i < N; ++i) {
|
||||
in_lanes[i] = min;
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get())));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -105,6 +133,35 @@ struct TestMaxOfLanes {
|
||||
max = HWY_MAX(max, in_lanes[i]);
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get())));
|
||||
|
||||
// Bug #910: also check negative values
|
||||
max = LowestValue<T>();
|
||||
const T input_copy[] = {static_cast<T>(-1),
|
||||
static_cast<T>(-2),
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14};
|
||||
size_t i = 0;
|
||||
for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) {
|
||||
in_lanes[i] = input_copy[i];
|
||||
max = HWY_MAX(max, in_lanes[i]);
|
||||
}
|
||||
// Pad with neutral element to full vector (so we can load)
|
||||
for (; i < N; ++i) {
|
||||
in_lanes[i] = max;
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get())));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
9
third_party/highway/hwy/tests/shift_test.cc
vendored
9
third_party/highway/hwy/tests/shift_test.cc
vendored
@@ -13,7 +13,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -243,7 +242,7 @@ T RightShiftNegative(T val) {
|
||||
// seen divisions replaced with shifts, so resort to bit operations.
|
||||
using TU = hwy::MakeUnsigned<T>;
|
||||
TU bits;
|
||||
CopyBytes<sizeof(T)>(&val, &bits);
|
||||
CopySameSize(&val, &bits);
|
||||
|
||||
const TU shifted = TU(bits >> kAmount);
|
||||
|
||||
@@ -252,7 +251,7 @@ T RightShiftNegative(T val) {
|
||||
const TU sign_extended = static_cast<TU>((all << num_zero) & LimitsMax<TU>());
|
||||
|
||||
bits = shifted | sign_extended;
|
||||
CopyBytes<sizeof(T)>(&bits, &val);
|
||||
CopySameSize(&bits, &val);
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -356,7 +355,7 @@ struct TestVariableSignedRightShifts {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const size_t amount = i & kMaxShift;
|
||||
const TU shifted = ~((1ull << (kMaxShift - amount)) - 1);
|
||||
CopyBytes<sizeof(T)>(&shifted, &expected[i]);
|
||||
CopySameSize(&shifted, &expected[i]);
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(Set(d, kMin), small_shifts));
|
||||
|
||||
@@ -364,7 +363,7 @@ struct TestVariableSignedRightShifts {
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
const size_t amount = kMaxShift - (i & kMaxShift);
|
||||
const TU shifted = ~((1ull << (kMaxShift - amount)) - 1);
|
||||
CopyBytes<sizeof(T)>(&shifted, &expected[i]);
|
||||
CopySameSize(&shifted, &expected[i]);
|
||||
}
|
||||
HWY_ASSERT_VEC_EQ(d, expected.get(), Shr(Set(d, kMin), large_shifts));
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
|
||||
// Target-specific helper functions for use by *_test.cc.
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "hwy/base.h"
|
||||
@@ -97,8 +96,8 @@ HWY_NOINLINE void AssertMaskEqual(D d, VecArg<Mask<D>> a, VecArg<Mask<D>> b,
|
||||
// First check whole bytes (if that many elements are still valid)
|
||||
for (; i < N / 8; ++i) {
|
||||
if (bits_a[i] != bits_b[i]) {
|
||||
fprintf(stderr, "Mismatch in byte %" PRIu64 ": %d != %d\n",
|
||||
static_cast<uint64_t>(i), bits_a[i], bits_b[i]);
|
||||
fprintf(stderr, "Mismatch in byte %d: %d != %d\n", static_cast<int>(i),
|
||||
bits_a[i], bits_b[i]);
|
||||
Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
|
||||
Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
|
||||
hwy::Abort(filename, line, "Masks not equal");
|
||||
@@ -111,8 +110,8 @@ HWY_NOINLINE void AssertMaskEqual(D d, VecArg<Mask<D>> a, VecArg<Mask<D>> b,
|
||||
const int valid_a = bits_a[i] & mask;
|
||||
const int valid_b = bits_b[i] & mask;
|
||||
if (valid_a != valid_b) {
|
||||
fprintf(stderr, "Mismatch in last byte %" PRIu64 ": %d != %d\n",
|
||||
static_cast<uint64_t>(i), valid_a, valid_b);
|
||||
fprintf(stderr, "Mismatch in last byte %d: %d != %d\n",
|
||||
static_cast<int>(i), valid_a, valid_b);
|
||||
Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
|
||||
Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
|
||||
hwy::Abort(filename, line, "Masks not equal");
|
||||
|
||||
11
third_party/highway/hwy/tests/test_util.cc
vendored
11
third_party/highway/hwy/tests/test_util.cc
vendored
@@ -15,7 +15,6 @@
|
||||
|
||||
#include "hwy/tests/test_util.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
@@ -71,8 +70,7 @@ HWY_TEST_DLLEXPORT bool IsEqual(const TypeInfo& info, const void* expected_ptr,
|
||||
CopyBytes<8>(actual_ptr, &actual);
|
||||
return ComputeUlpDelta(expected, actual) <= 1;
|
||||
} else {
|
||||
HWY_ABORT("Unexpected float size %" PRIu64 "\n",
|
||||
static_cast<uint64_t>(info.sizeof_t));
|
||||
HWY_ABORT("Unexpected float size %d\n", static_cast<int>(info.sizeof_t));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -88,10 +86,9 @@ HWY_TEST_DLLEXPORT HWY_NORETURN void PrintMismatchAndAbort(
|
||||
char actual_str[100];
|
||||
ToString(info, actual_ptr, actual_str);
|
||||
Abort(filename, line,
|
||||
"%s, %sx%" PRIu64 " lane %" PRIu64
|
||||
" mismatch: expected '%s', got '%s'.\n",
|
||||
target_name, type_name, static_cast<uint64_t>(num_lanes),
|
||||
static_cast<uint64_t>(lane), expected_str, actual_str);
|
||||
"%s, %sx%d lane %d mismatch: expected '%s', got '%s'.\n", target_name,
|
||||
type_name, static_cast<int>(num_lanes), static_cast<int>(lane),
|
||||
expected_str, actual_str);
|
||||
}
|
||||
|
||||
HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo& info,
|
||||
|
||||
4
third_party/highway/hwy/tests/test_util.h
vendored
4
third_party/highway/hwy/tests/test_util.h
vendored
@@ -105,8 +105,8 @@ TU ComputeUlpDelta(const T expected, const T actual) {
|
||||
// Compute the difference in units of last place. We do not need to check for
|
||||
// differing signs; they will result in large differences, which is fine.
|
||||
TU ux, uy;
|
||||
CopyBytes<sizeof(T)>(&expected, &ux);
|
||||
CopyBytes<sizeof(T)>(&actual, &uy);
|
||||
CopySameSize(&expected, &ux);
|
||||
CopySameSize(&actual, &uy);
|
||||
|
||||
// Avoid unsigned->signed cast: 2's complement is only guaranteed by C++20.
|
||||
const TU ulp = HWY_MAX(ux, uy) - HWY_MIN(ux, uy);
|
||||
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
with:
|
||||
repository: libjxl/conformance
|
||||
# TODO(eustas): move ref to a global variable / file?
|
||||
ref: a6a44bbbd69830e1dc862174599ce5738a0a414f
|
||||
ref: 43d8135b50e53167ee6fee0b842b9eb15cfc4aa2
|
||||
path: conformance
|
||||
- name: Cache
|
||||
uses: actions/cache@v2
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: libjxl/conformance
|
||||
ref: a6a44bbbd69830e1dc862174599ce5738a0a414f
|
||||
ref: 43d8135b50e53167ee6fee0b842b9eb15cfc4aa2
|
||||
path: conformance
|
||||
- name: Cache
|
||||
uses: actions/cache@v2
|
||||
|
||||
1
third_party/jpeg-xl/AUTHORS
vendored
1
third_party/jpeg-xl/AUTHORS
vendored
@@ -48,6 +48,7 @@ roland-rollo
|
||||
Samuel Leong <wvvwvvvvwvvw@gmail.com>
|
||||
Sandro <sandro.jaeckel@gmail.com>
|
||||
Stephan T. Lavavej <stl@nuwen.net>
|
||||
Thomas Bonfort <thomas.bonfort@airbus.com>
|
||||
Vincent Torri <vincent.torri@gmail.com>
|
||||
xiota
|
||||
Yonatan Nebenzhal <yonatan.nebenzhl@gmail.com>
|
||||
|
||||
7
third_party/jpeg-xl/CHANGELOG.md
vendored
7
third_party/jpeg-xl/CHANGELOG.md
vendored
@@ -7,9 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Added
|
||||
- encoder API: new function `JxlEncoderSetFrameBitDepth` to set the bit depth
|
||||
of the input buffer.
|
||||
- decoder API: new function `JxlDecoderSetImageBitDepth` to set the bit depth
|
||||
of the output buffer.
|
||||
|
||||
## [0.7] - 2022-07-21
|
||||
|
||||
### Added
|
||||
- Export version information in headers.
|
||||
- decoder API: Ability to decode the content of metadata boxes:
|
||||
`JXL_DEC_BOX`, `JXL_DEC_BOX_NEED_MORE_OUTPUT`, `JxlDecoderSetBoxBuffer`,
|
||||
`JxlDecoderGetBoxType`, `JxlDecoderGetBoxSizeRaw` and
|
||||
|
||||
6
third_party/jpeg-xl/ci.sh
vendored
6
third_party/jpeg-xl/ci.sh
vendored
@@ -1394,10 +1394,8 @@ cmd_bump_version() {
|
||||
fi
|
||||
fi
|
||||
|
||||
newver="${major}.${minor}"
|
||||
if [[ "${patch}" != "0" ]]; then
|
||||
newver="${newver}.${patch}"
|
||||
fi
|
||||
newver="${major}.${minor}.${patch}"
|
||||
|
||||
echo "Bumping version to ${newver} (${major}.${minor}.${patch})"
|
||||
sed -E \
|
||||
-e "s/(set\\(JPEGXL_MAJOR_VERSION) [0-9]+\\)/\\1 ${major})/" \
|
||||
|
||||
50
third_party/jpeg-xl/lib/extras/codec_test.cc
vendored
50
third_party/jpeg-xl/lib/extras/codec_test.cc
vendored
@@ -51,9 +51,9 @@ std::string ExtensionFromCodec(Codec codec, const bool is_gray,
|
||||
case Codec::kPNG:
|
||||
return ".png";
|
||||
case Codec::kPNM:
|
||||
if (bits_per_sample == 32) return ".pfm";
|
||||
if (has_alpha) return ".pam";
|
||||
if (is_gray) return ".pgm";
|
||||
return (bits_per_sample == 32) ? ".pfm" : ".ppm";
|
||||
return is_gray ? ".pgm" : ".ppm";
|
||||
case Codec::kGIF:
|
||||
return ".gif";
|
||||
case Codec::kEXR:
|
||||
@@ -173,10 +173,11 @@ struct TestImageParams {
|
||||
bool is_gray;
|
||||
bool add_alpha;
|
||||
bool big_endian;
|
||||
bool add_extra_channels;
|
||||
|
||||
bool ShouldTestRoundtrip() const {
|
||||
if (codec == Codec::kPNG) {
|
||||
return true;
|
||||
return bits_per_sample <= 16;
|
||||
} else if (codec == Codec::kPNM) {
|
||||
// TODO(szabadka) Make PNM encoder endianness-aware.
|
||||
return ((bits_per_sample <= 16 && big_endian) ||
|
||||
@@ -213,7 +214,7 @@ struct TestImageParams {
|
||||
std::string DebugString() const {
|
||||
std::ostringstream os;
|
||||
os << "bps:" << bits_per_sample << " gr:" << is_gray << " al:" << add_alpha
|
||||
<< " be: " << big_endian;
|
||||
<< " be: " << big_endian << " ec: " << add_extra_channels;
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
@@ -233,6 +234,19 @@ void CreateTestImage(const TestImageParams& params, PackedPixelFile* ppf) {
|
||||
|
||||
PackedFrame frame(params.xsize, params.ysize, params.PixelFormat());
|
||||
FillPackedImage(params.bits_per_sample, &frame.color);
|
||||
if (params.add_extra_channels) {
|
||||
for (size_t i = 0; i < 7; ++i) {
|
||||
JxlPixelFormat ec_format = params.PixelFormat();
|
||||
ec_format.num_channels = 1;
|
||||
PackedImage ec(params.xsize, params.ysize, ec_format);
|
||||
FillPackedImage(params.bits_per_sample, &ec);
|
||||
frame.extra_channels.emplace_back(std::move(ec));
|
||||
PackedExtraChannel pec;
|
||||
pec.ec_info.bits_per_sample = params.bits_per_sample;
|
||||
pec.ec_info.type = static_cast<JxlExtraChannelType>(i);
|
||||
ppf->extra_channels_info.emplace_back(std::move(pec));
|
||||
}
|
||||
}
|
||||
ppf->frames.emplace_back(std::move(frame));
|
||||
}
|
||||
|
||||
@@ -254,8 +268,13 @@ void TestRoundTrip(const TestImageParams& params, ThreadPool* pool) {
|
||||
ASSERT_EQ(encoded.bitstreams.size(), 1);
|
||||
|
||||
PackedPixelFile ppf_out;
|
||||
ColorHints color_hints;
|
||||
if (params.codec == Codec::kPNM || params.codec == Codec::kPGX) {
|
||||
color_hints.Add("color_space",
|
||||
params.is_gray ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG");
|
||||
}
|
||||
ASSERT_TRUE(DecodeBytes(Span<const uint8_t>(encoded.bitstreams[0]),
|
||||
ColorHints(), SizeConstraints(), &ppf_out));
|
||||
color_hints, SizeConstraints(), &ppf_out));
|
||||
|
||||
if (params.codec != Codec::kPNM && params.codec != Codec::kPGX &&
|
||||
params.codec != Codec::kEXR) {
|
||||
@@ -263,9 +282,21 @@ void TestRoundTrip(const TestImageParams& params, ThreadPool* pool) {
|
||||
}
|
||||
|
||||
ASSERT_EQ(ppf_out.frames.size(), 1);
|
||||
VerifySameImage(ppf_in.frames[0].color, ppf_in.info.bits_per_sample,
|
||||
ppf_out.frames[0].color, ppf_out.info.bits_per_sample,
|
||||
const auto& frame_in = ppf_in.frames[0];
|
||||
const auto& frame_out = ppf_out.frames[0];
|
||||
VerifySameImage(frame_in.color, ppf_in.info.bits_per_sample, frame_out.color,
|
||||
ppf_out.info.bits_per_sample,
|
||||
/*lossless=*/params.codec != Codec::kJPG);
|
||||
ASSERT_EQ(frame_in.extra_channels.size(), frame_out.extra_channels.size());
|
||||
ASSERT_EQ(ppf_out.extra_channels_info.size(),
|
||||
frame_out.extra_channels.size());
|
||||
for (size_t i = 0; i < frame_in.extra_channels.size(); ++i) {
|
||||
VerifySameImage(frame_in.extra_channels[i], ppf_in.info.bits_per_sample,
|
||||
frame_out.extra_channels[i], ppf_out.info.bits_per_sample,
|
||||
/*lossless=*/true);
|
||||
EXPECT_EQ(ppf_out.extra_channels_info[i].ec_info.type,
|
||||
ppf_in.extra_channels_info[i].ec_info.type);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CodecTest, TestRoundTrip) {
|
||||
@@ -285,7 +316,12 @@ TEST(CodecTest, TestRoundTrip) {
|
||||
params.is_gray = is_gray;
|
||||
params.add_alpha = add_alpha;
|
||||
params.big_endian = big_endian;
|
||||
params.add_extra_channels = false;
|
||||
TestRoundTrip(params, &pool);
|
||||
if (codec == Codec::kPNM && add_alpha) {
|
||||
params.add_extra_channels = true;
|
||||
TestRoundTrip(params, &pool);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
63
third_party/jpeg-xl/lib/extras/dec/jxl.cc
vendored
63
third_party/jpeg-xl/lib/extras/dec/jxl.cc
vendored
@@ -68,6 +68,39 @@ struct BoxProcessor {
|
||||
}
|
||||
};
|
||||
|
||||
void SetBitDepthFromDataType(JxlDataType data_type, uint32_t* bits_per_sample,
|
||||
uint32_t* exponent_bits_per_sample) {
|
||||
switch (data_type) {
|
||||
case JXL_TYPE_UINT8:
|
||||
*bits_per_sample = 8;
|
||||
*exponent_bits_per_sample = 0;
|
||||
break;
|
||||
case JXL_TYPE_UINT16:
|
||||
*bits_per_sample = 16;
|
||||
*exponent_bits_per_sample = 0;
|
||||
break;
|
||||
case JXL_TYPE_FLOAT16:
|
||||
*bits_per_sample = 16;
|
||||
*exponent_bits_per_sample = 5;
|
||||
break;
|
||||
case JXL_TYPE_FLOAT:
|
||||
*bits_per_sample = 32;
|
||||
*exponent_bits_per_sample = 8;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void UpdateBitDepth(JxlBitDepth bit_depth, JxlDataType data_type, T* info) {
|
||||
if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
|
||||
SetBitDepthFromDataType(data_type, &info->bits_per_sample,
|
||||
&info->exponent_bits_per_sample);
|
||||
} else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
|
||||
info->bits_per_sample = bit_depth.bits_per_sample;
|
||||
info->exponent_bits_per_sample = bit_depth.exponent_bits_per_sample;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
|
||||
@@ -185,8 +218,12 @@ bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
|
||||
}
|
||||
break;
|
||||
}
|
||||
size_t released_size = JxlDecoderReleaseInput(dec);
|
||||
fprintf(stderr,
|
||||
"Input file is truncated and allow_partial_input was disabled.");
|
||||
"Input file is truncated (total bytes: %" PRIuS
|
||||
", processed bytes: %" PRIuS
|
||||
") and allow_partial_input was disabled.",
|
||||
bytes_size, bytes_size - released_size);
|
||||
return false;
|
||||
} else if (status == JXL_DEC_BOX) {
|
||||
boxes.FinalizeOutput();
|
||||
@@ -254,9 +291,11 @@ bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
|
||||
if (!have_alpha) {
|
||||
// Mark in the basic info that alpha channel was dropped.
|
||||
ppf->info.alpha_bits = 0;
|
||||
} else if (dparams.unpremultiply_alpha) {
|
||||
// Mark in the basic info that alpha was unpremultiplied.
|
||||
ppf->info.alpha_premultiplied = false;
|
||||
} else {
|
||||
if (dparams.unpremultiply_alpha) {
|
||||
// Mark in the basic info that alpha was unpremultiplied.
|
||||
ppf->info.alpha_premultiplied = false;
|
||||
}
|
||||
}
|
||||
bool alpha_found = false;
|
||||
for (uint32_t i = 0; i < ppf->info.num_extra_channels; ++i) {
|
||||
@@ -421,9 +460,21 @@ bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (JXL_DEC_SUCCESS !=
|
||||
JxlDecoderSetImageOutBitDepth(dec, &dparams.output_bitdepth)) {
|
||||
fprintf(stderr, "JxlDecoderSetImageOutBitDepth failed\n");
|
||||
return false;
|
||||
}
|
||||
UpdateBitDepth(dparams.output_bitdepth, format.data_type, &ppf->info);
|
||||
bool have_alpha = (format.num_channels == 2 || format.num_channels == 4);
|
||||
if (have_alpha) {
|
||||
// Interleaved alpha channels has the same bit depth as color channels.
|
||||
ppf->info.alpha_bits = ppf->info.bits_per_sample;
|
||||
ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample;
|
||||
}
|
||||
JxlPixelFormat ec_format = format;
|
||||
ec_format.num_channels = 1;
|
||||
for (const auto& eci : ppf->extra_channels_info) {
|
||||
for (auto& eci : ppf->extra_channels_info) {
|
||||
frame.extra_channels.emplace_back(jxl::extras::PackedImage(
|
||||
ppf->info.xsize, ppf->info.ysize, ec_format));
|
||||
auto& ec = frame.extra_channels.back();
|
||||
@@ -446,6 +497,8 @@ bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
|
||||
fprintf(stderr, "JxlDecoderSetExtraChannelBuffer failed\n");
|
||||
return false;
|
||||
}
|
||||
UpdateBitDepth(dparams.output_bitdepth, ec_format.data_type,
|
||||
&eci.ec_info);
|
||||
}
|
||||
} else if (status == JXL_DEC_SUCCESS) {
|
||||
// Decoding finished successfully.
|
||||
|
||||
3
third_party/jpeg-xl/lib/extras/dec/jxl.h
vendored
3
third_party/jpeg-xl/lib/extras/dec/jxl.h
vendored
@@ -53,6 +53,9 @@ struct JXLDecompressParams {
|
||||
bool use_image_callback = true;
|
||||
// Whether to unpremultiply colors for associated alpha channels.
|
||||
bool unpremultiply_alpha = false;
|
||||
|
||||
// Controls the effective bit depth of the output pixels.
|
||||
JxlBitDepth output_bitdepth = {JXL_BIT_DEPTH_FROM_PIXEL_FORMAT, 0, 0};
|
||||
};
|
||||
|
||||
bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
|
||||
|
||||
76
third_party/jpeg-xl/lib/extras/dec/pnm.cc
vendored
76
third_party/jpeg-xl/lib/extras/dec/pnm.cc
vendored
@@ -24,6 +24,7 @@ struct HeaderPNM {
|
||||
size_t bits_per_sample;
|
||||
bool floating_point;
|
||||
bool big_endian;
|
||||
std::vector<JxlExtraChannelType> ec_types; // PAM
|
||||
};
|
||||
|
||||
class Parser {
|
||||
@@ -183,16 +184,20 @@ class Parser {
|
||||
Status ParseHeaderPAM(HeaderPNM* header, const uint8_t** pos) {
|
||||
size_t depth = 3;
|
||||
size_t max_val = 255;
|
||||
JXL_RETURN_IF_ERROR(SkipWhitespace());
|
||||
while (!MatchString("ENDHDR", /*skipws=*/false)) {
|
||||
JXL_RETURN_IF_ERROR(SkipWhitespace());
|
||||
if (MatchString("WIDTH")) {
|
||||
JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
|
||||
JXL_RETURN_IF_ERROR(SkipWhitespace());
|
||||
} else if (MatchString("HEIGHT")) {
|
||||
JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
|
||||
JXL_RETURN_IF_ERROR(SkipWhitespace());
|
||||
} else if (MatchString("DEPTH")) {
|
||||
JXL_RETURN_IF_ERROR(ParseUnsigned(&depth));
|
||||
JXL_RETURN_IF_ERROR(SkipWhitespace());
|
||||
} else if (MatchString("MAXVAL")) {
|
||||
JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val));
|
||||
JXL_RETURN_IF_ERROR(SkipWhitespace());
|
||||
} else if (MatchString("TUPLTYPE")) {
|
||||
if (MatchString("RGB_ALPHA")) {
|
||||
header->has_alpha = true;
|
||||
@@ -209,6 +214,20 @@ class Parser {
|
||||
} else if (MatchString("BLACKANDWHITE")) {
|
||||
header->is_gray = true;
|
||||
max_val = 1;
|
||||
} else if (MatchString("Alpha")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_ALPHA);
|
||||
} else if (MatchString("Depth")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_DEPTH);
|
||||
} else if (MatchString("SpotColor")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_SPOT_COLOR);
|
||||
} else if (MatchString("SelectionMask")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_SELECTION_MASK);
|
||||
} else if (MatchString("Black")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_BLACK);
|
||||
} else if (MatchString("CFA")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_CFA);
|
||||
} else if (MatchString("Thermal")) {
|
||||
header->ec_types.push_back(JXL_CHANNEL_THERMAL);
|
||||
} else {
|
||||
return JXL_FAILURE("PAM: unknown TUPLTYPE");
|
||||
}
|
||||
@@ -223,7 +242,7 @@ class Parser {
|
||||
}
|
||||
size_t num_channels = header->is_gray ? 1 : 3;
|
||||
if (header->has_alpha) num_channels++;
|
||||
if (num_channels != depth) {
|
||||
if (num_channels + header->ec_types.size() != depth) {
|
||||
return JXL_FAILURE("PAM: bad DEPTH");
|
||||
}
|
||||
if (max_val == 0 || max_val >= 65536) {
|
||||
@@ -341,7 +360,17 @@ Status DecodeImagePNM(const Span<const uint8_t> bytes,
|
||||
ppf->info.alpha_bits = (header.has_alpha ? ppf->info.bits_per_sample : 0);
|
||||
ppf->info.alpha_exponent_bits = 0;
|
||||
ppf->info.num_color_channels = (header.is_gray ? 1 : 3);
|
||||
ppf->info.num_extra_channels = (header.has_alpha ? 1 : 0);
|
||||
uint32_t num_alpha_channels = (header.has_alpha ? 1 : 0);
|
||||
uint32_t num_interleaved_channels =
|
||||
ppf->info.num_color_channels + num_alpha_channels;
|
||||
ppf->info.num_extra_channels = num_alpha_channels + header.ec_types.size();
|
||||
|
||||
for (auto type : header.ec_types) {
|
||||
PackedExtraChannel pec;
|
||||
pec.ec_info.bits_per_sample = ppf->info.bits_per_sample;
|
||||
pec.ec_info.type = type;
|
||||
ppf->extra_channels_info.emplace_back(std::move(pec));
|
||||
}
|
||||
|
||||
JxlDataType data_type;
|
||||
if (header.floating_point) {
|
||||
@@ -356,27 +385,50 @@ Status DecodeImagePNM(const Span<const uint8_t> bytes,
|
||||
}
|
||||
|
||||
const JxlPixelFormat format{
|
||||
/*num_channels=*/ppf->info.num_color_channels +
|
||||
ppf->info.num_extra_channels,
|
||||
/*num_channels=*/num_interleaved_channels,
|
||||
/*data_type=*/data_type,
|
||||
/*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN,
|
||||
/*align=*/0,
|
||||
};
|
||||
const JxlPixelFormat ec_format{1, format.data_type, format.endianness, 0};
|
||||
ppf->frames.clear();
|
||||
ppf->frames.emplace_back(header.xsize, header.ysize, format);
|
||||
auto* frame = &ppf->frames.back();
|
||||
|
||||
for (size_t i = 0; i < header.ec_types.size(); ++i) {
|
||||
frame->extra_channels.emplace_back(header.xsize, header.ysize, ec_format);
|
||||
}
|
||||
size_t pnm_remaining_size = bytes.data() + bytes.size() - pos;
|
||||
if (pnm_remaining_size < frame->color.pixels_size) {
|
||||
return JXL_FAILURE("PNM file too small");
|
||||
}
|
||||
const bool flipped_y = header.bits_per_sample == 32; // PFMs are flipped
|
||||
|
||||
uint8_t* out = reinterpret_cast<uint8_t*>(frame->color.pixels());
|
||||
for (size_t y = 0; y < header.ysize; ++y) {
|
||||
size_t y_in = flipped_y ? header.ysize - 1 - y : y;
|
||||
const uint8_t* row_in = &pos[y_in * frame->color.stride];
|
||||
uint8_t* row_out = &out[y * frame->color.stride];
|
||||
memcpy(row_out, row_in, frame->color.stride);
|
||||
std::vector<uint8_t*> ec_out(header.ec_types.size());
|
||||
for (size_t i = 0; i < ec_out.size(); ++i) {
|
||||
ec_out[i] = reinterpret_cast<uint8_t*>(frame->extra_channels[i].pixels());
|
||||
}
|
||||
if (ec_out.empty()) {
|
||||
const bool flipped_y = header.bits_per_sample == 32; // PFMs are flipped
|
||||
for (size_t y = 0; y < header.ysize; ++y) {
|
||||
size_t y_in = flipped_y ? header.ysize - 1 - y : y;
|
||||
const uint8_t* row_in = &pos[y_in * frame->color.stride];
|
||||
uint8_t* row_out = &out[y * frame->color.stride];
|
||||
memcpy(row_out, row_in, frame->color.stride);
|
||||
}
|
||||
} else {
|
||||
size_t pwidth = PackedImage::BitsPerChannel(data_type) / 8;
|
||||
for (size_t y = 0; y < header.ysize; ++y) {
|
||||
for (size_t x = 0; x < header.xsize; ++x) {
|
||||
memcpy(out, pos, frame->color.pixel_stride());
|
||||
out += frame->color.pixel_stride();
|
||||
pos += frame->color.pixel_stride();
|
||||
for (auto& p : ec_out) {
|
||||
memcpy(p, pos, pwidth);
|
||||
pos += pwidth;
|
||||
p += pwidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
159
third_party/jpeg-xl/lib/extras/dec_group_jpeg.cc
vendored
Normal file
159
third_party/jpeg-xl/lib/extras/dec_group_jpeg.cc
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "lib/extras/dec_group_jpeg.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
#define HWY_TARGET_INCLUDE "lib/extras/dec_group_jpeg.cc"
|
||||
#include <hwy/foreach_target.h>
|
||||
#include <hwy/highway.h>
|
||||
|
||||
#include "lib/jxl/base/status.h"
|
||||
#include "lib/jxl/dct_scales.h"
|
||||
#include "lib/jxl/dec_transforms-inl.h"
|
||||
#include "lib/jxl/render_pipeline/render_pipeline.h"
|
||||
HWY_BEFORE_NAMESPACE();
|
||||
namespace jxl {
|
||||
namespace HWY_NAMESPACE {
|
||||
|
||||
// These templates are not found via ADL.
|
||||
using hwy::HWY_NAMESPACE::And;
|
||||
using hwy::HWY_NAMESPACE::AndNot;
|
||||
using hwy::HWY_NAMESPACE::ApproximateReciprocal;
|
||||
using hwy::HWY_NAMESPACE::Gt;
|
||||
using hwy::HWY_NAMESPACE::IfThenElse;
|
||||
using hwy::HWY_NAMESPACE::IfThenElseZero;
|
||||
using hwy::HWY_NAMESPACE::Lt;
|
||||
using hwy::HWY_NAMESPACE::Rebind;
|
||||
using hwy::HWY_NAMESPACE::Vec;
|
||||
using hwy::HWY_NAMESPACE::Xor;
|
||||
|
||||
using D = HWY_FULL(float);
|
||||
using DI = HWY_FULL(int32_t);
|
||||
constexpr D d;
|
||||
constexpr DI di;
|
||||
|
||||
template <class DI>
|
||||
HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias(
|
||||
DI di, const size_t c, const Vec<DI> quant_i,
|
||||
const float* HWY_RESTRICT biases) {
|
||||
const Rebind<float, DI> df;
|
||||
|
||||
const auto quant = ConvertTo(df, quant_i);
|
||||
|
||||
// Compare |quant|, keep sign bit for negating result.
|
||||
const auto kSign = BitCast(df, Set(di, INT32_MIN));
|
||||
const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig
|
||||
const auto abs_quant = AndNot(kSign, quant);
|
||||
|
||||
// If |x| is 1, kZeroBias creates a different bias for each channel.
|
||||
// We're implementing the following:
|
||||
// if (quant == 0) return 0;
|
||||
// if (quant == 1) return biases[c];
|
||||
// if (quant == -1) return -biases[c];
|
||||
// return quant - biases[3] / quant;
|
||||
|
||||
// Integer comparison is not helpful because Clang incurs bypass penalties
|
||||
// from unnecessarily mixing integer and float.
|
||||
const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
|
||||
const auto not_0 = Gt(abs_quant, Zero(df));
|
||||
|
||||
// Bitwise logic is faster than quant * biases[c].
|
||||
const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
|
||||
|
||||
// About 2E-5 worse than ReciprocalNR or division.
|
||||
const auto bias =
|
||||
NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
|
||||
|
||||
return IfThenElse(is_01, one_bias, bias);
|
||||
}
|
||||
|
||||
void DequantBlock(const int16_t* JXL_RESTRICT qblock, size_t c,
|
||||
const float* JXL_RESTRICT dequant_matrices,
|
||||
const float* JXL_RESTRICT biases, float* JXL_RESTRICT block) {
|
||||
for (size_t k = 0; k < kDCTBlockSize; k += Lanes(d)) {
|
||||
const auto mul = Load(d, dequant_matrices + c * kDCTBlockSize + k);
|
||||
Rebind<int16_t, DI> di16;
|
||||
Vec<DI> quantized = PromoteTo(di, Load(di16, qblock + k));
|
||||
const auto dequant = Mul(AdjustQuantBias(di, c, quantized, biases), mul);
|
||||
Store(dequant, d, block + k);
|
||||
}
|
||||
}
|
||||
|
||||
Status DecodeGroupJpeg(const Image3S& coeffs, size_t group_idx,
|
||||
const Rect block_rect, const YCbCrChromaSubsampling& cs,
|
||||
const float* dequant_matrices,
|
||||
float* JXL_RESTRICT group_dec_cache, size_t thread,
|
||||
RenderPipelineInput& render_pipeline_input) {
|
||||
HWY_ALIGN float* const block = group_dec_cache;
|
||||
HWY_ALIGN float* const scratch_space = block + kDCTBlockSize;
|
||||
|
||||
size_t hshift[3] = {cs.HShift(0), cs.HShift(1), cs.HShift(2)};
|
||||
size_t vshift[3] = {cs.VShift(0), cs.VShift(1), cs.VShift(2)};
|
||||
|
||||
static constexpr float kDefaultQuantBias[4] = {
|
||||
1.0f - 0.05465007330715401f,
|
||||
1.0f - 0.07005449891748593f,
|
||||
1.0f - 0.049935103337343655f,
|
||||
0.145f,
|
||||
};
|
||||
|
||||
for (size_t c = 0; c < 3; ++c) {
|
||||
ImageF* rpbuffer = render_pipeline_input.GetBuffer(c).first;
|
||||
Rect rect = render_pipeline_input.GetBuffer(c).second;
|
||||
size_t xsize_blocks = DivCeil(block_rect.xsize(), 1 << hshift[c]);
|
||||
size_t ysize_blocks = DivCeil(block_rect.ysize(), 1 << vshift[c]);
|
||||
size_t offset = 0;
|
||||
for (size_t by = 0; by < ysize_blocks; ++by) {
|
||||
float* JXL_RESTRICT idct_row = rect.Row(rpbuffer, by * kBlockDim);
|
||||
size_t idct_stride = rpbuffer->PixelsPerRow();
|
||||
for (size_t bx = 0; bx < xsize_blocks; ++bx) {
|
||||
const int16_t* qblock = &coeffs.PlaneRow(c, group_idx)[offset];
|
||||
offset += kDCTBlockSize;
|
||||
DequantBlock(qblock, c, dequant_matrices, kDefaultQuantBias, block);
|
||||
// IDCT
|
||||
float* JXL_RESTRICT idct_pos = idct_row + bx * kBlockDim;
|
||||
// JPEG XL transposes the DCT, JPEG doesn't.
|
||||
Transpose<8, 8>::Run(DCTFrom(block, 8), DCTTo(scratch_space, 8));
|
||||
TransformToPixels(AcStrategy::DCT, scratch_space, idct_pos, idct_stride,
|
||||
block);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace jxl
|
||||
HWY_AFTER_NAMESPACE();
|
||||
|
||||
#if HWY_ONCE
|
||||
namespace jxl {
|
||||
namespace {
|
||||
HWY_EXPORT(DecodeGroupJpeg);
|
||||
} // namespace
|
||||
|
||||
namespace extras {
|
||||
Status DecodeGroupJpeg(const Image3S& coeffs, size_t group_idx,
|
||||
const Rect block_rect, const YCbCrChromaSubsampling& cs,
|
||||
const float* dequant_matrices,
|
||||
float* JXL_RESTRICT group_dec_cache, size_t thread,
|
||||
RenderPipelineInput& render_pipeline_input) {
|
||||
return HWY_DYNAMIC_DISPATCH(DecodeGroupJpeg)(
|
||||
coeffs, group_idx, block_rect, cs, dequant_matrices, group_dec_cache,
|
||||
thread, render_pipeline_input);
|
||||
}
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
#endif // HWY_ONCE
|
||||
31
third_party/jpeg-xl/lib/extras/dec_group_jpeg.h
vendored
Normal file
31
third_party/jpeg-xl/lib/extras/dec_group_jpeg.h
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef LIB_EXTRAS_DEC_GROUP_JPEG_H_
|
||||
#define LIB_EXTRAS_DEC_GROUP_JPEG_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "lib/jxl/base/status.h"
|
||||
#include "lib/jxl/frame_header.h"
|
||||
#include "lib/jxl/image.h"
|
||||
#include "lib/jxl/render_pipeline/render_pipeline.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
||||
Status DecodeGroupJpeg(const Image3S& coeffs, size_t group_idx,
|
||||
const Rect block_rect, const YCbCrChromaSubsampling& cs,
|
||||
const float* dequant_matrices,
|
||||
float* JXL_RESTRICT group_dec_cache, size_t thread,
|
||||
RenderPipelineInput& render_pipeline_input);
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
|
||||
#endif // LIB_EXTRAS_DEC_GROUP_JPEG_H_
|
||||
274
third_party/jpeg-xl/lib/extras/decode_jpeg.cc
vendored
Normal file
274
third_party/jpeg-xl/lib/extras/decode_jpeg.cc
vendored
Normal file
@@ -0,0 +1,274 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "lib/extras/decode_jpeg.h"
|
||||
|
||||
#include "lib/extras/dec_group_jpeg.h"
|
||||
#include "lib/jxl/base/status.h"
|
||||
#include "lib/jxl/color_encoding_internal.h"
|
||||
#include "lib/jxl/common.h"
|
||||
#include "lib/jxl/frame_header.h"
|
||||
#include "lib/jxl/image.h"
|
||||
#include "lib/jxl/jpeg/enc_jpeg_data.h"
|
||||
#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
|
||||
#include "lib/jxl/render_pipeline/render_pipeline.h"
|
||||
#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h"
|
||||
#include "lib/jxl/render_pipeline/stage_write.h"
|
||||
#include "lib/jxl/render_pipeline/stage_ycbcr.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
||||
namespace {
|
||||
|
||||
Rect BlockGroupRect(const FrameDimensions& frame_dim, size_t group_index) {
|
||||
const size_t gx = group_index % frame_dim.xsize_groups;
|
||||
const size_t gy = group_index / frame_dim.xsize_groups;
|
||||
const Rect rect(gx * (frame_dim.group_dim >> 3),
|
||||
gy * (frame_dim.group_dim >> 3), frame_dim.group_dim >> 3,
|
||||
frame_dim.group_dim >> 3, frame_dim.xsize_blocks,
|
||||
frame_dim.ysize_blocks);
|
||||
return rect;
|
||||
}
|
||||
|
||||
Rect DCGroupRect(const FrameDimensions& frame_dim, size_t group_index) {
|
||||
const size_t gx = group_index % frame_dim.xsize_dc_groups;
|
||||
const size_t gy = group_index / frame_dim.xsize_dc_groups;
|
||||
const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
|
||||
frame_dim.group_dim, frame_dim.group_dim,
|
||||
frame_dim.xsize_blocks, frame_dim.ysize_blocks);
|
||||
return rect;
|
||||
}
|
||||
|
||||
Status SetChromaSubsamplingFromJpegData(const jpeg::JPEGData& jpeg_data,
|
||||
YCbCrChromaSubsampling* cs) {
|
||||
size_t nbcomp = jpeg_data.components.size();
|
||||
if (nbcomp == 3) {
|
||||
uint8_t hsample[3], vsample[3];
|
||||
for (size_t i = 0; i < nbcomp; i++) {
|
||||
hsample[i] = jpeg_data.components[i].h_samp_factor;
|
||||
vsample[i] = jpeg_data.components[i].v_samp_factor;
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(cs->Set(hsample, vsample));
|
||||
} else if (nbcomp == 1) {
|
||||
uint8_t hsample[3], vsample[3];
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
hsample[i] = jpeg_data.components[0].h_samp_factor;
|
||||
vsample[i] = jpeg_data.components[0].v_samp_factor;
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(cs->Set(hsample, vsample));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsYCbCrJpeg(const jpeg::JPEGData& jpeg_data) {
|
||||
size_t nbcomp = jpeg_data.components.size();
|
||||
bool is_rgb = false;
|
||||
const auto& markers = jpeg_data.marker_order;
|
||||
// If there is a JFIF marker, this is YCbCr. Otherwise...
|
||||
if (std::find(markers.begin(), markers.end(), 0xE0) == markers.end()) {
|
||||
// Try to find an 'Adobe' marker.
|
||||
size_t app_markers = 0;
|
||||
size_t i = 0;
|
||||
for (; i < markers.size(); i++) {
|
||||
// This is an APP marker.
|
||||
if ((markers[i] & 0xF0) == 0xE0) {
|
||||
JXL_CHECK(app_markers < jpeg_data.app_data.size());
|
||||
// APP14 marker
|
||||
if (markers[i] == 0xEE) {
|
||||
const auto& data = jpeg_data.app_data[app_markers];
|
||||
if (data.size() == 15 && data[3] == 'A' && data[4] == 'd' &&
|
||||
data[5] == 'o' && data[6] == 'b' && data[7] == 'e') {
|
||||
// 'Adobe' marker.
|
||||
is_rgb = data[14] == 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
app_markers++;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == markers.size()) {
|
||||
// No 'Adobe' marker, guess from component IDs.
|
||||
is_rgb = nbcomp == 3 && jpeg_data.components[0].id == 'R' &&
|
||||
jpeg_data.components[1].id == 'G' &&
|
||||
jpeg_data.components[2].id == 'B';
|
||||
}
|
||||
}
|
||||
return (!is_rgb || nbcomp == 1);
|
||||
}
|
||||
|
||||
inline std::array<int, 3> JpegOrder(bool is_ycbcr, bool is_gray) {
|
||||
if (is_gray) {
|
||||
return {{0, 0, 0}};
|
||||
} else if (is_ycbcr) {
|
||||
return {{1, 0, 2}};
|
||||
} else {
|
||||
return {{0, 1, 2}};
|
||||
}
|
||||
}
|
||||
|
||||
void SetDequantWeightsFromJpegData(const jpeg::JPEGData& jpeg_data,
|
||||
const bool is_ycbcr, float* dequant) {
|
||||
auto jpeg_c_map = JpegOrder(is_ycbcr, jpeg_data.components.size() == 1);
|
||||
const float kDequantScale = 1.0f / (8 * 255);
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
size_t jpeg_c = jpeg_c_map[c];
|
||||
const int32_t* quant =
|
||||
jpeg_data.quant[jpeg_data.components[jpeg_c].quant_idx].values.data();
|
||||
for (size_t k = 0; k < kDCTBlockSize; ++k) {
|
||||
dequant[c * kDCTBlockSize + k] = quant[k] * kDequantScale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetCoefficientsFromJpegData(const jpeg::JPEGData& jpeg_data,
|
||||
const FrameDimensions& frame_dim,
|
||||
const YCbCrChromaSubsampling& cs,
|
||||
const bool is_ycbcr, Image3S* coeffs) {
|
||||
auto jpeg_c_map = JpegOrder(is_ycbcr, jpeg_data.components.size() == 1);
|
||||
*coeffs = Image3S(kGroupDim * kGroupDim, frame_dim.num_groups);
|
||||
for (size_t c = 0; c < 3; ++c) {
|
||||
if (jpeg_data.components.size() == 1 && c != 1) {
|
||||
ZeroFillImage(&coeffs->Plane(c));
|
||||
continue;
|
||||
}
|
||||
const auto& comp = jpeg_data.components[jpeg_c_map[c]];
|
||||
size_t hshift = cs.HShift(c);
|
||||
size_t vshift = cs.VShift(c);
|
||||
int dcquant = jpeg_data.quant[comp.quant_idx].values.data()[0];
|
||||
int16_t dc_level = 1024 / dcquant;
|
||||
size_t jpeg_stride = comp.width_in_blocks * kDCTBlockSize;
|
||||
for (size_t group_index = 0; group_index < frame_dim.num_groups;
|
||||
group_index++) {
|
||||
Rect block_rect = BlockGroupRect(frame_dim, group_index);
|
||||
size_t xsize_blocks = DivCeil(block_rect.xsize(), 1 << hshift);
|
||||
size_t ysize_blocks = DivCeil(block_rect.ysize(), 1 << vshift);
|
||||
size_t group_xsize = xsize_blocks * kDCTBlockSize;
|
||||
size_t bx0 = block_rect.x0() >> hshift;
|
||||
size_t by0 = block_rect.y0() >> vshift;
|
||||
size_t jpeg_offset = by0 * jpeg_stride + bx0 * kDCTBlockSize;
|
||||
const int16_t* JXL_RESTRICT jpeg_coeffs =
|
||||
comp.coeffs.data() + jpeg_offset;
|
||||
int16_t* JXL_RESTRICT coeff_row = coeffs->PlaneRow(c, group_index);
|
||||
for (size_t by = 0; by < ysize_blocks; ++by) {
|
||||
memcpy(&coeff_row[by * group_xsize], &jpeg_coeffs[by * jpeg_stride],
|
||||
group_xsize * sizeof(coeff_row[0]));
|
||||
}
|
||||
if (!is_ycbcr) {
|
||||
for (size_t offset = 0; offset < coeffs->xsize();
|
||||
offset += kDCTBlockSize) {
|
||||
coeff_row[offset] += dc_level;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<RenderPipeline> PreparePipeline(
|
||||
const YCbCrChromaSubsampling& cs, const bool is_ycbcr,
|
||||
const FrameDimensions& frame_dim, PackedImage* output) {
|
||||
RenderPipeline::Builder builder(3);
|
||||
if (!cs.Is444()) {
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
if (cs.HShift(c) != 0) {
|
||||
builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/true));
|
||||
}
|
||||
if (cs.VShift(c) != 0) {
|
||||
builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/false));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_ycbcr) {
|
||||
builder.AddStage(GetYCbCrStage());
|
||||
}
|
||||
ImageOutput main_output;
|
||||
main_output.format = output->format;
|
||||
main_output.bits_per_sample =
|
||||
PackedImage::BitsPerChannel(output->format.data_type);
|
||||
main_output.buffer = reinterpret_cast<uint8_t*>(output->pixels());
|
||||
main_output.buffer_size = output->pixels_size;
|
||||
main_output.stride = output->stride;
|
||||
std::vector<ImageOutput> extra_output;
|
||||
builder.AddStage(GetWriteToOutputStage(
|
||||
main_output, output->xsize, output->ysize,
|
||||
/*has_alpha=*/false,
|
||||
/*unpremul_alpha=*/false,
|
||||
/*alpha_c=*/0, Orientation::kIdentity, extra_output));
|
||||
return std::move(builder).Finalize(frame_dim);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Status DecodeJpeg(const std::vector<uint8_t>& compressed,
|
||||
JxlDataType output_data_type, ThreadPool* pool,
|
||||
PackedPixelFile* ppf) {
|
||||
jpeg::JPEGData jpeg_data;
|
||||
JXL_RETURN_IF_ERROR(jpeg::ReadJpeg(compressed.data(), compressed.size(),
|
||||
jpeg::JpegReadMode::kReadAll, &jpeg_data));
|
||||
const size_t xsize = jpeg_data.width;
|
||||
const size_t ysize = jpeg_data.height;
|
||||
const uint32_t nbcomp = jpeg_data.components.size();
|
||||
const bool is_ycbcr = IsYCbCrJpeg(jpeg_data);
|
||||
|
||||
ppf->info.xsize = xsize;
|
||||
ppf->info.ysize = ysize;
|
||||
ppf->info.num_color_channels = nbcomp;
|
||||
ppf->info.bits_per_sample = PackedImage::BitsPerChannel(output_data_type);
|
||||
|
||||
ColorEncoding color_encoding;
|
||||
JXL_RETURN_IF_ERROR(SetColorEncodingFromJpegData(jpeg_data, &color_encoding));
|
||||
PaddedBytes icc = color_encoding.ICC();
|
||||
ppf->icc.assign(icc.data(), icc.data() + icc.size());
|
||||
ConvertInternalToExternalColorEncoding(color_encoding, &ppf->color_encoding);
|
||||
|
||||
YCbCrChromaSubsampling cs;
|
||||
JXL_RETURN_IF_ERROR(SetChromaSubsamplingFromJpegData(jpeg_data, &cs));
|
||||
|
||||
FrameDimensions frame_dim;
|
||||
frame_dim.Set(xsize, ysize, /*group_size_shift=*/1, cs.MaxHShift(),
|
||||
cs.MaxVShift(),
|
||||
/*modular_mode=*/false, /*upsampling=*/1);
|
||||
|
||||
std::vector<float> dequant(3 * kDCTBlockSize);
|
||||
SetDequantWeightsFromJpegData(jpeg_data, is_ycbcr, &dequant[0]);
|
||||
|
||||
Image3S coeffs;
|
||||
SetCoefficientsFromJpegData(jpeg_data, frame_dim, cs, is_ycbcr, &coeffs);
|
||||
|
||||
JxlPixelFormat format = {nbcomp, output_data_type, JXL_LITTLE_ENDIAN, 0};
|
||||
ppf->frames.emplace_back(xsize, ysize, format);
|
||||
auto& frame = ppf->frames.back();
|
||||
|
||||
std::unique_ptr<RenderPipeline> render_pipeline =
|
||||
PreparePipeline(cs, is_ycbcr, frame_dim, &frame.color);
|
||||
JXL_RETURN_IF_ERROR(render_pipeline->IsInitialized());
|
||||
|
||||
hwy::AlignedFreeUniquePtr<float[]> float_memory;
|
||||
const auto allocate_storage = [&](const size_t num_threads) -> Status {
|
||||
JXL_RETURN_IF_ERROR(
|
||||
render_pipeline->PrepareForThreads(num_threads,
|
||||
/*use_group_ids=*/false));
|
||||
float_memory = hwy::AllocateAligned<float>(kDCTBlockSize * 2 * num_threads);
|
||||
return true;
|
||||
};
|
||||
const auto process_group = [&](const uint32_t group_index,
|
||||
const size_t thread) {
|
||||
RenderPipelineInput input =
|
||||
render_pipeline->GetInputBuffers(group_index, thread);
|
||||
float* group_dec_cache = float_memory.get() + thread * kDCTBlockSize * 2;
|
||||
const Rect block_rect = BlockGroupRect(frame_dim, group_index);
|
||||
JXL_CHECK(DecodeGroupJpeg(coeffs, group_index, block_rect, cs, &dequant[0],
|
||||
group_dec_cache, thread, input));
|
||||
input.Done();
|
||||
};
|
||||
JXL_CHECK(RunOnPool(pool, 0, frame_dim.num_groups, allocate_storage,
|
||||
process_group, "Decode Groups"));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
26
third_party/jpeg-xl/lib/extras/decode_jpeg.h
vendored
Normal file
26
third_party/jpeg-xl/lib/extras/decode_jpeg.h
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef LIB_EXTRAS_DECODE_JPEG_H_
|
||||
#define LIB_EXTRAS_DECODE_JPEG_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "lib/extras/packed_image.h"
|
||||
#include "lib/jxl/base/data_parallel.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
||||
Status DecodeJpeg(const std::vector<uint8_t>& compressed,
|
||||
JxlDataType output_data_type, ThreadPool* pool,
|
||||
PackedPixelFile* ppf);
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
|
||||
#endif // LIB_EXTRAS_DECODE_JPEG_H_
|
||||
20
third_party/jpeg-xl/lib/extras/enc/apng.cc
vendored
20
third_party/jpeg-xl/lib/extras/enc/apng.cc
vendored
@@ -58,8 +58,10 @@ class APNGEncoder : public Encoder {
|
||||
std::vector<JxlPixelFormat> formats;
|
||||
for (const uint32_t num_channels : {1, 2, 3, 4}) {
|
||||
for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
|
||||
formats.push_back(JxlPixelFormat{num_channels, data_type,
|
||||
JXL_BIG_ENDIAN, /*align=*/0});
|
||||
for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
|
||||
formats.push_back(
|
||||
JxlPixelFormat{num_channels, data_type, endianness, /*align=*/0});
|
||||
}
|
||||
}
|
||||
}
|
||||
return formats;
|
||||
@@ -233,21 +235,7 @@ Status APNGEncoder::EncodePackedPixelFileToAPNG(
|
||||
} else {
|
||||
memcpy(&out[0], in, out_size);
|
||||
}
|
||||
} else if (format.data_type == JXL_TYPE_FLOAT) {
|
||||
float mul = 65535.0;
|
||||
const uint8_t* p_in = in;
|
||||
uint8_t* p_out = out.data();
|
||||
for (size_t i = 0; i < num_samples; ++i, p_in += 4, p_out += 2) {
|
||||
uint32_t val = (format.endianness == JXL_BIG_ENDIAN ? LoadBE32(p_in)
|
||||
: LoadLE32(p_in));
|
||||
float fval;
|
||||
memcpy(&fval, &val, 4);
|
||||
StoreBE16(static_cast<uint32_t>(fval * mul + 0.5), p_out);
|
||||
}
|
||||
} else {
|
||||
return JXL_FAILURE("Unsupported pixel data type");
|
||||
}
|
||||
|
||||
png_structp png_ptr;
|
||||
png_infop info_ptr;
|
||||
|
||||
|
||||
36
third_party/jpeg-xl/lib/extras/enc/encode.cc
vendored
36
third_party/jpeg-xl/lib/extras/enc/encode.cc
vendored
@@ -40,6 +40,34 @@ Status Encoder::VerifyBasicInfo(const JxlBasicInfo& info) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
Status Encoder::VerifyFormat(const JxlPixelFormat& format) const {
|
||||
for (auto f : AcceptedFormats()) {
|
||||
if (f.num_channels != format.num_channels) continue;
|
||||
if (f.data_type != format.data_type) continue;
|
||||
if (f.data_type == JXL_TYPE_UINT8 || f.endianness == format.endianness) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return JXL_FAILURE("Format is not in the list of accepted formats.");
|
||||
}
|
||||
|
||||
Status Encoder::VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample,
|
||||
uint32_t exponent_bits) const {
|
||||
if ((data_type == JXL_TYPE_UINT8 &&
|
||||
(bits_per_sample == 0 || bits_per_sample > 8 || exponent_bits != 0)) ||
|
||||
(data_type == JXL_TYPE_UINT16 &&
|
||||
(bits_per_sample <= 8 || bits_per_sample > 16 || exponent_bits != 0)) ||
|
||||
(data_type == JXL_TYPE_FLOAT16 &&
|
||||
(bits_per_sample != 16 || exponent_bits != 5)) ||
|
||||
(data_type == JXL_TYPE_FLOAT &&
|
||||
(bits_per_sample != 32 || exponent_bits != 8))) {
|
||||
return JXL_FAILURE(
|
||||
"Incompatible data_type %d and bit depth %u with exponent bits %u",
|
||||
(int)data_type, bits_per_sample, exponent_bits);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Status Encoder::VerifyPackedImage(const PackedImage& image,
|
||||
const JxlBasicInfo& info) const {
|
||||
if (image.pixels() == nullptr) {
|
||||
@@ -57,10 +85,10 @@ Status Encoder::VerifyPackedImage(const PackedImage& image,
|
||||
image.format.num_channels != info_num_channels) {
|
||||
return JXL_FAILURE("Frame size does not match image size");
|
||||
}
|
||||
if (info.bits_per_sample >
|
||||
PackedImage::BitsPerChannel(image.format.data_type)) {
|
||||
return JXL_FAILURE("Bit depth does not fit pixel data type");
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(VerifyFormat(image.format));
|
||||
JXL_RETURN_IF_ERROR(VerifyBitDepth(image.format.data_type,
|
||||
info.bits_per_sample,
|
||||
info.exponent_bits_per_sample));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
5
third_party/jpeg-xl/lib/extras/enc/encode.h
vendored
5
third_party/jpeg-xl/lib/extras/enc/encode.h
vendored
@@ -60,6 +60,11 @@ class Encoder {
|
||||
|
||||
Status VerifyBasicInfo(const JxlBasicInfo& info) const;
|
||||
|
||||
Status VerifyFormat(const JxlPixelFormat& format) const;
|
||||
|
||||
Status VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample,
|
||||
uint32_t exponent_bits) const;
|
||||
|
||||
Status VerifyPackedImage(const PackedImage& image,
|
||||
const JxlBasicInfo& info) const;
|
||||
|
||||
|
||||
52
third_party/jpeg-xl/lib/extras/enc/jpg.cc
vendored
52
third_party/jpeg-xl/lib/extras/enc/jpg.cc
vendored
@@ -111,7 +111,7 @@ Status EncodeWithLibJpeg(const PackedImage& image, const JxlBasicInfo& info,
|
||||
const std::vector<uint8_t>& icc,
|
||||
std::vector<uint8_t> exif, size_t quality,
|
||||
const std::string& chroma_subsampling,
|
||||
std::vector<uint8_t>* bytes) {
|
||||
bool progressive, std::vector<uint8_t>* bytes) {
|
||||
if (BITS_IN_JSAMPLE != 8 || sizeof(JSAMPLE) != 1) {
|
||||
return JXL_FAILURE("Only 8 bit JSAMPLE is supported.");
|
||||
}
|
||||
@@ -139,6 +139,9 @@ Status EncodeWithLibJpeg(const PackedImage& image, const JxlBasicInfo& info,
|
||||
jpeg_set_colorspace(&cinfo, JCS_RGB);
|
||||
}
|
||||
jpeg_set_quality(&cinfo, quality, TRUE);
|
||||
if (progressive) {
|
||||
jpeg_simple_progression(&cinfo);
|
||||
}
|
||||
jpeg_start_compress(&cinfo, TRUE);
|
||||
if (!icc.empty()) {
|
||||
WriteICCProfile(&cinfo, icc);
|
||||
@@ -209,7 +212,8 @@ Status EncodeImageJPG(const PackedImage& image, const JxlBasicInfo& info,
|
||||
const std::vector<uint8_t>& icc,
|
||||
std::vector<uint8_t> exif, JpegEncoder encoder,
|
||||
size_t quality, const std::string& chroma_subsampling,
|
||||
ThreadPool* pool, std::vector<uint8_t>* bytes) {
|
||||
bool progressive, ThreadPool* pool,
|
||||
std::vector<uint8_t>* bytes) {
|
||||
if (image.format.data_type != JXL_TYPE_UINT8) {
|
||||
return JXL_FAILURE("Unsupported pixel data type");
|
||||
}
|
||||
@@ -222,9 +226,9 @@ Status EncodeImageJPG(const PackedImage& image, const JxlBasicInfo& info,
|
||||
|
||||
switch (encoder) {
|
||||
case JpegEncoder::kLibJpeg:
|
||||
JXL_RETURN_IF_ERROR(EncodeWithLibJpeg(image, info, color_encoding, icc,
|
||||
std::move(exif), quality,
|
||||
chroma_subsampling, bytes));
|
||||
JXL_RETURN_IF_ERROR(
|
||||
EncodeWithLibJpeg(image, info, color_encoding, icc, std::move(exif),
|
||||
quality, chroma_subsampling, progressive, bytes));
|
||||
break;
|
||||
case JpegEncoder::kSJpeg:
|
||||
JXL_RETURN_IF_ERROR(EncodeWithSJpeg(image, info, icc, std::move(exif),
|
||||
@@ -253,28 +257,26 @@ class JPEGEncoder : public Encoder {
|
||||
Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
|
||||
ThreadPool* pool = nullptr) const override {
|
||||
JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
|
||||
const auto& options = this->options();
|
||||
int quality = 100;
|
||||
auto it_quality = options.find("q");
|
||||
if (it_quality != options.end()) {
|
||||
std::istringstream is(it_quality->second);
|
||||
JXL_RETURN_IF_ERROR(static_cast<bool>(is >> quality));
|
||||
}
|
||||
std::string chroma_subsampling = "444";
|
||||
auto it_chroma_subsampling = options.find("chroma_subsampling");
|
||||
if (it_chroma_subsampling != options.end()) {
|
||||
chroma_subsampling = it_chroma_subsampling->second;
|
||||
}
|
||||
JpegEncoder jpeg_encoder = JpegEncoder::kLibJpeg;
|
||||
auto it_encoder = options.find("jpeg_encoder");
|
||||
if (it_encoder != options.end()) {
|
||||
if (it_encoder->second == "libjpeg") {
|
||||
jpeg_encoder = JpegEncoder::kLibJpeg;
|
||||
} else if (it_encoder->second == "sjpeg") {
|
||||
jpeg_encoder = JpegEncoder::kSJpeg;
|
||||
} else {
|
||||
return JXL_FAILURE("unknown jpeg encoder \"%s\"",
|
||||
it_encoder->second.c_str());
|
||||
bool progressive = false;
|
||||
for (const auto& it : options()) {
|
||||
if (it.first == "q") {
|
||||
std::istringstream is(it.second);
|
||||
JXL_RETURN_IF_ERROR(static_cast<bool>(is >> quality));
|
||||
} else if (it.first == "chroma_subsampling") {
|
||||
chroma_subsampling = it.second;
|
||||
} else if (it.first == "jpeg_encoder") {
|
||||
if (it.second == "libjpeg") {
|
||||
jpeg_encoder = JpegEncoder::kLibJpeg;
|
||||
} else if (it.second == "sjpeg") {
|
||||
jpeg_encoder = JpegEncoder::kSJpeg;
|
||||
} else {
|
||||
return JXL_FAILURE("unknown jpeg encoder \"%s\"", it.second.c_str());
|
||||
}
|
||||
} else if (it.first == "progressive") {
|
||||
progressive = true;
|
||||
}
|
||||
}
|
||||
std::vector<uint8_t> icc;
|
||||
@@ -288,7 +290,7 @@ class JPEGEncoder : public Encoder {
|
||||
encoded_image->bitstreams.emplace_back();
|
||||
JXL_RETURN_IF_ERROR(EncodeImageJPG(
|
||||
frame.color, ppf.info, ppf.color_encoding, icc, ppf.metadata.exif,
|
||||
jpeg_encoder, quality, chroma_subsampling, pool,
|
||||
jpeg_encoder, quality, chroma_subsampling, progressive, pool,
|
||||
&encoded_image->bitstreams.back()));
|
||||
}
|
||||
return true;
|
||||
|
||||
231
third_party/jpeg-xl/lib/extras/enc/jxl.cc
vendored
Normal file
231
third_party/jpeg-xl/lib/extras/enc/jxl.cc
vendored
Normal file
@@ -0,0 +1,231 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "lib/extras/enc/jxl.h"
|
||||
|
||||
#include "jxl/encode_cxx.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
||||
JxlEncoderStatus SetOption(const JXLOption& opt,
|
||||
JxlEncoderFrameSettings* settings) {
|
||||
return opt.is_float
|
||||
? JxlEncoderFrameSettingsSetFloatOption(settings, opt.id, opt.fval)
|
||||
: JxlEncoderFrameSettingsSetOption(settings, opt.id, opt.ival);
|
||||
}
|
||||
|
||||
bool SetFrameOptions(const std::vector<JXLOption>& options, size_t frame_index,
|
||||
size_t* option_idx, JxlEncoderFrameSettings* settings) {
|
||||
while (*option_idx < options.size()) {
|
||||
const auto& opt = options[*option_idx];
|
||||
if (opt.frame_index > frame_index) {
|
||||
break;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS != SetOption(opt, settings)) {
|
||||
fprintf(stderr, "Setting option id %d failed.\n", opt.id);
|
||||
return false;
|
||||
}
|
||||
(*option_idx)++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf,
|
||||
const std::vector<uint8_t>* jpeg_bytes,
|
||||
std::vector<uint8_t>* compressed) {
|
||||
auto encoder = JxlEncoderMake(/*memory_manager=*/nullptr);
|
||||
JxlEncoder* enc = encoder.get();
|
||||
|
||||
if (params.runner_opaque != nullptr &&
|
||||
JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc, params.runner,
|
||||
params.runner_opaque)) {
|
||||
fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto settings = JxlEncoderFrameSettingsCreate(enc, nullptr);
|
||||
size_t option_idx = 0;
|
||||
if (!SetFrameOptions(params.options, 0, &option_idx, settings)) {
|
||||
return false;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetFrameDistance(settings, params.distance)) {
|
||||
fprintf(stderr, "Setting frame distance failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool use_container = params.use_container;
|
||||
if (!ppf.metadata.exif.empty() || !ppf.metadata.xmp.empty() ||
|
||||
!ppf.metadata.jumbf.empty() || !ppf.metadata.iptc.empty() ||
|
||||
(jpeg_bytes && params.jpeg_store_metadata)) {
|
||||
use_container = true;
|
||||
}
|
||||
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderUseContainer(enc, static_cast<int>(use_container))) {
|
||||
fprintf(stderr, "JxlEncoderUseContainer failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (jpeg_bytes) {
|
||||
if (params.jpeg_store_metadata &&
|
||||
JXL_ENC_SUCCESS != JxlEncoderStoreJPEGMetadata(enc, JXL_TRUE)) {
|
||||
fprintf(stderr, "Storing JPEG metadata failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS != JxlEncoderAddJPEGFrame(settings, jpeg_bytes->data(),
|
||||
jpeg_bytes->size())) {
|
||||
fprintf(stderr, "JxlEncoderAddJPEGFrame() failed.\n");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
size_t num_alpha_channels = 0; // Adjusted below.
|
||||
JxlBasicInfo basic_info = ppf.info;
|
||||
if (basic_info.alpha_bits > 0) num_alpha_channels = 1;
|
||||
if (params.intensity_target > 0) {
|
||||
basic_info.intensity_target = params.intensity_target;
|
||||
}
|
||||
basic_info.num_extra_channels =
|
||||
std::max<uint32_t>(num_alpha_channels, ppf.info.num_extra_channels);
|
||||
basic_info.num_color_channels = ppf.info.num_color_channels;
|
||||
const bool lossless = params.distance == 0;
|
||||
basic_info.uses_original_profile = lossless;
|
||||
if (params.override_bitdepth != 0) {
|
||||
basic_info.bits_per_sample = params.override_bitdepth;
|
||||
basic_info.exponent_bits_per_sample =
|
||||
params.override_bitdepth == 32 ? 8 : 0;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetCodestreamLevel(enc, params.codestream_level)) {
|
||||
fprintf(stderr, "Setting --codestream_level failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc, &basic_info)) {
|
||||
fprintf(stderr, "JxlEncoderSetBasicInfo() failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetFrameBitDepth(settings, ¶ms.input_bitdepth)) {
|
||||
fprintf(stderr, "JxlEncoderSetFrameBitDepth() failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (lossless &&
|
||||
JXL_ENC_SUCCESS != JxlEncoderSetFrameLossless(settings, JXL_TRUE)) {
|
||||
fprintf(stderr, "JxlEncoderSetFrameLossless() failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (!ppf.icc.empty()) {
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetICCProfile(enc, ppf.icc.data(), ppf.icc.size())) {
|
||||
fprintf(stderr, "JxlEncoderSetICCProfile() failed.\n");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetColorEncoding(enc, &ppf.color_encoding)) {
|
||||
fprintf(stderr, "JxlEncoderSetColorEncoding() failed.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t num_frame = 0; num_frame < ppf.frames.size(); ++num_frame) {
|
||||
const jxl::extras::PackedFrame& pframe = ppf.frames[num_frame];
|
||||
const jxl::extras::PackedImage& pimage = pframe.color;
|
||||
JxlPixelFormat ppixelformat = pimage.format;
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetFrameHeader(settings, &pframe.frame_info)) {
|
||||
fprintf(stderr, "JxlEncoderSetFrameHeader() failed.\n");
|
||||
return false;
|
||||
}
|
||||
if (!SetFrameOptions(params.options, num_frame, &option_idx, settings)) {
|
||||
return false;
|
||||
}
|
||||
if (num_alpha_channels > 0) {
|
||||
JxlExtraChannelInfo extra_channel_info;
|
||||
JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &extra_channel_info);
|
||||
extra_channel_info.bits_per_sample = ppf.info.alpha_bits;
|
||||
extra_channel_info.exponent_bits_per_sample =
|
||||
ppf.info.alpha_exponent_bits;
|
||||
if (params.premultiply != -1) {
|
||||
if (params.premultiply != 0 && params.premultiply != 1) {
|
||||
fprintf(stderr, "premultiply must be one of: -1, 0, 1.\n");
|
||||
return false;
|
||||
}
|
||||
extra_channel_info.alpha_premultiplied = params.premultiply;
|
||||
}
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetExtraChannelInfo(enc, 0, &extra_channel_info)) {
|
||||
fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n");
|
||||
return false;
|
||||
}
|
||||
// We take the extra channel blend info frame_info, but don't do
|
||||
// clamping.
|
||||
JxlBlendInfo extra_channel_blend_info =
|
||||
pframe.frame_info.layer_info.blend_info;
|
||||
extra_channel_blend_info.clamp = JXL_FALSE;
|
||||
JxlEncoderSetExtraChannelBlendInfo(settings, 0,
|
||||
&extra_channel_blend_info);
|
||||
}
|
||||
size_t num_interleaved_alpha =
|
||||
(ppixelformat.num_channels - ppf.info.num_color_channels);
|
||||
// Add extra channel info for the rest of the extra channels.
|
||||
for (size_t i = 0; i < ppf.info.num_extra_channels; ++i) {
|
||||
if (i < ppf.extra_channels_info.size()) {
|
||||
const auto& ec_info = ppf.extra_channels_info[i].ec_info;
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetExtraChannelInfo(enc, num_interleaved_alpha + i,
|
||||
&ec_info)) {
|
||||
fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (JXL_ENC_SUCCESS != JxlEncoderAddImageFrame(settings, &ppixelformat,
|
||||
pimage.pixels(),
|
||||
pimage.pixels_size)) {
|
||||
fprintf(stderr, "JxlEncoderAddImageFrame() failed.\n");
|
||||
return false;
|
||||
}
|
||||
// Only set extra channel buffer if it is provided non-interleaved.
|
||||
for (size_t i = 0; i < pframe.extra_channels.size(); ++i) {
|
||||
if (JXL_ENC_SUCCESS !=
|
||||
JxlEncoderSetExtraChannelBuffer(settings, &ppixelformat,
|
||||
pframe.extra_channels[i].pixels(),
|
||||
pframe.extra_channels[i].stride *
|
||||
pframe.extra_channels[i].ysize,
|
||||
num_interleaved_alpha + i)) {
|
||||
fprintf(stderr, "JxlEncoderSetExtraChannelBuffer() failed.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
JxlEncoderCloseInput(enc);
|
||||
// Reading compressed output
|
||||
compressed->clear();
|
||||
compressed->resize(4096);
|
||||
uint8_t* next_out = compressed->data();
|
||||
size_t avail_out = compressed->size() - (next_out - compressed->data());
|
||||
JxlEncoderStatus result = JXL_ENC_NEED_MORE_OUTPUT;
|
||||
while (result == JXL_ENC_NEED_MORE_OUTPUT) {
|
||||
result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
|
||||
if (result == JXL_ENC_NEED_MORE_OUTPUT) {
|
||||
size_t offset = next_out - compressed->data();
|
||||
compressed->resize(compressed->size() * 2);
|
||||
next_out = compressed->data() + offset;
|
||||
avail_out = compressed->size() - offset;
|
||||
}
|
||||
}
|
||||
compressed->resize(next_out - compressed->data());
|
||||
if (result != JXL_ENC_SUCCESS) {
|
||||
fprintf(stderr, "JxlEncoderProcessOutput failed.\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
73
third_party/jpeg-xl/lib/extras/enc/jxl.h
vendored
Normal file
73
third_party/jpeg-xl/lib/extras/enc/jxl.h
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#ifndef LIB_EXTRAS_ENC_JXL_H_
|
||||
#define LIB_EXTRAS_ENC_JXL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "jxl/encode.h"
|
||||
#include "jxl/parallel_runner.h"
|
||||
#include "jxl/thread_parallel_runner.h"
|
||||
#include "jxl/types.h"
|
||||
#include "lib/extras/packed_image.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
||||
struct JXLOption {
|
||||
JXLOption(JxlEncoderFrameSettingId id, int64_t val, size_t frame_index)
|
||||
: id(id), is_float(false), ival(val), frame_index(frame_index) {}
|
||||
JXLOption(JxlEncoderFrameSettingId id, float val, size_t frame_index)
|
||||
: id(id), is_float(true), fval(val), frame_index(frame_index) {}
|
||||
|
||||
JxlEncoderFrameSettingId id;
|
||||
bool is_float;
|
||||
union {
|
||||
int64_t ival;
|
||||
float fval;
|
||||
};
|
||||
size_t frame_index;
|
||||
};
|
||||
|
||||
struct JXLCompressParams {
|
||||
std::vector<JXLOption> options;
|
||||
// Target butteraugli distance, 0.0 means lossless.
|
||||
float distance = 1.0f;
|
||||
// If set to true, forces container mode.
|
||||
bool use_container = false;
|
||||
// Whether to enable/disable byte-exact jpeg reconstruction for jpeg inputs.
|
||||
bool jpeg_store_metadata = true;
|
||||
// Upper bound on the intensity level present in the image in nits (zero means
|
||||
// that the library chooses a default).
|
||||
float intensity_target = 0;
|
||||
// Overrides for bitdepth, codestream level and alpha premultiply.
|
||||
size_t override_bitdepth = 0;
|
||||
int32_t codestream_level = -1;
|
||||
int32_t premultiply = -1;
|
||||
// Override input buffer interpretation.
|
||||
JxlBitDepth input_bitdepth = {JXL_BIT_DEPTH_FROM_PIXEL_FORMAT, 0, 0};
|
||||
// If runner_opaque is set, the decoder uses this parallel runner.
|
||||
JxlParallelRunner runner = JxlThreadParallelRunner;
|
||||
void* runner_opaque = nullptr;
|
||||
|
||||
void AddOption(JxlEncoderFrameSettingId id, int64_t val) {
|
||||
options.emplace_back(JXLOption(id, val, 0));
|
||||
}
|
||||
void AddFloatOption(JxlEncoderFrameSettingId id, float val) {
|
||||
options.emplace_back(JXLOption(id, val, 0));
|
||||
}
|
||||
};
|
||||
|
||||
bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf,
|
||||
const std::vector<uint8_t>* jpeg_bytes,
|
||||
std::vector<uint8_t>* compressed);
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
|
||||
#endif // LIB_EXTRAS_ENC_JXL_H_
|
||||
320
third_party/jpeg-xl/lib/extras/enc/pnm.cc
vendored
320
third_party/jpeg-xl/lib/extras/enc/pnm.cc
vendored
@@ -32,69 +32,6 @@ namespace {
|
||||
|
||||
constexpr size_t kMaxHeaderSize = 200;
|
||||
|
||||
Status EncodeHeader(const PackedImage& image, size_t bits_per_sample,
|
||||
bool little_endian, char* header, int* chars_written) {
|
||||
size_t num_channels = image.format.num_channels;
|
||||
bool is_gray = num_channels <= 2;
|
||||
bool has_alpha = num_channels == 2 || num_channels == 4;
|
||||
if (has_alpha) { // PAM
|
||||
if (bits_per_sample > 16) return JXL_FAILURE("PNM cannot have > 16 bits");
|
||||
const uint32_t max_val = (1U << bits_per_sample) - 1;
|
||||
*chars_written =
|
||||
snprintf(header, kMaxHeaderSize,
|
||||
"P7\nWIDTH %" PRIuS "\nHEIGHT %" PRIuS
|
||||
"\nDEPTH %u\nMAXVAL %u\nTUPLTYPE %s\nENDHDR\n",
|
||||
image.xsize, image.ysize, is_gray ? 2 : 4, max_val,
|
||||
is_gray ? "GRAYSCALE_ALPHA" : "RGB_ALPHA");
|
||||
JXL_RETURN_IF_ERROR(static_cast<unsigned int>(*chars_written) <
|
||||
kMaxHeaderSize);
|
||||
} else if (bits_per_sample == 32) { // PFM
|
||||
const char type = is_gray ? 'f' : 'F';
|
||||
const double scale = little_endian ? -1.0 : 1.0;
|
||||
*chars_written =
|
||||
snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%.1f\n",
|
||||
type, image.xsize, image.ysize, scale);
|
||||
JXL_RETURN_IF_ERROR(static_cast<unsigned int>(*chars_written) <
|
||||
kMaxHeaderSize);
|
||||
} else { // PGM/PPM
|
||||
if (bits_per_sample > 16) return JXL_FAILURE("PNM cannot have > 16 bits");
|
||||
const uint32_t max_val = (1U << bits_per_sample) - 1;
|
||||
const char type = is_gray ? '5' : '6';
|
||||
*chars_written =
|
||||
snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%u\n",
|
||||
type, image.xsize, image.ysize, max_val);
|
||||
JXL_RETURN_IF_ERROR(static_cast<unsigned int>(*chars_written) <
|
||||
kMaxHeaderSize);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Status EncodeImagePNM(const PackedImage& image, size_t bits_per_sample,
|
||||
std::vector<uint8_t>* bytes) {
|
||||
if (bits_per_sample <= 16 && image.format.endianness != JXL_BIG_ENDIAN) {
|
||||
return JXL_FAILURE("PPM/PGM requires big-endian pixel format.");
|
||||
}
|
||||
bool is_little_endian =
|
||||
(image.format.endianness == JXL_LITTLE_ENDIAN ||
|
||||
(image.format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()));
|
||||
char header[kMaxHeaderSize];
|
||||
int header_size = 0;
|
||||
JXL_RETURN_IF_ERROR(EncodeHeader(image, bits_per_sample, is_little_endian,
|
||||
header, &header_size));
|
||||
bytes->resize(static_cast<size_t>(header_size) + image.pixels_size);
|
||||
memcpy(bytes->data(), header, static_cast<size_t>(header_size));
|
||||
const bool flipped_y = bits_per_sample == 32; // PFMs are flipped
|
||||
const uint8_t* in = reinterpret_cast<const uint8_t*>(image.pixels());
|
||||
uint8_t* out = bytes->data() + header_size;
|
||||
for (size_t y = 0; y < image.ysize; ++y) {
|
||||
size_t y_out = flipped_y ? image.ysize - 1 - y : y;
|
||||
const uint8_t* row_in = &in[y * image.stride];
|
||||
uint8_t* row_out = &out[y_out * image.stride];
|
||||
memcpy(row_out, row_in, image.stride);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
class PNMEncoder : public Encoder {
|
||||
public:
|
||||
Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
|
||||
@@ -110,8 +47,8 @@ class PNMEncoder : public Encoder {
|
||||
for (const auto& frame : ppf.frames) {
|
||||
JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
|
||||
encoded_image->bitstreams.emplace_back();
|
||||
JXL_RETURN_IF_ERROR(EncodeImagePNM(frame.color, ppf.info.bits_per_sample,
|
||||
&encoded_image->bitstreams.back()));
|
||||
JXL_RETURN_IF_ERROR(
|
||||
EncodeFrame(ppf, frame, &encoded_image->bitstreams.back()));
|
||||
}
|
||||
for (size_t i = 0; i < ppf.extra_channels_info.size(); ++i) {
|
||||
const auto& ec_info = ppf.extra_channels_info[i].ec_info;
|
||||
@@ -119,30 +56,61 @@ class PNMEncoder : public Encoder {
|
||||
auto& ec_bitstreams = encoded_image->extra_channel_bitstreams.back();
|
||||
for (const auto& frame : ppf.frames) {
|
||||
ec_bitstreams.emplace_back();
|
||||
JXL_RETURN_IF_ERROR(EncodeImagePNM(frame.extra_channels[i],
|
||||
ec_info.bits_per_sample,
|
||||
&ec_bitstreams.back()));
|
||||
JXL_RETURN_IF_ERROR(EncodeExtraChannel(frame.extra_channels[i],
|
||||
ec_info.bits_per_sample,
|
||||
&ec_bitstreams.back()));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual Status EncodeFrame(const PackedPixelFile& ppf,
|
||||
const PackedFrame& frame,
|
||||
std::vector<uint8_t>* bytes) const = 0;
|
||||
virtual Status EncodeExtraChannel(const PackedImage& image,
|
||||
size_t bits_per_sample,
|
||||
std::vector<uint8_t>* bytes) const = 0;
|
||||
};
|
||||
|
||||
class PPMEncoder : public PNMEncoder {
|
||||
public:
|
||||
std::vector<JxlPixelFormat> AcceptedFormats() const override {
|
||||
std::vector<JxlPixelFormat> formats;
|
||||
for (const uint32_t num_channels : {1, 2, 3, 4}) {
|
||||
for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
|
||||
for (JxlEndianness endianness : {JXL_BIG_ENDIAN}) {
|
||||
formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
|
||||
/*data_type=*/data_type,
|
||||
/*endianness=*/endianness,
|
||||
/*align=*/0});
|
||||
}
|
||||
}
|
||||
}
|
||||
return formats;
|
||||
return {JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0},
|
||||
JxlPixelFormat{3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}};
|
||||
}
|
||||
Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
|
||||
std::vector<uint8_t>* bytes) const override {
|
||||
return EncodeImage(frame.color, ppf.info.bits_per_sample, bytes);
|
||||
}
|
||||
Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
|
||||
std::vector<uint8_t>* bytes) const override {
|
||||
return EncodeImage(image, bits_per_sample, bytes);
|
||||
}
|
||||
|
||||
private:
|
||||
Status EncodeImage(const PackedImage& image, size_t bits_per_sample,
|
||||
std::vector<uint8_t>* bytes) const {
|
||||
uint32_t maxval = (1u << bits_per_sample) - 1;
|
||||
char type = image.format.num_channels == 1 ? '5' : '6';
|
||||
char header[kMaxHeaderSize];
|
||||
size_t header_size =
|
||||
snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%u\n",
|
||||
type, image.xsize, image.ysize, maxval);
|
||||
JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize);
|
||||
bytes->resize(header_size + image.pixels_size);
|
||||
memcpy(bytes->data(), header, header_size);
|
||||
memcpy(bytes->data() + header_size,
|
||||
reinterpret_cast<uint8_t*>(image.pixels()), image.pixels_size);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class PGMEncoder : public PPMEncoder {
|
||||
public:
|
||||
std::vector<JxlPixelFormat> AcceptedFormats() const override {
|
||||
return {JxlPixelFormat{1, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0},
|
||||
JxlPixelFormat{1, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -151,54 +119,168 @@ class PFMEncoder : public PNMEncoder {
|
||||
std::vector<JxlPixelFormat> AcceptedFormats() const override {
|
||||
std::vector<JxlPixelFormat> formats;
|
||||
for (const uint32_t num_channels : {1, 3}) {
|
||||
for (const JxlDataType data_type : {JXL_TYPE_FLOAT16, JXL_TYPE_FLOAT}) {
|
||||
for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
|
||||
formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
|
||||
/*data_type=*/data_type,
|
||||
/*endianness=*/endianness,
|
||||
/*align=*/0});
|
||||
for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
|
||||
formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
|
||||
/*data_type=*/JXL_TYPE_FLOAT,
|
||||
/*endianness=*/endianness,
|
||||
/*align=*/0});
|
||||
}
|
||||
}
|
||||
return formats;
|
||||
}
|
||||
Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
|
||||
std::vector<uint8_t>* bytes) const override {
|
||||
return EncodeImage(frame.color, bytes);
|
||||
}
|
||||
Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
|
||||
std::vector<uint8_t>* bytes) const override {
|
||||
return EncodeImage(image, bytes);
|
||||
}
|
||||
|
||||
private:
|
||||
Status EncodeImage(const PackedImage& image,
|
||||
std::vector<uint8_t>* bytes) const {
|
||||
char type = image.format.num_channels == 1 ? 'f' : 'F';
|
||||
double scale = image.format.endianness == JXL_LITTLE_ENDIAN ? -1.0 : 1.0;
|
||||
char header[kMaxHeaderSize];
|
||||
size_t header_size =
|
||||
snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%.1f\n",
|
||||
type, image.xsize, image.ysize, scale);
|
||||
JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize);
|
||||
bytes->resize(header_size + image.pixels_size);
|
||||
memcpy(bytes->data(), header, header_size);
|
||||
const uint8_t* in = reinterpret_cast<const uint8_t*>(image.pixels());
|
||||
uint8_t* out = bytes->data() + header_size;
|
||||
for (size_t y = 0; y < image.ysize; ++y) {
|
||||
size_t y_out = image.ysize - 1 - y;
|
||||
const uint8_t* row_in = &in[y * image.stride];
|
||||
uint8_t* row_out = &out[y_out * image.stride];
|
||||
memcpy(row_out, row_in, image.stride);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class PAMEncoder : public PNMEncoder {
|
||||
public:
|
||||
std::vector<JxlPixelFormat> AcceptedFormats() const override {
|
||||
std::vector<JxlPixelFormat> formats;
|
||||
for (const uint32_t num_channels : {1, 2, 3, 4}) {
|
||||
for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
|
||||
formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
|
||||
/*data_type=*/data_type,
|
||||
/*endianness=*/JXL_BIG_ENDIAN,
|
||||
/*align=*/0});
|
||||
}
|
||||
}
|
||||
return formats;
|
||||
}
|
||||
Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
|
||||
std::vector<uint8_t>* bytes) const override {
|
||||
const PackedImage& color = frame.color;
|
||||
const auto& ec_info = ppf.extra_channels_info;
|
||||
JXL_RETURN_IF_ERROR(frame.extra_channels.size() == ec_info.size());
|
||||
for (const auto& ec : frame.extra_channels) {
|
||||
if (ec.xsize != color.xsize || ec.ysize != color.ysize) {
|
||||
return JXL_FAILURE("Extra channel and color size mismatch.");
|
||||
}
|
||||
if (ec.format.data_type != color.format.data_type ||
|
||||
ec.format.endianness != color.format.endianness) {
|
||||
return JXL_FAILURE("Extra channel and color format mismatch.");
|
||||
}
|
||||
}
|
||||
if (ppf.info.bits_per_sample != ppf.info.alpha_bits) {
|
||||
return JXL_FAILURE("Alpha bit depth does not match image bit depth");
|
||||
}
|
||||
for (const auto& it : ec_info) {
|
||||
if (it.ec_info.bits_per_sample != ppf.info.bits_per_sample) {
|
||||
return JXL_FAILURE(
|
||||
"Extra channel bit depth does not match image bit depth");
|
||||
}
|
||||
}
|
||||
const char* kColorTypes[4] = {"GRAYSCALE", "GRAYSCALE_ALPHA", "RGB",
|
||||
"RGB_ALPHA"};
|
||||
uint32_t maxval = (1u << ppf.info.bits_per_sample) - 1;
|
||||
uint32_t depth = color.format.num_channels + ec_info.size();
|
||||
char header[kMaxHeaderSize];
|
||||
size_t pos = 0;
|
||||
pos += snprintf(header + pos, kMaxHeaderSize - pos,
|
||||
"P7\nWIDTH %" PRIuS "\nHEIGHT %" PRIuS
|
||||
"\nDEPTH %u\n"
|
||||
"MAXVAL %u\nTUPLTYPE %s\n",
|
||||
color.xsize, color.ysize, depth, maxval,
|
||||
kColorTypes[color.format.num_channels - 1]);
|
||||
JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
|
||||
for (const auto& info : ec_info) {
|
||||
pos += snprintf(header + pos, kMaxHeaderSize - pos, "TUPLTYPE %s\n",
|
||||
ExtraChannelTypeName(info.ec_info.type).c_str());
|
||||
JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
|
||||
}
|
||||
pos += snprintf(header + pos, kMaxHeaderSize - pos, "ENDHDR\n");
|
||||
JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
|
||||
size_t total_size = color.pixels_size;
|
||||
for (const auto& ec : frame.extra_channels) {
|
||||
total_size += ec.pixels_size;
|
||||
}
|
||||
bytes->resize(pos + total_size);
|
||||
memcpy(bytes->data(), header, pos);
|
||||
// If we have no extra channels, just copy color pixel data over.
|
||||
if (frame.extra_channels.empty()) {
|
||||
memcpy(bytes->data() + pos, reinterpret_cast<uint8_t*>(color.pixels()),
|
||||
color.pixels_size);
|
||||
return true;
|
||||
}
|
||||
// Interleave color and extra channels.
|
||||
const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
|
||||
std::vector<const uint8_t*> ec_in(frame.extra_channels.size());
|
||||
for (size_t i = 0; i < frame.extra_channels.size(); ++i) {
|
||||
ec_in[i] =
|
||||
reinterpret_cast<const uint8_t*>(frame.extra_channels[i].pixels());
|
||||
}
|
||||
uint8_t* out = bytes->data() + pos;
|
||||
size_t pwidth = PackedImage::BitsPerChannel(color.format.data_type) / 8;
|
||||
for (size_t y = 0; y < color.ysize; ++y) {
|
||||
for (size_t x = 0; x < color.xsize; ++x) {
|
||||
memcpy(out, in, color.pixel_stride());
|
||||
out += color.pixel_stride();
|
||||
in += color.pixel_stride();
|
||||
for (auto& p : ec_in) {
|
||||
memcpy(out, p, pwidth);
|
||||
out += pwidth;
|
||||
p += pwidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
return formats;
|
||||
return true;
|
||||
}
|
||||
Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
|
||||
std::vector<uint8_t>* bytes) const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class PGMEncoder : public PPMEncoder {
|
||||
public:
|
||||
std::vector<JxlPixelFormat> AcceptedFormats() const override {
|
||||
std::vector<JxlPixelFormat> formats = PPMEncoder::AcceptedFormats();
|
||||
for (auto it = formats.begin(); it != formats.end();) {
|
||||
if (it->num_channels > 2) {
|
||||
it = formats.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
private:
|
||||
static std::string ExtraChannelTypeName(JxlExtraChannelType type) {
|
||||
switch (type) {
|
||||
case JXL_CHANNEL_ALPHA:
|
||||
return std::string("Alpha");
|
||||
case JXL_CHANNEL_DEPTH:
|
||||
return std::string("Depth");
|
||||
case JXL_CHANNEL_SPOT_COLOR:
|
||||
return std::string("SpotColor");
|
||||
case JXL_CHANNEL_SELECTION_MASK:
|
||||
return std::string("SelectionMask");
|
||||
case JXL_CHANNEL_BLACK:
|
||||
return std::string("Black");
|
||||
case JXL_CHANNEL_CFA:
|
||||
return std::string("CFA");
|
||||
case JXL_CHANNEL_THERMAL:
|
||||
return std::string("Thermal");
|
||||
default:
|
||||
return std::string("UNKNOWN");
|
||||
}
|
||||
return formats;
|
||||
}
|
||||
};
|
||||
|
||||
class PAMEncoder : public PPMEncoder {
|
||||
public:
|
||||
std::vector<JxlPixelFormat> AcceptedFormats() const override {
|
||||
std::vector<JxlPixelFormat> formats = PPMEncoder::AcceptedFormats();
|
||||
for (auto it = formats.begin(); it != formats.end();) {
|
||||
if (it->num_channels != 2 && it->num_channels != 4) {
|
||||
it = formats.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return formats;
|
||||
}
|
||||
};
|
||||
|
||||
Span<const uint8_t> MakeSpan(const char* str) {
|
||||
return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
|
||||
strlen(str));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<Encoder> GetPPMEncoder() {
|
||||
|
||||
111
third_party/jpeg-xl/lib/extras/encode_jpeg.cc
vendored
111
third_party/jpeg-xl/lib/extras/encode_jpeg.cc
vendored
@@ -31,27 +31,42 @@ namespace HWY_NAMESPACE {
|
||||
void ComputeDCTCoefficients(const Image3F& opsin, const ImageF& qf,
|
||||
const FrameDimensions& frame_dim, const float* qm,
|
||||
std::vector<jpeg::JPEGComponent>* components) {
|
||||
int max_samp_factor = 1;
|
||||
for (const auto& c : *components) {
|
||||
JXL_DASSERT(c.h_samp_factor == c.v_samp_factor);
|
||||
max_samp_factor = std::max(c.h_samp_factor, max_samp_factor);
|
||||
}
|
||||
float qfmin, qfmax;
|
||||
ImageMinMax(qf, &qfmin, &qfmax);
|
||||
HWY_ALIGN float scratch_space[2 * kDCTBlockSize];
|
||||
ImageF tmp;
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
std::vector<jpeg::coeff_t>& coeffs = (*components)[c].coeffs;
|
||||
size_t num_blocks = frame_dim.xsize_blocks * frame_dim.ysize_blocks;
|
||||
coeffs.resize(num_blocks * kDCTBlockSize);
|
||||
auto& comp = (*components)[c];
|
||||
const size_t xsize_blocks = comp.width_in_blocks;
|
||||
const size_t ysize_blocks = comp.height_in_blocks;
|
||||
JXL_DASSERT(max_samp_factor % comp.h_samp_factor == 0);
|
||||
const int factor = max_samp_factor / comp.h_samp_factor;
|
||||
const ImageF* plane = &opsin.Plane(c);
|
||||
if (factor > 1) {
|
||||
tmp = CopyImage(*plane);
|
||||
DownsampleImage(&tmp, factor);
|
||||
plane = &tmp;
|
||||
}
|
||||
std::vector<jpeg::coeff_t>& coeffs = comp.coeffs;
|
||||
coeffs.resize(xsize_blocks * ysize_blocks * kDCTBlockSize);
|
||||
const float* qmc = &qm[c * kDCTBlockSize];
|
||||
for (size_t by = 0, bix = 0; by < frame_dim.ysize_blocks; by++) {
|
||||
for (size_t bx = 0; bx < frame_dim.xsize_blocks; bx++, bix++) {
|
||||
HWY_ALIGN float dct[kDCTBlockSize];
|
||||
TransformFromPixels(AcStrategy::Type::DCT,
|
||||
opsin.PlaneRow(c, 8 * by) + 8 * bx,
|
||||
opsin.PixelsPerRow(), dct, scratch_space);
|
||||
for (size_t by = 0, bix = 0; by < ysize_blocks; by++) {
|
||||
for (size_t bx = 0; bx < xsize_blocks; bx++, bix++) {
|
||||
jpeg::coeff_t* block = &coeffs[bix * kDCTBlockSize];
|
||||
HWY_ALIGN float dct[kDCTBlockSize];
|
||||
TransformFromPixels(AcStrategy::Type::DCT, plane->Row(8 * by) + 8 * bx,
|
||||
plane->PixelsPerRow(), dct, scratch_space);
|
||||
for (size_t iy = 0, i = 0; iy < 8; iy++) {
|
||||
for (size_t ix = 0; ix < 8; ix++, i++) {
|
||||
float coeff = 2040 * dct[i] * qmc[i];
|
||||
// Create more zeros in areas where jpeg xl would have used a lower
|
||||
// quantization multiplier.
|
||||
float zero_bias = 0.5f * qfmax / qf.Row(by)[bx];
|
||||
float zero_bias = 0.5f * qfmax / qf.Row(by * factor)[bx * factor];
|
||||
int cc = std::abs(coeff) < zero_bias ? 0 : std::round(coeff);
|
||||
// If the relative value of the adaptive quantization field is less
|
||||
// than 0.5, we drop the least significant bit.
|
||||
@@ -102,7 +117,7 @@ std::vector<uint8_t> CreateXybICCAppMarker() {
|
||||
return icc_marker;
|
||||
}
|
||||
|
||||
void AddJpegQuantMatrices(const ImageF& qf, float dc_quant,
|
||||
void AddJpegQuantMatrices(const ImageF& qf, float dc_quant, float global_scale,
|
||||
std::vector<jpeg::JPEGQuantTable>* quant_tables,
|
||||
float* qm) {
|
||||
// Create a custom JPEG XL dequant matrix. The quantization weight parameters
|
||||
@@ -127,7 +142,6 @@ void AddJpegQuantMatrices(const ImageF& qf, float dc_quant,
|
||||
// Scale the quant matrix based on the scaled XYB scales and the quant field.
|
||||
float qfmin, qfmax;
|
||||
ImageMinMax(qf, &qfmin, &qfmax);
|
||||
const float global_scale = 0.66f;
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
const float scale = kScaledXYBScale[c] * global_scale;
|
||||
qm[c * kDCTBlockSize] *= scale;
|
||||
@@ -237,7 +251,9 @@ void AddJpegHuffmanCodes(std::vector<Histogram>& histograms,
|
||||
}
|
||||
|
||||
void FillJPEGData(const Image3F& opsin, const ImageF& qf, float dc_quant,
|
||||
float global_scale, const bool subsample_blue,
|
||||
const FrameDimensions& frame_dim, jpeg::JPEGData* out) {
|
||||
*out = jpeg::JPEGData();
|
||||
// ICC
|
||||
out->marker_order.push_back(0xe2);
|
||||
out->app_data.push_back(CreateXybICCAppMarker());
|
||||
@@ -245,7 +261,7 @@ void FillJPEGData(const Image3F& opsin, const ImageF& qf, float dc_quant,
|
||||
// DQT
|
||||
out->marker_order.emplace_back(0xdb);
|
||||
float qm[3 * kDCTBlockSize];
|
||||
AddJpegQuantMatrices(qf, dc_quant, &out->quant, qm);
|
||||
AddJpegQuantMatrices(qf, dc_quant, global_scale, &out->quant, qm);
|
||||
|
||||
// SOF
|
||||
out->marker_order.emplace_back(0xc2);
|
||||
@@ -255,11 +271,15 @@ void FillJPEGData(const Image3F& opsin, const ImageF& qf, float dc_quant,
|
||||
out->components[0].id = 'R';
|
||||
out->components[1].id = 'G';
|
||||
out->components[2].id = 'B';
|
||||
size_t max_samp_factor = subsample_blue ? 2 : 1;
|
||||
for (size_t c = 0; c < 3; ++c) {
|
||||
out->components[c].h_samp_factor = 1;
|
||||
out->components[c].v_samp_factor = 1;
|
||||
out->components[c].width_in_blocks = frame_dim.xsize_blocks;
|
||||
out->components[c].height_in_blocks = frame_dim.ysize_blocks;
|
||||
const size_t factor = (subsample_blue && c == 2) ? 2 : 1;
|
||||
out->components[c].h_samp_factor = max_samp_factor / factor;
|
||||
out->components[c].v_samp_factor = max_samp_factor / factor;
|
||||
JXL_ASSERT(frame_dim.xsize_blocks % factor == 0);
|
||||
JXL_ASSERT(frame_dim.ysize_blocks % factor == 0);
|
||||
out->components[c].width_in_blocks = frame_dim.xsize_blocks / factor;
|
||||
out->components[c].height_in_blocks = frame_dim.ysize_blocks / factor;
|
||||
out->components[c].quant_idx = c;
|
||||
}
|
||||
HWY_DYNAMIC_DISPATCH(ComputeDCTCoefficients)
|
||||
@@ -271,7 +291,7 @@ void FillJPEGData(const Image3F& opsin, const ImageF& qf, float dc_quant,
|
||||
// SOS
|
||||
std::vector<ProgressiveScan> progressive_mode = {
|
||||
// DC
|
||||
{0, 0, 0, 0, true},
|
||||
{0, 0, 0, 0, !subsample_blue},
|
||||
// AC 1 - highest bits
|
||||
{1, 63, 0, 1, false},
|
||||
// AC 2 - lowest bit
|
||||
@@ -315,18 +335,31 @@ void FillJPEGData(const Image3F& opsin, const ImageF& qf, float dc_quant,
|
||||
}
|
||||
}
|
||||
|
||||
size_t JpegSize(const jpeg::JPEGData& jpeg_data) {
|
||||
size_t total_size = 0;
|
||||
auto countsize = [&total_size](const uint8_t* buf, size_t len) {
|
||||
total_size += len;
|
||||
return len;
|
||||
};
|
||||
JXL_CHECK(jpeg::WriteJpeg(jpeg_data, countsize));
|
||||
return total_size;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Status EncodeJpeg(const ImageBundle& input, float distance, ThreadPool* pool,
|
||||
std::vector<uint8_t>* compressed) {
|
||||
Status EncodeJpeg(const ImageBundle& input, size_t target_size, float distance,
|
||||
ThreadPool* pool, std::vector<uint8_t>* compressed) {
|
||||
const bool subsample_blue = true;
|
||||
const size_t max_shift = subsample_blue ? 1 : 0;
|
||||
FrameDimensions frame_dim;
|
||||
frame_dim.Set(input.xsize(), input.ysize(), 1, 0, 0, false, 1);
|
||||
frame_dim.Set(input.xsize(), input.ysize(), 1, max_shift, max_shift, false,
|
||||
1);
|
||||
|
||||
// Convert input to XYB colorspace.
|
||||
Image3F opsin(frame_dim.xsize_padded, frame_dim.ysize_padded);
|
||||
opsin.ShrinkTo(frame_dim.xsize, frame_dim.ysize);
|
||||
ToXYB(input, pool, &opsin, GetJxlCms());
|
||||
PadImageToBlockMultipleInPlace(&opsin);
|
||||
PadImageToBlockMultipleInPlace(&opsin, 8 << max_shift);
|
||||
|
||||
// Compute adaptive quant field.
|
||||
ImageF mask;
|
||||
@@ -335,7 +368,39 @@ Status EncodeJpeg(const ImageBundle& input, float distance, ThreadPool* pool,
|
||||
|
||||
// Create jpeg data and optimize Huffman codes.
|
||||
jpeg::JPEGData jpeg_data;
|
||||
FillJPEGData(opsin, qf, InitialQuantDC(distance), frame_dim, &jpeg_data);
|
||||
float global_scale = 0.66f;
|
||||
float dc_quant = InitialQuantDC(distance);
|
||||
FillJPEGData(opsin, qf, dc_quant, global_scale, subsample_blue, frame_dim,
|
||||
&jpeg_data);
|
||||
|
||||
if (target_size != 0) {
|
||||
// Tweak the jpeg data so that the resulting compressed file is
|
||||
// approximately target_size long.
|
||||
size_t prev_size = 0;
|
||||
float best_error = 100.0f;
|
||||
float best_global_scale = global_scale;
|
||||
size_t iter = 0;
|
||||
for (;;) {
|
||||
size_t size = JpegSize(jpeg_data);
|
||||
float error = size * 1.0f / target_size - 1.0f;
|
||||
if (std::abs(error) < std::abs(best_error)) {
|
||||
best_error = error;
|
||||
best_global_scale = global_scale;
|
||||
}
|
||||
if (size == prev_size || std::abs(error) < 0.001f || iter >= 10) {
|
||||
break;
|
||||
}
|
||||
global_scale *= 1.0f + error;
|
||||
FillJPEGData(opsin, qf, dc_quant, global_scale, subsample_blue, frame_dim,
|
||||
&jpeg_data);
|
||||
prev_size = size;
|
||||
++iter;
|
||||
}
|
||||
if (best_global_scale != global_scale) {
|
||||
FillJPEGData(opsin, qf, dc_quant, best_global_scale, subsample_blue,
|
||||
frame_dim, &jpeg_data);
|
||||
}
|
||||
}
|
||||
|
||||
// Write jpeg data to compressed stream.
|
||||
auto write = [&compressed](const uint8_t* buf, size_t len) {
|
||||
|
||||
4
third_party/jpeg-xl/lib/extras/encode_jpeg.h
vendored
4
third_party/jpeg-xl/lib/extras/encode_jpeg.h
vendored
@@ -16,8 +16,8 @@
|
||||
namespace jxl {
|
||||
namespace extras {
|
||||
|
||||
Status EncodeJpeg(const ImageBundle& input, float distance, ThreadPool* pool,
|
||||
std::vector<uint8_t>* compressed);
|
||||
Status EncodeJpeg(const ImageBundle& input, size_t target_size, float distance,
|
||||
ThreadPool* pool, std::vector<uint8_t>* compressed);
|
||||
|
||||
} // namespace extras
|
||||
} // namespace jxl
|
||||
|
||||
32
third_party/jpeg-xl/lib/extras/packed_image.h
vendored
32
third_party/jpeg-xl/lib/extras/packed_image.h
vendored
@@ -33,6 +33,13 @@ class PackedImage {
|
||||
PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format)
|
||||
: PackedImage(xsize, ysize, format, CalcStride(format, xsize)) {}
|
||||
|
||||
PackedImage Copy() const {
|
||||
PackedImage copy(xsize, ysize, format);
|
||||
memcpy(reinterpret_cast<uint8_t*>(copy.pixels()),
|
||||
reinterpret_cast<const uint8_t*>(pixels()), pixels_size);
|
||||
return copy;
|
||||
}
|
||||
|
||||
// The interleaved pixels as defined in the storage format.
|
||||
void* pixels() const { return pixels_.get(); }
|
||||
|
||||
@@ -98,6 +105,18 @@ class PackedFrame {
|
||||
template <typename... Args>
|
||||
explicit PackedFrame(Args&&... args) : color(std::forward<Args>(args)...) {}
|
||||
|
||||
PackedFrame Copy() const {
|
||||
PackedFrame copy(color.xsize, color.ysize, color.format);
|
||||
copy.frame_info = frame_info;
|
||||
copy.name = name;
|
||||
copy.color = color.Copy();
|
||||
for (size_t i = 0; i < extra_channels.size(); ++i) {
|
||||
PackedImage ec = extra_channels[i].Copy();
|
||||
copy.extra_channels.emplace_back(std::move(ec));
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
// The Frame metadata.
|
||||
JxlFrameHeader frame_info = {};
|
||||
std::string name;
|
||||
@@ -117,17 +136,18 @@ class PackedMetadata {
|
||||
std::vector<uint8_t> xmp;
|
||||
};
|
||||
|
||||
// The extra channel metadata information.
|
||||
struct PackedExtraChannel {
|
||||
JxlExtraChannelInfo ec_info;
|
||||
size_t index;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
// Helper class representing a JXL image file as decoded to pixels from the API.
|
||||
class PackedPixelFile {
|
||||
public:
|
||||
JxlBasicInfo info = {};
|
||||
|
||||
// The extra channel metadata information.
|
||||
struct PackedExtraChannel {
|
||||
JxlExtraChannelInfo ec_info;
|
||||
size_t index;
|
||||
std::string name;
|
||||
};
|
||||
std::vector<PackedExtraChannel> extra_channels_info;
|
||||
|
||||
// Color information of the decoded pixels.
|
||||
|
||||
@@ -58,10 +58,8 @@ Status ConvertPackedFrameToImageBundle(const JxlBasicInfo& info,
|
||||
|
||||
JXL_RETURN_IF_ERROR(ConvertFromExternal(
|
||||
span, frame.color.xsize, frame.color.ysize, io.metadata.m.color_encoding,
|
||||
frame.color.format.num_channels,
|
||||
/*alpha_is_premultiplied=*/info.alpha_premultiplied,
|
||||
frame_bits_per_sample, frame.color.format.endianness, pool, bundle,
|
||||
/*float_in=*/float_in, /*align=*/0));
|
||||
frame_bits_per_sample, frame.color.format, pool, bundle));
|
||||
|
||||
bundle->extra_channels().resize(io.metadata.m.extra_channel_info.size());
|
||||
for (size_t i = 0; i < frame.extra_channels.size(); i++) {
|
||||
@@ -140,8 +138,7 @@ Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
|
||||
io->blobs.xmp = ppf.metadata.xmp;
|
||||
|
||||
// Append all other extra channels.
|
||||
for (const PackedPixelFile::PackedExtraChannel& info :
|
||||
ppf.extra_channels_info) {
|
||||
for (const auto& info : ppf.extra_channels_info) {
|
||||
ExtraChannelInfo out;
|
||||
out.type = static_cast<jxl::ExtraChannel>(info.ec_info.type);
|
||||
out.bit_depth.bits_per_sample = info.ec_info.bits_per_sample;
|
||||
|
||||
36
third_party/jpeg-xl/lib/include/jxl/decode.h
vendored
36
third_party/jpeg-xl/lib/include/jxl/decode.h
vendored
@@ -22,6 +22,7 @@
|
||||
#include "jxl/memory_manager.h"
|
||||
#include "jxl/parallel_runner.h"
|
||||
#include "jxl/types.h"
|
||||
#include "jxl/version.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
@@ -742,14 +743,26 @@ typedef enum {
|
||||
* represented, the ICC profile may be a close approximation. It is also not
|
||||
* always feasible to deduce from an ICC profile which named color space it
|
||||
* exactly represents, if any, as it can represent any arbitrary space.
|
||||
* HDR color spaces such as those using PQ and HLG are also potentially
|
||||
* problematic, in that: while ICC profiles can encode a transfer function
|
||||
* that happens to approximate those of PQ and HLG (HLG for only one given
|
||||
* system gamma at a time, and necessitating a 3D LUT if gamma is to be
|
||||
* different from 1), they cannot (before ICCv4.4) semantically signal that
|
||||
* this is the color space that they represent. Therefore, they will
|
||||
* typically not actually be interpreted as representing an HDR color space.
|
||||
* This is especially detrimental to PQ which will then be interpreted as if
|
||||
* the maximum signal value represented SDR white instead of 10000 cd/m^2,
|
||||
* meaning that the image will be displayed two orders of magnitude (5-7 EV)
|
||||
* too dim.
|
||||
* - The JPEG XL image has an encoded structured color profile, and it
|
||||
* indicates an unknown or xyb color space. In that case, @ref
|
||||
* JxlDecoderGetColorAsICCProfile is not available.
|
||||
*
|
||||
* When rendering an image on a system that supports ICC profiles, @ref
|
||||
* JxlDecoderGetColorAsICCProfile should be used first. When rendering
|
||||
* for a specific color space, possibly indicated in the JPEG XL
|
||||
* image, @ref JxlDecoderGetColorAsEncodedProfile should be used first.
|
||||
* When rendering an image on a system where ICC-based color management is used,
|
||||
* @ref JxlDecoderGetColorAsICCProfile should generally be used first as it will
|
||||
* return a ready-to-use profile (with the aforementioned caveat about HDR).
|
||||
* When knowledge about the nominal color space is desired if available, @ref
|
||||
* JxlDecoderGetColorAsEncodedProfile should be used first.
|
||||
*
|
||||
* @param dec decoder object
|
||||
* @param unused_format deprecated, can be NULL
|
||||
@@ -1437,6 +1450,21 @@ JXL_EXPORT size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec);
|
||||
*/
|
||||
JXL_EXPORT JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec);
|
||||
|
||||
/**
|
||||
* Sets the bit depth of the output buffer or callback.
|
||||
*
|
||||
* Can be called after @ref JxlDecoderSetImageOutBuffer or @ref
|
||||
* JxlDecoderSetImageOutCallback. For float pixel data types, only the default
|
||||
* @ref JXL_BIT_DEPTH_FROM_PIXEL_FORMAT setting is supported.
|
||||
*
|
||||
* @param dec decoder object
|
||||
* @param bit_depth the bit depth setting of the pixel output
|
||||
* @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
|
||||
* incompatible custom bit depth and pixel data type.
|
||||
*/
|
||||
JXL_EXPORT JxlDecoderStatus
|
||||
JxlDecoderSetImageOutBitDepth(JxlDecoder* dec, const JxlBitDepth* bit_depth);
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
42
third_party/jpeg-xl/lib/include/jxl/encode.h
vendored
42
third_party/jpeg-xl/lib/include/jxl/encode.h
vendored
@@ -18,6 +18,7 @@
|
||||
#include "jxl/jxl_export.h"
|
||||
#include "jxl/memory_manager.h"
|
||||
#include "jxl/parallel_runner.h"
|
||||
#include "jxl/version.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
@@ -514,6 +515,22 @@ JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo(
|
||||
JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameName(
|
||||
JxlEncoderFrameSettings* frame_settings, const char* frame_name);
|
||||
|
||||
/**
|
||||
* Sets the bit depth of the input buffer.
|
||||
*
|
||||
* For float pixel formats, only the default JXL_BIT_DEPTH_FROM_PIXEL_FORMAT
|
||||
* setting is allowed, while for unsigned pixel formats,
|
||||
* JXL_BIT_DEPTH_FROM_CODESTREAM setting is also allowed. See the comment on
|
||||
* @ref JxlEncoderAddImageFrame for the effects of the bit depth setting.
|
||||
|
||||
* @param frame_settings set of options and metadata for this frame. Also
|
||||
* includes reference to the encoder object.
|
||||
* @param bit_depth the bit depth setting of the pixel input
|
||||
* @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
|
||||
*/
|
||||
JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameBitDepth(
|
||||
JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth);
|
||||
|
||||
/**
|
||||
* Sets the buffer to read JPEG encoded bytes from for the next frame to encode.
|
||||
*
|
||||
@@ -555,15 +572,22 @@ JxlEncoderAddJPEGFrame(const JxlEncoderFrameSettings* frame_settings,
|
||||
* - JXL_TYPE_FLOAT, with nominal range 0..1
|
||||
*
|
||||
* Note: the sample data type in pixel_format is allowed to be different from
|
||||
* what is described in the JxlBasicInfo. The type in pixel_format describes the
|
||||
* format of the uncompressed pixel buffer. The bits_per_sample and
|
||||
* exponent_bits_per_sample in the JxlBasicInfo describes what will actually be
|
||||
* encoded in the JPEG XL codestream. For example, to encode a 12-bit image, you
|
||||
* would set bits_per_sample to 12, and you could use e.g. JXL_TYPE_UINT16
|
||||
* (where the values are rescaled to 16-bit, i.e. multiplied by 65535/4095) or
|
||||
* JXL_TYPE_FLOAT (where the values are rescaled to 0..1, i.e. multiplied
|
||||
* by 1.f/4095.f). While it is allowed, it is obviously not recommended to use a
|
||||
* pixel_format with lower precision than what is specified in the JxlBasicInfo.
|
||||
* what is described in the JxlBasicInfo. The type in pixel_format, together
|
||||
* with an optional @ref JxlBitDepth parameter set by @ref
|
||||
* JxlEncoderSetFrameBitDepth describes the format of the uncompressed pixel
|
||||
* buffer. The bits_per_sample and exponent_bits_per_sample in the JxlBasicInfo
|
||||
* describes what will actually be encoded in the JPEG XL codestream.
|
||||
* For example, to encode a 12-bit image, you would set bits_per_sample to 12,
|
||||
* while the input frame buffer can be in the following formats:
|
||||
* - if pixel format is in JXL_TYPE_UINT16 with default bit depth setting
|
||||
* (i.e. JXL_BIT_DEPTH_FROM_PIXEL_FORMAT), input sample values are rescaled
|
||||
* to 16-bit, i.e. multiplied by 65535/4095;
|
||||
* - if pixel format is in JXL_TYPE_UINT16 with JXL_BIT_DEPTH_FROM_CODESTREAM
|
||||
* bit depth setting, input sample values are provided unscaled;
|
||||
* - if pixel format is in JXL_TYPE_FLOAT, input sample values are rescaled
|
||||
* to 0..1, i.e. multiplied by 1.f/4095.f.
|
||||
* While it is allowed, it is obviously not recommended to use a pixel_format
|
||||
* with lower precision than what is specified in the JxlBasicInfo.
|
||||
*
|
||||
* We support interleaved channels as described by the JxlPixelFormat:
|
||||
* - single-channel data, e.g. grayscale
|
||||
|
||||
37
third_party/jpeg-xl/lib/include/jxl/types.h
vendored
37
third_party/jpeg-xl/lib/include/jxl/types.h
vendored
@@ -111,6 +111,43 @@ typedef struct {
|
||||
size_t align;
|
||||
} JxlPixelFormat;
|
||||
|
||||
/** Settings for the interpretation of the input and output buffers.
|
||||
*/
|
||||
typedef enum {
|
||||
/** This is the default setting, where the encoder expects the input pixels
|
||||
* to use the full range of the pixel format data type (e.g. for UINT16, the
|
||||
* input range is 0 .. 65535 and the value 65535 is mapped to 1.0 when
|
||||
* converting to float), and the decoder uses the full range to output
|
||||
* pixels. If the bit depth in the basic info is different from this, the
|
||||
* encoder expects the values to be rescaled accordingly (e.g multiplied by
|
||||
* 65535/4095 for a 12-bit image using UINT16 input data type). */
|
||||
JXL_BIT_DEPTH_FROM_PIXEL_FORMAT = 0,
|
||||
|
||||
/** If this setting is selected, the encoder expects the input pixels to be
|
||||
* in the range defined by the bits_per_sample value of the basic info (e.g.
|
||||
* for 12-bit images using UINT16 input data types, the allowed range is
|
||||
* 0 .. 4095 and the value 4095 is mapped to 1.0 when converting to float),
|
||||
* and the decoder outputs pixels in this range. */
|
||||
JXL_BIT_DEPTH_FROM_CODESTREAM = 1,
|
||||
|
||||
/** This setting can only be used in the decoder to select a custom range for
|
||||
* pixel output */
|
||||
JXL_BIT_DEPTH_CUSTOM = 2,
|
||||
} JxlBitDepthType;
|
||||
|
||||
/** Data type for describing the interpretation of the input and output buffers
|
||||
* in terms of the range of allowed input and output pixel values. */
|
||||
typedef struct {
|
||||
/** Bit depth setting, see comment on @ref JxlBitDepthType */
|
||||
JxlBitDepthType type;
|
||||
|
||||
/** Custom bits per sample */
|
||||
uint32_t bits_per_sample;
|
||||
|
||||
/** Custom exponent bits per sample */
|
||||
uint32_t exponent_bits_per_sample;
|
||||
} JxlBitDepth;
|
||||
|
||||
/** Data type holding the 4-character type name of an ISOBMFF box.
|
||||
*/
|
||||
typedef char JxlBoxType[4];
|
||||
|
||||
3
third_party/jpeg-xl/lib/jxl.cmake
vendored
3
third_party/jpeg-xl/lib/jxl.cmake
vendored
@@ -447,6 +447,9 @@ else ()
|
||||
)
|
||||
endif ()
|
||||
|
||||
# Generate version.h
|
||||
configure_file("jxl/version.h.in" "include/jxl/version.h")
|
||||
|
||||
# Headers for exporting/importing public headers
|
||||
include(GenerateExportHeader)
|
||||
set_target_properties(jxl_dec-obj PROPERTIES
|
||||
|
||||
202
third_party/jpeg-xl/lib/jxl/codec_y4m_testonly.cc
vendored
202
third_party/jpeg-xl/lib/jxl/codec_y4m_testonly.cc
vendored
@@ -1,202 +0,0 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include "lib/jxl/codec_y4m_testonly.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace jxl {
|
||||
namespace test {
|
||||
|
||||
struct HeaderY4M {
|
||||
size_t xsize;
|
||||
size_t ysize;
|
||||
size_t bits_per_sample;
|
||||
int is_yuv; // Y4M: where 1 = 444, 2 = 422, 3 = 420
|
||||
};
|
||||
|
||||
// Decode Y4M images.
|
||||
class Y4MParser {
|
||||
public:
|
||||
explicit Y4MParser(const Span<const uint8_t> bytes)
|
||||
: pos_(bytes.data()), end_(pos_ + bytes.size()) {}
|
||||
|
||||
// TODO(jon): support multi-frame y4m
|
||||
Status ParseHeader(HeaderY4M* header, const uint8_t** pos) {
|
||||
JXL_RETURN_IF_ERROR(ExpectString("YUV4MPEG2", 9));
|
||||
header->is_yuv = 3;
|
||||
// TODO(jon): check if 4:2:0 is indeed the default
|
||||
header->bits_per_sample = 8;
|
||||
// TODO(jon): check if there's a y4m convention for higher bit depths
|
||||
while (pos_ < end_) {
|
||||
char next = 0;
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&next));
|
||||
if (next == 0x0A) break;
|
||||
if (next != ' ') continue;
|
||||
char field = 0;
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&field));
|
||||
switch (field) {
|
||||
case 'W':
|
||||
JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
|
||||
break;
|
||||
case 'H':
|
||||
JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
|
||||
break;
|
||||
case 'I':
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&next));
|
||||
if (next != 'p') {
|
||||
return JXL_FAILURE(
|
||||
"Y4M: only progressive (no frame interlacing) allowed");
|
||||
}
|
||||
break;
|
||||
case 'C': {
|
||||
char c1 = 0;
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&c1));
|
||||
char c2 = 0;
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&c2));
|
||||
char c3 = 0;
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&c3));
|
||||
if (c1 != '4') return JXL_FAILURE("Y4M: invalid C param");
|
||||
if (c2 == '4') {
|
||||
if (c3 != '4') return JXL_FAILURE("Y4M: invalid C param");
|
||||
header->is_yuv = 1; // 444
|
||||
} else if (c2 == '2') {
|
||||
if (c3 == '2') {
|
||||
header->is_yuv = 2; // 422
|
||||
} else if (c3 == '0') {
|
||||
header->is_yuv = 3; // 420
|
||||
} else {
|
||||
return JXL_FAILURE("Y4M: invalid C param");
|
||||
}
|
||||
} else {
|
||||
return JXL_FAILURE("Y4M: invalid C param");
|
||||
}
|
||||
}
|
||||
[[fallthrough]];
|
||||
// no break: fallthrough because this field can have values like
|
||||
// "C420jpeg" (we are ignoring the chroma sample location and treat
|
||||
// everything like C420jpeg)
|
||||
case 'F': // Framerate in fps as numerator:denominator
|
||||
// TODO(jon): actually read this and set corresponding jxl
|
||||
// metadata
|
||||
case 'A': // Pixel aspect ratio (ignoring it, could perhaps adjust
|
||||
// intrinsic dimensions based on this?)
|
||||
case 'X': // Comment, ignore
|
||||
// ignore the field value and go to next one
|
||||
while (pos_ < end_) {
|
||||
if (pos_[0] == ' ' || pos_[0] == 0x0A) break;
|
||||
pos_++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return JXL_FAILURE("Y4M: parse error");
|
||||
}
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(ExpectString("FRAME", 5));
|
||||
while (true) {
|
||||
char next = 0;
|
||||
JXL_RETURN_IF_ERROR(ReadChar(&next));
|
||||
if (next == 0x0A) {
|
||||
*pos = pos_;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Status ExpectString(const char* str, size_t len) {
|
||||
// Unlikely to happen.
|
||||
if (pos_ + len < pos_) return JXL_FAILURE("Y4M: overflow");
|
||||
|
||||
if (pos_ + len > end_ || strncmp(str, (const char*)pos_, len) != 0) {
|
||||
return JXL_FAILURE("Y4M: expected %s", str);
|
||||
}
|
||||
pos_ += len;
|
||||
return true;
|
||||
}
|
||||
|
||||
Status ReadChar(char* out) {
|
||||
// Unlikely to happen.
|
||||
if (pos_ + 1 < pos_) return JXL_FAILURE("Y4M: overflow");
|
||||
|
||||
if (pos_ >= end_) {
|
||||
return JXL_FAILURE("Y4M: unexpected end of input");
|
||||
}
|
||||
*out = *pos_;
|
||||
pos_++;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
|
||||
|
||||
Status ParseUnsigned(size_t* number) {
|
||||
if (pos_ == end_) return JXL_FAILURE("PNM: reached end before number");
|
||||
if (!IsDigit(*pos_)) return JXL_FAILURE("PNM: expected unsigned number");
|
||||
|
||||
*number = 0;
|
||||
while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
|
||||
*number *= 10;
|
||||
*number += *pos_ - '0';
|
||||
++pos_;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const uint8_t* pos_;
|
||||
const uint8_t* const end_;
|
||||
};
|
||||
|
||||
Status DecodeImageY4M(const Span<const uint8_t> bytes, CodecInOut* io) {
|
||||
Y4MParser parser(bytes);
|
||||
HeaderY4M header = {};
|
||||
const uint8_t* pos = nullptr;
|
||||
JXL_RETURN_IF_ERROR(parser.ParseHeader(&header, &pos));
|
||||
|
||||
Image3F yuvdata(header.xsize, header.ysize);
|
||||
ImageBundle bundle(&io->metadata.m);
|
||||
const int hshift[3][3] = {{0, 0, 0}, {0, 1, 1}, {0, 1, 1}};
|
||||
const int vshift[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 1, 1}};
|
||||
|
||||
for (size_t c = 0; c < 3; c++) {
|
||||
for (size_t y = 0; y < header.ysize >> vshift[header.is_yuv - 1][c]; ++y) {
|
||||
float* const JXL_RESTRICT row = yuvdata.PlaneRow((c == 2 ? 2 : 1 - c), y);
|
||||
if (pos + (header.xsize >> hshift[header.is_yuv - 1][c]) >
|
||||
bytes.data() + bytes.size())
|
||||
return JXL_FAILURE("Not enough image data");
|
||||
for (size_t x = 0; x < header.xsize >> hshift[header.is_yuv - 1][c];
|
||||
++x) {
|
||||
row[x] = (1.f / 255.f) * ((*pos++) - 128.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
bundle.SetFromImage(std::move(yuvdata), io->metadata.m.color_encoding);
|
||||
bundle.color_transform = ColorTransform::kYCbCr;
|
||||
|
||||
YCbCrChromaSubsampling subsampling;
|
||||
uint8_t cssh[3] = {
|
||||
2, static_cast<uint8_t>(hshift[header.is_yuv - 1][1] ? 1 : 2),
|
||||
static_cast<uint8_t>(hshift[header.is_yuv - 1][2] ? 1 : 2)};
|
||||
uint8_t cssv[3] = {
|
||||
2, static_cast<uint8_t>(vshift[header.is_yuv - 1][1] ? 1 : 2),
|
||||
static_cast<uint8_t>(vshift[header.is_yuv - 1][2] ? 1 : 2)};
|
||||
|
||||
JXL_RETURN_IF_ERROR(subsampling.Set(cssh, cssv));
|
||||
bundle.chroma_subsampling = subsampling;
|
||||
io->Main() = std::move(bundle);
|
||||
|
||||
JXL_RETURN_IF_ERROR(io->metadata.m.color_encoding.SetSRGB(ColorSpace::kRGB));
|
||||
io->metadata.m.SetUintSamples(header.bits_per_sample);
|
||||
io->metadata.m.SetAlphaBits(0);
|
||||
io->dec_pixels = header.xsize * header.ysize;
|
||||
|
||||
io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth();
|
||||
io->SetSize(header.xsize, header.ysize);
|
||||
SetIntensityTarget(io);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace jxl
|
||||
18
third_party/jpeg-xl/lib/jxl/codec_y4m_testonly.h
vendored
18
third_party/jpeg-xl/lib/jxl/codec_y4m_testonly.h
vendored
@@ -1,18 +0,0 @@
|
||||
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "lib/jxl/base/padded_bytes.h"
|
||||
#include "lib/jxl/base/status.h"
|
||||
#include "lib/jxl/codec_in_out.h"
|
||||
|
||||
namespace jxl {
|
||||
namespace test {
|
||||
|
||||
Status DecodeImageY4M(const Span<const uint8_t> bytes, CodecInOut* io);
|
||||
|
||||
} // namespace test
|
||||
} // namespace jxl
|
||||
115
third_party/jpeg-xl/lib/jxl/color_management.cc
vendored
115
third_party/jpeg-xl/lib/jxl/color_management.cc
vendored
@@ -249,7 +249,7 @@ Status CreateICCHeader(const ColorEncoding& c,
|
||||
|
||||
WriteICCUint32(0, 0, header); // size, correct value filled in at end
|
||||
WriteICCTag(kCmm, 4, header);
|
||||
WriteICCUint32(0x04300000u, 8, header);
|
||||
WriteICCUint32(0x04400000u, 8, header);
|
||||
const char* profile_type =
|
||||
c.GetColorSpace() == ColorSpace::kXYB ? "scnr" : "mntr";
|
||||
WriteICCTag(profile_type, 12, header);
|
||||
@@ -339,6 +339,44 @@ Status CreateICCChadTag(float chad[9], PaddedBytes* JXL_RESTRICT tags) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void MaybeCreateICCCICPTag(const ColorEncoding& c,
|
||||
PaddedBytes* JXL_RESTRICT tags, size_t* offset,
|
||||
size_t* size, PaddedBytes* JXL_RESTRICT tagtable,
|
||||
std::vector<size_t>* offsets) {
|
||||
if (c.GetColorSpace() != ColorSpace::kRGB) {
|
||||
return;
|
||||
}
|
||||
uint8_t primaries = 0;
|
||||
if (c.primaries == Primaries::kP3) {
|
||||
if (c.white_point == WhitePoint::kD65) {
|
||||
primaries = 12;
|
||||
} else if (c.white_point == WhitePoint::kDCI) {
|
||||
primaries = 11;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else if (c.primaries != Primaries::kCustom &&
|
||||
c.white_point == WhitePoint::kD65) {
|
||||
primaries = static_cast<uint8_t>(c.primaries);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
if (c.tf.IsUnknown() || c.tf.IsGamma()) {
|
||||
return;
|
||||
}
|
||||
WriteICCTag("cicp", tags->size(), tags);
|
||||
WriteICCUint32(0, tags->size(), tags);
|
||||
WriteICCUint8(primaries, tags->size(), tags);
|
||||
WriteICCUint8(static_cast<uint8_t>(c.tf.GetTransferFunction()), tags->size(),
|
||||
tags);
|
||||
// Matrix
|
||||
WriteICCUint8(0, tags->size(), tags);
|
||||
// Full range
|
||||
WriteICCUint8(1, tags->size(), tags);
|
||||
FinalizeICCTag(tags, offset, size);
|
||||
AddToICCTagTable("cicp", *offset, *size, tagtable, offsets);
|
||||
}
|
||||
|
||||
void CreateICCCurvCurvTag(const std::vector<uint16_t>& curve,
|
||||
PaddedBytes* JXL_RESTRICT tags) {
|
||||
size_t pos = tags->size();
|
||||
@@ -351,6 +389,7 @@ void CreateICCCurvCurvTag(const std::vector<uint16_t>& curve,
|
||||
}
|
||||
}
|
||||
|
||||
// Writes 12 + 4*params.size() bytes
|
||||
Status CreateICCCurvParaTag(std::vector<float> params, size_t curve_type,
|
||||
PaddedBytes* JXL_RESTRICT tags) {
|
||||
WriteICCTag("para", tags->size(), tags);
|
||||
@@ -365,30 +404,50 @@ Status CreateICCCurvParaTag(std::vector<float> params, size_t curve_type,
|
||||
|
||||
Status CreateICCLutAtoBTagForXYB(PaddedBytes* JXL_RESTRICT tags) {
|
||||
WriteICCTag("mAB ", tags->size(), tags);
|
||||
// 4 reserved bytes set to 0
|
||||
WriteICCUint32(0, tags->size(), tags);
|
||||
// number of input channels
|
||||
WriteICCUint8(3, tags->size(), tags);
|
||||
// number of output channels
|
||||
WriteICCUint8(3, tags->size(), tags);
|
||||
// 2 reserved bytes for padding
|
||||
WriteICCUint16(0, tags->size(), tags);
|
||||
WriteICCUint32(316, tags->size(), tags);
|
||||
WriteICCUint32(268, tags->size(), tags);
|
||||
WriteICCUint32(148, tags->size(), tags);
|
||||
WriteICCUint32(80, tags->size(), tags);
|
||||
// offset to first B curve
|
||||
WriteICCUint32(32, tags->size(), tags);
|
||||
// offset to matrix
|
||||
WriteICCUint32(244, tags->size(), tags);
|
||||
// offset to first M curve
|
||||
WriteICCUint32(148, tags->size(), tags);
|
||||
// offset to CLUT
|
||||
WriteICCUint32(80, tags->size(), tags);
|
||||
// offset to first A curve
|
||||
// (reuse linear B curves)
|
||||
WriteICCUint32(32, tags->size(), tags);
|
||||
|
||||
// offset = 32
|
||||
// no-op curves
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
|
||||
// offset = 80
|
||||
// number of grid points for each input channel
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
WriteICCUint8(i < 3 ? 2 : 0, tags->size(), tags);
|
||||
}
|
||||
// precision = 2
|
||||
WriteICCUint8(2, tags->size(), tags);
|
||||
WriteICCUint8(2, tags->size(), tags);
|
||||
WriteICCUint8(2, tags->size(), tags);
|
||||
WriteICCUint8(0, tags->size(), tags);
|
||||
WriteICCUint32(0, tags->size(), tags);
|
||||
WriteICCUint32(0, tags->size(), tags);
|
||||
WriteICCUint32(0, tags->size(), tags);
|
||||
WriteICCUint8(2, tags->size(), tags);
|
||||
// 3 bytes of padding
|
||||
WriteICCUint8(0, tags->size(), tags);
|
||||
WriteICCUint16(0, tags->size(), tags);
|
||||
const float kOffsets[3] = {0.015387, 0.028101, 0.277706};
|
||||
const float kScaling[3] = {1.125, 1.125, 1. / 1.511027};
|
||||
const float kOffsets[3] = {
|
||||
kScaledXYBOffset[0] + kScaledXYBOffset[1],
|
||||
kScaledXYBOffset[1] - kScaledXYBOffset[0] + 1.0f / kScaledXYBScale[0],
|
||||
kScaledXYBOffset[1] + kScaledXYBOffset[2]};
|
||||
const float kScaling[3] = {
|
||||
1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]),
|
||||
1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]),
|
||||
1.0f / (1.0f / kScaledXYBScale[1] + 1.0f / kScaledXYBScale[2])};
|
||||
// 2*2*2*3 entries of 2 bytes each = 48 bytes
|
||||
for (size_t ix = 0; ix < 2; ++ix) {
|
||||
for (size_t iy = 0; iy < 2; ++iy) {
|
||||
for (size_t ib = 0; ib < 2; ++ib) {
|
||||
@@ -414,6 +473,8 @@ Status CreateICCLutAtoBTagForXYB(PaddedBytes* JXL_RESTRICT tags) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// offset = 148
|
||||
// 3 curves with 5 parameters = 3 * (12 + 5 * 4) = 96 bytes
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
const float b =
|
||||
-kOffsets[i] - std::cbrt(jxl::kNegOpsinAbsorbanceBiasRGB[i]);
|
||||
@@ -423,23 +484,24 @@ Status CreateICCLutAtoBTagForXYB(PaddedBytes* JXL_RESTRICT tags) {
|
||||
b,
|
||||
0, // unused
|
||||
std::max(0.f, -b * kScaling[i]), // make skcms happy
|
||||
jxl::kNegOpsinAbsorbanceBiasRGB[i],
|
||||
0, // unused
|
||||
};
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(params, 4, tags));
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(params, 3, tags));
|
||||
}
|
||||
// offset = 244
|
||||
const double matrix[] = {1.5170095, -1.1065225, 0.071623,
|
||||
-0.050022, 0.5683655, -0.018344,
|
||||
-1.387676, 1.1145555, 0.6857255};
|
||||
// 12 * 4 = 48 bytes
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(matrix[i], tags->size(), tags));
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(0.0f, tags->size(), tags));
|
||||
JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(0.0f, tags->size(), tags));
|
||||
JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(0.0f, tags->size(), tags));
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
|
||||
JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
float intercept = 0;
|
||||
for (size_t j = 0; j < 3; ++j) {
|
||||
intercept += matrix[i * 3 + j] * jxl::kNegOpsinAbsorbanceBiasRGB[j];
|
||||
}
|
||||
JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(intercept, tags->size(), tags));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
@@ -481,9 +543,7 @@ Status MaybeCreateProfile(const ColorEncoding& c,
|
||||
FinalizeICCTag(&tags, &tag_offset, &tag_size);
|
||||
AddToICCTagTable("desc", tag_offset, tag_size, &tagtable, &offsets);
|
||||
|
||||
const std::string copyright =
|
||||
"Copyright 2019 Google LLC, CC-BY-SA 3.0 Unported "
|
||||
"license(https://creativecommons.org/licenses/by-sa/3.0/legalcode)";
|
||||
const std::string copyright = "CC0";
|
||||
CreateICCMlucTag(copyright, &tags);
|
||||
FinalizeICCTag(&tags, &tag_offset, &tag_size);
|
||||
AddToICCTagTable("cprt", tag_offset, tag_size, &tagtable, &offsets);
|
||||
@@ -511,6 +571,9 @@ Status MaybeCreateProfile(const ColorEncoding& c,
|
||||
}
|
||||
|
||||
if (c.GetColorSpace() == ColorSpace::kRGB) {
|
||||
MaybeCreateICCCICPTag(c, &tags, &tag_offset, &tag_size, &tagtable,
|
||||
&offsets);
|
||||
|
||||
const PrimariesCIExy primaries = c.GetPrimaries();
|
||||
float m[9];
|
||||
JXL_RETURN_IF_ERROR(CreateICCRGBMatrix(primaries.r, primaries.g,
|
||||
|
||||
@@ -392,7 +392,7 @@ TEST_F(ColorManagementTest, XYBProfile) {
|
||||
}
|
||||
}
|
||||
}
|
||||
static float kMaxError[3] = {8.5e-4, 4e-4, 5e-4};
|
||||
static float kMaxError[3] = {9e-4, 4e-4, 5e-4};
|
||||
printf("Maximum errors:\n");
|
||||
for (size_t c = 0; c < 3; ++c) {
|
||||
debug_print_color(max_err_i[c]);
|
||||
|
||||
17
third_party/jpeg-xl/lib/jxl/dec_cache.cc
vendored
17
third_party/jpeg-xl/lib/jxl/dec_cache.cc
vendored
@@ -144,10 +144,11 @@ Status PassesDecoderState::PreparePipeline(ImageBundle* decoded,
|
||||
frame_header.save_before_color_transform);
|
||||
JXL_ASSERT(!options.render_spotcolors ||
|
||||
!decoded->metadata()->Find(ExtraChannel::kSpotColor));
|
||||
bool is_rgba = (format.num_channels == 4);
|
||||
uint8_t* rgb_output = reinterpret_cast<uint8_t*>(image_buffer);
|
||||
builder.AddStage(GetFastXYBTosRGB8Stage(rgb_output, stride, width, height,
|
||||
is_rgba, has_alpha, alpha_c));
|
||||
bool is_rgba = (main_output.format.num_channels == 4);
|
||||
uint8_t* rgb_output = reinterpret_cast<uint8_t*>(main_output.buffer);
|
||||
builder.AddStage(GetFastXYBTosRGB8Stage(rgb_output, main_output.stride,
|
||||
width, height, is_rgba, has_alpha,
|
||||
alpha_c));
|
||||
} else {
|
||||
bool linear = false;
|
||||
if (frame_header.color_transform == ColorTransform::kYCbCr) {
|
||||
@@ -212,10 +213,10 @@ Status PassesDecoderState::PreparePipeline(ImageBundle* decoded,
|
||||
linear = false;
|
||||
}
|
||||
|
||||
if (pixel_callback.IsPresent() || image_buffer) {
|
||||
builder.AddStage(GetWriteToOutputStage(
|
||||
pixel_callback, image_buffer, width, height, stride, format,
|
||||
has_alpha, unpremul_alpha, alpha_c, undo_orientation));
|
||||
if (main_output.callback.IsPresent() || main_output.buffer) {
|
||||
builder.AddStage(GetWriteToOutputStage(main_output, width, height,
|
||||
has_alpha, unpremul_alpha, alpha_c,
|
||||
undo_orientation, extra_output));
|
||||
} else {
|
||||
builder.AddStage(GetWriteToImageBundleStage(
|
||||
decoded, output_encoding_info.color_encoding));
|
||||
|
||||
29
third_party/jpeg-xl/lib/jxl/dec_cache.h
vendored
29
third_party/jpeg-xl/lib/jxl/dec_cache.h
vendored
@@ -56,6 +56,20 @@ struct PixelCallback {
|
||||
void* init_opaque = nullptr;
|
||||
};
|
||||
|
||||
struct ImageOutput {
|
||||
// Pixel format of the output pixels, used for buffer and callback output.
|
||||
JxlPixelFormat format;
|
||||
// Output bit depth for unsigned data types, used for float to int conversion.
|
||||
size_t bits_per_sample;
|
||||
// Callback for line-by-line output.
|
||||
PixelCallback callback;
|
||||
// Pixel buffer for image output.
|
||||
void* buffer;
|
||||
size_t buffer_size;
|
||||
// Length of a row of image_buffer in bytes (based on oriented width).
|
||||
size_t stride;
|
||||
};
|
||||
|
||||
// Per-frame decoder state. All the images here should be accessed through a
|
||||
// group rect (either with block units or pixel units).
|
||||
struct PassesDecoderState {
|
||||
@@ -77,17 +91,11 @@ struct PassesDecoderState {
|
||||
// Sigma values for EPF.
|
||||
ImageF sigma;
|
||||
|
||||
// Pixel buffer for image output.
|
||||
void* image_buffer;
|
||||
// Image dimensions before applying undo_orientation.
|
||||
size_t width;
|
||||
size_t height;
|
||||
// Length of a row of image_buffer in bytes (based on oriented width).
|
||||
size_t stride;
|
||||
// Callback for line-by-line output.
|
||||
PixelCallback pixel_callback;
|
||||
// Pixel format of the output pixels, used for buffer and callback output.
|
||||
JxlPixelFormat format;
|
||||
ImageOutput main_output;
|
||||
std::vector<ImageOutput> extra_output;
|
||||
|
||||
// Whether to use int16 float-XYB-to-uint8-srgb conversion.
|
||||
bool fast_xyb_srgb8_conversion;
|
||||
@@ -134,8 +142,9 @@ struct PassesDecoderState {
|
||||
b_dm_multiplier =
|
||||
std::pow(1 / (1.25f), shared->frame_header.b_qm_scale - 2.0f);
|
||||
|
||||
pixel_callback = PixelCallback();
|
||||
image_buffer = nullptr;
|
||||
main_output.callback = PixelCallback();
|
||||
main_output.buffer = nullptr;
|
||||
extra_output.clear();
|
||||
|
||||
fast_xyb_srgb8_conversion = false;
|
||||
unpremul_alpha = false;
|
||||
|
||||
52
third_party/jpeg-xl/lib/jxl/dec_frame.h
vendored
52
third_party/jpeg-xl/lib/jxl/dec_frame.h
vendored
@@ -173,28 +173,22 @@ class FrameDecoder {
|
||||
}
|
||||
|
||||
// Sets the pixel callback or image buffer where the pixels will be decoded.
|
||||
// This is not supported for all images. If it succeeds, HasRGBBuffer() will
|
||||
// return true.
|
||||
// If it does not succeed, the image is decoded to the ImageBundle passed to
|
||||
// InitFrame instead.
|
||||
//
|
||||
// @param undo_orientation: if true, indicates the frame decoder should apply
|
||||
// the exif orientation to bring the image to the intended display
|
||||
// orientation.
|
||||
void SetImageOutput(const PixelCallback& pixel_callback, void* image_buffer,
|
||||
size_t xsize, size_t ysize, JxlPixelFormat format,
|
||||
size_t image_buffer_size, size_t xsize, size_t ysize,
|
||||
JxlPixelFormat format, size_t bits_per_sample,
|
||||
bool unpremul_alpha, bool undo_orientation) const {
|
||||
dec_state_->pixel_callback = pixel_callback;
|
||||
dec_state_->image_buffer = image_buffer;
|
||||
dec_state_->width = xsize;
|
||||
dec_state_->height = ysize;
|
||||
dec_state_->format = format;
|
||||
dec_state_->stride =
|
||||
(xsize * BytesPerChannel(format.data_type) * format.num_channels);
|
||||
if (format.align > 1) {
|
||||
dec_state_->stride =
|
||||
(jxl::DivCeil(dec_state_->stride, format.align) * format.align);
|
||||
}
|
||||
dec_state_->main_output.format = format;
|
||||
dec_state_->main_output.bits_per_sample = bits_per_sample;
|
||||
dec_state_->main_output.callback = pixel_callback;
|
||||
dec_state_->main_output.buffer = image_buffer;
|
||||
dec_state_->main_output.buffer_size = image_buffer_size;
|
||||
dec_state_->main_output.stride = GetStride(xsize, format);
|
||||
const jxl::ExtraChannelInfo* alpha =
|
||||
decoded_->metadata()->Find(jxl::ExtraChannel::kAlpha);
|
||||
if (alpha && alpha->alpha_associated && unpremul_alpha) {
|
||||
@@ -206,9 +200,11 @@ class FrameDecoder {
|
||||
std::swap(dec_state_->width, dec_state_->height);
|
||||
}
|
||||
}
|
||||
dec_state_->extra_output.clear();
|
||||
#if !JXL_HIGH_PRECISION
|
||||
if (dec_state_->image_buffer && (format.data_type == JXL_TYPE_UINT8) &&
|
||||
(format.num_channels >= 3) && !dec_state_->unpremul_alpha &&
|
||||
if (dec_state_->main_output.buffer &&
|
||||
(format.data_type == JXL_TYPE_UINT8) && (format.num_channels >= 3) &&
|
||||
!dec_state_->unpremul_alpha &&
|
||||
(dec_state_->undo_orientation == Orientation::kIdentity) &&
|
||||
decoded_->metadata()->xyb_encoded &&
|
||||
dec_state_->output_encoding_info.color_encoding.IsSRGB() &&
|
||||
@@ -221,12 +217,15 @@ class FrameDecoder {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns true if the rgb output buffer passed by MaybeSetRGB8OutputBuffer
|
||||
// has been/will be populated by Flush() / FinalizeFrame(), or if a pixel
|
||||
// callback has been used.
|
||||
bool HasRGBBuffer() const {
|
||||
return dec_state_->image_buffer != nullptr ||
|
||||
dec_state_->pixel_callback.IsPresent();
|
||||
void AddExtraChannelOutput(void* buffer, size_t buffer_size, size_t xsize,
|
||||
JxlPixelFormat format, size_t bits_per_sample) {
|
||||
ImageOutput out;
|
||||
out.format = format;
|
||||
out.bits_per_sample = bits_per_sample;
|
||||
out.buffer = buffer;
|
||||
out.buffer_size = buffer_size;
|
||||
out.stride = GetStride(xsize, format);
|
||||
dec_state_->extra_output.push_back(out);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -273,6 +272,15 @@ class FrameDecoder {
|
||||
: 2u);
|
||||
}
|
||||
|
||||
static size_t GetStride(const size_t xsize, JxlPixelFormat format) {
|
||||
size_t stride =
|
||||
(xsize * BytesPerChannel(format.data_type) * format.num_channels);
|
||||
if (format.align > 1) {
|
||||
stride = (jxl::DivCeil(stride, format.align) * format.align);
|
||||
}
|
||||
return stride;
|
||||
}
|
||||
|
||||
PassesDecoderState* dec_state_;
|
||||
ThreadPool* pool_;
|
||||
std::vector<TocEntry> toc_;
|
||||
|
||||
@@ -65,12 +65,13 @@ class Rec2408ToneMapper {
|
||||
Mul(Set(df_, 10000), TF_PQ().DisplayFromEncoded(df_, e4))));
|
||||
|
||||
const V ratio = Div(new_luminance, luminance);
|
||||
|
||||
const V inv_target_peak = Set(df_, inv_target_peak_);
|
||||
const V normalizer = Set(df_, normalizer_);
|
||||
const V multiplier = Mul(ratio, normalizer);
|
||||
for (V* const val : {red, green, blue}) {
|
||||
*val = Mul(IfThenElse(Le(luminance, Set(df_, 1e-6f)), new_luminance,
|
||||
Mul(*val, ratio)),
|
||||
normalizer);
|
||||
*val = IfThenElse(Le(luminance, Set(df_, 1e-6f)),
|
||||
Mul(new_luminance, inv_target_peak),
|
||||
Mul(*val, multiplier));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,8 +99,8 @@ class Rec2408ToneMapper {
|
||||
ks,
|
||||
MulAdd(Add(t_b_3, MulAdd(Set(df_, -2), t_b_2, t_b)),
|
||||
Sub(Set(df_, 1), ks),
|
||||
MulAdd(Set(df_, -2), t_b_3,
|
||||
Mul(Mul(Set(df_, 3), t_b_2), max_lum))));
|
||||
Mul(MulAdd(Set(df_, -2), t_b_3, Mul(Set(df_, 3), t_b_2)),
|
||||
max_lum)));
|
||||
}
|
||||
|
||||
D df_;
|
||||
@@ -125,6 +126,7 @@ class Rec2408ToneMapper {
|
||||
const float inv_one_minus_ks_ = 1.0f / std::max(1e-6f, 1.0f - ks_);
|
||||
|
||||
const float normalizer_ = source_range_.second / target_range_.second;
|
||||
const float inv_target_peak_ = 1.f / target_range_.second;
|
||||
};
|
||||
|
||||
class HlgOOTF {
|
||||
|
||||
168
third_party/jpeg-xl/lib/jxl/decode.cc
vendored
168
third_party/jpeg-xl/lib/jxl/decode.cc
vendored
@@ -144,6 +144,20 @@ size_t BitsPerChannel(JxlDataType data_type) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
|
||||
JxlPixelFormat format) {
|
||||
if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
|
||||
return BitsPerChannel(format.data_type);
|
||||
} else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) {
|
||||
return metadata.bit_depth.bits_per_sample;
|
||||
} else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
|
||||
return bit_depth.bits_per_sample;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
enum class DecoderStage : uint32_t {
|
||||
kInited, // Decoder created, no JxlDecoderProcessInput called yet
|
||||
kStarted, // Running JxlDecoderProcessInput calls
|
||||
@@ -415,6 +429,7 @@ struct JxlDecoderStruct {
|
||||
size_t image_out_size;
|
||||
|
||||
JxlPixelFormat image_out_format;
|
||||
JxlBitDepth image_out_bit_depth;
|
||||
|
||||
// For extra channels. Empty if no extra channels are requested, and they are
|
||||
// reset each frame
|
||||
@@ -701,6 +716,7 @@ void JxlDecoderRewindDecodingState(JxlDecoder* dec) {
|
||||
dec->image_out_destroy_callback = nullptr;
|
||||
dec->image_out_init_opaque = nullptr;
|
||||
dec->image_out_size = 0;
|
||||
dec->image_out_bit_depth.type = JXL_BIT_DEPTH_FROM_PIXEL_FORMAT;
|
||||
dec->extra_channel_output.clear();
|
||||
dec->dec_pixels = 0;
|
||||
dec->next_in = 0;
|
||||
@@ -1072,93 +1088,6 @@ JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec) {
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
static size_t GetStride(const JxlDecoder* dec, const JxlPixelFormat& format) {
|
||||
size_t xsize, ysize;
|
||||
GetCurrentDimensions(dec, xsize, ysize);
|
||||
size_t stride = xsize * (BitsPerChannel(format.data_type) *
|
||||
format.num_channels / jxl::kBitsPerByte);
|
||||
if (format.align > 1) {
|
||||
stride = jxl::DivCeil(stride, format.align) * format.align;
|
||||
}
|
||||
return stride;
|
||||
}
|
||||
|
||||
// Internal wrapper around jxl::ConvertToExternal which converts the stride,
|
||||
// format and orientation and allows to choose whether to get all RGB(A)
|
||||
// channels or alternatively get a single extra channel.
|
||||
// If want_extra_channel, a valid index to a single extra channel must be
|
||||
// given, the output must be single-channel, and format.num_channels is ignored
|
||||
// and treated as if it is 1.
|
||||
static JxlDecoderStatus ConvertImageInternal(
|
||||
const JxlDecoder* dec, const jxl::ImageBundle& frame,
|
||||
const JxlPixelFormat& format, bool want_extra_channel,
|
||||
size_t extra_channel_index, void* out_image, size_t out_size,
|
||||
const PixelCallback& out_callback) {
|
||||
// TODO(lode): handle mismatch of RGB/grayscale color profiles and pixel data
|
||||
// color/grayscale format
|
||||
const size_t stride = GetStride(dec, format);
|
||||
|
||||
bool float_format = format.data_type == JXL_TYPE_FLOAT ||
|
||||
format.data_type == JXL_TYPE_FLOAT16;
|
||||
|
||||
jxl::Orientation undo_orientation = dec->keep_orientation
|
||||
? jxl::Orientation::kIdentity
|
||||
: dec->metadata.m.GetOrientation();
|
||||
|
||||
jxl::Status status(true);
|
||||
if (want_extra_channel) {
|
||||
JXL_ASSERT(extra_channel_index < frame.extra_channels().size());
|
||||
status = jxl::ConvertToExternal(frame.extra_channels()[extra_channel_index],
|
||||
BitsPerChannel(format.data_type),
|
||||
float_format, format.endianness, stride,
|
||||
dec->thread_pool.get(), out_image, out_size,
|
||||
out_callback, undo_orientation);
|
||||
} else {
|
||||
status = jxl::ConvertToExternal(
|
||||
frame, BitsPerChannel(format.data_type), float_format,
|
||||
format.num_channels, format.endianness, stride, dec->thread_pool.get(),
|
||||
out_image, out_size, out_callback, undo_orientation,
|
||||
dec->unpremul_alpha);
|
||||
}
|
||||
|
||||
return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
|
||||
}
|
||||
|
||||
// Outputs the preview or full image (including extra channels) in the internal
|
||||
// image bundle to the image buffers and/or image callback provided through the
|
||||
// API.
|
||||
// TODO(szabadka) Handle all these cases in the low-memory code-path and remove
|
||||
// this function.
|
||||
JxlDecoderStatus JxlDecoderOutputImage(JxlDecoder* dec) {
|
||||
if (!dec->frame_dec->HasRGBBuffer()) {
|
||||
JxlDecoderStatus status = ConvertImageInternal(
|
||||
dec, *dec->ib, dec->image_out_format,
|
||||
/*want_extra_channel=*/false,
|
||||
/*extra_channel_index=*/0, dec->image_out_buffer, dec->image_out_size,
|
||||
PixelCallback{dec->image_out_init_callback, dec->image_out_run_callback,
|
||||
dec->image_out_destroy_callback,
|
||||
dec->image_out_init_opaque});
|
||||
if (status != JXL_DEC_SUCCESS) return status;
|
||||
}
|
||||
bool has_ec = !dec->ib->extra_channels().empty();
|
||||
for (size_t i = 0; i < dec->extra_channel_output.size(); ++i) {
|
||||
void* buffer = dec->extra_channel_output[i].buffer;
|
||||
// buffer nullptr indicates this extra channel is not requested
|
||||
if (!buffer) continue;
|
||||
if (!has_ec) {
|
||||
JXL_WARNING("Extra channels are not supported when callback is used");
|
||||
return JXL_DEC_ERROR;
|
||||
}
|
||||
const JxlPixelFormat* format = &dec->extra_channel_output[i].format;
|
||||
JxlDecoderStatus status = ConvertImageInternal(
|
||||
dec, *dec->ib, *format,
|
||||
/*want_extra_channel=*/true, /*extra_channel_index=*/i, buffer,
|
||||
dec->extra_channel_output[i].buffer_size, /*out_callback=*/{});
|
||||
if (status != JXL_DEC_SUCCESS) return status;
|
||||
}
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
JxlDecoderStatus JxlDecoderProcessSections(JxlDecoder* dec) {
|
||||
Span<const uint8_t> span;
|
||||
JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
|
||||
@@ -1463,15 +1392,27 @@ JxlDecoderStatus JxlDecoderProcessCodestream(JxlDecoder* dec) {
|
||||
}
|
||||
}
|
||||
|
||||
if (dec->image_out_buffer_set && dec->extra_channel_output.empty()) {
|
||||
if (dec->image_out_buffer_set) {
|
||||
size_t xsize, ysize;
|
||||
GetCurrentDimensions(dec, xsize, ysize);
|
||||
size_t bits_per_sample = GetBitDepth(
|
||||
dec->image_out_bit_depth, dec->metadata.m, dec->image_out_format);
|
||||
dec->frame_dec->SetImageOutput(
|
||||
PixelCallback{
|
||||
dec->image_out_init_callback, dec->image_out_run_callback,
|
||||
dec->image_out_destroy_callback, dec->image_out_init_opaque},
|
||||
reinterpret_cast<uint8_t*>(dec->image_out_buffer), xsize, ysize,
|
||||
dec->image_out_format, dec->unpremul_alpha, !dec->keep_orientation);
|
||||
reinterpret_cast<uint8_t*>(dec->image_out_buffer),
|
||||
dec->image_out_size, xsize, ysize, dec->image_out_format,
|
||||
bits_per_sample, dec->unpremul_alpha, !dec->keep_orientation);
|
||||
for (size_t i = 0; i < dec->extra_channel_output.size(); ++i) {
|
||||
const auto& extra = dec->extra_channel_output[i];
|
||||
size_t ec_bits_per_sample =
|
||||
GetBitDepth(dec->image_out_bit_depth,
|
||||
dec->metadata.m.extra_channel_info[i], extra.format);
|
||||
dec->frame_dec->AddExtraChannelOutput(extra.buffer, extra.buffer_size,
|
||||
xsize, extra.format,
|
||||
ec_bits_per_sample);
|
||||
}
|
||||
}
|
||||
|
||||
size_t next_num_passes_to_pause = dec->frame_dec->NextNumPassesToPause();
|
||||
@@ -1527,9 +1468,6 @@ JxlDecoderStatus JxlDecoderProcessCodestream(JxlDecoder* dec) {
|
||||
}
|
||||
|
||||
if (dec->preview_frame || dec->is_last_of_still) {
|
||||
if (dec->image_out_buffer_set) {
|
||||
JXL_API_RETURN_IF_ERROR(JxlDecoderOutputImage(dec));
|
||||
}
|
||||
dec->image_out_buffer_set = false;
|
||||
dec->extra_channel_output.clear();
|
||||
}
|
||||
@@ -2347,11 +2285,7 @@ JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec) {
|
||||
return JXL_DEC_ERROR;
|
||||
}
|
||||
|
||||
if (dec->jpeg_decoder.IsOutputSet() && dec->ib->jpeg_data != nullptr) {
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
return jxl::JxlDecoderOutputImage(dec);
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
|
||||
@@ -2809,3 +2743,41 @@ JxlDecoderStatus JxlDecoderSetProgressiveDetail(JxlDecoder* dec,
|
||||
dec->prog_detail = detail;
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
JxlDecoderStatus VerifyOutputBitDepth(JxlBitDepth bit_depth, const T& metadata,
|
||||
JxlPixelFormat format) {
|
||||
if ((format.data_type == JXL_TYPE_FLOAT ||
|
||||
format.data_type == JXL_TYPE_FLOAT16) &&
|
||||
bit_depth.type != JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
|
||||
return JXL_API_ERROR(
|
||||
"Only JXL_BIT_DEPTH_FROM_PIXEL_FORMAT is implemented "
|
||||
"for float types.");
|
||||
}
|
||||
uint32_t bits_per_sample = GetBitDepth(bit_depth, metadata, format);
|
||||
if (format.data_type == JXL_TYPE_UINT8 &&
|
||||
(bits_per_sample == 0 || bits_per_sample > 8)) {
|
||||
return JXL_API_ERROR("Inavlid bit depth %u for uint8 output",
|
||||
bits_per_sample);
|
||||
} else if (format.data_type == JXL_TYPE_UINT16 &&
|
||||
(bits_per_sample == 0 || bits_per_sample > 16)) {
|
||||
return JXL_API_ERROR("Inavlid bit depth %u for uint16 output",
|
||||
bits_per_sample);
|
||||
}
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
JxlDecoderStatus JxlDecoderSetImageOutBitDepth(JxlDecoder* dec,
|
||||
const JxlBitDepth* bit_depth) {
|
||||
if (!dec->image_out_buffer_set) {
|
||||
return JXL_API_ERROR("No image out buffer was set.");
|
||||
}
|
||||
JXL_API_RETURN_IF_ERROR(
|
||||
VerifyOutputBitDepth(*bit_depth, dec->metadata.m, dec->image_out_format));
|
||||
dec->image_out_bit_depth = *bit_depth;
|
||||
return JXL_DEC_SUCCESS;
|
||||
}
|
||||
|
||||
180
third_party/jpeg-xl/lib/jxl/decode_test.cc
vendored
180
third_party/jpeg-xl/lib/jxl/decode_test.cc
vendored
@@ -259,13 +259,15 @@ PaddedBytes CreateTestJXLCodestream(Span<const uint8_t> pixels, size_t xsize,
|
||||
if (params.intensity_target != 0) {
|
||||
io.metadata.m.SetIntensityTarget(params.intensity_target);
|
||||
}
|
||||
JxlPixelFormat format = {static_cast<uint32_t>(num_channels), JXL_TYPE_UINT16,
|
||||
JXL_BIG_ENDIAN, 0};
|
||||
// Make the grayscale-ness of the io metadata color_encoding and the packed
|
||||
// image match.
|
||||
io.metadata.m.color_encoding = color_encoding;
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
pixels, xsize, ysize, color_encoding, num_channels,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
|
||||
&pool, &io.Main(), /*float_in=*/false, /*align=*/0));
|
||||
EXPECT_TRUE(ConvertFromExternal(pixels, xsize, ysize, color_encoding,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, format, &pool,
|
||||
&io.Main()));
|
||||
jxl::PaddedBytes jpeg_data;
|
||||
if (params.jpeg_codestream != nullptr) {
|
||||
#if JPEGXL_ENABLE_JPEG
|
||||
@@ -1334,11 +1336,9 @@ TEST_P(DecodeTestParam, PixelTest) {
|
||||
io.SetSize(config.xsize, config.ysize);
|
||||
|
||||
EXPECT_TRUE(ConvertFromExternal(bytes, config.xsize, config.ysize,
|
||||
color_encoding, orig_channels,
|
||||
color_encoding,
|
||||
/*alpha_is_premultiplied=*/false, 16,
|
||||
JXL_BIG_ENDIAN, nullptr, &io.Main(),
|
||||
/*float_in=*/false,
|
||||
/*align=*/0));
|
||||
format_orig, nullptr, &io.Main()));
|
||||
|
||||
for (size_t i = 0; i < pixels.size(); i++) pixels[i] = 0;
|
||||
EXPECT_TRUE(ConvertToExternal(
|
||||
@@ -1448,8 +1448,6 @@ std::vector<PixelTestConfig> GeneratePixelTests() {
|
||||
// Test previews.
|
||||
for (int preview_mode = 0; preview_mode < jxl::kNumPreviewModes;
|
||||
preview_mode++) {
|
||||
// if (preview_mode == jxl::kBigPreview &&
|
||||
// ch_info[0].output_channels != 3) continue;
|
||||
make_test(ch_info[0], 77, 33, (jxl::PreviewMode)preview_mode,
|
||||
/*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
|
||||
JXL_ORIENT_IDENTITY,
|
||||
@@ -1664,12 +1662,10 @@ TEST(DecodeTest, PixelTestWithICCProfileLossy) {
|
||||
jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
|
||||
jxl::CodecInOut io0;
|
||||
io0.SetSize(xsize, ysize);
|
||||
EXPECT_TRUE(
|
||||
ConvertFromExternal(span0, xsize, ysize, color_encoding0, /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, format_orig.endianness,
|
||||
/*pool=*/nullptr, &io0.Main(), /*float_in=*/false,
|
||||
/*align=*/0));
|
||||
EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, format_orig,
|
||||
/*pool=*/nullptr, &io0.Main()));
|
||||
|
||||
jxl::ColorEncoding color_encoding1;
|
||||
EXPECT_TRUE(color_encoding1.SetICC(std::move(icc)));
|
||||
@@ -1677,15 +1673,14 @@ TEST(DecodeTest, PixelTestWithICCProfileLossy) {
|
||||
jxl::CodecInOut io1;
|
||||
io1.SetSize(xsize, ysize);
|
||||
EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
|
||||
channels, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/32, format.endianness,
|
||||
/*pool=*/nullptr, &io1.Main(),
|
||||
/*float_in=*/true, /*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/32, format,
|
||||
/*pool=*/nullptr, &io1.Main()));
|
||||
|
||||
jxl::ButteraugliParams ba;
|
||||
EXPECT_THAT(ButteraugliDistance(io0, io1, ba, jxl::GetJxlCms(),
|
||||
/*distmap=*/nullptr, nullptr),
|
||||
IsSlightlyBelow(0.785f));
|
||||
IsSlightlyBelow(0.85f));
|
||||
|
||||
JxlDecoderDestroy(dec);
|
||||
}
|
||||
@@ -1722,21 +1717,25 @@ double ButteraugliDistance(size_t xsize, size_t ysize,
|
||||
jxl::CodecInOut in;
|
||||
in.metadata.m.color_encoding = color_in;
|
||||
in.metadata.m.SetIntensityTarget(intensity_in);
|
||||
JxlPixelFormat format_in = {static_cast<uint32_t>(color_in.Channels()),
|
||||
JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
|
||||
EXPECT_TRUE(jxl::ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(pixels_in.data(), pixels_in.size()), xsize,
|
||||
ysize, color_in, color_in.Channels(),
|
||||
ysize, color_in,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, JXL_BIG_ENDIAN,
|
||||
/*pool=*/nullptr, &in.Main(), /*float_in=*/false, /*align=*/0));
|
||||
/*bits_per_sample=*/16, format_in,
|
||||
/*pool=*/nullptr, &in.Main()));
|
||||
jxl::CodecInOut out;
|
||||
out.metadata.m.color_encoding = color_out;
|
||||
out.metadata.m.SetIntensityTarget(intensity_out);
|
||||
JxlPixelFormat format_out = {static_cast<uint32_t>(color_out.Channels()),
|
||||
JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
|
||||
EXPECT_TRUE(jxl::ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(pixels_out.data(), pixels_out.size()), xsize,
|
||||
ysize, color_out, color_out.Channels(),
|
||||
ysize, color_out,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, JXL_BIG_ENDIAN,
|
||||
/*pool=*/nullptr, &out.Main(), /*float_in=*/false, /*align=*/0));
|
||||
/*bits_per_sample=*/16, format_out,
|
||||
/*pool=*/nullptr, &out.Main()));
|
||||
return ButteraugliDistance(in, out, jxl::ButteraugliParams(),
|
||||
jxl::GetJxlCms(), nullptr, nullptr);
|
||||
}
|
||||
@@ -1926,22 +1925,18 @@ TEST(DecodeTest, PixelTestOpaqueSrgbLossy) {
|
||||
jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
|
||||
jxl::CodecInOut io0;
|
||||
io0.SetSize(xsize, ysize);
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
span0, xsize, ysize, color_encoding0, /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
format_orig.endianness,
|
||||
/*pool=*/nullptr, &io0.Main(), /*float_in=*/false,
|
||||
/*align=*/0));
|
||||
EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, format_orig,
|
||||
/*pool=*/nullptr, &io0.Main()));
|
||||
|
||||
jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
|
||||
jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
|
||||
jxl::CodecInOut io1;
|
||||
EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
|
||||
channels, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, format.endianness,
|
||||
/*pool=*/nullptr, &io1.Main(),
|
||||
/*float_in=*/false,
|
||||
/*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, format,
|
||||
/*pool=*/nullptr, &io1.Main()));
|
||||
|
||||
jxl::ButteraugliParams ba;
|
||||
EXPECT_THAT(ButteraugliDistance(io0, io1, ba, jxl::GetJxlCms(),
|
||||
@@ -1982,22 +1977,18 @@ TEST(DecodeTest, PixelTestOpaqueSrgbLossyNoise) {
|
||||
jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
|
||||
jxl::CodecInOut io0;
|
||||
io0.SetSize(xsize, ysize);
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
span0, xsize, ysize, color_encoding0, /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
format_orig.endianness,
|
||||
/*pool=*/nullptr, &io0.Main(), /*float_in=*/false,
|
||||
/*align=*/0));
|
||||
EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, format_orig,
|
||||
/*pool=*/nullptr, &io0.Main()));
|
||||
|
||||
jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
|
||||
jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
|
||||
jxl::CodecInOut io1;
|
||||
EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
|
||||
channels, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, format.endianness,
|
||||
/*pool=*/nullptr, &io1.Main(),
|
||||
/*float_in=*/false,
|
||||
/*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, format,
|
||||
/*pool=*/nullptr, &io1.Main()));
|
||||
|
||||
jxl::ButteraugliParams ba;
|
||||
EXPECT_THAT(ButteraugliDistance(io0, io1, ba, jxl::GetJxlCms(),
|
||||
@@ -2362,7 +2353,7 @@ TEST(DecodeTest, DCNotGettableTest) {
|
||||
TEST(DecodeTest, PreviewTest) {
|
||||
size_t xsize = 77, ysize = 120;
|
||||
std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
|
||||
|
||||
JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
|
||||
for (jxl::PreviewMode mode : {jxl::kSmallPreview, jxl::kBigPreview}) {
|
||||
jxl::TestCodestreamParams params;
|
||||
params.preview_mode = mode;
|
||||
@@ -2393,9 +2384,8 @@ TEST(DecodeTest, PreviewTest) {
|
||||
jxl::CodecInOut io0;
|
||||
EXPECT_TRUE(jxl::ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
|
||||
c_srgb, 3, /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &io0.Main(),
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
c_srgb, /*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
format_orig, /*pool=*/nullptr, &io0.Main()));
|
||||
GeneratePreview(params.preview_mode, &io0.Main());
|
||||
|
||||
size_t xsize_preview = io0.Main().xsize();
|
||||
@@ -2416,9 +2406,9 @@ TEST(DecodeTest, PreviewTest) {
|
||||
jxl::CodecInOut io1;
|
||||
EXPECT_TRUE(jxl::ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(preview.data(), preview.size()), xsize_preview,
|
||||
ysize_preview, c_srgb, 3, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, JXL_LITTLE_ENDIAN,
|
||||
/*pool=*/nullptr, &io1.Main(), /*float_in=*/false, /*align=*/0));
|
||||
ysize_preview, c_srgb, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, format,
|
||||
/*pool=*/nullptr, &io1.Main()));
|
||||
|
||||
jxl::ButteraugliParams ba;
|
||||
// TODO(lode): this ButteraugliDistance silently returns 0 (dangerous for
|
||||
@@ -2492,10 +2482,9 @@ TEST(DecodeTest, AnimationTest) {
|
||||
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.duration = frame_durations[i];
|
||||
io.frames.push_back(std::move(bundle));
|
||||
}
|
||||
@@ -2596,10 +2585,9 @@ TEST(DecodeTest, AnimationTestStreaming) {
|
||||
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.duration = frame_durations[i];
|
||||
io.frames.push_back(std::move(bundle));
|
||||
}
|
||||
@@ -2815,10 +2803,9 @@ TEST(DecodeTest, SkipCurrentFrameTest) {
|
||||
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.duration = frame_durations[i];
|
||||
io.frames.push_back(std::move(bundle));
|
||||
}
|
||||
@@ -2930,10 +2917,9 @@ TEST(DecodeTest, SkipFrameTest) {
|
||||
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.duration = frame_durations[i];
|
||||
io.frames.push_back(std::move(bundle));
|
||||
}
|
||||
@@ -3067,10 +3053,8 @@ TEST(DecodeTest, SkipFrameWithBlendingTest) {
|
||||
jxl::Span<const uint8_t>(frame_internal.data(),
|
||||
frame_internal.size()),
|
||||
xsize, ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle_internal,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle_internal));
|
||||
bundle_internal.duration = 0;
|
||||
bundle_internal.use_for_next_frame = true;
|
||||
io.frames.push_back(std::move(bundle_internal));
|
||||
@@ -3083,10 +3067,9 @@ TEST(DecodeTest, SkipFrameWithBlendingTest) {
|
||||
jxl::ImageBundle bundle(&io.metadata.m);
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frame.data(), frame.size()), xsize, ysize,
|
||||
jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*channels=*/3,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.duration = frame_durations[i];
|
||||
// Create some variation in which frames depend on which.
|
||||
if (i != 3 && i != 9 && i != 10) {
|
||||
@@ -3294,10 +3277,8 @@ TEST(DecodeTest, SkipFrameWithAlphaBlendingTest) {
|
||||
jxl::Span<const uint8_t>(frame_internal.data(),
|
||||
frame_internal.size()),
|
||||
xsize / 2, ysize / 2, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*channels=*/4,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle_internal,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle_internal));
|
||||
bundle_internal.duration = 0;
|
||||
bundle_internal.use_for_next_frame = true;
|
||||
bundle_internal.origin = {13, 17};
|
||||
@@ -3315,10 +3296,9 @@ TEST(DecodeTest, SkipFrameWithAlphaBlendingTest) {
|
||||
jxl::ImageBundle bundle(&io.metadata.m);
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frame.data(), frame.size()), cropxsize,
|
||||
cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), /*channels=*/4,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.duration = 5 + i;
|
||||
frame_durations_nc.push_back(5 + i);
|
||||
frame_durations_c.push_back(5 + i);
|
||||
@@ -3579,10 +3559,8 @@ TEST(DecodeTest, OrientedCroppedFrameTest) {
|
||||
EXPECT_TRUE(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(frame.data(), frame.size()), cropxsize,
|
||||
cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
|
||||
/*channels=*/4,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN, /*pool=*/nullptr, &bundle,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &bundle));
|
||||
bundle.origin = {cropx0, cropy0};
|
||||
bundle.use_for_next_frame = true;
|
||||
io.frames.push_back(std::move(bundle));
|
||||
@@ -4659,14 +4637,15 @@ TEST_P(DecodeProgressiveTest, ProgressiveEventTest) {
|
||||
}
|
||||
std::vector<uint8_t> pixels =
|
||||
jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
|
||||
JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
|
||||
jxl::ColorEncoding color_encoding = jxl::ColorEncoding::SRGB(false);
|
||||
jxl::CodecInOut io;
|
||||
EXPECT_TRUE(jxl::ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
|
||||
color_encoding, num_channels,
|
||||
color_encoding,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, JXL_BIG_ENDIAN,
|
||||
/*pool=*/nullptr, &io.Main(), /*float_in=*/false, /*align=*/0));
|
||||
/*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &io.Main()));
|
||||
jxl::TestCodestreamParams params;
|
||||
if (lossless) {
|
||||
params.cparams.SetLossless();
|
||||
@@ -4681,7 +4660,6 @@ TEST_P(DecodeProgressiveTest, ProgressiveEventTest) {
|
||||
jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
|
||||
jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
|
||||
num_channels, params);
|
||||
JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
|
||||
|
||||
for (size_t increment : {(size_t)1, data.size()}) {
|
||||
printf(
|
||||
@@ -4782,11 +4760,9 @@ TEST_P(DecodeProgressiveTest, ProgressiveEventTest) {
|
||||
jxl::CodecInOut io1;
|
||||
EXPECT_TRUE(jxl::ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(passes[p].data(), passes[p].size()), xsize,
|
||||
ysize, color_encoding, num_channels,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16,
|
||||
JXL_BIG_ENDIAN,
|
||||
/*pool=*/nullptr, &io1.Main(), /*float_in=*/false,
|
||||
/*align=*/0));
|
||||
ysize, color_encoding,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format,
|
||||
/*pool=*/nullptr, &io1.Main()));
|
||||
distances[p] = ButteraugliDistance(io, io1, ba, jxl::GetJxlCms(),
|
||||
nullptr, nullptr);
|
||||
if (p == kNumPasses) break;
|
||||
@@ -4800,7 +4776,7 @@ TEST_P(DecodeProgressiveTest, ProgressiveEventTest) {
|
||||
// Verify that the returned pass image is actually not the
|
||||
// same as the next pass image, by checking that it has a bit
|
||||
// worse butteraugli score.
|
||||
EXPECT_LT(distances[next_p] * 1.2f, distances[p]);
|
||||
EXPECT_LT(distances[next_p] * 1.1f, distances[p]);
|
||||
p = next_p;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -733,8 +733,8 @@ ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin,
|
||||
return tile_distmap;
|
||||
}
|
||||
|
||||
constexpr float kDcQuantPow = 0.57f;
|
||||
static const float kDcQuant = 1.12f;
|
||||
constexpr float kDcQuantPow = 0.66f;
|
||||
static const float kDcQuant = 1.0f;
|
||||
static const float kAcQuant = 0.8294f;
|
||||
|
||||
void FindBestQuantization(const ImageBundle& linear, const Image3F& opsin,
|
||||
@@ -1037,7 +1037,7 @@ void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
|
||||
}
|
||||
|
||||
float InitialQuantDC(float butteraugli_target) {
|
||||
const float kDcMul = 2.9; // Butteraugli target where non-linearity kicks in.
|
||||
const float kDcMul = 1.5; // Butteraugli target where non-linearity kicks in.
|
||||
const float butteraugli_target_dc = std::max<float>(
|
||||
0.5f * butteraugli_target,
|
||||
std::min<float>(butteraugli_target,
|
||||
|
||||
4
third_party/jpeg-xl/lib/jxl/enc_cache.cc
vendored
4
third_party/jpeg-xl/lib/jxl/enc_cache.cc
vendored
@@ -179,9 +179,7 @@ Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms,
|
||||
} else {
|
||||
auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
|
||||
modular_frame_encoder->AddVarDCTDC(
|
||||
dc, group_index,
|
||||
enc_state->cparams.butteraugli_distance >= 2.0f &&
|
||||
enc_state->cparams.speed_tier < SpeedTier::kFalcon,
|
||||
dc, group_index, enc_state->cparams.speed_tier < SpeedTier::kFalcon,
|
||||
enc_state, /*jpeg_transcode=*/false);
|
||||
};
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups,
|
||||
|
||||
279
third_party/jpeg-xl/lib/jxl/enc_external_image.cc
vendored
279
third_party/jpeg-xl/lib/jxl/enc_external_image.cc
vendored
@@ -84,42 +84,54 @@ void JXL_INLINE LoadFloatRow(float* JXL_RESTRICT row_out, const uint8_t* in,
|
||||
|
||||
uint32_t JXL_INLINE Load8(const uint8_t* p) { return *p; }
|
||||
|
||||
Status PixelFormatToExternal(const JxlPixelFormat& pixel_format,
|
||||
size_t* bitdepth, bool* float_in) {
|
||||
if (pixel_format.data_type == JXL_TYPE_FLOAT) {
|
||||
*bitdepth = 32;
|
||||
*float_in = true;
|
||||
} else if (pixel_format.data_type == JXL_TYPE_FLOAT16) {
|
||||
*bitdepth = 16;
|
||||
*float_in = true;
|
||||
} else if (pixel_format.data_type == JXL_TYPE_UINT8) {
|
||||
*bitdepth = 8;
|
||||
*float_in = false;
|
||||
} else if (pixel_format.data_type == JXL_TYPE_UINT16) {
|
||||
*bitdepth = 16;
|
||||
*float_in = false;
|
||||
} else {
|
||||
return JXL_FAILURE("unsupported pixel format data type");
|
||||
size_t JxlDataTypeBytes(JxlDataType data_type) {
|
||||
switch (data_type) {
|
||||
case JXL_TYPE_UINT8:
|
||||
return 1;
|
||||
case JXL_TYPE_UINT16:
|
||||
return 2;
|
||||
case JXL_TYPE_FLOAT16:
|
||||
return 2;
|
||||
case JXL_TYPE_FLOAT:
|
||||
return 4;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
size_t ysize, size_t bits_per_sample,
|
||||
JxlEndianness endianness, ThreadPool* pool,
|
||||
ImageF* channel, bool float_in, size_t align) {
|
||||
// TODO(firsching): Avoid code duplication with the function below.
|
||||
JXL_CHECK(float_in ? bits_per_sample == 16 || bits_per_sample == 32
|
||||
: bits_per_sample > 0 && bits_per_sample <= 16);
|
||||
const size_t bytes_per_pixel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
|
||||
JxlPixelFormat format, size_t c, ThreadPool* pool,
|
||||
ImageF* channel) {
|
||||
if (format.data_type == JXL_TYPE_UINT8) {
|
||||
JXL_RETURN_IF_ERROR(bits_per_sample > 0 && bits_per_sample <= 8);
|
||||
} else if (format.data_type == JXL_TYPE_UINT16) {
|
||||
JXL_RETURN_IF_ERROR(bits_per_sample > 8 && bits_per_sample <= 16);
|
||||
} else if (format.data_type == JXL_TYPE_FLOAT16) {
|
||||
JXL_RETURN_IF_ERROR(bits_per_sample == 16);
|
||||
} else if (format.data_type == JXL_TYPE_FLOAT) {
|
||||
JXL_RETURN_IF_ERROR(bits_per_sample == 32);
|
||||
} else {
|
||||
JXL_FAILURE("unsupported pixel format data type %d", format.data_type);
|
||||
}
|
||||
size_t bytes_per_channel = JxlDataTypeBytes(format.data_type);
|
||||
size_t bytes_per_pixel = format.num_channels * bytes_per_channel;
|
||||
size_t pixel_offset = c * bytes_per_channel;
|
||||
|
||||
const size_t last_row_size = xsize * bytes_per_pixel;
|
||||
const size_t align = format.align;
|
||||
const size_t row_size =
|
||||
(align > 1 ? jxl::DivCeil(last_row_size, align) * align : last_row_size);
|
||||
const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size;
|
||||
if (xsize == 0 || ysize == 0) return JXL_FAILURE("Empty image");
|
||||
if (bytes.size() < bytes_to_read) {
|
||||
return JXL_FAILURE("Buffer size is too small");
|
||||
return JXL_FAILURE("Buffer size is too small, expected: %" PRIuS
|
||||
" got: %" PRIuS " (Image: %" PRIuS "x%" PRIuS
|
||||
"x%u, bytes_per_channel: %" PRIuS ")",
|
||||
bytes_to_read, bytes.size(), xsize, ysize,
|
||||
format.num_channels, bytes_per_channel);
|
||||
}
|
||||
JXL_ASSERT(channel->xsize() == xsize);
|
||||
JXL_ASSERT(channel->ysize() == ysize);
|
||||
@@ -130,18 +142,19 @@ Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
}
|
||||
|
||||
const bool little_endian =
|
||||
endianness == JXL_LITTLE_ENDIAN ||
|
||||
(endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
|
||||
format.endianness == JXL_LITTLE_ENDIAN ||
|
||||
(format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
|
||||
|
||||
const uint8_t* const in = bytes.data();
|
||||
if (float_in) {
|
||||
if (format.data_type == JXL_TYPE_FLOAT ||
|
||||
format.data_type == JXL_TYPE_FLOAT16) {
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const size_t y = task;
|
||||
size_t i = row_size * task;
|
||||
size_t i = row_size * task + pixel_offset;
|
||||
float* JXL_RESTRICT row_out = channel->Row(y);
|
||||
if (bits_per_sample == 16) {
|
||||
if (format.data_type == JXL_TYPE_FLOAT16) {
|
||||
if (little_endian) {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadLEFloat16(in + i);
|
||||
@@ -174,9 +187,9 @@ Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const size_t y = task;
|
||||
size_t i = row_size * task;
|
||||
size_t i = row_size * task + pixel_offset;
|
||||
float* JXL_RESTRICT row_out = channel->Row(y);
|
||||
if (bits_per_sample <= 8) {
|
||||
if (format.data_type == JXL_TYPE_UINT8) {
|
||||
LoadFloatRow<Load8>(row_out, in + i, mul, xsize, bytes_per_pixel);
|
||||
} else {
|
||||
if (little_endian) {
|
||||
@@ -195,187 +208,36 @@ Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
}
|
||||
Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
size_t ysize, const ColorEncoding& c_current,
|
||||
size_t channels, bool alpha_is_premultiplied,
|
||||
size_t bits_per_sample, JxlEndianness endianness,
|
||||
ThreadPool* pool, ImageBundle* ib, bool float_in,
|
||||
size_t align) {
|
||||
JXL_CHECK(float_in ? bits_per_sample == 16 || bits_per_sample == 32
|
||||
: bits_per_sample > 0 && bits_per_sample <= 16);
|
||||
|
||||
bool alpha_is_premultiplied, size_t bits_per_sample,
|
||||
JxlPixelFormat format, ThreadPool* pool,
|
||||
ImageBundle* ib) {
|
||||
const size_t color_channels = c_current.Channels();
|
||||
bool has_alpha = channels == 2 || channels == 4;
|
||||
if (channels < color_channels) {
|
||||
bool has_alpha = format.num_channels == 2 || format.num_channels == 4;
|
||||
if (format.num_channels < color_channels) {
|
||||
return JXL_FAILURE("Expected %" PRIuS
|
||||
" color channels, received only %" PRIuS " channels",
|
||||
color_channels, channels);
|
||||
" color channels, received only %u channels",
|
||||
color_channels, format.num_channels);
|
||||
}
|
||||
|
||||
const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
|
||||
const size_t bytes_per_pixel = channels * bytes_per_channel;
|
||||
if (bits_per_sample > 16 && bits_per_sample < 32) {
|
||||
return JXL_FAILURE("not supported, try bits_per_sample=32");
|
||||
}
|
||||
|
||||
const size_t last_row_size = xsize * bytes_per_pixel;
|
||||
const size_t row_size =
|
||||
(align > 1 ? jxl::DivCeil(last_row_size, align) * align : last_row_size);
|
||||
const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size;
|
||||
if (xsize == 0 || ysize == 0) return JXL_FAILURE("Empty image");
|
||||
if (bytes.size() < bytes_to_read) {
|
||||
return JXL_FAILURE(
|
||||
"Buffer size is too small: expected at least %" PRIuS
|
||||
" bytes (= %" PRIuS " * %" PRIuS " * %" PRIuS "), got %" PRIuS " bytes",
|
||||
bytes_to_read, xsize, ysize, bytes_per_pixel, bytes.size());
|
||||
}
|
||||
// Too large buffer is likely an application bug, so also fail for that.
|
||||
// Do allow padding to stride in last row though.
|
||||
if (bytes.size() > row_size * ysize) {
|
||||
return JXL_FAILURE(
|
||||
"Buffer size is too large: expected at most %" PRIuS " bytes (= %" PRIuS
|
||||
" * %" PRIuS " * %" PRIuS "), got %" PRIuS " bytes",
|
||||
row_size * ysize, xsize, ysize, bytes_per_pixel, bytes.size());
|
||||
}
|
||||
const bool little_endian =
|
||||
endianness == JXL_LITTLE_ENDIAN ||
|
||||
(endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
|
||||
|
||||
const uint8_t* const in = bytes.data();
|
||||
|
||||
Image3F color(xsize, ysize);
|
||||
|
||||
if (float_in) {
|
||||
for (size_t c = 0; c < color_channels; ++c) {
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const size_t y = task;
|
||||
size_t i =
|
||||
row_size * task + (c * bits_per_sample / jxl::kBitsPerByte);
|
||||
float* JXL_RESTRICT row_out = color.PlaneRow(c, y);
|
||||
if (bits_per_sample == 16) {
|
||||
if (little_endian) {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadLEFloat16(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
} else {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadBEFloat16(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (little_endian) {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadLEFloat(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
} else {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadBEFloat(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"ConvertRGBFloat"));
|
||||
}
|
||||
} else {
|
||||
// Multiplier to convert from the integer range to floating point 0-1 range.
|
||||
float mul = 1. / ((1ull << bits_per_sample) - 1);
|
||||
for (size_t c = 0; c < color_channels; ++c) {
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const size_t y = task;
|
||||
size_t i = row_size * task + c * bytes_per_channel;
|
||||
float* JXL_RESTRICT row_out = color.PlaneRow(c, y);
|
||||
if (bits_per_sample <= 8) {
|
||||
LoadFloatRow<Load8>(row_out, in + i, mul, xsize, bytes_per_pixel);
|
||||
} else {
|
||||
if (little_endian) {
|
||||
LoadFloatRow<LoadLE16>(row_out, in + i, mul, xsize,
|
||||
bytes_per_pixel);
|
||||
} else {
|
||||
LoadFloatRow<LoadBE16>(row_out, in + i, mul, xsize,
|
||||
bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
},
|
||||
"ConvertRGBUint"));
|
||||
}
|
||||
for (size_t c = 0; c < color_channels; ++c) {
|
||||
JXL_RETURN_IF_ERROR(ConvertFromExternal(bytes, xsize, ysize,
|
||||
bits_per_sample, format, c, pool,
|
||||
&color.Plane(c)));
|
||||
}
|
||||
|
||||
if (color_channels == 1) {
|
||||
CopyImageTo(color.Plane(0), &color.Plane(1));
|
||||
CopyImageTo(color.Plane(0), &color.Plane(2));
|
||||
}
|
||||
|
||||
ib->SetFromImage(std::move(color), c_current);
|
||||
|
||||
// Passing an interleaved image with an alpha channel to an image that doesn't
|
||||
// have alpha channel just discards the passed alpha channel.
|
||||
if (has_alpha && ib->HasAlpha()) {
|
||||
ImageF alpha(xsize, ysize);
|
||||
|
||||
if (float_in) {
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const size_t y = task;
|
||||
size_t i = row_size * task +
|
||||
((channels - 1) * bits_per_sample / jxl::kBitsPerByte);
|
||||
float* JXL_RESTRICT row_out = alpha.Row(y);
|
||||
if (bits_per_sample == 16) {
|
||||
if (little_endian) {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadLEFloat16(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
} else {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadBEFloat16(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (little_endian) {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadLEFloat(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
} else {
|
||||
for (size_t x = 0; x < xsize; ++x) {
|
||||
row_out[x] = LoadBEFloat(in + i);
|
||||
i += bytes_per_pixel;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"ConvertAlphaFloat"));
|
||||
} else {
|
||||
float mul = 1. / ((1ull << bits_per_sample) - 1);
|
||||
JXL_RETURN_IF_ERROR(RunOnPool(
|
||||
pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
|
||||
[&](const uint32_t task, size_t /*thread*/) {
|
||||
const size_t y = task;
|
||||
size_t i = row_size * task + (channels - 1) * bytes_per_channel;
|
||||
float* JXL_RESTRICT row_out = alpha.Row(y);
|
||||
if (bits_per_sample <= 8) {
|
||||
LoadFloatRow<Load8>(row_out, in + i, mul, xsize, bytes_per_pixel);
|
||||
} else {
|
||||
if (little_endian) {
|
||||
LoadFloatRow<LoadLE16>(row_out, in + i, mul, xsize,
|
||||
bytes_per_pixel);
|
||||
} else {
|
||||
LoadFloatRow<LoadBE16>(row_out, in + i, mul, xsize,
|
||||
bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
},
|
||||
"ConvertAlphaUint"));
|
||||
}
|
||||
|
||||
JXL_RETURN_IF_ERROR(
|
||||
ConvertFromExternal(bytes, xsize, ysize, bits_per_sample, format,
|
||||
format.num_channels - 1, pool, &alpha));
|
||||
ib->SetAlpha(std::move(alpha), alpha_is_premultiplied);
|
||||
} else if (!has_alpha && ib->HasAlpha()) {
|
||||
// if alpha is not passed, but it is expected, then assume
|
||||
@@ -391,18 +253,10 @@ Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize,
|
||||
size_t ysize, const void* buffer, size_t size,
|
||||
ThreadPool* pool, ImageF* channel) {
|
||||
size_t bitdepth;
|
||||
bool float_in;
|
||||
|
||||
JXL_RETURN_IF_ERROR(
|
||||
PixelFormatToExternal(pixel_format, &bitdepth, &float_in));
|
||||
|
||||
JXL_RETURN_IF_ERROR(ConvertFromExternal(
|
||||
size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte;
|
||||
return ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(static_cast<const uint8_t*>(buffer), size),
|
||||
xsize, ysize, bitdepth, pixel_format.endianness, pool, channel, float_in,
|
||||
pixel_format.align));
|
||||
|
||||
return true;
|
||||
xsize, ysize, bitdepth, pixel_format, 0, pool, channel);
|
||||
}
|
||||
|
||||
Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
|
||||
@@ -410,16 +264,11 @@ Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
|
||||
jxl::ThreadPool* pool,
|
||||
const jxl::ColorEncoding& c_current,
|
||||
jxl::ImageBundle* ib) {
|
||||
size_t bitdepth;
|
||||
bool float_in;
|
||||
JXL_RETURN_IF_ERROR(
|
||||
PixelFormatToExternal(pixel_format, &bitdepth, &float_in));
|
||||
|
||||
size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte;
|
||||
JXL_RETURN_IF_ERROR(ConvertFromExternal(
|
||||
jxl::Span<const uint8_t>(static_cast<const uint8_t*>(buffer), size),
|
||||
xsize, ysize, c_current, pixel_format.num_channels,
|
||||
/*alpha_is_premultiplied=*/false, bitdepth, pixel_format.endianness, pool,
|
||||
ib, float_in, pixel_format.align));
|
||||
xsize, ysize, c_current,
|
||||
/*alpha_is_premultiplied=*/false, bitdepth, pixel_format, pool, ib));
|
||||
ib->VerifyMetadata();
|
||||
|
||||
return true;
|
||||
|
||||
11
third_party/jpeg-xl/lib/jxl/enc_external_image.h
vendored
11
third_party/jpeg-xl/lib/jxl/enc_external_image.h
vendored
@@ -23,17 +23,16 @@
|
||||
namespace jxl {
|
||||
Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
size_t ysize, size_t bits_per_sample,
|
||||
JxlEndianness endianness, ThreadPool* pool,
|
||||
ImageF* channel, bool float_in, size_t align);
|
||||
JxlPixelFormat format, size_t c, ThreadPool* pool,
|
||||
ImageF* channel);
|
||||
|
||||
// Convert an interleaved pixel buffer to the internal ImageBundle
|
||||
// representation. This is the opposite of ConvertToExternal().
|
||||
Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
|
||||
size_t ysize, const ColorEncoding& c_current,
|
||||
size_t channels, bool alpha_is_premultiplied,
|
||||
size_t bits_per_sample, JxlEndianness endianness,
|
||||
ThreadPool* pool, ImageBundle* ib, bool float_in,
|
||||
size_t align);
|
||||
bool alpha_is_premultiplied, size_t bits_per_sample,
|
||||
JxlPixelFormat format, ThreadPool* pool,
|
||||
ImageBundle* ib);
|
||||
Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize,
|
||||
size_t ysize, const void* buffer, size_t size,
|
||||
ThreadPool* pool, ImageF* channel);
|
||||
|
||||
@@ -21,17 +21,16 @@ void BM_EncExternalImage_ConvertImageRGBA(benchmark::State& state) {
|
||||
ImageBundle ib(&im);
|
||||
|
||||
std::vector<uint8_t> interleaved(xsize * ysize * 4);
|
||||
|
||||
JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
|
||||
for (auto _ : state) {
|
||||
for (size_t i = 0; i < kNumIter; ++i) {
|
||||
JXL_CHECK(ConvertFromExternal(
|
||||
Span<const uint8_t>(interleaved.data(), interleaved.size()), xsize,
|
||||
ysize,
|
||||
/*c_current=*/ColorEncoding::SRGB(),
|
||||
/*channels=*/4,
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, JXL_NATIVE_ENDIAN,
|
||||
/*pool=*/nullptr, &ib, /*float_in=*/false, /*align=*/0));
|
||||
/*bits_per_sample=*/8, format,
|
||||
/*pool=*/nullptr, &ib));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,23 +25,23 @@ TEST(ExternalImageTest, InvalidSize) {
|
||||
im.SetAlphaBits(8);
|
||||
ImageBundle ib(&im);
|
||||
|
||||
JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
|
||||
const uint8_t buf[10 * 100 * 8] = {};
|
||||
EXPECT_FALSE(ConvertFromExternal(
|
||||
Span<const uint8_t>(buf, 10), /*xsize=*/10, /*ysize=*/100,
|
||||
/*c_current=*/ColorEncoding::SRGB(), /*channels=*/4,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
|
||||
nullptr, &ib, /*float_in=*/false, /*align=*/0));
|
||||
/*c_current=*/ColorEncoding::SRGB(),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format, nullptr,
|
||||
&ib));
|
||||
EXPECT_FALSE(ConvertFromExternal(
|
||||
Span<const uint8_t>(buf, sizeof(buf) - 1), /*xsize=*/10, /*ysize=*/100,
|
||||
/*c_current=*/ColorEncoding::SRGB(), /*channels=*/4,
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, JXL_BIG_ENDIAN,
|
||||
nullptr, &ib, /*float_in=*/false, /*align=*/0));
|
||||
/*c_current=*/ColorEncoding::SRGB(),
|
||||
/*alpha_is_premultiplied=*/false, /*bits_per_sample=*/16, format, nullptr,
|
||||
&ib));
|
||||
EXPECT_TRUE(
|
||||
ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), /*xsize=*/10,
|
||||
/*ysize=*/100, /*c_current=*/ColorEncoding::SRGB(),
|
||||
/*channels=*/4, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, JXL_BIG_ENDIAN, nullptr, &ib,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/16, format, nullptr, &ib));
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -54,14 +54,14 @@ TEST(ExternalImageTest, AlphaMissing) {
|
||||
const size_t ysize = 20;
|
||||
const uint8_t buf[xsize * ysize * 4] = {};
|
||||
|
||||
JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0};
|
||||
// has_alpha is true but the ImageBundle has no alpha. Alpha channel should
|
||||
// be ignored.
|
||||
EXPECT_TRUE(
|
||||
ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), xsize, ysize,
|
||||
/*c_current=*/ColorEncoding::SRGB(),
|
||||
/*channels=*/4, /*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, JXL_BIG_ENDIAN, nullptr, &ib,
|
||||
/*float_in=*/false, /*align=*/0));
|
||||
EXPECT_TRUE(ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), xsize,
|
||||
ysize,
|
||||
/*c_current=*/ColorEncoding::SRGB(),
|
||||
/*alpha_is_premultiplied=*/false,
|
||||
/*bits_per_sample=*/8, format, nullptr, &ib));
|
||||
EXPECT_FALSE(ib.HasAlpha());
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user