Bug 1953601 - Support encoding frames that require scaling with the OpenH264 platform encoder. r=media-playback-reviewers,chunmin

This patch teaches ConvertToI420 how to scale supported formats in addition to performing the conversion to I420. It attempts to do the scale early if downscaling, and scale late if upscaling, to reduce the number of pixels that it needs to convert. This is not always possible because libyuv doesn't support scaling for all of our supported formats. In those cases, we always scale late as I420. If the format is already I420, then we can avoid using an intermediate buffer for the scaling, and scale directly into the destination. Additionally, FFmpegVideoEncoder is now able to take advantage of this unified copy/scale operation and we can remove its own internal scaling logic. Differential Revision: https://phabricator.services.mozilla.com/D241694
2025-03-18 18:43:21 +00:00
parent a452fefae1
commit 53532e454e
9 changed files with 591 additions and 135 deletions
--- a/dom/media/ImageConversion.cpp
+++ b/dom/media/ImageConversion.cpp
@@ -80,92 +80,378 @@ already_AddRefed<SourceSurface> GetSourceSurface(Image* aImage) {

 nsresult ConvertToI420(Image* aImage, uint8_t* aDestY, int aDestStrideY,
                       uint8_t* aDestU, int aDestStrideU, uint8_t* aDestV,
-                       int aDestStrideV) {
+                       int aDestStrideV, const IntSize& aDestSize) {
  if (!aImage->IsValid()) {
    return NS_ERROR_INVALID_ARG;
  }

-  if (const PlanarYCbCrData* data = GetPlanarYCbCrData(aImage)) {
+  const IntSize imageSize = aImage->GetSize();
+  auto srcPixelCount = CheckedInt<int32_t>(imageSize.width) * imageSize.height;
+  auto dstPixelCount = CheckedInt<int32_t>(aDestSize.width) * aDestSize.height;
+  if (!srcPixelCount.isValid() || !dstPixelCount.isValid()) {
+    MOZ_ASSERT_UNREACHABLE("Bad input or output sizes");
+    return NS_ERROR_INVALID_ARG;
+  }
+
+  // If we are downscaling, we prefer an early scale. If we are upscaling, we
+  // prefer a late scale. This minimizes the number of pixel manipulations.
+  // Depending on the input format, we may be forced to do a late scale after
+  // conversion to I420, because we don't support scaling the input format.
+  const bool needsScale = imageSize != aDestSize;
+  bool earlyScale = srcPixelCount.value() > dstPixelCount.value();
+
+  Maybe<DataSourceSurface::ScopedMap> surfaceMap;
+  SurfaceFormat surfaceFormat = SurfaceFormat::UNKNOWN;
+
+  const PlanarYCbCrData* data = GetPlanarYCbCrData(aImage);
+  Maybe<dom::ImageBitmapFormat> format;
+  if (data) {
    const ImageUtils imageUtils(aImage);
-    Maybe<dom::ImageBitmapFormat> format = imageUtils.GetFormat();
+    format = imageUtils.GetFormat();
    if (format.isNothing()) {
      MOZ_ASSERT_UNREACHABLE("YUV format conversion not implemented");
      return NS_ERROR_NOT_IMPLEMENTED;
    }
+
    switch (format.value()) {
      case ImageBitmapFormat::YUV420P:
+        // Since the input and output formats match, we can copy or scale
+        // directly to the output buffer.
+        if (needsScale) {
+          return MapRv(libyuv::I420Scale(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, data->mCrChannel, data->mCbCrStride,
+              imageSize.width, imageSize.height, aDestY, aDestStrideY, aDestU,
+              aDestStrideU, aDestV, aDestStrideV, aDestSize.width,
+              aDestSize.height, libyuv::FilterMode::kFilterBox));
+        }
        return MapRv(libyuv::I420ToI420(
            data->mYChannel, data->mYStride, data->mCbChannel,
            data->mCbCrStride, data->mCrChannel, data->mCbCrStride, aDestY,
            aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
-            aImage->GetSize().width, aImage->GetSize().height));
+            aDestSize.width, aDestSize.height));
      case ImageBitmapFormat::YUV422P:
-        return MapRv(libyuv::I422ToI420(
-            data->mYChannel, data->mYStride, data->mCbChannel,
-            data->mCbCrStride, data->mCrChannel, data->mCbCrStride, aDestY,
-            aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
-            aImage->GetSize().width, aImage->GetSize().height));
+        if (!needsScale) {
+          return MapRv(libyuv::I422ToI420(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, data->mCrChannel, data->mCbCrStride, aDestY,
+              aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
+              aDestSize.width, aDestSize.height));
+        }
+        break;
      case ImageBitmapFormat::YUV444P:
-        return MapRv(libyuv::I444ToI420(
-            data->mYChannel, data->mYStride, data->mCbChannel,
-            data->mCbCrStride, data->mCrChannel, data->mCbCrStride, aDestY,
-            aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
-            aImage->GetSize().width, aImage->GetSize().height));
+        if (!needsScale) {
+          return MapRv(libyuv::I444ToI420(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, data->mCrChannel, data->mCbCrStride, aDestY,
+              aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
+              aDestSize.width, aDestSize.height));
+        }
+        break;
      case ImageBitmapFormat::YUV420SP_NV12:
-        return MapRv(libyuv::NV12ToI420(
-            data->mYChannel, data->mYStride, data->mCbChannel,
-            data->mCbCrStride, aDestY, aDestStrideY, aDestU, aDestStrideU,
-            aDestV, aDestStrideV, aImage->GetSize().width,
-            aImage->GetSize().height));
+        if (!needsScale) {
+          return MapRv(libyuv::NV12ToI420(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, aDestY, aDestStrideY, aDestU, aDestStrideU,
+              aDestV, aDestStrideV, aDestSize.width, aDestSize.height));
+        }
+        break;
      case ImageBitmapFormat::YUV420SP_NV21:
-        return MapRv(libyuv::NV21ToI420(
-            data->mYChannel, data->mYStride, data->mCrChannel,
-            data->mCbCrStride, aDestY, aDestStrideY, aDestU, aDestStrideU,
-            aDestV, aDestStrideV, aImage->GetSize().width,
-            aImage->GetSize().height));
+        if (!needsScale) {
+          return MapRv(libyuv::NV21ToI420(
+              data->mYChannel, data->mYStride, data->mCrChannel,
+              data->mCbCrStride, aDestY, aDestStrideY, aDestU, aDestStrideU,
+              aDestV, aDestStrideV, aDestSize.width, aDestSize.height));
+        }
+        earlyScale = false;
+        break;
+      default:
+        MOZ_ASSERT_UNREACHABLE("YUV format conversion not implemented");
+        return NS_ERROR_NOT_IMPLEMENTED;
+    }
+  } else {
+    RefPtr<SourceSurface> surface = GetSourceSurface(aImage);
+    if (!surface) {
+      return NS_ERROR_FAILURE;
+    }
+
+    RefPtr<DataSourceSurface> dataSurface = surface->GetDataSurface();
+    if (!dataSurface) {
+      return NS_ERROR_FAILURE;
+    }
+
+    surfaceMap.emplace(dataSurface, DataSourceSurface::READ);
+    if (!surfaceMap->IsMapped()) {
+      return NS_ERROR_FAILURE;
+    }
+
+    surfaceFormat = dataSurface->GetFormat();
+    switch (surfaceFormat) {
+      case SurfaceFormat::B8G8R8A8:
+      case SurfaceFormat::B8G8R8X8:
+        if (!needsScale) {
+          return MapRv(
+              libyuv::ARGBToI420(static_cast<uint8_t*>(surfaceMap->GetData()),
+                                 surfaceMap->GetStride(), aDestY, aDestStrideY,
+                                 aDestU, aDestStrideU, aDestV, aDestStrideV,
+                                 aDestSize.width, aDestSize.height));
+        }
+        break;
+      case SurfaceFormat::R8G8B8A8:
+      case SurfaceFormat::R8G8B8X8:
+        if (!needsScale) {
+          return MapRv(
+              libyuv::ABGRToI420(static_cast<uint8_t*>(surfaceMap->GetData()),
+                                 surfaceMap->GetStride(), aDestY, aDestStrideY,
+                                 aDestU, aDestStrideU, aDestV, aDestStrideV,
+                                 aDestSize.width, aDestSize.height));
+        }
+        break;
+      case SurfaceFormat::R5G6B5_UINT16:
+        if (!needsScale) {
+          return MapRv(libyuv::RGB565ToI420(
+              static_cast<uint8_t*>(surfaceMap->GetData()),
+              surfaceMap->GetStride(), aDestY, aDestStrideY, aDestU,
+              aDestStrideU, aDestV, aDestStrideV, aDestSize.width,
+              aDestSize.height));
+        }
+        earlyScale = false;
+        break;
+      default:
+        MOZ_ASSERT_UNREACHABLE("Surface format conversion not implemented");
+        return NS_ERROR_NOT_IMPLEMENTED;
+    }
+  }
+
+  MOZ_DIAGNOSTIC_ASSERT(needsScale);
+
+  // We have to scale, and we are unable to scale directly, so we need a
+  // temporary buffer to hold the scaled result in the input format, or the
+  // unscaled result in the output format.
+  IntSize tempBufSize;
+  IntSize tempBufCbCrSize;
+  if (earlyScale) {
+    // Early scaling means we are scaling from the input buffer to a temporary
+    // buffer of the same format.
+    tempBufSize = aDestSize;
+    if (data) {
+      tempBufCbCrSize = gfx::ChromaSize(tempBufSize, data->mChromaSubsampling);
+    }
+  } else {
+    // Late scaling means we are scaling from a temporary I420 buffer to the
+    // destination I420 buffer.
+    tempBufSize = imageSize;
+    tempBufCbCrSize = gfx::ChromaSize(
+        tempBufSize, gfx::ChromaSubsampling::HALF_WIDTH_AND_HEIGHT);
+  }
+
+  MOZ_ASSERT(!tempBufSize.IsEmpty());
+
+  // Make sure we can allocate the temporary buffer.
+  gfx::AlignedArray<uint8_t> tempBuf;
+  uint8_t* tempBufY = nullptr;
+  uint8_t* tempBufU = nullptr;
+  uint8_t* tempBufV = nullptr;
+  int32_t tempRgbStride = 0;
+  if (!tempBufCbCrSize.IsEmpty()) {
+    // Our temporary buffer is represented as a YUV format.
+    auto tempBufYLen =
+        CheckedInt<size_t>(tempBufSize.width) * tempBufSize.height;
+    auto tempBufCbCrLen =
+        CheckedInt<size_t>(tempBufCbCrSize.width) * tempBufCbCrSize.height;
+    auto tempBufLen = tempBufYLen + 2 * tempBufCbCrLen;
+    if (!tempBufLen.isValid()) {
+      MOZ_ASSERT_UNREACHABLE("Bad buffer size!");
+      return NS_ERROR_FAILURE;
+    }
+
+    tempBuf.Realloc(tempBufLen.value());
+    if (!tempBuf) {
+      return NS_ERROR_OUT_OF_MEMORY;
+    }
+
+    tempBufY = tempBuf;
+    tempBufU = tempBufY + tempBufYLen.value();
+    tempBufV = tempBufU + tempBufCbCrLen.value();
+  } else {
+    // The temporary buffer is represented as a RGBA/BGRA format.
+    auto tempStride = CheckedInt<int32_t>(tempBufSize.width) * 4;
+    auto tempBufLen = tempStride * tempBufSize.height;
+    if (!tempStride.isValid() || !tempBufLen.isValid()) {
+      MOZ_ASSERT_UNREACHABLE("Bad buffer size!");
+      return NS_ERROR_FAILURE;
+    }
+
+    tempBuf.Realloc(tempBufLen.value());
+    if (!tempBuf) {
+      return NS_ERROR_OUT_OF_MEMORY;
+    }
+
+    tempRgbStride = tempStride.value();
+  }
+
+  nsresult rv;
+  if (!earlyScale) {
+    // First convert whatever the input format is to I420 into the temp buffer.
+    if (data) {
+      switch (format.value()) {
+        case ImageBitmapFormat::YUV422P:
+          rv = MapRv(libyuv::I422ToI420(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, data->mCrChannel, data->mCbCrStride, tempBufY,
+              tempBufSize.width, tempBufU, tempBufCbCrSize.width, tempBufV,
+              tempBufCbCrSize.width, tempBufSize.width, tempBufSize.height));
+          break;
+        case ImageBitmapFormat::YUV444P:
+          rv = MapRv(libyuv::I444ToI420(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, data->mCrChannel, data->mCbCrStride, tempBufY,
+              tempBufSize.width, tempBufU, tempBufCbCrSize.width, tempBufV,
+              tempBufCbCrSize.width, tempBufSize.width, tempBufSize.height));
+          break;
+        case ImageBitmapFormat::YUV420SP_NV12:
+          rv = MapRv(libyuv::NV12ToI420(
+              data->mYChannel, data->mYStride, data->mCbChannel,
+              data->mCbCrStride, tempBufY, tempBufSize.width, tempBufU,
+              tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+              tempBufSize.width, tempBufSize.height));
+          break;
+        case ImageBitmapFormat::YUV420SP_NV21:
+          rv = MapRv(libyuv::NV21ToI420(
+              data->mYChannel, data->mYStride, data->mCrChannel,
+              data->mCbCrStride, tempBufY, tempBufSize.width, tempBufU,
+              tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+              tempBufSize.width, tempBufSize.height));
+          break;
+        default:
+          MOZ_ASSERT_UNREACHABLE("YUV format conversion not implemented");
+          return NS_ERROR_UNEXPECTED;
+      }
+    } else {
+      switch (surfaceFormat) {
+        case SurfaceFormat::B8G8R8A8:
+        case SurfaceFormat::B8G8R8X8:
+          rv = MapRv(libyuv::ARGBToI420(
+              static_cast<uint8_t*>(surfaceMap->GetData()),
+              surfaceMap->GetStride(), tempBufY, tempBufSize.width, tempBufU,
+              tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+              tempBufSize.width, tempBufSize.height));
+          break;
+        case SurfaceFormat::R8G8B8A8:
+        case SurfaceFormat::R8G8B8X8:
+          rv = MapRv(libyuv::ABGRToI420(
+              static_cast<uint8_t*>(surfaceMap->GetData()),
+              surfaceMap->GetStride(), tempBufY, tempBufSize.width, tempBufU,
+              tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+              tempBufSize.width, tempBufSize.height));
+          break;
+        case SurfaceFormat::R5G6B5_UINT16:
+          rv = MapRv(libyuv::RGB565ToI420(
+              static_cast<uint8_t*>(surfaceMap->GetData()),
+              surfaceMap->GetStride(), tempBufY, tempBufSize.width, tempBufU,
+              tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+              tempBufSize.width, tempBufSize.height));
+          break;
+        default:
+          MOZ_ASSERT_UNREACHABLE("Surface format conversion not implemented");
+          return NS_ERROR_NOT_IMPLEMENTED;
+      }
+    }
+
+    if (NS_FAILED(rv)) {
+      return rv;
+    }
+
+    // Now do the scale in I420 to the output buffer.
+    return MapRv(libyuv::I420Scale(
+        tempBufY, tempBufSize.width, tempBufU, tempBufCbCrSize.width, tempBufV,
+        tempBufCbCrSize.width, tempBufSize.width, tempBufSize.height, aDestY,
+        aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
+        aDestSize.width, aDestSize.height, libyuv::FilterMode::kFilterBox));
+  }
+
+  if (data) {
+    // First scale in the input format to the desired size into temp buffer, and
+    // then convert that into the final I420 result.
+    switch (format.value()) {
+      case ImageBitmapFormat::YUV422P:
+        rv = MapRv(libyuv::I422Scale(
+            data->mYChannel, data->mYStride, data->mCbChannel,
+            data->mCbCrStride, data->mCrChannel, data->mCbCrStride,
+            imageSize.width, imageSize.height, tempBufY, tempBufSize.width,
+            tempBufU, tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+            tempBufSize.width, tempBufSize.height,
+            libyuv::FilterMode::kFilterBox));
+        if (NS_FAILED(rv)) {
+          return rv;
+        }
+        return MapRv(libyuv::I422ToI420(
+            tempBufY, tempBufSize.width, tempBufU, tempBufCbCrSize.width,
+            tempBufV, tempBufCbCrSize.width, aDestY, aDestStrideY, aDestU,
+            aDestStrideU, aDestV, aDestStrideV, aDestSize.width,
+            aDestSize.height));
+      case ImageBitmapFormat::YUV444P:
+        rv = MapRv(libyuv::I444Scale(
+            data->mYChannel, data->mYStride, data->mCbChannel,
+            data->mCbCrStride, data->mCrChannel, data->mCbCrStride,
+            imageSize.width, imageSize.height, tempBufY, tempBufSize.width,
+            tempBufU, tempBufCbCrSize.width, tempBufV, tempBufCbCrSize.width,
+            tempBufSize.width, tempBufSize.height,
+            libyuv::FilterMode::kFilterBox));
+        if (NS_FAILED(rv)) {
+          return rv;
+        }
+        return MapRv(libyuv::I444ToI420(
+            tempBufY, tempBufSize.width, tempBufU, tempBufCbCrSize.width,
+            tempBufV, tempBufCbCrSize.width, aDestY, aDestStrideY, aDestU,
+            aDestStrideU, aDestV, aDestStrideV, aDestSize.width,
+            aDestSize.height));
+      case ImageBitmapFormat::YUV420SP_NV12:
+        rv = MapRv(libyuv::NV12Scale(
+            data->mYChannel, data->mYStride, data->mCbChannel,
+            data->mCbCrStride, imageSize.width, imageSize.height, tempBufY,
+            tempBufSize.width, tempBufU, tempBufCbCrSize.width,
+            tempBufSize.width, tempBufSize.height,
+            libyuv::FilterMode::kFilterBox));
+        if (NS_FAILED(rv)) {
+          return rv;
+        }
+        return MapRv(libyuv::NV12ToI420(
+            tempBufY, tempBufSize.width, tempBufU, tempBufCbCrSize.width,
+            aDestY, aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
+            aDestSize.width, aDestSize.height));
      default:
        MOZ_ASSERT_UNREACHABLE("YUV format conversion not implemented");
        return NS_ERROR_NOT_IMPLEMENTED;
    }
  }

-  RefPtr<SourceSurface> surf = GetSourceSurface(aImage);
-  if (!surf) {
-    return NS_ERROR_FAILURE;
+  MOZ_DIAGNOSTIC_ASSERT(surfaceFormat == SurfaceFormat::B8G8R8X8 ||
+                        surfaceFormat == SurfaceFormat::B8G8R8A8 ||
+                        surfaceFormat == SurfaceFormat::R8G8B8X8 ||
+                        surfaceFormat == SurfaceFormat::R8G8B8A8);
+
+  // We can use the same scaling method for either BGRA or RGBA since the
+  // channel orders don't matter to the scaling algorithm.
+  rv = MapRv(libyuv::ARGBScale(
+      surfaceMap->GetData(), surfaceMap->GetStride(), imageSize.width,
+      imageSize.height, tempBuf, tempRgbStride, tempBufSize.width,
+      tempBufSize.height, libyuv::FilterMode::kFilterBox));
+  if (NS_FAILED(rv)) {
+    return rv;
  }

-  RefPtr<DataSourceSurface> data = surf->GetDataSurface();
-  if (!data) {
-    return NS_ERROR_FAILURE;
+  // Now convert the scale result to I420.
+  if (surfaceFormat == SurfaceFormat::B8G8R8A8 ||
+      surfaceFormat == SurfaceFormat::B8G8R8X8) {
+    return MapRv(libyuv::ARGBToI420(
+        tempBuf, tempRgbStride, aDestY, aDestStrideY, aDestU, aDestStrideU,
+        aDestV, aDestStrideV, aDestSize.width, aDestSize.height));
  }

-  DataSourceSurface::ScopedMap map(data, DataSourceSurface::READ);
-  if (!map.IsMapped()) {
-    return NS_ERROR_FAILURE;
-  }
-
-  switch (surf->GetFormat()) {
-    case SurfaceFormat::B8G8R8A8:
-    case SurfaceFormat::B8G8R8X8:
-      return MapRv(libyuv::ARGBToI420(
-          static_cast<uint8_t*>(map.GetData()), map.GetStride(), aDestY,
-          aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
-          aImage->GetSize().width, aImage->GetSize().height));
-    case SurfaceFormat::R8G8B8A8:
-    case SurfaceFormat::R8G8B8X8:
-      return MapRv(libyuv::ABGRToI420(
-          static_cast<uint8_t*>(map.GetData()), map.GetStride(), aDestY,
-          aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
-          aImage->GetSize().width, aImage->GetSize().height));
-    case SurfaceFormat::R5G6B5_UINT16:
-      return MapRv(libyuv::RGB565ToI420(
-          static_cast<uint8_t*>(map.GetData()), map.GetStride(), aDestY,
-          aDestStrideY, aDestU, aDestStrideU, aDestV, aDestStrideV,
-          aImage->GetSize().width, aImage->GetSize().height));
-    default:
-      MOZ_ASSERT_UNREACHABLE("Surface format conversion not implemented");
-      return NS_ERROR_NOT_IMPLEMENTED;
-  }
+  return MapRv(libyuv::ABGRToI420(tempBuf, tempRgbStride, aDestY, aDestStrideY,
+                                  aDestU, aDestStrideU, aDestV, aDestStrideV,
+                                  aDestSize.width, aDestSize.height));
 }

 static int32_t CeilingOfHalf(int32_t aValue) {
--- a/dom/media/ImageConversion.h
+++ b/dom/media/ImageConversion.h
@@ -31,7 +31,7 @@ already_AddRefed<gfx::SourceSurface> GetSourceSurface(layers::Image* aImage);
 */
 nsresult ConvertToI420(layers::Image* aImage, uint8_t* aDestY, int aDestStrideY,
                       uint8_t* aDestU, int aDestStrideU, uint8_t* aDestV,
-                       int aDestStrideV);
+                       int aDestStrideV, const gfx::IntSize& aDestSize);

 /**
 * Converts aImage to an NV12 image and writes it to the given buffers.
--- a/dom/media/VideoFrameConverter.h
+++ b/dom/media/VideoFrameConverter.h
@@ -353,7 +353,7 @@ class VideoFrameConverterImpl : public rtc::AdaptedVideoTrackSource {
      nsresult rv = ConvertToI420(aFrame.mImage, buffer->MutableDataY(),
                                  buffer->StrideY(), buffer->MutableDataU(),
                                  buffer->StrideU(), buffer->MutableDataV(),
-                                  buffer->StrideV());
+                                  buffer->StrideV(), aFrame.mSize);

      if (NS_FAILED(rv)) {
        LOG(LogLevel::Warning,
--- a/dom/media/encoder/VP8TrackEncoder.cpp
+++ b/dom/media/encoder/VP8TrackEncoder.cpp
@@ -674,7 +674,7 @@ nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk& aChunk) {
                              mVPXImageWrapper.planes[VPX_PLANE_U],
                              mVPXImageWrapper.stride[VPX_PLANE_U],
                              mVPXImageWrapper.planes[VPX_PLANE_V],
-                              mVPXImageWrapper.stride[VPX_PLANE_V]);
+                              mVPXImageWrapper.stride[VPX_PLANE_V], imgSize);
  if (NS_FAILED(rv)) {
    VP8LOG(LogLevel::Error, "Converting to I420 failed");
    return rv;
--- a/dom/media/gtest/TestImageConversion.cpp
+++ b/dom/media/gtest/TestImageConversion.cpp
@@ -0,0 +1,235 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/dom/ImageBitmapBinding.h"
+#include "mozilla/dom/ImageUtils.h"
+#include "ImageConversion.h"
+#include "ImageContainer.h"
+#include "SourceSurfaceRawData.h"
+
+using mozilla::ConvertToI420;
+using mozilla::MakeAndAddRef;
+using mozilla::MakeRefPtr;
+using mozilla::Maybe;
+using mozilla::Nothing;
+using mozilla::Some;
+using mozilla::dom::ImageBitmapFormat;
+using mozilla::gfx::ChromaSubsampling;
+using mozilla::gfx::DataSourceSurface;
+using mozilla::gfx::IntSize;
+using mozilla::gfx::SourceSurfaceAlignedRawData;
+using mozilla::gfx::SurfaceFormat;
+using mozilla::layers::PlanarYCbCrImage;
+using mozilla::layers::SourceSurfaceImage;
+
+class TestRedPlanarYCbCrImage2x2 final : public PlanarYCbCrImage {
+ public:
+  explicit TestRedPlanarYCbCrImage2x2(ImageBitmapFormat aFormat) {
+    mSize = IntSize(2, 2);
+    mBufferSize = sizeof(mY) + sizeof(mU) + sizeof(mV);
+    mData.mPictureRect = mozilla::gfx::IntRect(mozilla::gfx::IntPoint(), mSize);
+    mData.mYChannel = mY;
+    mData.mYStride = 2;
+    switch (aFormat) {
+      case ImageBitmapFormat::YUV420P:
+        mData.mChromaSubsampling = ChromaSubsampling::HALF_WIDTH_AND_HEIGHT;
+        mData.mCbChannel = mU;
+        mData.mCrChannel = mV;
+        mData.mCbCrStride = 1;
+        break;
+      case ImageBitmapFormat::YUV422P:
+        mData.mChromaSubsampling = ChromaSubsampling::HALF_WIDTH;
+        mData.mCbChannel = mU;
+        mData.mCrChannel = mV;
+        mData.mCbCrStride = 1;
+        break;
+      case ImageBitmapFormat::YUV444P:
+        mData.mChromaSubsampling = ChromaSubsampling::FULL;
+        mData.mCbChannel = mU;
+        mData.mCrChannel = mV;
+        mData.mCbCrStride = 2;
+        break;
+      case ImageBitmapFormat::YUV420SP_NV12:
+        mData.mChromaSubsampling = ChromaSubsampling::HALF_WIDTH_AND_HEIGHT;
+        mData.mCbChannel = mU;
+        mData.mCrChannel = mData.mCbChannel + 1;
+        mData.mCbCrStride = 1;
+        mData.mCrSkip = 1;
+        mData.mCbSkip = 1;
+        mU[1] = mV[0];
+        mU[3] = mV[1];
+        break;
+      case ImageBitmapFormat::YUV420SP_NV21:
+        mData.mChromaSubsampling = ChromaSubsampling::HALF_WIDTH_AND_HEIGHT;
+        mData.mCrChannel = mU;
+        mData.mCbChannel = mData.mCrChannel + 1;
+        mData.mCbCrStride = 1;
+        mData.mCrSkip = 1;
+        mData.mCbSkip = 1;
+        mU[0] = mV[0];
+        mU[2] = mV[1];
+        break;
+      default:
+        MOZ_CRASH("Unsupported ImageBitmapFormat!");
+        break;
+    }
+  }
+
+  nsresult CopyData(const Data& aData) override {
+    return NS_ERROR_NOT_IMPLEMENTED;
+  }
+
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const {
+    return 0;
+  }
+
+ private:
+  uint8_t mY[4] = {0x52, 0x52, 0x52, 0x52};
+  uint8_t mU[4] = {0x5A, 0x5A, 0x5A, 0x5A};
+  uint8_t mV[4] = {0xEF, 0xEF, 0xEF, 0xEF};
+};
+
+static already_AddRefed<SourceSurfaceImage> CreateRedSurfaceImage2x2(
+    SurfaceFormat aFormat) {
+  uint8_t redPixel[4] = {};
+
+  switch (aFormat) {
+    case SurfaceFormat::R8G8B8A8:
+    case SurfaceFormat::R8G8B8X8:
+      redPixel[0] = 0xFF;
+      redPixel[3] = 0xFF;
+      break;
+    case SurfaceFormat::B8G8R8A8:
+    case SurfaceFormat::B8G8R8X8:
+      redPixel[2] = 0xFF;
+      redPixel[3] = 0xFF;
+      break;
+    case SurfaceFormat::R5G6B5_UINT16:
+      redPixel[1] = 0xF8;
+      break;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Unsupported format!");
+      return nullptr;
+  }
+
+  const IntSize size(2, 2);
+
+  auto surface = MakeRefPtr<SourceSurfaceAlignedRawData>();
+  if (NS_WARN_IF(!surface->Init(size, aFormat, /* aClearMem */ false, 0, 0))) {
+    return nullptr;
+  }
+
+  DataSourceSurface::ScopedMap map(surface, DataSourceSurface::WRITE);
+  if (NS_WARN_IF(!map.IsMapped())) {
+    return nullptr;
+  }
+
+  const uint32_t bpp = BytesPerPixel(aFormat);
+  MOZ_ASSERT(bpp <= sizeof(redPixel));
+
+  uint8_t* rowPtr = map.GetData();
+  for (int32_t row = 0; row < size.height; ++row) {
+    for (int32_t col = 0; col < size.width; ++col) {
+      for (uint32_t i = 0; i < bpp; ++i) {
+        rowPtr[col * bpp + i] = redPixel[i];
+      }
+    }
+    rowPtr += map.GetStride();
+  }
+
+  return MakeAndAddRef<SourceSurfaceImage>(size, surface);
+}
+
+TEST(MediaImageConversion, ConvertToI420)
+{
+  uint8_t y[20] = {};
+  uint8_t u[20] = {};
+  uint8_t v[20] = {};
+
+  auto checkBuf = [&](const uint8_t* aY, const uint8_t* aU, const uint8_t* aV) {
+    for (size_t i = 0; i < sizeof(y); ++i) {
+      EXPECT_EQ(y[i], aY[i]);
+    }
+    for (size_t i = 0; i < sizeof(u); ++i) {
+      EXPECT_EQ(u[i], aU[i]);
+    }
+    for (size_t i = 0; i < sizeof(v); ++i) {
+      EXPECT_EQ(v[i], aV[i]);
+    }
+    memset(y, 0, sizeof(y));
+    memset(u, 0, sizeof(u));
+    memset(v, 0, sizeof(v));
+  };
+
+  static constexpr uint8_t yRed1x1[20] = {0x52};
+  static constexpr uint8_t yRed2x2[20] = {0x52, 0x52, 0x52, 0x52};
+  static constexpr uint8_t yRed4x4[20] = {0x52, 0x52, 0x52, 0x52, 0x52, 0x52,
+                                          0x52, 0x52, 0x52, 0x52, 0x52, 0x52,
+                                          0x52, 0x52, 0x52, 0x52};
+
+  static constexpr uint8_t uRed1x1[20] = {0x5A};
+  static constexpr uint8_t uRed2x2[20] = {0x5A, 0x5A, 0x5A, 0x5A};
+
+  static constexpr uint8_t vRed1x1[20] = {0xEF};
+  static constexpr uint8_t vRed2x2[20] = {0xEF, 0xEF, 0xEF, 0xEF};
+
+  auto checkImage = [&](mozilla::layers::Image* aImage,
+                        const Maybe<ImageBitmapFormat>& aFormat) {
+    ASSERT_TRUE(!!aImage);
+
+    mozilla::dom::ImageUtils utils(aImage);
+    Maybe<ImageBitmapFormat> format = utils.GetFormat();
+    ASSERT_EQ(format.isSome(), aFormat.isSome());
+    if (format.isSome()) {
+      ASSERT_EQ(format.value(), aFormat.value());
+    }
+
+    EXPECT_TRUE(
+        NS_SUCCEEDED(ConvertToI420(aImage, y, 2, u, 1, v, 1, IntSize(2, 2))));
+    checkBuf(yRed2x2, uRed1x1, vRed1x1);
+
+    EXPECT_TRUE(
+        NS_SUCCEEDED(ConvertToI420(aImage, y, 1, u, 1, v, 1, IntSize(1, 1))));
+    checkBuf(yRed1x1, uRed1x1, vRed1x1);
+
+    EXPECT_TRUE(
+        NS_SUCCEEDED(ConvertToI420(aImage, y, 4, u, 2, v, 2, IntSize(4, 4))));
+    checkBuf(yRed4x4, uRed2x2, vRed2x2);
+  };
+
+  RefPtr<SourceSurfaceImage> imgRgba =
+      CreateRedSurfaceImage2x2(SurfaceFormat::R8G8B8A8);
+  checkImage(imgRgba, Some(ImageBitmapFormat::RGBA32));
+
+  RefPtr<SourceSurfaceImage> imgBgra =
+      CreateRedSurfaceImage2x2(SurfaceFormat::B8G8R8A8);
+  checkImage(imgBgra, Some(ImageBitmapFormat::BGRA32));
+
+  RefPtr<SourceSurfaceImage> imgRgb565 =
+      CreateRedSurfaceImage2x2(SurfaceFormat::R5G6B5_UINT16);
+  checkImage(imgRgb565, Nothing());
+
+  auto imgYuv420p =
+      MakeRefPtr<TestRedPlanarYCbCrImage2x2>(ImageBitmapFormat::YUV420P);
+  checkImage(imgYuv420p, Some(ImageBitmapFormat::YUV420P));
+
+  auto imgYuv422p =
+      MakeRefPtr<TestRedPlanarYCbCrImage2x2>(ImageBitmapFormat::YUV422P);
+  checkImage(imgYuv422p, Some(ImageBitmapFormat::YUV422P));
+
+  auto imgYuv444p =
+      MakeRefPtr<TestRedPlanarYCbCrImage2x2>(ImageBitmapFormat::YUV444P);
+  checkImage(imgYuv444p, Some(ImageBitmapFormat::YUV444P));
+
+  auto imgYuvNv12 =
+      MakeRefPtr<TestRedPlanarYCbCrImage2x2>(ImageBitmapFormat::YUV420SP_NV12);
+  checkImage(imgYuvNv12, Some(ImageBitmapFormat::YUV420SP_NV12));
+
+  auto imgYuvNv21 =
+      MakeRefPtr<TestRedPlanarYCbCrImage2x2>(ImageBitmapFormat::YUV420SP_NV21);
+  checkImage(imgYuvNv21, Some(ImageBitmapFormat::YUV420SP_NV21));
+}
--- a/dom/media/gtest/moz.build
+++ b/dom/media/gtest/moz.build
@@ -47,6 +47,7 @@ UNIFIED_SOURCES += [
    "TestDriftCompensation.cpp",
    "TestGMPUtils.cpp",
    "TestGroupId.cpp",
+    "TestImageConversion.cpp",
    "TestIntervalSet.cpp",
    "TestKeyValueStorage.cpp",
    "TestMediaCodecsSupport.cpp",
--- a/dom/media/platforms/agnostic/gmp/GMPVideoEncoder.cpp
+++ b/dom/media/platforms/agnostic/gmp/GMPVideoEncoder.cpp
@@ -218,24 +218,11 @@ RefPtr<MediaDataEncoder::EncodePromise> GMPVideoEncoder::Encode(
  const VideoData* sample(aSample->As<const VideoData>());
  const uint64_t timestamp = sample->mTime.ToMicroseconds();

-  gfx::IntSize ySize;
-  gfx::IntSize cbCrSize;
-  int32_t yStride;
-  int32_t cbCrStride;
-
-  if (const layers::PlanarYCbCrImage* planarImage =
-          sample->mImage->AsPlanarYCbCrImage()) {
-    const layers::PlanarYCbCrData* yuv = planarImage->GetData();
-    ySize = yuv->YDataSize();
-    cbCrSize = yuv->CbCrDataSize();
-    yStride = yuv->mYStride;
-    cbCrStride = yuv->mCbCrStride;
-  } else {
-    ySize = sample->mImage->GetSize();
-    cbCrSize = gfx::ChromaSize(ySize, gfx::ChromaSubsampling::HALF_WIDTH);
-    yStride = ySize.width;
-    cbCrStride = cbCrSize.width;
-  }
+  const gfx::IntSize ySize = mConfig.mSize;
+  const gfx::IntSize cbCrSize =
+      gfx::ChromaSize(ySize, gfx::ChromaSubsampling::HALF_WIDTH_AND_HEIGHT);
+  const int32_t yStride = ySize.width;
+  const int32_t cbCrStride = cbCrSize.width;

  GMP_LOG_DEBUG(
      "[%p] GMPVideoEncoder::Encode -- request encode of frame @ %" PRIu64
@@ -257,7 +244,7 @@ RefPtr<MediaDataEncoder::EncodePromise> GMPVideoEncoder::Encode(
  uint8_t* vDest = frame->Buffer(GMPPlaneType::kGMPVPlane);

  nsresult rv = ConvertToI420(sample->mImage, yDest, yStride, uDest, cbCrStride,
-                              vDest, cbCrStride);
+                              vDest, cbCrStride, ySize);
  if (NS_WARN_IF(NS_FAILED(rv))) {
    GMP_LOG_ERROR("[%p] GMPVideoEncoder::Encode -- failed to convert to I420",
                  this);
--- a/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.cpp
@@ -511,48 +511,6 @@ nsresult FFmpegVideoEncoder<LIBAV_VER>::InitSpecific() {
  return NS_OK;
 }

-bool FFmpegVideoEncoder<LIBAV_VER>::ScaleInputFrame() {
-  AVFrame* source = mFrame;
-  mFrame = nullptr;
-  // Allocate AVFrame.
-  if (!PrepareFrame()) {
-    FFMPEGV_LOG("failed to allocate frame");
-    return false;
-  }
-
-  // Set AVFrame properties for its internal data allocation. For now, we always
-  // convert into ffmpeg's buffer.
-  mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
-  mFrame->width = static_cast<int>(mConfig.mSize.Width());
-  mFrame->height = static_cast<int>(mConfig.mSize.Height());
-
-  // Allocate AVFrame data.
-  if (int ret = mLib->av_frame_get_buffer(mFrame, 16); ret < 0) {
-    FFMPEGV_LOG("failed to allocate frame data: %s",
-                MakeErrorString(mLib, ret).get());
-    return false;
-  }
-
-  // Make sure AVFrame is writable.
-  if (int ret = mLib->av_frame_make_writable(mFrame); ret < 0) {
-    FFMPEGV_LOG("failed to make frame writable: %s",
-                MakeErrorString(mLib, ret).get());
-    return false;
-  }
-  int rv = I420Scale(source->data[0], source->linesize[0], source->data[1],
-                     source->linesize[1], source->data[2], source->linesize[2],
-                     source->width, source->height, mFrame->data[0],
-                     mFrame->linesize[0], mFrame->data[1], mFrame->linesize[1],
-                     mFrame->data[2], mFrame->linesize[2], mFrame->width,
-                     mFrame->height, libyuv::FilterMode::kFilterBox);
-  if (!rv) {
-    FFMPEGV_LOG("YUV scale error");
-  }
-  mLib->av_frame_unref(source);
-  mLib->av_frame_free(&source);
-  return true;
-}
-
 // avcodec_send_frame and avcodec_receive_packet were introduced in version 58.
 #if LIBAVCODEC_VERSION_MAJOR >= 58
 Result<MediaDataEncoder::EncodedData, nsresult> FFmpegVideoEncoder<
@@ -585,8 +543,8 @@ Result<MediaDataEncoder::EncodedData, nsresult> FFmpegVideoEncoder<
  // Set AVFrame properties for its internal data allocation. For now, we always
  // convert into ffmpeg's buffer.
  mFrame->format = ffmpeg::FFMPEG_PIX_FMT_YUV420P;
-  mFrame->width = static_cast<int>(sample->mImage->GetSize().width);
-  mFrame->height = static_cast<int>(sample->mImage->GetSize().height);
+  mFrame->width = static_cast<int>(mConfig.mSize.width);
+  mFrame->height = static_cast<int>(mConfig.mSize.height);
  mFrame->pict_type =
      sample->mKeyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE;

@@ -608,23 +566,13 @@ Result<MediaDataEncoder::EncodedData, nsresult> FFmpegVideoEncoder<

  nsresult rv = ConvertToI420(
      sample->mImage, mFrame->data[0], mFrame->linesize[0], mFrame->data[1],
-      mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2]);
+      mFrame->linesize[1], mFrame->data[2], mFrame->linesize[2], mConfig.mSize);
  if (NS_FAILED(rv)) {
    FFMPEGV_LOG("Conversion error!");
    return Result<MediaDataEncoder::EncodedData, nsresult>(
        NS_ERROR_DOM_MEDIA_FATAL_ERR);
  }

-  // Scale the YUV input frame if needed -- the encoded frame will have the
-  // dimensions configured at encoded initialization.
-  if (mFrame->width != mConfig.mSize.Width() ||
-      mFrame->height != mConfig.mSize.Height()) {
-    if (!ScaleInputFrame()) {
-      return Result<MediaDataEncoder::EncodedData, nsresult>(
-          NS_ERROR_DOM_MEDIA_FATAL_ERR);
-    }
-  }
-
  // Set presentation timestamp and duration of the AVFrame. The unit of pts is
  // time_base.
  // TODO(bug 1869560): The recommended time_base is the reciprocal of the frame
--- a/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoEncoder.h
@@ -42,7 +42,6 @@ class FFmpegVideoEncoder<LIBAV_VER> : public FFmpegDataEncoder<LIBAV_VER> {
  Result<EncodedData, nsresult> EncodeInputWithModernAPIs(
      RefPtr<const MediaData> aSample) override;
 #endif
-  bool ScaleInputFrame();
  virtual RefPtr<MediaRawData> ToMediaRawData(AVPacket* aPacket) override;
  Result<already_AddRefed<MediaByteBuffer>, nsresult> GetExtraData(
      AVPacket* aPacket) override;