Bug 1944302 - Add profiler markers around gemmology operations. r=sfink

We could have gone with putting this under the macro but I wanted custom marker
messages for each call so decided to simply add a marker to each call site.

Differential Revision: https://phabricator.services.mozilla.com/D239786
This commit is contained in:
Paul Adenot
2025-03-04 12:51:02 +00:00
parent ebf6af14e9
commit 00a110d4a7
2 changed files with 65 additions and 2 deletions

View File

@@ -114,6 +114,7 @@ included_inclnames_to_ignore = set(
"wasm/WasmBuiltinModuleGenerated.h", # generated in $OBJDIR" "wasm/WasmBuiltinModuleGenerated.h", # generated in $OBJDIR"
"zydis/ZydisAPI.h", # Zydis "zydis/ZydisAPI.h", # Zydis
"xsum/xsum.h", # xsum "xsum/xsum.h", # xsum
"fmt/format.h", # {fmt} main header
] ]
) )

View File

@@ -10,12 +10,16 @@
#include "mozilla/CheckedInt.h" #include "mozilla/CheckedInt.h"
#include "mozilla/IntegerPrintfMacros.h" #include "mozilla/IntegerPrintfMacros.h"
#include "mozilla/TimeStamp.h"
#include <gemmology_fwd.h> #include <gemmology_fwd.h>
#include "fmt/format.h"
#include "js/ErrorReport.h" #include "js/ErrorReport.h"
#include "js/HeapAPI.h" #include "js/HeapAPI.h"
#include "vm/ArrayBufferObject.h" #include "vm/ArrayBufferObject.h"
#include "vm/GeckoProfiler.h"
#include "vm/JSContext.h"
#include "wasm/WasmBuiltins.h" #include "wasm/WasmBuiltins.h"
#include "wasm/WasmInstance.h" #include "wasm/WasmInstance.h"
#include "wasm/WasmLog.h" #include "wasm/WasmLog.h"
@@ -70,7 +74,42 @@
return gemmology::Engine<decltype(arch)>::FUNC(args...); \ return gemmology::Engine<decltype(arch)>::FUNC(args...); \
}) })
struct JSContext; template <size_t TextLength = 512, typename CharT = char>
struct AutoProfilerMarker {
AutoProfilerMarker(js::GeckoProfilerRuntime& profiler, const CharT* name)
: profiler(profiler), name(name) {
if (profiler.enabled()) {
startTime = mozilla::TimeStamp::Now();
}
}
template <typename... Args>
AutoProfilerMarker(js::GeckoProfilerRuntime& profiler, const CharT* name,
fmt::format_string<Args...> aFormatStr, Args&&... aArgs)
: profiler(profiler), name(name) {
if (profiler.enabled()) {
startTime = mozilla::TimeStamp::Now();
auto [out, size] = fmt::vformat_to_n(
text, sizeof(text) - 1, aFormatStr,
fmt::make_format_args<fmt::buffered_context<CharT>>(aArgs...));
MOZ_ASSERT(size > sizeof(text) - 1,
"Truncated marker, consider increasing the buffer");
*out = 0;
}
}
~AutoProfilerMarker() {
if (profiler.enabled()) {
profiler.markInterval(name, startTime, text,
JS::ProfilingCategoryPair::JS);
}
}
js::GeckoProfilerRuntime& profiler;
const char* name;
char text[TextLength]{};
mozilla::TimeStamp startTime;
};
static constexpr uint32_t ARRAY_ALIGNMENT = 64; static constexpr uint32_t ARRAY_ALIGNMENT = 64;
static constexpr uint32_t ROWS_A_MULTIPLIER = 1; static constexpr uint32_t ROWS_A_MULTIPLIER = 1;
@@ -169,6 +208,9 @@ int32_t js::intgemm::IntrI8PrepareB(wasm::Instance* instance,
const float* inputMatrixBPtr = const float* inputMatrixBPtr =
reinterpret_cast<const float*>(&memBase[inputMatrixB]); reinterpret_cast<const float*>(&memBase[inputMatrixB]);
int8_t* outputMatrixBPtr = reinterpret_cast<int8_t*>(&memBase[outputMatrixB]); int8_t* outputMatrixBPtr = reinterpret_cast<int8_t*>(&memBase[outputMatrixB]);
AutoProfilerMarker marker(cx->runtime()->geckoProfiler(), "integemm::PreparB",
FMT_STRING("rowsB: {} colsB: {} sizeB: {}"), rowsB,
colsB, sizeB);
GEMMOLOGY_DISPATCH(PrepareB) GEMMOLOGY_DISPATCH(PrepareB)
(inputMatrixBPtr, outputMatrixBPtr, (inputMatrixBPtr, outputMatrixBPtr,
scale, // Quant Mult scale, // Quant Mult
@@ -212,6 +254,9 @@ int32_t js::intgemm::IntrI8PrepareBFromTransposed(
const float* inputMatrixBTransposedPtr = const float* inputMatrixBTransposedPtr =
reinterpret_cast<const float*>(&memBase[inputMatrixBTransposed]); reinterpret_cast<const float*>(&memBase[inputMatrixBTransposed]);
int8_t* outputMatrixBPtr = reinterpret_cast<int8_t*>(&memBase[outputMatrixB]); int8_t* outputMatrixBPtr = reinterpret_cast<int8_t*>(&memBase[outputMatrixB]);
AutoProfilerMarker marker(
cx->runtime()->geckoProfiler(), "intgemm::PreparBTransposed",
FMT_STRING("rowsB: {} colsB: {} sizeB: {}"), rowsB, colsB, sizeB);
GEMMOLOGY_DISPATCH(PrepareBTransposed) GEMMOLOGY_DISPATCH(PrepareBTransposed)
(inputMatrixBTransposedPtr, outputMatrixBPtr, (inputMatrixBTransposedPtr, outputMatrixBPtr,
scale, // Quant Mult scale, // Quant Mult
@@ -255,6 +300,9 @@ int32_t js::intgemm::IntrI8PrepareBFromQuantizedTransposed(
reinterpret_cast<const int8_t*>( reinterpret_cast<const int8_t*>(
&memBase[inputMatrixBQuantizedTransposed]); &memBase[inputMatrixBQuantizedTransposed]);
int8_t* outputMatrixBPtr = reinterpret_cast<int8_t*>(&memBase[outputMatrixB]); int8_t* outputMatrixBPtr = reinterpret_cast<int8_t*>(&memBase[outputMatrixB]);
AutoProfilerMarker marker(cx->runtime()->geckoProfiler(),
"intgemm::PrepareBQuantizedTransposed",
FMT_STRING("rowsB: {}, colsB: {}"), rowsB, colsB);
GEMMOLOGY_DISPATCH(PrepareBQuantizedTransposed) GEMMOLOGY_DISPATCH(PrepareBQuantizedTransposed)
(inputMatrixBQuantizedTransposedPtr, outputMatrixBPtr, rowsB, colsB); (inputMatrixBQuantizedTransposedPtr, outputMatrixBPtr, rowsB, colsB);
return 0; return 0;
@@ -268,7 +316,6 @@ int32_t js::intgemm::IntrI8PrepareA(wasm::Instance* instance,
MOZ_ASSERT(wasm::SASigIntrI8PrepareA.failureMode == MOZ_ASSERT(wasm::SASigIntrI8PrepareA.failureMode ==
wasm::FailureMode::FailOnNegI32); wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->cx(); JSContext* cx = instance->cx();
// Size checks for matricies // Size checks for matricies
if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) || if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsA, COLUMNS_A_MULTIPLIER)) { !CheckMatrixDimension(cx, colsA, COLUMNS_A_MULTIPLIER)) {
@@ -296,6 +343,8 @@ int32_t js::intgemm::IntrI8PrepareA(wasm::Instance* instance,
const float* inputMatrixAPtr = const float* inputMatrixAPtr =
reinterpret_cast<const float*>(&memBase[inputMatrixA]); reinterpret_cast<const float*>(&memBase[inputMatrixA]);
uint8_t* outputMatrixAPtr = &memBase[outputMatrixA]; uint8_t* outputMatrixAPtr = &memBase[outputMatrixA];
AutoProfilerMarker marker(cx->runtime()->geckoProfiler(), "intgemm::PrepareA",
FMT_STRING("rowsA: {}, colsA: {}"), rowsA, colsA);
GEMMOLOGY_DISPATCH(Shift::PrepareA) GEMMOLOGY_DISPATCH(Shift::PrepareA)
(inputMatrixAPtr, outputMatrixAPtr, scale, rowsA, colsA); (inputMatrixAPtr, outputMatrixAPtr, scale, rowsA, colsA);
return 0; return 0;
@@ -350,11 +399,17 @@ int32_t js::intgemm::IntrI8PrepareBias(
} }
const float* inputBiasPtr = reinterpret_cast<float*>(&memBase[inputBias]); const float* inputBiasPtr = reinterpret_cast<float*>(&memBase[inputBias]);
AutoProfilerMarker marker(
cx->runtime()->geckoProfiler(), "intgemm::PrepareBias w/ input bias",
FMT_STRING("rowsB: {} colsB: {} sizeB: {}"), rowsB, colsB, sizeB);
GEMMOLOGY_DISPATCH(Shift::PrepareBias) GEMMOLOGY_DISPATCH(Shift::PrepareBias)
(inputMatrixBPreparedPtr, rowsB, colsB, (inputMatrixBPreparedPtr, rowsB, colsB,
gemmology::callbacks::UnquantizeAndAddBiasAndWrite( gemmology::callbacks::UnquantizeAndAddBiasAndWrite(
unquantFactor, inputBiasPtr, outputPtr)); unquantFactor, inputBiasPtr, outputPtr));
} else { } else {
AutoProfilerMarker marker(
cx->runtime()->geckoProfiler(), "intgemm::PrepareBias",
FMT_STRING("rowsB: {} colsB: {} sizeB: {}"), rowsB, colsB, sizeB);
GEMMOLOGY_DISPATCH(Shift::PrepareBias) GEMMOLOGY_DISPATCH(Shift::PrepareBias)
(inputMatrixBPreparedPtr, rowsB, colsB, (inputMatrixBPreparedPtr, rowsB, colsB,
gemmology::callbacks::UnquantizeAndWrite(unquantFactor, outputPtr)); gemmology::callbacks::UnquantizeAndWrite(unquantFactor, outputPtr));
@@ -415,6 +470,9 @@ int32_t js::intgemm::IntrI8MultiplyAndAddBias(
float* outputPtr = reinterpret_cast<float*>(&memBase[output]); float* outputPtr = reinterpret_cast<float*>(&memBase[output]);
float unquantFactor = unquantMultiplier / (scaleA * scaleB); float unquantFactor = unquantMultiplier / (scaleA * scaleB);
AutoProfilerMarker marker(
cx->runtime()->geckoProfiler(), "intgemm::Shift::Multiply",
FMT_STRING("rowsA: {}, width: {}, colsA: {}"), rowsA, width, colsB);
GEMMOLOGY_DISPATCH(Shift::Multiply) GEMMOLOGY_DISPATCH(Shift::Multiply)
(inputMatrixAPreparedPtr, inputMatrixBPreparedPtr, rowsA, width, colsB, (inputMatrixAPreparedPtr, inputMatrixBPreparedPtr, rowsA, width, colsB,
gemmology::callbacks::UnquantizeAndAddBiasAndWrite( gemmology::callbacks::UnquantizeAndAddBiasAndWrite(
@@ -469,6 +527,10 @@ int32_t js::intgemm::IntrI8SelectColumnsOfB(wasm::Instance* instance,
const uint32_t* colIndexListPtr = const uint32_t* colIndexListPtr =
reinterpret_cast<const uint32_t*>(&memBase[colIndexList]); reinterpret_cast<const uint32_t*>(&memBase[colIndexList]);
int8_t* outputPtr = reinterpret_cast<int8_t*>(&memBase[output]); int8_t* outputPtr = reinterpret_cast<int8_t*>(&memBase[output]);
AutoProfilerMarker marker(
cx->runtime()->geckoProfiler(), "integemm::SelectColumnsB",
FMT_STRING("rowsB: {} colsB: {} sizecolList: {}, sizeB: {}"), rowsB,
colsB, sizeColIndexList, sizeB);
GEMMOLOGY_DISPATCH(SelectColumnsB) GEMMOLOGY_DISPATCH(SelectColumnsB)
(inputMatrixBPreparedPtr, outputPtr, rowsB, colIndexListPtr, (inputMatrixBPreparedPtr, outputPtr, rowsB, colIndexListPtr,
colIndexListPtr + sizeColIndexList); colIndexListPtr + sizeColIndexList);