Bug 1924533: Run macrobenchmark test on CI to measure Baseline Profile impact on performance r=releng-reviewers,android-reviewers,bhearsum,calu
This patch adds a job called `run-macrobenchmark-firebase-fenix` that runs a macrobenchmark on a physical device on Firebase Test lab to benchmark the impact of the baseline profile on app startup. Differential Revision: https://phabricator.services.mozilla.com/D229019
This commit is contained in:
committed by
tthibaud@mozilla.com
parent
f2dca3c70d
commit
abe18b0bd1
@@ -0,0 +1,39 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Google Cloud Documentation: https://cloud.google.com/sdk/gcloud/reference/firebase/test/android/run
|
||||
# Flank Documentation: https://flank.github.io/flank/
|
||||
gcloud:
|
||||
results-bucket: fenix_test_artifacts
|
||||
record-video: true
|
||||
timeout: 15m
|
||||
async: false
|
||||
num-flaky-test-attempts: 1
|
||||
|
||||
app: /app/path
|
||||
test: /test/path
|
||||
|
||||
auto-google-login: false
|
||||
use-orchestrator: false
|
||||
environment-variables:
|
||||
clearPackageData: true
|
||||
directories-to-pull:
|
||||
- /sdcard/Download
|
||||
- /sdcard/Android/media/org.mozilla.fenix.benchmark/
|
||||
performance-metrics: true
|
||||
|
||||
test-targets:
|
||||
- class org.mozilla.fenix.benchmark.BaselineProfilesStartupBenchmark
|
||||
|
||||
device:
|
||||
- model: shiba
|
||||
version: 34
|
||||
locale: en_US
|
||||
|
||||
flank:
|
||||
project: GOOGLE_PROJECT
|
||||
max-test-shards: 1
|
||||
num-test-runs: 1
|
||||
output-style: compact
|
||||
full-junit-result: true
|
||||
@@ -66,5 +66,6 @@ class BaselineProfilesStartupBenchmark {
|
||||
},
|
||||
) {
|
||||
startActivityAndWait()
|
||||
killProcess()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -890,3 +890,11 @@ Run baseline profile generation for Android on Firebase TestLab.
|
||||
update
|
||||
------------
|
||||
Run tests to see if the executable can be updated to the latest release.
|
||||
|
||||
run-macrobenchmark-firebase
|
||||
---------------------------
|
||||
Run Macrobenchmark for Android on Firebase TestLab.
|
||||
|
||||
instrumented-build-macrobenchmark-apk
|
||||
-------------------------------------
|
||||
Generate instrumented apks used to run Macrobenchmark for Android apps.
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
---
|
||||
loader: taskgraph.loader.transform:loader
|
||||
|
||||
transforms:
|
||||
- android_taskgraph.transforms.build_android_app:transforms
|
||||
- gecko_taskgraph.transforms.job:transforms
|
||||
- gecko_taskgraph.transforms.task:transforms
|
||||
|
||||
kind-dependencies:
|
||||
- toolchain
|
||||
- build-fat-aar
|
||||
- generate-baseline-profile-firebase
|
||||
|
||||
task-defaults:
|
||||
attributes:
|
||||
retrigger: true
|
||||
fetches:
|
||||
toolchain:
|
||||
- linux64-android-sdk-linux-repack
|
||||
- linux64-jdk-repack
|
||||
- linux64-node
|
||||
run:
|
||||
using: gradlew
|
||||
treeherder:
|
||||
kind: test
|
||||
tier: 1
|
||||
|
||||
tasks:
|
||||
fenix:
|
||||
description: 'Generate macrobenchmark apks for fenix with baseline profile.'
|
||||
attributes:
|
||||
shipping-product: fenix
|
||||
dependencies:
|
||||
build-fat-aar: build-fat-aar-android-geckoview-fat-aar/opt
|
||||
baseline-profile: generate-baseline-profile-firebase-fenix
|
||||
source-project-name: "fenix"
|
||||
run:
|
||||
gradle-package-command: ":benchmark:assembleBenchmark :app:assembleBenchmark"
|
||||
baseline-profile-path: /builds/worker/fetches/baselineProfiles
|
||||
treeherder:
|
||||
symbol: fenix(instr)
|
||||
platform: fenix-android-all/opt
|
||||
worker:
|
||||
artifacts:
|
||||
- name: "public/build/target.arm64-v8a.apk"
|
||||
path: "/builds/worker/workspace/obj-build/gradle/build/mobile/android/fenix/app/outputs/apk/fenix/benchmark/app-fenix-arm64-v8a-benchmark.apk"
|
||||
type: "file"
|
||||
- name: "public/build/target.noarch.apk"
|
||||
path: "/builds/worker/workspace/obj-build/gradle/build/mobile/android/fenix/benchmark/outputs/apk/benchmark/benchmark-benchmark.apk"
|
||||
type: "file"
|
||||
chain-of-trust: true
|
||||
81
taskcluster/kinds/run-macrobenchmark-firebase/kind.yml
Normal file
81
taskcluster/kinds/run-macrobenchmark-firebase/kind.yml
Normal file
@@ -0,0 +1,81 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
---
|
||||
loader: taskgraph.loader.transform:loader
|
||||
|
||||
transforms:
|
||||
- gecko_taskgraph.transforms.test_apk:transforms
|
||||
# While not a build, the build optimization works well for these tasks.
|
||||
- gecko_taskgraph.transforms.build_schedules:transforms
|
||||
- gecko_taskgraph.transforms.job:transforms
|
||||
- gecko_taskgraph.transforms.task:transforms
|
||||
|
||||
kind-dependencies:
|
||||
- toolchain
|
||||
- instrumented-build-macrobenchmark-apk
|
||||
|
||||
task-defaults:
|
||||
attributes:
|
||||
build_platform: android
|
||||
build-type: debug
|
||||
retrigger: true
|
||||
fetches:
|
||||
toolchain:
|
||||
- android-sdk-linux
|
||||
optimization:
|
||||
skip-unless-backstop: null
|
||||
worker-type: b-linux-medium-gcp
|
||||
worker:
|
||||
docker-image: {in-tree: android-ui-tests}
|
||||
max-run-time: 7200
|
||||
env:
|
||||
GOOGLE_APPLICATION_CREDENTIALS: '.firebase_token.json'
|
||||
ANDROID_SDK_ROOT: /builds/worker/fetches/android-sdk-linux
|
||||
artifacts:
|
||||
- name: public
|
||||
path: /builds/worker/artifacts
|
||||
type: directory
|
||||
retry-exit-status: [20]
|
||||
treeherder:
|
||||
kind: test
|
||||
tier: 1
|
||||
run:
|
||||
use-caches: false
|
||||
using: run-commands
|
||||
dummy-secrets:
|
||||
- content: "faketoken"
|
||||
path: .adjust_token
|
||||
|
||||
tasks:
|
||||
fenix:
|
||||
attributes:
|
||||
build-type: fenix-debug-firebase
|
||||
shipping-product: fenix
|
||||
description: Run macrobenchmark for Fenix
|
||||
dependencies:
|
||||
instrumented-apk: instrumented-build-macrobenchmark-apk-fenix
|
||||
fetches:
|
||||
instrumented-apk:
|
||||
- artifact: target.arm64-v8a.apk
|
||||
extract: false
|
||||
- artifact: target.noarch.apk
|
||||
extract: false
|
||||
run:
|
||||
secrets:
|
||||
- name: project/mobile/firefox-android/fenix/firebase
|
||||
key: firebaseToken
|
||||
path: .firebase_token.json
|
||||
json: true
|
||||
pre-commands:
|
||||
- ["cd", "mobile/android/fenix"]
|
||||
commands:
|
||||
- [python3, ../../../taskcluster/scripts/tests/test-lab.py, arm64-v8a-macrobenchmark, /builds/worker/fetches/target.arm64-v8a.apk, --apk_test, /builds/worker/fetches/target.noarch.apk]
|
||||
- [python3, ../../../taskcluster/scripts/tests/copy-artifacts-from-ftl.py, "macrobenchmark"]
|
||||
- [python3, ../../../taskcluster/scripts/tests/compute-macrobenchmark-results.py, "/builds/worker/artifacts/build/macrobenchmark.json", "/builds/worker/artifacts/build/macrobenchmark.txt"]
|
||||
treeherder:
|
||||
platform: fenix-android-all/opt
|
||||
symbol: fenix(run)
|
||||
worker:
|
||||
env:
|
||||
GOOGLE_PROJECT: moz-fenix
|
||||
100
taskcluster/scripts/tests/compute-macrobenchmark-results.py
Normal file
100
taskcluster/scripts/tests/compute-macrobenchmark-results.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
def read_benchmark_data(file_path):
|
||||
"""Reads the JSON file and returns the benchmark results as a dictionary."""
|
||||
with open(file_path) as file:
|
||||
data = json.load(file)
|
||||
|
||||
# Extract benchmarks data
|
||||
benchmarks = data["benchmarks"]
|
||||
results = {}
|
||||
for benchmark in benchmarks:
|
||||
name = benchmark["name"]
|
||||
time_metrics = benchmark["metrics"]["timeToInitialDisplayMs"]
|
||||
results[name] = {
|
||||
"median": time_metrics["median"],
|
||||
"minimum": time_metrics["minimum"],
|
||||
"maximum": time_metrics["maximum"],
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def calculate_improvements(results):
|
||||
"""Calculates percentage improvements between startup with and without baseline profiles."""
|
||||
improvements = {
|
||||
"median": f"{((results['startupNone']['median'] - results['startupPartialWithBaselineProfiles']['median']) / results['startupNone']['median']) * 100:.2f}%",
|
||||
"minimum": f"{((results['startupNone']['minimum'] - results['startupPartialWithBaselineProfiles']['minimum']) / results['startupNone']['minimum']) * 100:.2f}%",
|
||||
"maximum": f"{((results['startupNone']['maximum'] - results['startupPartialWithBaselineProfiles']['maximum']) / results['startupNone']['maximum']) * 100:.2f}%",
|
||||
}
|
||||
return improvements
|
||||
|
||||
|
||||
def format_output_content(results):
|
||||
"""Formats the output content into the specified JSON structure."""
|
||||
# Map to transform result names to subtest entries
|
||||
baseline_map = {
|
||||
"startupPartialWithBaselineProfiles": "baseline",
|
||||
"startupNone": "no_baseline",
|
||||
}
|
||||
|
||||
# Construct the subtests list
|
||||
subtests = []
|
||||
for result_name, metrics in results.items():
|
||||
baseline_mode = baseline_map.get(result_name, "unknown")
|
||||
for metric_name, value in metrics.items():
|
||||
subtest = {
|
||||
"name": f"cold_startup.{baseline_mode}.{metric_name}",
|
||||
"lowerIsBetter": True,
|
||||
"value": value,
|
||||
"unit": "ms",
|
||||
}
|
||||
subtests.append(subtest)
|
||||
|
||||
# Define the base JSON structure using the subtests list
|
||||
output_json = {
|
||||
"framework": {"name": "mozperftest"},
|
||||
"application": {"name": "fenix"},
|
||||
"suites": [
|
||||
{
|
||||
"name": "baseline-profile:fenix",
|
||||
"type": "coldstart",
|
||||
"unit": "ms",
|
||||
"extraOptions": [],
|
||||
"lowerIsBetter": True,
|
||||
"subtests": subtests,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
return output_json
|
||||
|
||||
|
||||
def output_results(output_json, output_file_path):
|
||||
"""Writes the output JSON to a specified file and prints it in a compacted format to the console."""
|
||||
# Convert JSON structure to a compacted one-line string
|
||||
compact_json = json.dumps(output_json)
|
||||
|
||||
# Print in the specified format
|
||||
print(f"PERFHERDER_DATA: {compact_json}")
|
||||
|
||||
# Write the pretty-formatted JSON to the file
|
||||
with open(output_file_path, "w") as output_file:
|
||||
output_file.write(json.dumps(output_json, indent=3))
|
||||
print(f"Results have been written to {output_file_path}")
|
||||
|
||||
|
||||
# Main script logic
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: python script.py <input_json_path> <output_file_path>")
|
||||
else:
|
||||
input_json_path = sys.argv[1]
|
||||
output_file_path = sys.argv[2]
|
||||
|
||||
# Process the benchmark data
|
||||
results = read_benchmark_data(input_json_path)
|
||||
improvements = calculate_improvements(results)
|
||||
output_json = format_output_content(results)
|
||||
output_results(output_json, output_file_path)
|
||||
@@ -58,6 +58,7 @@ class Worker(Enum):
|
||||
|
||||
RESULTS_DIR = "/builds/worker/artifacts/results"
|
||||
BASELINE_PROFILE_DEST = "/builds/worker/artifacts/build/baseline-prof.txt"
|
||||
MACROBENCHMARK_DEST = "/builds/worker/artifacts/build/macrobenchmark.json"
|
||||
ARTIFACTS_DIR = "/builds/worker/artifacts"
|
||||
|
||||
|
||||
@@ -70,6 +71,9 @@ class ArtifactType(Enum):
|
||||
"artifacts/sdcard/Android/media/org.mozilla.fenix.benchmark/*-baseline-prof.txt"
|
||||
)
|
||||
CRASH_LOG = "data_app_crash*.txt"
|
||||
MACROBENCHMARK = (
|
||||
"artifacts/sdcard/Android/media/org.mozilla.fenix.benchmark/*benchmarkData.json"
|
||||
)
|
||||
MATRIX_IDS = "matrix_ids.json"
|
||||
|
||||
|
||||
@@ -247,6 +251,8 @@ def process_artifacts(artifact_type):
|
||||
|
||||
if artifact_type == ArtifactType.BASELINE_PROFILE:
|
||||
return process_baseline_profile_artifact(root_gcs_path, device_names)
|
||||
elif artifact_type == ArtifactType.MACROBENCHMARK:
|
||||
return process_macrobenchmark_artifact(root_gcs_path, device_names)
|
||||
else:
|
||||
return process_crash_artifacts(root_gcs_path, device_names)
|
||||
|
||||
@@ -262,6 +268,17 @@ def process_baseline_profile_artifact(root_gcs_path, device_names):
|
||||
gsutil_cp(artifact, Worker.BASELINE_PROFILE_DEST.value)
|
||||
|
||||
|
||||
def process_macrobenchmark_artifact(root_gcs_path, device_names):
|
||||
device = device_names[0]
|
||||
artifact = fetch_artifacts(
|
||||
root_gcs_path, device, ArtifactType.MACROBENCHMARK.value
|
||||
)[0]
|
||||
if not artifact:
|
||||
exit_with_error(f"No artifacts found for device: {device}")
|
||||
|
||||
gsutil_cp(artifact, Worker.MACROBENCHMARK_DEST.value)
|
||||
|
||||
|
||||
def process_crash_artifacts(root_gcs_path, failed_device_names):
|
||||
crashes_reported = 0
|
||||
for device in failed_device_names:
|
||||
@@ -295,6 +312,8 @@ def main():
|
||||
artifact_type_arg = sys.argv[1]
|
||||
if artifact_type_arg == "baseline_profile":
|
||||
process_artifacts(ArtifactType.BASELINE_PROFILE)
|
||||
elif artifact_type_arg == "macrobenchmark":
|
||||
process_artifacts(ArtifactType.MACROBENCHMARK)
|
||||
elif artifact_type_arg == "crash_log":
|
||||
process_artifacts(ArtifactType.CRASH_LOG)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user