From cf00b30288c4c81b2c6a5af01c38f236148777a0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 28 Mar 2023 06:13:36 +0900 Subject: [PATCH] Revert "[Passes][VectorCombine] enable early run generally and try load folds" This reverts commit 163bb6d64e5f1220777c3ec2a8b58c0666a74d91. It causes various reftest regressions. --- llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 ++------ llvm/test/Other/new-pm-defaults.ll | 2 +- .../Other/new-pm-thinlto-postlink-defaults.ll | 1 - .../Other/new-pm-thinlto-postlink-pgo-defaults.ll | 1 - .../new-pm-thinlto-postlink-samplepgo-defaults.ll | 1 - .../Other/new-pm-thinlto-prelink-pgo-defaults.ll | 1 - .../new-pm-thinlto-prelink-samplepgo-defaults.ll | 1 - .../PhaseOrdering/X86/vec-load-combine.ll | 15 +++++++++++---- 9 files changed, 18 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 07db107325f0..40045629af06 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -729,9 +729,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Delete small array after loop unroll. FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); - // Try vectorization/scalarization transforms that are both improvements - // themselves and can allow further folds with GVN and InstCombine. - FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); + // The matrix extension can introduce large vector operations early, which can + // benefit from running vector-combine early on. + if (EnableMatrix) + FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); // Eliminate redundancies. FPM.addPass(MergedLoadStoreMotionPass()); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 019d79567b4a..b3e0f4cf0514 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3500,23 +3500,6 @@ bool VectorCombine::run() { LLVM_DEBUG(dbgs() << "VC: Visiting: " << I << '\n'); - // These folds should be beneficial regardless of when this pass is run - // in the optimization pipeline. - // The type checking is for run-time efficiency. We can avoid wasting time - // dispatching to folding functions if there's no chance of matching. - if (IsFixedVectorType) { - switch (Opcode) { - case Instruction::InsertElement: - MadeChange |= vectorizeLoadInsert(I); - break; - case Instruction::ShuffleVector: - MadeChange |= widenSubvectorLoad(I); - break; - default: - break; - } - } - // This transform works with scalable and fixed vectors // TODO: Identify and allow other scalable transforms if (IsVectorType) { @@ -3540,11 +3523,13 @@ bool VectorCombine::run() { if (IsFixedVectorType) { switch (Opcode) { case Instruction::InsertElement: + MadeChange |= vectorizeLoadInsert(I); MadeChange |= foldInsExtFNeg(I); MadeChange |= foldInsExtBinop(I); MadeChange |= foldInsExtVectorToShuffle(I); break; case Instruction::ShuffleVector: + MadeChange |= widenSubvectorLoad(I); MadeChange |= foldPermuteOfBinops(I); MadeChange |= foldShuffleOfBinops(I); MadeChange |= foldShuffleOfSelects(I); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index c554fdbf4c79..e02bbec7312e 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -198,7 +198,7 @@ ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Running pass: SROAPass on foo -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass +; CHECK-MATRIX: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index ed13402e1c4b..5d8712841801 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -124,7 +124,6 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Running pass: SROAPass on foo -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index c82c34f7ff01..c6f95b393170 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -110,7 +110,6 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Running pass: SROAPass on foo -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index d375747547d6..b17556bc528a 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -119,7 +119,6 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Running pass: SROAPass on foo -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f6a940659680..8551ea2bc998 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -159,7 +159,6 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Running pass: SROAPass on foo -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 48a9433d2499..531dc0361873 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -123,7 +123,6 @@ ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: SROAPass on foo -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll index 85f6fceb5bdb..2d20250dfd81 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll @@ -12,13 +12,20 @@ $getAt = comdat any define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 { ; SSE-LABEL: @ConvertVectors_ByRef( ; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> -; SSE-NEXT: ret <4 x float> [[TMP3]] +; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1 +; SSE-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP5]], <4 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP5]], <4 x i32> +; SSE-NEXT: ret <4 x float> [[TMP7]] ; ; AVX-LABEL: @ConvertVectors_ByRef( ; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> -; AVX-NEXT: ret <4 x float> [[TMP3]] +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2 +; AVX-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 8 +; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i64 2 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP4]], i64 3 +; AVX-NEXT: ret <4 x float> [[TMP6]] ; %2 = alloca ptr, align 8 %3 = alloca <4 x float>, align 16 -- 2.47.0.1.g59ce1bf855