-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Fold (trunc X to i1) & !iszero(X & Pow2)) -> (X & (1 | Pow2)) == (1 | Pow2) #119196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…X & (1 | Pow2)) != (1 | Pow2)
… Pow2)) != (1 | Pow2) also folds (trunc X to i1) & !iszero(X & Pow2)) -> (X & (1 | Pow2)) == (1 | Pow2)
@llvm/pr-subscribers-llvm-transforms Author: Andreas Jonson (andjo403) ChangesFolds the patterns: this is the same pattern that foldAndOrOfICmpsOfAndWithPow2 handles but one proof: https://alive2.llvm.org/ce/z/ofzAyQ Full diff: https://github.com/llvm/llvm-project/pull/119196.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b4033fc2a418a1..da58bedbce8b34 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -805,6 +805,40 @@ Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
return nullptr;
}
+// Fold not(trunc X to i1) | iszero(X & Pow2)
+// -> (X & (1 | Pow2)) != (1 | Pow2)
+// Fold (trunc X to i1) & !iszero(X & Pow2))
+// -> (X & (1 | Pow2)) == (1 | Pow2)
+static Value *foldTruncAndOrICmpOfAndWithPow2(InstCombiner::BuilderTy &Builder,
+ Value *LHS, Value *RHS,
+ bool IsAnd, bool IsLogical,
+ const SimplifyQuery &Q) {
+ CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+
+ if (isa<ICmpInst>(LHS))
+ std::swap(LHS, RHS);
+
+ Value *X, *Pow2;
+
+ if ((IsAnd ? match(LHS, m_Trunc(m_Value(X)))
+ : match(LHS, m_Not(m_Trunc(m_Value(X))))) &&
+ match(RHS, m_SpecificICmp(Pred, m_c_And(m_Specific(X), m_Value(Pow2)),
+ m_ZeroInt())) &&
+ isKnownToBeAPowerOfTwo(Pow2, Q.DL, /*OrZero=*/false, /*Depth=*/0, Q.AC,
+ Q.CxtI, Q.DT)) {
+ // If this is a logical and/or, then we must prevent propagation of a
+ // poison value from the RHS by inserting freeze.
+ if (IsLogical)
+ Pow2 = Builder.CreateFreeze(Pow2);
+ Value *Mask = Builder.CreateOr(ConstantInt::get(Pow2->getType(), 1), Pow2);
+ Value *Masked = Builder.CreateAnd(X, Mask);
+ auto NewPred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
+ return Builder.CreateICmp(NewPred, Masked, Mask);
+ }
+
+ return nullptr;
+}
+
/// General pattern:
/// X & Y
///
@@ -3541,6 +3575,10 @@ Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS,
if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd))
return Res;
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
+ if (Value *Res = foldTruncAndOrICmpOfAndWithPow2(Builder, LHS, RHS, IsAnd,
+ IsLogical, Q))
+ return Res;
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll
index 2e57597455c2cd..e4586a231535a3 100644
--- a/llvm/test/Transforms/InstCombine/onehot_merge.ll
+++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll
@@ -1143,3 +1143,295 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_not_pwr2_logical(i32 %
%or = select i1 %t2, i1 true, i1 %t4
ret i1 %or
}
+
+define i1 @trunc_or_icmp_consts(i8 %k) {
+; CHECK-LABEL: @trunc_or_icmp_consts(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 9
+; CHECK-NEXT: [[OR:%.*]] = icmp ne i8 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[OR]]
+;
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %and = and i8 %k, 8
+ %icmp = icmp eq i8 %and, 0
+ %ret = or i1 %not, %icmp
+ ret i1 %ret
+}
+
+define i1 @icmp_or_trunc_consts(i8 %k) {
+; CHECK-LABEL: @icmp_or_trunc_consts(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 9
+; CHECK-NEXT: [[OR:%.*]] = icmp ne i8 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[OR]]
+;
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %and = and i8 %k, 8
+ %icmp = icmp eq i8 %and, 0
+ %ret = or i1 %icmp, %not
+ ret i1 %ret
+}
+
+define i1 @trunc_or_icmp(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_or_icmp(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[T]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[K:%.*]], [[TMP1]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp eq i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %ret = or i1 %icmp, %not
+ ret i1 %ret
+}
+
+define i1 @trunc_logical_or_icmp(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_logical_or_icmp(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[T]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[K:%.*]], [[TMP1]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp eq i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %ret = select i1 %icmp, i1 true, i1 %not
+ ret i1 %ret
+}
+
+define i1 @icmp_logical_or_trunc(i8 %k, i8 %c1) {
+; CHECK-LABEL: @icmp_logical_or_trunc(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i8 [[T]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[K:%.*]], [[TMP2]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp eq i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %ret = select i1 %not, i1 true, i1 %icmp
+ ret i1 %ret
+}
+
+define <2 x i1> @trunc_or_icmp_vec(<2 x i8> %k, <2 x i8> %c1) {
+; CHECK-LABEL: @trunc_or_icmp_vec(
+; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i8> splat (i8 1), [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[T]], splat (i8 1)
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[K:%.*]], [[TMP1]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret <2 x i1> [[RET]]
+;
+ %t = shl <2 x i8> <i8 1, i8 1>, %c1
+ %t1 = and <2 x i8> %t, %k
+ %icmp = icmp eq <2 x i8> %t1, zeroinitializer
+ %trunc = trunc <2 x i8> %k to <2 x i1>
+ %not = xor <2 x i1> %trunc, <i1 true, i1 true>
+ %ret = or <2 x i1> %icmp, %not
+ ret <2 x i1> %ret
+}
+
+define i1 @neg_trunc_or_icmp_not_pow2(i8 %k, i8 %c1) {
+; CHECK-LABEL: @neg_trunc_or_icmp_not_pow2(
+; CHECK-NEXT: [[T1:%.*]] = and i8 [[C1:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[T1]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[K]] to i1
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[TRUNC]], true
+; CHECK-NEXT: [[RET:%.*]] = or i1 [[ICMP]], [[NOT]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t1 = and i8 %c1, %k
+ %icmp = icmp eq i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %ret = or i1 %icmp, %not
+ ret i1 %ret
+}
+
+define i1 @neg_trunc_or_icmp_not_trunc(i8 %k, i8 %c1) {
+; CHECK-LABEL: @neg_trunc_or_icmp_not_trunc(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[T1:%.*]] = and i8 [[T]], [[K:%.*]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[T1]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[K]] to i1
+; CHECK-NEXT: [[RET:%.*]] = or i1 [[ICMP]], [[TRUNC]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp eq i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %ret = or i1 %icmp, %trunc
+ ret i1 %ret
+}
+
+define i1 @neg_trunc_or_icmp_ne(i8 %k, i8 %c1) {
+; CHECK-LABEL: @neg_trunc_or_icmp_ne(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[T1:%.*]] = and i8 [[T]], [[K:%.*]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i8 [[T1]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[K]] to i1
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[TRUNC]], true
+; CHECK-NEXT: [[RET:%.*]] = or i1 [[ICMP]], [[NOT]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp ne i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %ret = or i1 %icmp, %not
+ ret i1 %ret
+}
+
+define i1 @trunc_and_icmp_consts(i8 %k) {
+; CHECK-LABEL: @trunc_and_icmp_consts(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 9
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %trunc = trunc i8 %k to i1
+ %and = and i8 %k, 8
+ %icmp = icmp ne i8 %and, 0
+ %ret = and i1 %trunc, %icmp
+ ret i1 %ret
+}
+
+define i1 @icmp_and_trunc_consts(i8 %k) {
+; CHECK-LABEL: @icmp_and_trunc_consts(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[K:%.*]], 9
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %trunc = trunc i8 %k to i1
+ %and = and i8 %k, 8
+ %icmp = icmp ne i8 %and, 0
+ %ret = and i1 %icmp, %trunc
+ ret i1 %ret
+}
+
+define i1 @trunc_and_icmp(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_and_icmp(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[T]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[K:%.*]], [[TMP1]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp ne i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %ret = and i1 %icmp, %trunc
+ ret i1 %ret
+}
+
+define i1 @trunc_logical_and_icmp(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_logical_and_icmp(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[T]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[K:%.*]], [[TMP1]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp ne i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %ret = select i1 %icmp, i1 %trunc, i1 false
+ ret i1 %ret
+}
+
+define i1 @icmp_logical_and_trunc(i8 %k, i8 %c1) {
+; CHECK-LABEL: @icmp_logical_and_trunc(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i8 [[T]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[K:%.*]], [[TMP2]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp ne i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %ret = select i1 %trunc, i1 %icmp, i1 false
+ ret i1 %ret
+}
+
+define <2 x i1> @trunc_and_icmp_vec(<2 x i8> %k, <2 x i8> %c1) {
+; CHECK-LABEL: @trunc_and_icmp_vec(
+; CHECK-NEXT: [[T:%.*]] = shl nuw <2 x i8> splat (i8 1), [[C1:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[T]], splat (i8 1)
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[K:%.*]], [[TMP1]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret <2 x i1> [[RET]]
+;
+ %t = shl <2 x i8> <i8 1, i8 1>, %c1
+ %t1 = and <2 x i8> %t, %k
+ %icmp = icmp ne <2 x i8> %t1, zeroinitializer
+ %trunc = trunc <2 x i8> %k to <2 x i1>
+ %ret = and <2 x i1> %icmp, %trunc
+ ret <2 x i1> %ret
+}
+
+define i1 @trunc_and_icmp_not_pow2(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_and_icmp_not_pow2(
+; CHECK-NEXT: [[T1:%.*]] = and i8 [[C1:%.*]], [[K:%.*]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i8 [[T1]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[K]] to i1
+; CHECK-NEXT: [[RET:%.*]] = and i1 [[ICMP]], [[TRUNC]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t1 = and i8 %c1, %k
+ %icmp = icmp ne i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %ret = and i1 %icmp, %trunc
+ ret i1 %ret
+}
+
+define i1 @trunc_and_icmp_not_trunc(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_and_icmp_not_trunc(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[T1:%.*]] = and i8 [[T]], [[K:%.*]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i8 [[T1]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[K]] to i1
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[TRUNC]], true
+; CHECK-NEXT: [[RET:%.*]] = and i1 [[ICMP]], [[NOT]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp ne i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %not = xor i1 %trunc, true
+ %ret = and i1 %icmp, %not
+ ret i1 %ret
+}
+
+define i1 @trunc_and_icmp_eq(i8 %k, i8 %c1) {
+; CHECK-LABEL: @trunc_and_icmp_eq(
+; CHECK-NEXT: [[T:%.*]] = shl nuw i8 1, [[C1:%.*]]
+; CHECK-NEXT: [[T1:%.*]] = and i8 [[T]], [[K:%.*]]
+; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[T1]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[K]] to i1
+; CHECK-NEXT: [[RET:%.*]] = and i1 [[ICMP]], [[TRUNC]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %t = shl i8 1, %c1
+ %t1 = and i8 %t, %k
+ %icmp = icmp eq i8 %t1, 0
+ %trunc = trunc i8 %k to i1
+ %ret = and i1 %icmp, %trunc
+ ret i1 %ret
+}
|
this fixes a regression due to #84628 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you adjust foldLogOpOfMaskedICmps
to convert trunc X to i1
back into (X & 1) != 0
?
BTW, I skimmed through the comments in #84628. It seems to cause many performance regressions. It would be better to revert #84628 and find another way to unblock #83829.
cc @nikic @YanWQ-monad @dianqk @goldsteinn
I do not know what you mean by this? |
I mean that we should pass a value instead of an BTW, I recommend reverting #84628. Let's wait for inputs from other folks. |
I know a little about instcombine. Would it be possible to canonicalize |
This fold is not covered by one down side if #84628 is reverted is that it will be harder to find this type of missing folds if that change shall be applied again when the regressions is handled. |
think that I can close this PR as it looks better to move the fold done in but the question still is if I shall try to find/fix regressions due to the removed |
Ping See summary in last comment |
@nikic Any thoughts? |
To have some better information I have done some searching in the code to see how many places I think one reason for adding handling of the trunc pattern is that vectors already is folded to trunc so that will be better optimized. I find about 13 places that I think need some work: computeKnownBitsFromContext for assumes in ValueTracking handled in #118406 but have the ephemeral value problem for the not( trunc) pattern. getValueFromCondition in LazyValueInfo simplifySelectWithFakeICmpEq in InstructionSimplify foldSelectICmpAnd in InstCombineSelect foldSelectICmpAndAnd in InstCombineSelect looks like it will result in more instructions so maybe not. maybe the reason that decomposeBitTestICmp is not handled in this function also. foldSelectICmpAndZeroShl in InstCombineSelect unclear why decomposeBitTestICmp is not handled in this function looks like it will save instructions. foldSelectICmpAndBinOp in InstCombineSelect maybe shall have a common foldSelectICmpBitTest function that have the bit test code so it is not repeated. fold to alignment assume bundle in InstCombineCalls getMaskedTypeForICmpPair in InstCombineAndOrXor if foldAndOrOfICmpsOfAndWithPow2 is moved in to foldLogOpOfMaskedICmps that is the only fold in this file that need update. matchLeftShift in HexagonLoopIdiomRecognition detectShiftUntilBitTestIdiom in LoopIdiomRecognize many of the places already have decomposeBitTestICmp so maybe good to make a decomposeBitTest that only have a Value as input that handle trunc and then call decomposeBitTestICmp for icmp. |
Thank you for the detailed investigation! I agree that we can continue to migrate to |
Agree. |
Good then I will continue with new PRs |
Folds the patterns:
not(trunc X to i1) | iszero(X & Pow2) -> (X & (1 | Pow2)) != (1 | Pow2)
(trunc X to i1) & !iszero(X & Pow2)) -> (X & (1 | Pow2)) == (1 | Pow2)
this is the same pattern that foldAndOrOfICmpsOfAndWithPow2 handles but one
!iszero(X & 1)
have been folded to atrunc X to i1
oriszero(X & 1)
folded tonot(trunc X to i1)
around 100 files updated for the llvm-opt-benchmark with like 6 files with some regressions
like https://alive2.llvm.org/ce/z/SQLjda and https://alive2.llvm.org/ce/z/_Vu4yB
do not know how to handle the regressions feels like for every fold that I add to handle one regressions i gett new regressions.
proof: https://alive2.llvm.org/ce/z/ofzAyQ