Skip to content

[VectorCombine] Shrink loads used in shufflevector rebroadcasts #128938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions clang/test/CodeGenOpenCL/preserve_vec3.cl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ typedef float float4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local spir_kernel void @foo(
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: ret void
//
Expand All @@ -23,8 +23,8 @@ void kernel foo(global float3 *a, global float3 *b) {
// CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3(
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
// CHECK-NEXT: ret void
//
Expand All @@ -35,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
// CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
// CHECK-NEXT: ret void
//
Expand All @@ -47,9 +47,9 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
// CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
// CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
// CHECK-NEXT: ret void
//
void kernel float3_to_double2(global float3 *a, global double2 *b) {
Expand All @@ -59,8 +59,8 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
// CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3(
// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
// CHECK-NEXT: ret void
//
Expand Down
126 changes: 126 additions & 0 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
Expand All @@ -32,8 +33,10 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <numeric>
#include <optional>
#include <queue>
#include <set>
#include <tuple>

#define DEBUG_TYPE "vector-combine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
Expand Down Expand Up @@ -131,6 +134,7 @@ class VectorCombine {
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
bool foldInterleaveIntrinsics(Instruction &I);
bool shrinkType(Instruction &I);
bool shrinkLoadForShuffles(Instruction &I);

void replaceValue(Value &Old, Value &New) {
LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
Expand Down Expand Up @@ -3483,6 +3487,125 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
return true;
}

// Attempt to shrink loads that are only used by shufflevector instructions.
bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
auto *OldLoad = dyn_cast<LoadInst>(&I);
if (!OldLoad || !OldLoad->isSimple())
return false;

auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType());
if (!OldLoadTy)
return false;

unsigned const OldNumElements = OldLoadTy->getNumElements();

// Search all uses of load. If all uses are shufflevector instructions, and
// the second operands are all poison values, find the minimum and maximum
// indices of the vector elements referenced by all shuffle masks.
// Otherwise return `std::nullopt`.
using IndexRange = std::pair<int, int>;
auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
IndexRange OutputRange = IndexRange(OldNumElements, -1);
for (auto &Use : I.uses()) {
// Ensure all uses match the required pattern.
User *Shuffle = Use.getUser();
ArrayRef<int> Mask;

if (!match(Shuffle,
m_Shuffle(m_Specific(OldLoad), m_Undef(), m_Mask(Mask))))
return std::nullopt;

// Ignore shufflevector instructions that have no uses.
if (Shuffle->use_empty())
continue;

// Find the min and max indices used by the shufflevector instruction.
for (int Index : Mask) {
if (Index >= 0 && Index < static_cast<int>(OldNumElements)) {
OutputRange.first = std::min(Index, OutputRange.first);
OutputRange.second = std::max(Index, OutputRange.second);
}
}
}

if (OutputRange.second < OutputRange.first)
return std::nullopt;

return OutputRange;
};

// Get the range of vector elements used by shufflevector instructions.
if (auto Indices = GetIndexRangeInShuffles()) {
unsigned const NewNumElements = Indices->second + 1u;

// If the range of vector elements is smaller than the full load, attempt
// to create a smaller load.
if (NewNumElements < OldNumElements) {
auto Builder = IRBuilder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());

// Calculate costs of old and new ops.
Type *ElemTy = OldLoadTy->getElementType();
FixedVectorType *NewLoadTy = FixedVectorType::get(ElemTy, NewNumElements);
Value *PtrOp = OldLoad->getPointerOperand();

InstructionCost OldCost = TTI.getMemoryOpCost(
Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
OldLoad->getPointerAddressSpace(), CostKind);
InstructionCost NewCost =
TTI.getMemoryOpCost(Instruction::Load, NewLoadTy, OldLoad->getAlign(),
OldLoad->getPointerAddressSpace(), CostKind);

using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
auto NewUses = SmallVector<UseEntry, 4u>();
auto SizeDiff = OldNumElements - NewNumElements;

for (auto &Use : I.uses()) {
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
auto OldMask = Shuffle->getShuffleMask();

// Create entry for new use.
NewUses.push_back({Shuffle, {}});
auto &NewMask = NewUses.back().second;
for (auto Index : OldMask)
NewMask.push_back(Index >= static_cast<int>(OldNumElements)
? Index - SizeDiff
: Index);

// Update costs.
OldCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, OldLoadTy,
OldMask, CostKind);
NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, NewLoadTy,
NewMask, CostKind);
}

if (OldCost < NewCost || !NewCost.isValid())
return false;

// Create new load of smaller vector.
auto *NewLoad = cast<LoadInst>(
Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign()));
NewLoad->copyMetadata(I);

// Replace all uses.
for (auto &Use : NewUses) {
auto *Shuffle = Use.first;
auto &NewMask = Use.second;

Builder.SetInsertPoint(Shuffle);
Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
auto *NewShuffle = Builder.CreateShuffleVector(
NewLoad, PoisonValue::get(NewLoadTy), NewMask);

replaceValue(*Shuffle, *NewShuffle);
}

return true;
}
}
return false;
}

/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
Expand Down Expand Up @@ -3558,6 +3681,9 @@ bool VectorCombine::run() {
MadeChange |= foldSelectShuffle(I);
MadeChange |= foldShuffleToIdentity(I);
break;
case Instruction::Load:
MadeChange |= shrinkLoadForShuffles(I);
break;
case Instruction::BitCast:
MadeChange |= foldBitcastShuffle(I);
break;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ $getAt = comdat any

define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 {
; SSE-LABEL: @ConvertVectors_ByRef(
; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; SSE-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; SSE-NEXT: ret <4 x float> [[TMP3]]
;
; AVX-LABEL: @ConvertVectors_ByRef(
; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; AVX-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; AVX-NEXT: ret <4 x float> [[TMP3]]
;
%2 = alloca ptr, align 8
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,7 @@ define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(ptr align 16 dereferenc
define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %p) nofree nosync {
; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
Expand Down Expand Up @@ -341,8 +340,7 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync {
; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
; CHECK-NEXT: ret <8 x i16> [[R]]
;
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/VectorCombine/X86/load-widening.ll
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,8 @@ define <8 x float> @load_v2f32_v8f32_hwasan(ptr dereferenceable(32) %p) sanitize

define <4 x i32> @load_v2i32_v4i32_asan(ptr dereferenceable(16) %p) sanitize_address {
; CHECK-LABEL: @load_v2i32_v4i32_asan(
; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 1
; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret <4 x i32> [[S]]
;
%l = load <2 x i32>, ptr %p, align 1
Expand Down
24 changes: 9 additions & 15 deletions llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,12 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
; broadcast loads are free on AVX (and blends are much cheap than general 2-operand shuffles)

define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
; SSE-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
; SSE-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
; SSE-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
; SSE-NEXT: ret <4 x double> [[BLEND]]
;
; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
; AVX-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
; AVX-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
; AVX-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
; AVX-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
; AVX-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; AVX-NEXT: ret <4 x double> [[BLEND]]
; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64(
; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 32
; CHECK-NEXT: [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 32
; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: ret <4 x double> [[BLEND]]
;
%ld0 = load <4 x double>, ptr %p0, align 32
%ld1 = load <4 x double>, ptr %p1, align 32
Expand All @@ -81,3 +72,6 @@ define <2 x float> @PR86068(<2 x float> %a0, <2 x float> %a1) {
%s2 = shufflevector <2 x float> %s1, <2 x float> %a0, <2 x i32> <i32 0, i32 3>
ret <2 x float> %s2
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX: {{.*}}
; SSE: {{.*}}
Loading