Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 17 additions & 35 deletions src/CodeGen_GPU_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,27 +144,16 @@ void CodeGen_GPU_C::visit(const Shuffle *op) {
if (op->type.is_scalar()) {
CodeGen_C::visit(op);
} else {
// Vector shuffle with arbitrary number of lanes per arg
internal_assert(!op->vectors.empty());
for (size_t i = 1; i < op->vectors.size(); i++) {
internal_assert(op->vectors[0].type() == op->vectors[i].type());
}
internal_assert(op->type.lanes() == (int)op->indices.size());
// We need to construct the mapping between shuffled-index,
// and source-vector-index and source-element-index-within-the-vector.
// To start, we'll figure out what the first shuffle-index is per
// source-vector. Also let's compute the total number of
// source-elements the to be able to assert that all of the
// shuffle-indices are within range.
std::vector<int> vector_first_index;
int max_index = 0;
for (const Expr &v : op->vectors) {
vector_first_index.push_back(max_index);
max_index += v.type().lanes();
}
for (int i : op->indices) {
internal_assert(i >= 0 && i < max_index);
}

// Construct the mapping for each shuffled element to find
// the corresponding vector-index to use and which lane-index
// of the selected vector.
auto vector_lane_indices = op->vector_and_lane_indices();

// Traverse all the vector args
std::vector<std::string> vecs;
for (const Expr &v : op->vectors) {
vecs.push_back(print_expr(v));
Expand All @@ -184,23 +173,15 @@ void CodeGen_GPU_C::visit(const Shuffle *op) {
rhs << "{";
break;
}
int elem_num = 0;
for (int i : op->indices) {
size_t vector_idx;
int lane_idx = -1;
// Find in which source vector this shuffle-index "i" falls:
for (vector_idx = 0; vector_idx < op->vectors.size(); ++vector_idx) {
const int first_index = vector_first_index[vector_idx];
if (i >= first_index &&
i < first_index + op->vectors[vector_idx].type().lanes()) {
lane_idx = i - first_index;
break;
}
}
internal_assert(lane_idx != -1) << "Shuffle lane index not found: i=" << i;
internal_assert(vector_idx < op->vectors.size()) << "Shuffle vector index not found: i=" << i << ", lane=" << lane_idx;

int element_idx = 0;
for (auto element_mapping : vector_lane_indices) {
int vector_idx = element_mapping.first;
int lane_idx = element_mapping.second;

// Print the vector in which we will index.
rhs << vecs[vector_idx];

// In case we are dealing with an actual vector instead of scalar,
// print out the required indexing syntax.
if (op->vectors[vector_idx].type().lanes() > 1) {
Expand All @@ -216,11 +197,12 @@ void CodeGen_GPU_C::visit(const Shuffle *op) {
}

// Elements of a vector are comma separated.
if (elem_num < (int)(op->indices.size() - 1)) {
if (element_idx < (int)(op->indices.size() - 1)) {
rhs << ", ";
}
elem_num++;
element_idx++;
}

switch (vector_declaration_style) {
case VectorDeclarationStyle::OpenCLSyntax:
rhs << ")";
Expand Down
120 changes: 47 additions & 73 deletions src/CodeGen_Vulkan_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2003,18 +2003,28 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Evaluate *op) {
}

void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Shuffle *op) {
std::cout << " CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Shuffle): "
<< "type=" << op->type << " "
<< "vectors=" << (uint32_t)op->vectors.size() << " "
<< "is_interleave=" << (op->is_interleave() ? "true" : "false") << " "
<< "is_extract_element=" << (op->is_extract_element() ? "true" : "false") << "\n";
debug(2) << " CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Shuffle): "
Comment thread
derek-gerstmann marked this conversation as resolved.
<< "type=" << op->type << " "
<< "vectors=" << (uint32_t)op->vectors.size() << " "
<< "is_interleave=" << (op->is_interleave() ? "true" : "false") << " "
<< "is_extract_element=" << (op->is_extract_element() ? "true" : "false") << "\n";

internal_assert(!op->vectors.empty());
internal_assert(op->type.lanes() == (int)op->indices.size());

// The Shuffle operator supports any combination of vector width for its
// arguments, as long as the indices match the number of lanes for the result
// type. This means the arguments can be a mixed combination of vectors with
// any number of lanes (or a scalar). We special case interleave and extract,
// and then use the vector and lane index mapping to determine which values to
// use from the arguments to do the shufffle.

// Traverse all the arg vectors
// First, traverse all the arg vectors
uint32_t arg_idx = 0;
SpvFactory::Operands arg_ids;
arg_ids.reserve(op->vectors.size());
for (const Expr &e : op->vectors) {
debug(2) << " CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Shuffle): Arg[" << arg_idx++ << "] => " << e << "\n";
debug(3) << " CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Shuffle): Arg[" << arg_idx++ << "] => " << e << "\n";
e.accept(this);
arg_ids.push_back(builder.current_id());
}
Expand All @@ -2024,11 +2034,11 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Shuffle *op) {
internal_assert(!arg_ids.empty());
int arg_lanes = op->vectors[0].type().lanes();

std::cout << " vector interleave x" << (uint32_t)op->vectors.size() << " : ";
debug(3) << " vector interleave x" << (uint32_t)op->vectors.size() << " : ";
for (int idx : op->indices) {
std::cout << idx << " ";
debug(3) << idx << " ";
}
std::cout << "\n";
debug(3) << "\n";

if (arg_ids.size() == 1) {

Expand Down Expand Up @@ -2096,77 +2106,41 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Shuffle *op) {
SpvId result_id = cast_type(op->type, op->vectors[0].type(), arg_ids[0]);
builder.update_id(result_id);
}
} else if (op->type.is_scalar()) {
// Deduce which vector we need. Apparently it's not required
// that all vectors have identical lanes, so a loop is required.
// Since idx of -1 means "don't care", we'll treat it as 0 to simplify.
SpvId result_id = SpvInvalidId;
int idx = std::max(0, op->indices[0]);
for (size_t vec_idx = 0; vec_idx < op->vectors.size(); vec_idx++) {
const int vec_lanes = op->vectors[vec_idx].type().lanes();
if (idx < vec_lanes) {
if (op->vectors[vec_idx].type().is_vector()) {
SpvFactory::Indices indices = {(uint32_t)idx};
SpvId type_id = builder.declare_type(op->type);
result_id = builder.reserve_id(SpvResultId);
builder.append(SpvFactory::composite_extract(type_id, result_id, arg_ids[vec_idx], indices));
} else {
result_id = arg_ids[vec_idx];
}
break;
}
idx -= vec_lanes;
}

} else {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm not overlooking anything, it seems there is no specialized branch for simply joining (i.e., concatenating) vectors. Either put a TODO comment, make an issue, or add that branch. Falling back on the generic vector shuffle, below, seems like a performance waste.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tracking this here: #8622

// Shuffle with arbitrary number of lanes per arg

// vector shuffle ... not interleaving
int op_lanes = op->type.lanes();
int num_vectors = (int)op->vectors.size();
// Construct the mapping for each shuffled element to find
// the corresponding vector-index to use and which lane-index
// of the selected vector.
auto vector_lane_indices = op->vector_and_lane_indices();

std::cout << " vector shuffle x" << num_vectors << " : ";
for (int idx : op->indices) {
std::cout << idx << " ";
}
std::cout << "\n";
SpvId type_id = builder.declare_type(op->type);
SpvId result_id = builder.reserve_id(SpvResultId);

if (num_vectors == 1) {
// 1 argument, just do a simple assignment via a cast
SpvId result_id = cast_type(op->type, op->vectors[0].type(), arg_ids[0]);
builder.update_id(result_id);
SpvFactory::Components constituents;
debug(3) << " Shuffle Composite(" << op->type << ") => ";
for (auto element_mapping : vector_lane_indices) {
int arg_idx = element_mapping.first;
int lane_idx = element_mapping.second;

} else if (num_vectors == 2) {
if (op->vectors[arg_idx].type().lanes() > 1) {
SpvFactory::Indices indices = {(uint32_t)lane_idx};
SpvId scalar_type_id = builder.declare_type(op->vectors[arg_idx].type().element_of());
SpvId scalar_id = builder.reserve_id(SpvResultId);
builder.append(SpvFactory::composite_extract(scalar_type_id, scalar_id, arg_ids[arg_idx], indices));

// 2 arguments, use the builtin vector shuffle that takes a pair of vectors
SpvFactory::Indices indices;
indices.reserve(op->indices.size());
indices.insert(indices.end(), op->indices.begin(), op->indices.end());
SpvId type_id = builder.declare_type(op->type);
SpvId result_id = builder.reserve_id(SpvResultId);
builder.append(SpvFactory::vector_shuffle(type_id, result_id, arg_ids[0], arg_ids[1], indices));
builder.update_id(result_id);
} else {
std::vector<SpvFactory::Components> vector_component_ids(num_vectors);
for (uint32_t i = 0; i < (uint32_t)arg_ids.size(); ++i) {
if (op->vectors[i].type().is_vector()) {
vector_component_ids[i] = split_vector(op->vectors[i].type(), arg_ids[i]);
} else {
vector_component_ids[i] = {arg_ids[i]};
}
}

SpvFactory::Components result_component_ids(op_lanes);
for (int i = 0; i < op_lanes && i < (int)op->indices.size(); i++) {
int idx = op->indices[i];
int arg = idx % num_vectors;
int arg_idx = idx / num_vectors;
internal_assert(arg_idx <= (int)vector_component_ids[arg].size());
result_component_ids[i] = vector_component_ids[arg][arg_idx];
debug(3) << arg_ids[arg_idx] << "(v" << op->vectors[arg_idx].type().lanes() << "[" << lane_idx << "]) ";
constituents.push_back(scalar_id); // insert a component from a vector
} else {
debug(3) << arg_ids[arg_idx] << " ";
SpvId scalar_id = cast_type(op->type.element_of(), op->vectors[arg_idx].type(), arg_ids[arg_idx]);
constituents.push_back(scalar_id); // inserting a scalar
}

SpvId result_id = join_vector(op->type, result_component_ids);
builder.update_id(result_id);
}

debug(3) << "\n";
builder.append(SpvFactory::composite_construct(type_id, result_id, constituents));
builder.update_id(result_id);
}
}

Expand Down
19 changes: 19 additions & 0 deletions src/IR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,25 @@ bool Shuffle::is_interleave() const {
return true;
}

std::vector<std::pair<int, int>> Shuffle::vector_and_lane_indices() const {

// Construct the mapping for each shuffled element and find the index
// of which vector to use and the index for which of its lanes to use
std::vector<std::pair<int, int>> all_indices;
all_indices.reserve(indices.size());
for (int i = 0; i < (int)vectors.size(); i++) {
for (int j = 0; j < vectors[i].type().lanes(); j++) {
all_indices.emplace_back(i, j);
}
}
std::vector<std::pair<int, int>> result;
result.reserve(indices.size());
for (int i : indices) {
result.push_back(all_indices[i]);
}
return result;
}

Stmt Atomic::make(const std::string &producer_name,
const std::string &mutex_name,
Stmt body) {
Expand Down
4 changes: 4 additions & 0 deletions src/IR.h
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,10 @@ struct Shuffle : public ExprNode<Shuffle> {
* arguments. */
bool is_extract_element() const;

/** Returns the sequence of vector and lane indices that represent each
* entry to be used for the shuffled vector */
std::vector<std::pair<int, int>> vector_and_lane_indices() const;

static const IRNodeType _node_type = IRNodeType::Shuffle;
};

Expand Down
5 changes: 0 additions & 5 deletions test/correctness/vector_shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,8 @@ using namespace Halide;

int main(int argc, char **argv) {
Target target = get_jit_target_from_environment();
if (target.has_feature(Target::Feature::Vulkan)) {
std::printf("[SKIP] Vulkan seems to be not working.\n");
return 0;
}

Var x{"x"}, y{"y"};

Func f0{"f0"}, f1{"f1"}, g{"g"};
f0(x, y) = x * (y + 1);
f1(x, y) = x * (y + 3);
Expand Down
Loading