Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,50 +455,55 @@ void CodeGen_Hexagon::compile_func(const LoweredFunc &f,

Stmt body = f.body;

debug(1) << "Unpredicating loads and stores...\n";
debug(1) << "Hexagon: Unpredicating loads and stores...\n";
// Replace dense vector predicated loads with sloppy scalarized
// predicates, and scalarize predicated stores
body = sloppy_unpredicate_loads_and_stores(body);

debug(2) << "Lowering after unpredicating loads/stores:\n"
debug(2) << "Hexagon: Lowering after unpredicating loads/stores:\n"
<< body << "\n\n";

if (is_hvx_v65_or_later()) {
// Generate vscatter-vgathers before optimize_hexagon_shuffles.
debug(1) << "Looking for vscatter-vgather...\n";
debug(1) << "Hexagon: Looking for vscatter-vgather...\n";
body = scatter_gather_generator(body);
debug(2) << "Hexagon: Lowering after vscatter-vgather:\n"
<< body << "\n\n";
}

debug(1) << "Optimizing shuffles...\n";
debug(1) << "Hexagon: Optimizing shuffles...\n";
// vlut always indexes 64 bytes of the LUT at a time, even in 128 byte mode.
const int lut_alignment = 64;
body = optimize_hexagon_shuffles(body, lut_alignment);
debug(2) << "Lowering after optimizing shuffles:\n"
debug(2) << "Hexagon: Lowering after optimizing shuffles:\n"
<< body << "\n\n";

debug(1) << "Aligning loads for HVX....\n";
debug(1) << "Hexagon: Aligning loads for HVX....\n";
body = align_loads(body, target.natural_vector_size(Int(8)), 8);
body = common_subexpression_elimination(body);
// Don't simplify here, otherwise it will re-collapse the loads we
// want to carry across loop iterations.
debug(2) << "Lowering after aligning loads:\n"
debug(2) << "Hexagon: Lowering after aligning loads:\n"
<< body << "\n\n";

debug(1) << "Carrying values across loop iterations...\n";
debug(1) << "Hexagon: Carrying values across loop iterations...\n";
// Use at most 16 vector registers for carrying values.
body = loop_carry(body, 16);
body = simplify(body);
debug(2) << "Lowering after forwarding stores:\n"
debug(2) << "Hexagon: Lowering after forwarding stores:\n"
<< body << "\n\n";

// Optimize the IR for Hexagon.
debug(1) << "Optimizing Hexagon instructions...\n";
debug(1) << "Hexagon: Optimizing Hexagon instructions...\n";
body = optimize_hexagon_instructions(body, target);
debug(2) << "Hexagon: Lowering after optimizing Hexagon instructions:\n"
<< body << "\n\n";

debug(1) << "Adding calls to qurt_hvx_lock, if necessary...\n";
debug(1) << "Hexagon: Adding calls to qurt_hvx_lock, if necessary...\n";
body = inject_hvx_lock_unlock(body, target);
debug(2) << "Hexagon: Lowering after adding calls to qurt_hvx_lock:\n"
<< body << "\n\n";

debug(1) << "Hexagon function body:\n";
debug(1) << "Hexagon: function body for " << simple_name << " :\n";
debug(1) << body << "\n";

body.accept(this);
Expand Down