Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/HelloWasm/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
js/*
build/
bin/
2 changes: 1 addition & 1 deletion apps/HelloWasm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ clean:

$(BIN)/$(HL_TARGET)/run: $(BIN)/$(HL_TARGET)/reaction_diffusion_init.a $(BIN)/$(HL_TARGET)/reaction_diffusion_update.a $(BIN)/$(HL_TARGET)/reaction_diffusion_render.a $(BIN)/$(HL_TARGET)/runtime.a
mkdir -p $(@D)
$(CXX) $(HALIDE_DISTRIB_PATH)/tools/RunGenMain.cpp $(BIN)/$(HL_TARGET)/reaction_diffusion_*.registration.cpp $^ -o $@ -I $(HALIDE_DISTRIB_PATH)/include $(IMAGE_IO_FLAGS) $(LDFLAGS)
$(CXX) $(CXXFLAGS) $(HALIDE_DISTRIB_PATH)/tools/RunGenMain.cpp $(BIN)/$(HL_TARGET)/reaction_diffusion_*.registration.cpp $^ -o $@ -I $(HALIDE_DISTRIB_PATH)/include $(IMAGE_IO_FLAGS) $(LDFLAGS)

benchmark_native: $(BIN)/$(HL_TARGET)/run
$(BIN)/$(HL_TARGET)/run --benchmarks=all --benchmark_min_time=1 --name=reaction_diffusion_init --output_extents=[1024,1024] --parsable_output
Expand Down
2 changes: 1 addition & 1 deletion apps/HelloWasm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ To try it out,

- Build with `make all`

- Run a local webserver using, e.g.: python3 -m http.server 8080 &
- Run a local webserver: `python3 ../serve_with_cross_origin_isolation.py`. (This is a trivial wrapper about Python 3's `http.server` that also sets the necessary headers for "cross-origin isolated" that allow for threading to work in recent browsers; see https://developer.chrome.com/blog/enabling-shared-array-buffer/ for more information.)

- Load Google chrome (at least version 84), go to chrome://flags, and turn on all the experimental webassembly stuff (e.g. threads, simd). If you don't do this, only the single-threaded scalar variant will work (at the time of writing).

Expand Down
22 changes: 22 additions & 0 deletions apps/HelloWasm/serve_with_cross_origin_isolation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python3

from http.server import HTTPServer, SimpleHTTPRequestHandler
import sys

class MyRequestHandler(SimpleHTTPRequestHandler):

def end_headers(self):
self.send_header('Cross-Origin-Embedder-Policy', 'require-corp')
self.send_header('Cross-Origin-Opener-Policy', 'same-origin')
return super(MyRequestHandler, self).end_headers()

def do_OPTIONS(self):
self.send_response(200)
self.end_headers()

host = sys.argv[1] if len(sys.argv) > 2 else '127.0.0.1'
port = int(sys.argv[len(sys.argv)-1]) if len(sys.argv) > 1 else 8080

print("Listening on {}:{}".format(host, port))
httpd = HTTPServer((host, port), MyRequestHandler)
httpd.serve_forever()
14 changes: 10 additions & 4 deletions apps/hannk/interpreter/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,17 @@ OpPtr lower_tflite_fullyconnected(const TensorPtr &input, const TensorPtr &filte
assert(i.min == 0);
}
#endif
int c_extent = 1;
int b_extent = bounds.back().extent();
for (size_t i = 0; i + 1 < bounds.size(); i++) {
c_extent *= bounds[i].extent();
// It's important that we preserve the invariants:
// input_reshaped[0] == filter[0]
// input_reshaped[1] = input->number_of_elements()/input_reshaped[0]
int c_extent = filter->bounds()[0].extent();
int num_elems = 1;
for (size_t i = 0; i < bounds.size(); i++) {
num_elems *= bounds[i].extent();
}
const int b_extent = num_elems / c_extent;
HCHECK(c_extent * b_extent == num_elems);

Box reshaped_bounds = {{0, c_extent - 1}, {0, b_extent - 1}};
TensorPtr input_reshaped =
std::make_shared<Tensor>(input->name() + ".reshaped", input->type(), std::move(reshaped_bounds), input->quantization());
Expand Down
4 changes: 4 additions & 0 deletions apps/hannk/interpreter/ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ std::string dims_to_string(const halide_buffer_t *buf) {
oss << "}";
return oss.str();
}

std::string dims_to_string(const HalideBuffer<void> &buf) {
return dims_to_string(buf.raw_buffer());
}
#endif

// Split a dimension d into two new dimensions. Dim d will have min 0
Expand Down
219 changes: 164 additions & 55 deletions apps/hannk/interpreter/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,23 @@ TensorStoragePtr Tensor::storage() {
void Tensor::set_external_buffer(HalideBuffer<void> external_buffer) {
assert(!is_dynamic());
assert(is_external());
assert(storage_ == nullptr);

// No: it's ok to set this to different values over time,
// so don't assert that host is currently null (or already equal to the new value)
// assert(!is_allocated());

for (int i = 0; i < buffer_.dimensions(); i++) {
assert(external_buffer.dim(i).min() == buffer_.dim(i).min());
assert(external_buffer.dim(i).extent() == buffer_.dim(i).extent());
storage()->buffer = std::move(external_buffer);
finish_buffer_allocation();

if (aliases_ != nullptr) {
for (const auto &weak : *aliases_) {
TensorPtr tp = weak.lock(); // null if the weak_ptr has expired
if (tp != nullptr && tp.get() != this) {
assert(!tp->is_external());
tp->finish_buffer_allocation();
}
}
}
buffer_ = std::move(external_buffer);
}

void Tensor::allocate_from_arena_pointer(void *host) {
Expand Down Expand Up @@ -117,7 +123,7 @@ void Tensor::finish_buffer_allocation() {
halide_buffer_t *raw_storage_buffer = storage_buffer.raw_buffer();
assert(raw_storage_buffer->host);

if (is_reshape_alias_) {
if (alias_type_ == AliasType::Reshaped) {
assert(raw_storage_buffer->number_of_elements() == buffer_.number_of_elements());
assert(raw_storage_buffer->type == buffer_.type());
assert(storage_offset_.empty());
Expand Down Expand Up @@ -191,67 +197,162 @@ void Tensor::resize_dynamic(const Box &new_shape) {
storage_ = nullptr;
}

void Tensor::set_alias_of(const TensorPtr &t, const SmallVector<int, max_rank> &storage_offset, bool is_reshape) {
assert(!is_dynamic());
assert(!is_external());
assert(!is_alias());
bool Tensor::has_external_alias() const {
if (aliases_ != nullptr) {
for (const auto &weak : *aliases_) {
TensorPtr tp = weak.lock(); // null if the weak_ptr has expired
if (tp != nullptr && tp->is_external()) {
return true;
}
}
}
return false;
}

// Return true iff 'this' can be an alias of 'source' of the given type.
bool Tensor::can_alias(const TensorPtr &source, AliasType alias_type) const {
if (alias_type == AliasType::None || this == source.get()) {
return false; // Bad inputs, just say no
}

if (this->is_allocated()) {
// Can't alias a tensor that already has an allocation.
return false;
}

// No: 't' may (or may not) already have is_alias_ = true,
// but both will be considered an alias after this call.
// assert(!t->is_alias_);
// If either tensor is dynamic, can't alias them.
if (this->is_dynamic() || source->is_dynamic()) {
return false;
}

storage_ = t->storage();
storage_offset_ = storage_offset;
if (this->type().bytes() != source->type().bytes()) {
// We can't alias tensors with types of different size.
return false;
}

if (is_reshape) {
is_reshape_alias_ = true;
if (this->alias_type() != AliasType::None || this->aliases_ != nullptr) {
// Can't alias a tensor multiple times.
return false;
}

// No: assume that t->storage() matches the rank+dimensions expected by t
// (or, if not, that t->is_reshape_alias_ is already set to true).
//
// t->is_reshape_alias_ = true;
if (source->alias_type() != AliasType::None && source->alias_type() != alias_type) {
// The source of the aliasing must either be unaliased or of the type we want
return false;
}

#ifndef NDEBUG
if (is_reshape) {
assert(storage_offset_.empty());
assert(this->buffer().type() == t->buffer().type());
assert(this->buffer().number_of_elements() == t->buffer().number_of_elements());
} else {
// Reality-check.
Box offset_bounds = bounds();
for (int i = 0; i < (int)storage_offset_.size(); i++) {
offset_bounds[i] += storage_offset_[i];
if (alias_type == AliasType::Offset) {
// AliasType::Offset allows for NO external tensors in the alias group
if (this->is_external() || source->is_external()) {
return false;
}
auto &shared_buffer = storage_->buffer;
assert(shared_buffer.type().bytes() == type().bytes());
assert(shared_buffer.dimensions() == (int)offset_bounds.size());
assert(!shared_buffer.data());

// Check that the storage is big enough for this buffer.
for (int i = 0; i < shared_buffer.dimensions(); i++) {
assert(offset_bounds[i].min >= shared_buffer.dim(i).min());
assert(offset_bounds[i].max <= shared_buffer.dim(i).max());

if (this->rank() != source->rank()) {
// AliasType::Offset can't alias tensors with different rank.
return false;
}
} else if (alias_type == AliasType::Reshaped) {
// AliasType::Reshaped allows for at most one external tensor in the alias group
const bool this_external = this->is_external() || this->has_external_alias();
const bool source_external = source->is_external() || source->has_external_alias();
if (this_external && source_external) {
return false;
}
}

return true;
}

/*static*/ void Tensor::make_offset_alias(TensorPtr alias, TensorPtr source, const TensorOffset &storage_offset) {
assert(alias->can_alias(source, AliasType::Offset));

if (source->aliases_ == nullptr) {
source->aliases_ = std::make_shared<std::vector<std::weak_ptr<Tensor>>>(1, source);
source->alias_type_ = AliasType::Offset;
}

assert(alias->aliases_ == nullptr);
alias->aliases_ = source->aliases_;
alias->aliases_->push_back(alias);

assert(alias->alias_type_ == AliasType::None);
alias->alias_type_ = AliasType::Offset;

assert(alias->storage_ == nullptr);
alias->storage_ = source->storage();
assert(alias->storage_offset_.empty());
alias->storage_offset_ = storage_offset;

#ifndef NDEBUG
// Reality-check.
Box offset_bounds = alias->bounds();
for (int i = 0; i < (int)alias->storage_offset_.size(); i++) {
offset_bounds[i] += alias->storage_offset_[i];
}
auto &shared_buffer = alias->storage_->buffer;
assert(shared_buffer.type().bytes() == alias->type().bytes());
assert(shared_buffer.dimensions() == (int)offset_bounds.size());
assert(!shared_buffer.data());

// Check that the storage is big enough for this buffer.
for (int i = 0; i < shared_buffer.dimensions(); i++) {
assert(offset_bounds[i].min >= shared_buffer.dim(i).min());
assert(offset_bounds[i].max <= shared_buffer.dim(i).max());
}
#endif
}

/*static*/ void Tensor::make_reshape_alias(TensorPtr alias, TensorPtr source) {
assert(alias->can_alias(source, AliasType::Reshaped));

if (alias->is_external()) {
assert(!source->has_external_alias());
} else if (source->is_external()) {
assert(!alias->has_external_alias());
}

if (source->aliases_ == nullptr) {
source->aliases_ = std::make_shared<std::vector<std::weak_ptr<Tensor>>>(1, source);
assert(source->alias_type_ == AliasType::None);
source->alias_type_ = AliasType::Reshaped;
} else {
assert(source->alias_type_ == AliasType::Reshaped);
}

is_alias_ = true;
t->is_alias_ = true;
assert(alias->aliases_ == nullptr);
alias->aliases_ = source->aliases_;
alias->aliases_->push_back(alias);

assert(alias->alias_type_ == AliasType::None);
alias->alias_type_ = AliasType::Reshaped;

assert(alias->storage_ == nullptr);
alias->storage_ = source->storage();
assert(alias->storage_offset_.empty());

#ifndef NDEBUG
assert(alias->storage_offset_.empty());
assert(alias->buffer().type().bytes() == source->buffer().type().bytes());
assert(alias->buffer().number_of_elements() == source->buffer().number_of_elements());
#endif
}

void Tensor::dump(std::ostream &os) const {
os << " " << buffer_.type() << " x ";
os << " \"" << name() << "\" this:@" << (const void *)this;

os << " " << buffer_.type() << " x ";

const auto *b = buffer_.raw_buffer();
os << '{';
for (int i = 0; i < b->dimensions; i++) {
if (i > 0) {
os << ", ";
const auto dump_dims = [&os](const halide_buffer_t *b) {
os << '{';
for (int i = 0; i < b->dimensions; i++) {
if (i > 0) {
os << ", ";
}
os << b->dim[i];
}
os << b->dim[i];
}
os << '}';
os << '}';
};

dump_dims(buffer_.raw_buffer());

if (is_allocated()) {
os << " allocated";
Expand All @@ -267,9 +368,13 @@ void Tensor::dump(std::ostream &os) const {
if (is_dynamic()) {
os << " dynamic";
}
if (is_alias()) {
os << " alias";
os << " storage_offset{";
if (alias_type_ != AliasType::None) {
os << (alias_type_ == AliasType::Offset ? " alias_offset{" : " alias_reshaped{");
for (const auto &weak : *aliases_) {
TensorPtr tp = weak.lock(); // null if the weak_ptr has expired
os << " " << (void *)tp.get();
}
os << " } storage_offset{";
for (size_t i = 0; i < storage_offset_.size(); i++) {
if (i > 0) {
os << ", ";
Expand All @@ -285,8 +390,12 @@ void Tensor::dump(std::ostream &os) const {
}

os << " storage:@" << (void *)storage_.get();
if (storage_) {
os << ' ';
dump_dims(storage_->buffer.raw_buffer());
}

os << " " << name() << " this:@" << (const void *)this << std::endl;
os << std::endl;
}

} // namespace hannk
Loading