halide · steven-johnson · Oct 13, 2021 · Oct 14, 2021 · Oct 14, 2021 · Oct 14, 2021
diff --git a/apps/HelloWasm/.gitignore b/apps/HelloWasm/.gitignore
@@ -1 +1,3 @@
 js/*
+build/
+bin/
diff --git a/apps/HelloWasm/Makefile b/apps/HelloWasm/Makefile
@@ -75,7 +75,7 @@ clean:
 
 $(BIN)/$(HL_TARGET)/run: $(BIN)/$(HL_TARGET)/reaction_diffusion_init.a $(BIN)/$(HL_TARGET)/reaction_diffusion_update.a $(BIN)/$(HL_TARGET)/reaction_diffusion_render.a $(BIN)/$(HL_TARGET)/runtime.a
 	mkdir -p $(@D)
-	$(CXX) $(HALIDE_DISTRIB_PATH)/tools/RunGenMain.cpp $(BIN)/$(HL_TARGET)/reaction_diffusion_*.registration.cpp $^ -o $@ -I $(HALIDE_DISTRIB_PATH)/include $(IMAGE_IO_FLAGS) $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) $(HALIDE_DISTRIB_PATH)/tools/RunGenMain.cpp $(BIN)/$(HL_TARGET)/reaction_diffusion_*.registration.cpp $^ -o $@ -I $(HALIDE_DISTRIB_PATH)/include $(IMAGE_IO_FLAGS) $(LDFLAGS)
 
 benchmark_native: $(BIN)/$(HL_TARGET)/run
 	$(BIN)/$(HL_TARGET)/run --benchmarks=all --benchmark_min_time=1 --name=reaction_diffusion_init --output_extents=[1024,1024] --parsable_output

diff --git a/apps/HelloWasm/README.md b/apps/HelloWasm/README.md
@@ -6,7 +6,7 @@ To try it out,
 
 - Build with `make all`
 
-- Run a local webserver using, e.g.: python3 -m http.server 8080 &
+- Run a local webserver: `python3 ../serve_with_cross_origin_isolation.py`. (This is a trivial wrapper about Python 3's `http.server` that also sets the necessary headers for "cross-origin isolated" that allow for threading to work in recent browsers; see https://developer.chrome.com/blog/enabling-shared-array-buffer/ for more information.)
 
 - Load Google chrome (at least version 84), go to chrome://flags, and turn on all the experimental webassembly stuff (e.g. threads, simd). If you don't do this, only the single-threaded scalar variant will work (at the time of writing).
 

diff --git a/apps/HelloWasm/serve_with_cross_origin_isolation.py b/apps/HelloWasm/serve_with_cross_origin_isolation.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+import sys
+
+class MyRequestHandler(SimpleHTTPRequestHandler):
+
+    def end_headers(self):
+        self.send_header('Cross-Origin-Embedder-Policy', 'require-corp')
+        self.send_header('Cross-Origin-Opener-Policy', 'same-origin')
+        return super(MyRequestHandler, self).end_headers()
+
+    def do_OPTIONS(self):
+        self.send_response(200)
+        self.end_headers()
+
+host = sys.argv[1] if len(sys.argv) > 2 else '127.0.0.1'
+port = int(sys.argv[len(sys.argv)-1]) if len(sys.argv) > 1 else 8080
+
+print("Listening on {}:{}".format(host, port))
+httpd = HTTPServer((host, port), MyRequestHandler)
+httpd.serve_forever()
diff --git a/apps/hannk/interpreter/lower.cpp b/apps/hannk/interpreter/lower.cpp
@@ -108,11 +108,17 @@ OpPtr lower_tflite_fullyconnected(const TensorPtr &input, const TensorPtr &filte
             assert(i.min == 0);
         }
 #endif
-        int c_extent = 1;
-        int b_extent = bounds.back().extent();
-        for (size_t i = 0; i + 1 < bounds.size(); i++) {
-            c_extent *= bounds[i].extent();
+        // It's important that we preserve the invariants:
+        //      input_reshaped[0] == filter[0]
+        //      input_reshaped[1] = input->number_of_elements()/input_reshaped[0]
+        int c_extent = filter->bounds()[0].extent();
+        int num_elems = 1;
+        for (size_t i = 0; i < bounds.size(); i++) {
+            num_elems *= bounds[i].extent();
         }
+        const int b_extent = num_elems / c_extent;
+        HCHECK(c_extent * b_extent == num_elems);
+
         Box reshaped_bounds = {{0, c_extent - 1}, {0, b_extent - 1}};
         TensorPtr input_reshaped =
             std::make_shared<Tensor>(input->name() + ".reshaped", input->type(), std::move(reshaped_bounds), input->quantization());

diff --git a/apps/hannk/interpreter/ops.cpp b/apps/hannk/interpreter/ops.cpp
@@ -44,6 +44,10 @@ std::string dims_to_string(const halide_buffer_t *buf) {
     oss << "}";
     return oss.str();
 }
+
+std::string dims_to_string(const HalideBuffer<void> &buf) {
+    return dims_to_string(buf.raw_buffer());
+}
 #endif
 
 // Split a dimension d into two new dimensions. Dim d will have min 0

diff --git a/apps/hannk/interpreter/tensor.cpp b/apps/hannk/interpreter/tensor.cpp
@@ -74,17 +74,23 @@ TensorStoragePtr Tensor::storage() {
 void Tensor::set_external_buffer(HalideBuffer<void> external_buffer) {
     assert(!is_dynamic());
     assert(is_external());
-    assert(storage_ == nullptr);
 
     // No: it's ok to set this to different values over time,
     // so don't assert that host is currently null (or already equal to the new value)
     // assert(!is_allocated());
 
-    for (int i = 0; i < buffer_.dimensions(); i++) {
-        assert(external_buffer.dim(i).min() == buffer_.dim(i).min());
-        assert(external_buffer.dim(i).extent() == buffer_.dim(i).extent());
+    storage()->buffer = std::move(external_buffer);
+    finish_buffer_allocation();
+
+    if (aliases_ != nullptr) {
+        for (const auto &weak : *aliases_) {
+            TensorPtr tp = weak.lock();  // null if the weak_ptr has expired
+            if (tp != nullptr && tp.get() != this) {
+                assert(!tp->is_external());
+                tp->finish_buffer_allocation();
+            }
+        }
     }
-    buffer_ = std::move(external_buffer);
 }
 
 void Tensor::allocate_from_arena_pointer(void *host) {
@@ -117,7 +123,7 @@ void Tensor::finish_buffer_allocation() {
     halide_buffer_t *raw_storage_buffer = storage_buffer.raw_buffer();
     assert(raw_storage_buffer->host);
 
-    if (is_reshape_alias_) {
+    if (alias_type_ == AliasType::Reshaped) {
         assert(raw_storage_buffer->number_of_elements() == buffer_.number_of_elements());
         assert(raw_storage_buffer->type == buffer_.type());
         assert(storage_offset_.empty());
@@ -191,67 +197,162 @@ void Tensor::resize_dynamic(const Box &new_shape) {
     storage_ = nullptr;
 }
 
-void Tensor::set_alias_of(const TensorPtr &t, const SmallVector<int, max_rank> &storage_offset, bool is_reshape) {
-    assert(!is_dynamic());
-    assert(!is_external());
-    assert(!is_alias());
+bool Tensor::has_external_alias() const {
+    if (aliases_ != nullptr) {
+        for (const auto &weak : *aliases_) {
+            TensorPtr tp = weak.lock();  // null if the weak_ptr has expired
+            if (tp != nullptr && tp->is_external()) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+// Return true iff 'this' can be an alias of 'source' of the given type.
+bool Tensor::can_alias(const TensorPtr &source, AliasType alias_type) const {
+    if (alias_type == AliasType::None || this == source.get()) {
+        return false;  // Bad inputs, just say no
+    }
+
+    if (this->is_allocated()) {
+        // Can't alias a tensor that already has an allocation.
+        return false;
+    }
 
-    // No: 't' may (or may not) already have is_alias_ = true,
-    // but both will be considered an alias after this call.
-    // assert(!t->is_alias_);
+    // If either tensor is dynamic, can't alias them.
+    if (this->is_dynamic() || source->is_dynamic()) {
+        return false;
+    }
 
-    storage_ = t->storage();
-    storage_offset_ = storage_offset;
+    if (this->type().bytes() != source->type().bytes()) {
+        // We can't alias tensors with types of different size.
+        return false;
+    }
 
-    if (is_reshape) {
-        is_reshape_alias_ = true;
+    if (this->alias_type() != AliasType::None || this->aliases_ != nullptr) {
+        // Can't alias a tensor multiple times.
+        return false;
+    }
 
-        // No: assume that t->storage() matches the rank+dimensions expected by t
-        // (or, if not, that t->is_reshape_alias_ is already set to true).
-        //
-        // t->is_reshape_alias_ = true;
+    if (source->alias_type() != AliasType::None && source->alias_type() != alias_type) {
+        // The source of the aliasing must either be unaliased or of the type we want
+        return false;
     }
 
-#ifndef NDEBUG
-    if (is_reshape) {
-        assert(storage_offset_.empty());
-        assert(this->buffer().type() == t->buffer().type());
-        assert(this->buffer().number_of_elements() == t->buffer().number_of_elements());
-    } else {
-        // Reality-check.
-        Box offset_bounds = bounds();
-        for (int i = 0; i < (int)storage_offset_.size(); i++) {
-            offset_bounds[i] += storage_offset_[i];
+    if (alias_type == AliasType::Offset) {
+        // AliasType::Offset allows for NO external tensors in the alias group
+        if (this->is_external() || source->is_external()) {
+            return false;
         }
-        auto &shared_buffer = storage_->buffer;
-        assert(shared_buffer.type().bytes() == type().bytes());
-        assert(shared_buffer.dimensions() == (int)offset_bounds.size());
-        assert(!shared_buffer.data());
-
-        // Check that the storage is big enough for this buffer.
-        for (int i = 0; i < shared_buffer.dimensions(); i++) {
-            assert(offset_bounds[i].min >= shared_buffer.dim(i).min());
-            assert(offset_bounds[i].max <= shared_buffer.dim(i).max());
+
+        if (this->rank() != source->rank()) {
+            // AliasType::Offset can't alias tensors with different rank.
+            return false;
+        }
+    } else if (alias_type == AliasType::Reshaped) {
+        // AliasType::Reshaped allows for at most one external tensor in the alias group
+        const bool this_external = this->is_external() || this->has_external_alias();
+        const bool source_external = source->is_external() || source->has_external_alias();
+        if (this_external && source_external) {
+            return false;
         }
     }
+
+    return true;
+}
+
+/*static*/ void Tensor::make_offset_alias(TensorPtr alias, TensorPtr source, const TensorOffset &storage_offset) {
+    assert(alias->can_alias(source, AliasType::Offset));
+
+    if (source->aliases_ == nullptr) {
+        source->aliases_ = std::make_shared<std::vector<std::weak_ptr<Tensor>>>(1, source);
+        source->alias_type_ = AliasType::Offset;
+    }
+
+    assert(alias->aliases_ == nullptr);
+    alias->aliases_ = source->aliases_;
+    alias->aliases_->push_back(alias);
+
+    assert(alias->alias_type_ == AliasType::None);
+    alias->alias_type_ = AliasType::Offset;
+
+    assert(alias->storage_ == nullptr);
+    alias->storage_ = source->storage();
+    assert(alias->storage_offset_.empty());
+    alias->storage_offset_ = storage_offset;
+
+#ifndef NDEBUG
+    // Reality-check.
+    Box offset_bounds = alias->bounds();
+    for (int i = 0; i < (int)alias->storage_offset_.size(); i++) {
+        offset_bounds[i] += alias->storage_offset_[i];
+    }
+    auto &shared_buffer = alias->storage_->buffer;
+    assert(shared_buffer.type().bytes() == alias->type().bytes());
+    assert(shared_buffer.dimensions() == (int)offset_bounds.size());
+    assert(!shared_buffer.data());
+
+    // Check that the storage is big enough for this buffer.
+    for (int i = 0; i < shared_buffer.dimensions(); i++) {
+        assert(offset_bounds[i].min >= shared_buffer.dim(i).min());
+        assert(offset_bounds[i].max <= shared_buffer.dim(i).max());
+    }
 #endif
+}
+
+/*static*/ void Tensor::make_reshape_alias(TensorPtr alias, TensorPtr source) {
+    assert(alias->can_alias(source, AliasType::Reshaped));
+
+    if (alias->is_external()) {
+        assert(!source->has_external_alias());
+    } else if (source->is_external()) {
+        assert(!alias->has_external_alias());
+    }
+
+    if (source->aliases_ == nullptr) {
+        source->aliases_ = std::make_shared<std::vector<std::weak_ptr<Tensor>>>(1, source);
+        assert(source->alias_type_ == AliasType::None);
+        source->alias_type_ = AliasType::Reshaped;
+    } else {
+        assert(source->alias_type_ == AliasType::Reshaped);
+    }
 
-    is_alias_ = true;
-    t->is_alias_ = true;
+    assert(alias->aliases_ == nullptr);
+    alias->aliases_ = source->aliases_;
+    alias->aliases_->push_back(alias);
+
+    assert(alias->alias_type_ == AliasType::None);
+    alias->alias_type_ = AliasType::Reshaped;
+
+    assert(alias->storage_ == nullptr);
+    alias->storage_ = source->storage();
+    assert(alias->storage_offset_.empty());
+
+#ifndef NDEBUG
+    assert(alias->storage_offset_.empty());
+    assert(alias->buffer().type().bytes() == source->buffer().type().bytes());
+    assert(alias->buffer().number_of_elements() == source->buffer().number_of_elements());
+#endif
 }
 
 void Tensor::dump(std::ostream &os) const {
-    os << "  " << buffer_.type() << " x ";
+    os << "  \"" << name() << "\" this:@" << (const void *)this;
+
+    os << " " << buffer_.type() << " x ";
 
-    const auto *b = buffer_.raw_buffer();
-    os << '{';
-    for (int i = 0; i < b->dimensions; i++) {
-        if (i > 0) {
-            os << ", ";
+    const auto dump_dims = [&os](const halide_buffer_t *b) {
+        os << '{';
+        for (int i = 0; i < b->dimensions; i++) {
+            if (i > 0) {
+                os << ", ";
+            }
+            os << b->dim[i];
         }
-        os << b->dim[i];
-    }
-    os << '}';
+        os << '}';
+    };
+
+    dump_dims(buffer_.raw_buffer());
 
     if (is_allocated()) {
         os << " allocated";
@@ -267,9 +368,13 @@ void Tensor::dump(std::ostream &os) const {
     if (is_dynamic()) {
         os << " dynamic";
     }
-    if (is_alias()) {
-        os << " alias";
-        os << " storage_offset{";
+    if (alias_type_ != AliasType::None) {
+        os << (alias_type_ == AliasType::Offset ? " alias_offset{" : " alias_reshaped{");
+        for (const auto &weak : *aliases_) {
+            TensorPtr tp = weak.lock();  // null if the weak_ptr has expired
+            os << " " << (void *)tp.get();
+        }
+        os << " } storage_offset{";
         for (size_t i = 0; i < storage_offset_.size(); i++) {
             if (i > 0) {
                 os << ", ";
@@ -285,8 +390,12 @@ void Tensor::dump(std::ostream &os) const {
     }
 
     os << " storage:@" << (void *)storage_.get();
+    if (storage_) {
+        os << ' ';
+        dump_dims(storage_->buffer.raw_buffer());
+    }
 
-    os << " " << name() << " this:@" << (const void *)this << std::endl;
+    os << std::endl;
 }
 
 }  // namespace hannk
-Original file line number
+Diff line change
@@ -1 +1,3 @@
     js/*
+    build/
+    bin/