From ec162e46fb65615f90b2c7c62e84c6c7095a367c Mon Sep 17 00:00:00 2001 From: Dmitri Prime Date: Sun, 13 Sep 2020 23:47:51 -0700 Subject: [PATCH 1/2] Implement simple common subexpression elimination. --- compiler/back_end/cpp/BUILD | 9 ++ .../back_end/cpp/generated_code_templates | 27 ++-- compiler/back_end/cpp/header_generator.py | 135 +++++++++++++----- .../cpp/testcode/complex_offset_test.cc | 39 +++++ .../back_end/cpp/testcode/uint_sizes_test.cc | 7 + doc/cpp-reference.md | 31 ++++ runtime/cpp/emboss_array_view.h | 10 ++ runtime/cpp/emboss_memory_util.h | 28 ++++ runtime/cpp/test/emboss_memory_util_test.cc | 20 +++ testdata/BUILD | 7 + testdata/complex_offset.emb | 34 +++++ 11 files changed, 306 insertions(+), 41 deletions(-) create mode 100644 compiler/back_end/cpp/testcode/complex_offset_test.cc create mode 100644 testdata/complex_offset.emb diff --git a/compiler/back_end/cpp/BUILD b/compiler/back_end/cpp/BUILD index b54d3abd..52840f12 100644 --- a/compiler/back_end/cpp/BUILD +++ b/compiler/back_end/cpp/BUILD @@ -313,3 +313,12 @@ emboss_cc_test( "@com_google_googletest//:gtest_main", ], ) + +emboss_cc_test( + name = "complex_offset_test", + srcs = ["testcode/complex_offset_test.cc"], + deps = [ + "//testdata:complex_offset_emboss", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/compiler/back_end/cpp/generated_code_templates b/compiler/back_end/cpp/generated_code_templates index dcc82b6a..7ed9198d 100644 --- a/compiler/back_end/cpp/generated_code_templates +++ b/compiler/back_end/cpp/generated_code_templates @@ -468,16 +468,24 @@ inline typename $_type_reader_$ Generic$_parent_type_$View::$_name_$() // call those methods at all. Similarly, if the end of the field would come // before the start, we provide a null storage, though arguably we should // not. - if ($_parameters_known_$ has_$_name_$().ValueOr(false) && $_size_$.Known() && - $_size_$.ValueOr(0) >= 0 && $_offset_$.Known() && - $_offset_$.ValueOr(0) >= 0) { - return $_type_reader_$( - $_parameter_values_$ backing_ - .template GetOffsetStorage<$_alignment_$, $_static_offset_$>( - $_offset_$.ValueOrDefault(), $_size_$.ValueOrDefault())); - } else { - return $_type_reader_$(); +$_parameter_subexpressions_$ + if ($_parameters_known_$ has_$_name_$().ValueOr(false)) { +$_size_and_offset_subexpressions_$ + auto emboss_reserved_local_size = $_size_$; + auto emboss_reserved_local_offset = $_offset_$; + if (emboss_reserved_local_size.Known() && + emboss_reserved_local_size.ValueOr(0) >= 0 && + emboss_reserved_local_offset.Known() && + emboss_reserved_local_offset.ValueOr(0) >= 0) { + return $_type_reader_$( + $_parameter_values_$ backing_ + .template GetOffsetStorage<$_alignment_$, + $_static_offset_$>( + emboss_reserved_local_offset.ValueOrDefault(), + emboss_reserved_local_size.ValueOrDefault())); + } } + return $_type_reader_$(); } template @@ -617,6 +625,7 @@ $_write_methods_$ private: ::emboss::support::Maybe MaybeRead() const { +$_read_subexpressions_$ return $_read_value_$; } diff --git a/compiler/back_end/cpp/header_generator.py b/compiler/back_end/cpp/header_generator.py index 109686c4..a10adf98 100644 --- a/compiler/back_end/cpp/header_generator.py +++ b/compiler/back_end/cpp/header_generator.py @@ -443,16 +443,18 @@ def _cpp_integer_type_for_range(min_val, max_val): return None -def _render_builtin_operation(expression, ir, field_reader): +def _render_builtin_operation(expression, ir, field_reader, subexpressions): """Renders a built-in operation (+, -, &&, etc.) into C++ code.""" assert expression.function.function not in ( ir_pb2.Function.UPPER_BOUND, ir_pb2.Function.LOWER_BOUND), ( "UPPER_BOUND and LOWER_BOUND should be constant.") if expression.function.function == ir_pb2.Function.PRESENCE: - return field_reader.render_existence(expression.function.args[0]) + return field_reader.render_existence(expression.function.args[0], + subexpressions) args = expression.function.args rendered_args = [ - _render_expression(arg, ir, field_reader).rendered for arg in args] + _render_expression(arg, ir, field_reader, subexpressions).rendered + for arg in args] minimum_integers = [] maximum_integers = [] enum_types = set() @@ -506,18 +508,25 @@ def _render_builtin_operation(expression, ir, field_reader): class _FieldRenderer(object): """Base class for rendering field reads.""" - def render_field_read_with_context(self, expression, ir, prefix): + def render_field_read_with_context(self, expression, ir, prefix, + subexpressions): + field = ( + prefix + + _render_variable(ir_util.hashable_form_of_field_reference( + expression.field_reference))) + if subexpressions is None: + field_expression = field + else: + field_expression = subexpressions.add(field) expression_cpp_type = _cpp_basic_type_for_expression(expression, ir) - return ("({0}{1}.Ok()" - " ? {2}(static_cast({0}{1}.UncheckedRead()))" - " : {2}())".format( - prefix, - _render_variable(ir_util.hashable_form_of_field_reference( - expression.field_reference)), + return ("({0}.Ok()" + " ? {1}(static_cast({0}.UncheckedRead()))" + " : {1}())".format( + field_expression, _maybe_type(expression_cpp_type), expression_cpp_type)) - def render_existence_with_context(self, expression, prefix): + def render_existence_with_context(self, expression, prefix, subexpressions): return "{1}{0}".format( _render_variable( ir_util.hashable_form_of_field_reference( @@ -529,28 +538,51 @@ def render_existence_with_context(self, expression, prefix): class _DirectFieldRenderer(_FieldRenderer): """Renderer for fields read from inside a structure's View type.""" - def render_field(self, expression, ir): - return self.render_field_read_with_context(expression, ir, "") + def render_field(self, expression, ir, subexpressions): + return self.render_field_read_with_context( + expression, ir, "", subexpressions) - def render_existence(self, expression): - return self.render_existence_with_context(expression, "") + def render_existence(self, expression, subexpressions): + return self.render_existence_with_context(expression, "", subexpressions) class _VirtualViewFieldRenderer(_FieldRenderer): """Renderer for field reads from inside a virtual field's View.""" - def render_existence(self, expression): - return self.render_existence_with_context(expression, "view_.") + def render_existence(self, expression, subexpressions): + return self.render_existence_with_context( + expression, "view_.", subexpressions) + + def render_field(self, expression, ir, subexpressions): + return self.render_field_read_with_context( + expression, ir, "view_.", subexpressions) + + +class _SubexpressionStore: + """Holder for subexpressions to be assigned to local variables.""" - def render_field(self, expression, ir): - return self.render_field_read_with_context(expression, ir, "view_.") + def __init__(self, prefix): + self._prefix = prefix + self._subexpr_to_name = {} + self._index_to_subexpr = [] + + def add(self, subexpr): + if subexpr not in self._subexpr_to_name: + self._index_to_subexpr.append(subexpr) + self._subexpr_to_name[subexpr] = ( + self._prefix + str(len(self._index_to_subexpr))) + return self._subexpr_to_name[subexpr] + + def subexprs(self): + return [(self._subexpr_to_name[subexpr], subexpr) + for subexpr in self._index_to_subexpr] _ExpressionResult = collections.namedtuple("ExpressionResult", ["rendered", "is_constant"]) -def _render_expression(expression, ir, field_reader=None): +def _render_expression(expression, ir, field_reader=None, subexpressions=None): """Renders an expression into C++ code. Arguments: @@ -558,6 +590,8 @@ def _render_expression(expression, ir, field_reader=None): ir: The IR in which to look up references. field_reader: An object with render_existence and render_field methods appropriate for the C++ context of the expression. + subexpressions: A _SubexpressionStore in which to put subexpressions, or + None if subexpressions should be inline. Returns: A tuple of (rendered_text, is_constant), where rendered_text is C++ code @@ -591,26 +625,33 @@ def _render_expression(expression, ir, field_reader=None): assert False, "Unhandled expression type {}".format( expression.type.WhichOneof("type")) + result = None # Otherwise, render the operation. if expression.WhichOneof("expression") == "function": - return _ExpressionResult( - _render_builtin_operation(expression, ir, field_reader), False) + result = _render_builtin_operation( + expression, ir, field_reader, subexpressions) elif expression.WhichOneof("expression") == "field_reference": - return _ExpressionResult(field_reader.render_field(expression, ir), False) + result = field_reader.render_field(expression, ir, subexpressions) elif (expression.WhichOneof("expression") == "builtin_reference" and expression.builtin_reference.canonical_name.object_path[-1] == "$logical_value"): return _ExpressionResult( _maybe_type("decltype(emboss_reserved_local_value)") + "(emboss_reserved_local_value)", False) + # Any of the constant expression types should have been handled in the # previous section. + assert result is not None, "Unable to render expression {}".format( + str(expression)) - assert False, "Unable to render expression {}".format(str(expression)) + if subexpressions is None: + return _ExpressionResult(result, False) + else: + return _ExpressionResult(subexpressions.add(result), False) -def _render_existence_test(field, ir): - return _render_expression(field.existence_condition, ir) +def _render_existence_test(field, ir, subexpressions=None): + return _render_expression(field.existence_condition, ir, subexpressions) def _alignment_of_location(location): @@ -663,11 +704,11 @@ def _generate_custom_validator_expression_for(field_ir, ir): class _ValidatorFieldReader(object): """A "FieldReader" that translates the current field to `value`.""" - def render_existence(self, expression): + def render_existence(self, expression, subexpressions): del expression # Unused. assert False, "Shouldn't be here." - def render_field(self, expression, ir): + def render_field(self, expression, ir, subexpressions): assert len(expression.field_reference.path) == 1 assert (expression.field_reference.path[0].canonical_name == field_ir.name.canonical_name) @@ -708,13 +749,16 @@ def _generate_structure_virtual_field_methods(enclosing_type_name, field_ir, if field_ir.write_method.WhichOneof("method") == "alias": return _generate_field_indirection(field_ir, enclosing_type_name, ir) + read_subexpressions = _SubexpressionStore("emboss_reserved_local_subexpr_") read_value = _render_expression( field_ir.read_transform, ir, - field_reader=_VirtualViewFieldRenderer()) + field_reader=_VirtualViewFieldRenderer(), + subexpressions=read_subexpressions) field_exists = _render_existence_test(field_ir, ir) logical_type = _cpp_basic_type_for_expression(field_ir.read_transform, ir) if read_value.is_constant and field_exists.is_constant: + assert not read_subexpressions.subexprs() declaration_template = ( _TEMPLATES.structure_single_const_virtual_field_method_declarations) definition_template = ( @@ -766,6 +810,10 @@ def _generate_structure_virtual_field_methods(enclosing_type_name, field_ir, name=name, virtual_view_type_name=virtual_view_type_name, logical_type=logical_type, + read_subexpressions="".join( + [" const auto {} = {};".format(name, subexpr) + for name, subexpr in read_subexpressions.subexprs()] + ), read_value=read_value.rendered, write_to_text_stream_function=write_to_text_stream_function, parent_type=enclosing_type_name, @@ -828,15 +876,36 @@ def _generate_structure_physical_field_methods(enclosing_type_name, field_ir, _get_cpp_type_reader_of_field(field_ir, ir, "Storage", validator_type, parent_addressable_unit)) + field_name = field_ir.name.canonical_name.object_path[-1] + + subexpressions = _SubexpressionStore("emboss_reserved_local_subexpr_") parameter_values = [] parameters_known = [] for parameter in parameter_expressions: - parameter_cpp_expr = _render_expression(parameter, ir) + parameter_cpp_expr = _render_expression( + parameter, ir, subexpressions=subexpressions) parameter_values.append( "{}.ValueOrDefault(), ".format(parameter_cpp_expr.rendered)) parameters_known.append( "{}.Known() && ".format(parameter_cpp_expr.rendered)) + parameter_subexpressions = "".join( + [" const auto {} = {};\n".format(name, subexpr) + for name, subexpr in subexpressions.subexprs()] + ) + + first_size_and_offset_subexpr = len(subexpressions.subexprs()) + offset = _render_expression( + field_ir.location.start, ir, subexpressions=subexpressions).rendered + size = _render_expression( + field_ir.location.size, ir, subexpressions=subexpressions).rendered + size_and_offset_subexpressions = "".join( + [" const auto {} = {};\n".format(name, subexpr) + for name, subexpr in subexpressions.subexprs()[ + first_size_and_offset_subexpr:]] + ) + + field_alignment, field_offset = _alignment_of_location(field_ir.location) declaration = code_template.format_template( _TEMPLATES.structure_single_field_method_declarations, @@ -848,12 +917,14 @@ def _generate_structure_physical_field_methods(enclosing_type_name, field_ir, parent_type=enclosing_type_name, name=field_name, type_reader=type_reader, - offset=_render_expression(field_ir.location.start, ir).rendered, - size=_render_expression(field_ir.location.size, ir).rendered, + offset=offset, + size=size, + size_and_offset_subexpressions=size_and_offset_subexpressions, field_exists=_render_existence_test(field_ir, ir).rendered, alignment=field_alignment, parameters_known="".join(parameters_known), parameter_values="".join(parameter_values), + parameter_subexpressions=parameter_subexpressions, static_offset=field_offset) return validator_declaration, declaration, definition diff --git a/compiler/back_end/cpp/testcode/complex_offset_test.cc b/compiler/back_end/cpp/testcode/complex_offset_test.cc new file mode 100644 index 00000000..af894647 --- /dev/null +++ b/compiler/back_end/cpp/testcode/complex_offset_test.cc @@ -0,0 +1,39 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Tests of packed field support. +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "testdata/complex_offset.emb.h" + +namespace emboss { +namespace test { +namespace { + +TEST(PackedFields, PerformanceOfOk) { + ::std::array values = {0}; + const auto view = MakePackedFieldsView(&values); + EXPECT_TRUE(view.Ok()); + EXPECT_TRUE(view.length6().Ok()); + EXPECT_TRUE(view.data6().Ok()); +} + +} // namespace +} // namespace test +} // namespace emboss diff --git a/compiler/back_end/cpp/testcode/uint_sizes_test.cc b/compiler/back_end/cpp/testcode/uint_sizes_test.cc index 2513dffa..d3b8b684 100644 --- a/compiler/back_end/cpp/testcode/uint_sizes_test.cc +++ b/compiler/back_end/cpp/testcode/uint_sizes_test.cc @@ -393,6 +393,13 @@ TEST(SizesView, CanReadArraySizes) { EXPECT_EQ(8U, sizeof(view.eight_byte()[0].Read())); } +TEST(SizesView, ToString) { + ::std::array buf = {'a', 'b'}; + auto view = MakeArraySizesView(&buf); + + EXPECT_EQ(view.one_byte().ToString(), "ab"); +} + TEST(SizesView, CopyFrom) { ::std::array buf_x = {}; ::std::array buf_y = {}; diff --git a/doc/cpp-reference.md b/doc/cpp-reference.md index 98c70b83..46ecdd0d 100644 --- a/doc/cpp-reference.md +++ b/doc/cpp-reference.md @@ -1074,6 +1074,37 @@ bool IsComplete() const; to hold the entire array. +### `ToString` method + +```c++ +template +String ToString() const; +``` + +Intended usage: + +```c++ +// Makes a copy of view's backing storage. +auto str = view.ToString(); + +// Points to view's backing storage. +auto str_view = view.ToString(); +``` + +`ToString()` returns a string type constructed from the backing storage of the +array. Note that `ToString()` is only enabled for arrays of 1-byte values, +such as `UInt:8[]`, and only when the array view's underlying storage is +contiguous. + +Although it is intended for use with `std::string` and `std::string_view`, +`ToString()` can work with any C++ type that: + +1. Has a `data()` method that returns a pointer to the string's underlying + data as a `char` type. +2. Has a constructor that accepts a `const declval(data())` pointer and a + `size_t` length. + + ### `UpdateFromTextStream` method ```c++ diff --git a/runtime/cpp/emboss_array_view.h b/runtime/cpp/emboss_array_view.h index ccfef2cf..e2224364 100644 --- a/runtime/cpp/emboss_array_view.h +++ b/runtime/cpp/emboss_array_view.h @@ -250,6 +250,16 @@ class GenericArrayView final { BufferType BackingStorage() const { return buffer_; } + // Forwards to BufferType's ToString(), if any, but only if ElementView is a + // 1-byte type. + template + typename ::std::enable_if::type + ToString() const { + EMBOSS_CHECK(Ok()); + return BackingStorage().template ToString(); + } + private: // This uses the same technique to select the correct definition of // SizeOfBuffer() as in the SizeInBits()/SizeInBytes() selection above. diff --git a/runtime/cpp/emboss_memory_util.h b/runtime/cpp/emboss_memory_util.h index 8b52b33a..5ff56179 100644 --- a/runtime/cpp/emboss_memory_util.h +++ b/runtime/cpp/emboss_memory_util.h @@ -616,6 +616,34 @@ class ContiguousBuffer final { Byte *begin() const { return bytes_; } Byte *end() const { return bytes_ + size_; } + // Constructs a string type from the underlying data; mostly intended to be + // called as: + // + // buffer.ToString(); + // + // or: + // + // buffer.ToString(); + // + // ... but it should also work with any similar-enough classes, such as + // std::basic_string_view or Google's absl::string_view. + // + // Note that this may or may not make a copy of the underlying data, + // depending on the behavior of the given string type. + template + typename ::std::enable_if< + IsChar().data())>::type>::value, + String>::type + ToString() const { + return String( + reinterpret_cast< + const typename ::std::remove_reference().data())>::type>::type *>( + bytes_), + size_); + } + private: Byte *bytes_ = nullptr; ::std::size_t size_ = 0; diff --git a/runtime/cpp/test/emboss_memory_util_test.cc b/runtime/cpp/test/emboss_memory_util_test.cc index eec43fa0..54b8ffe0 100644 --- a/runtime/cpp/test/emboss_memory_util_test.cc +++ b/runtime/cpp/test/emboss_memory_util_test.cc @@ -12,6 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#if __cplusplus >= 201402L +#include +#endif // __cplusplus >= 201402L +#include + #include "runtime/cpp/emboss_memory_util.h" #include "gtest/gtest.h" @@ -465,6 +470,21 @@ TEST(ContiguousBuffer, ConstructionFromCompatibleContiguousBuffers) { EXPECT_EQ(aligned_buffer.data(), reinterpret_cast(data + 3)); } +TEST(ContiguousBuffer, ToString) { + const ::std::vector bytes = { + {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'}}; + const auto buffer = ReadOnlyContiguousBuffer{bytes.data(), bytes.size() - 4}; + auto str = buffer.ToString(); + EXPECT_TRUE((::std::is_same::value)); + EXPECT_EQ(str, "abcd"); +#if __cplusplus >= 201402L + auto str_view = buffer.ToString(); + EXPECT_TRUE( + (::std::is_same::value)); + EXPECT_EQ(str_view, "abcd"); +#endif // __cplusplus >= 201402L +} + TEST(LittleEndianByteOrderer, Methods) { ::std::vector bytes = { {21, 22, 1, 2, 3, 4, 5, 6, 7, 8, 23, 24}}; diff --git a/testdata/BUILD b/testdata/BUILD index d757da7b..1791d66f 100644 --- a/testdata/BUILD +++ b/testdata/BUILD @@ -284,3 +284,10 @@ emboss_cc_library( "virtual_field.emb", ], ) + +emboss_cc_library( + name = "complex_offset_emboss", + srcs = [ + "complex_offset.emb", + ], +) diff --git a/testdata/complex_offset.emb b/testdata/complex_offset.emb new file mode 100644 index 00000000..686150e3 --- /dev/null +++ b/testdata/complex_offset.emb @@ -0,0 +1,34 @@ +-- Test .emb with a struct with several consecutive packed fields. +-- +-- This particular case stresses generated code; common subexpression +-- elimination is needed in the code generator or else the generated code is +-- very, *very* slow when compiled without optimizations. + +[$default byte_order: "BigEndian"] +[(cpp) namespace: "emboss::test"] + +struct Length: + 0 [+1] UInt length + +struct Data: + 0 [+1] Length length + 1 [+length.length] UInt:8[] data + +struct PackedFields: + 0 [+1] Length length1 (l1) + 0 [+l1.length+1] Data data1 (d1) + let o1 = d1.$size_in_bytes + o1 [+1] Length length2 (l2) + o1 [+l2.length+1] Data data2 (d2) + let o2 = o1 + d2.$size_in_bytes + o2 [+1] Length length3 (l3) + o2 [+l3.length+1] Data data3 (d3) + let o3 = o2 + d3.$size_in_bytes + o3 [+1] Length length4 (l4) + o3 [+l4.length+1] Data data4 (d4) + let o4 = o3 + d4.$size_in_bytes + o4 [+1] Length length5 (l5) + o4 [+l5.length+1] Data data5 (d5) + let o5 = o4 + d5.$size_in_bytes + o5 [+1] Length length6 (l6) + o5 [+l6.length+1] Data data6 (d6) From 0f8af11e9a832adeb73e3f94e882aa9cc3eb9ad9 Mon Sep 17 00:00:00 2001 From: Dmitri Prime Date: Sun, 13 Sep 2020 23:49:41 -0700 Subject: [PATCH 2/2] Add missing license header. --- testdata/complex_offset.emb | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/testdata/complex_offset.emb b/testdata/complex_offset.emb index 686150e3..ef04fc74 100644 --- a/testdata/complex_offset.emb +++ b/testdata/complex_offset.emb @@ -1,3 +1,17 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + -- Test .emb with a struct with several consecutive packed fields. -- -- This particular case stresses generated code; common subexpression