-
Notifications
You must be signed in to change notification settings - Fork 4.1k
GH-38718: [Go][Format][Integration] Add StringView/BinaryView to Go implementation #35769
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d88fc91
a0eb736
daf1796
0491fd2
0ba9d56
46e7034
2a85125
0b141d8
7e19d39
15888ac
76e9bbc
3595e5d
92a8362
704bf82
dcdc1b1
c15b7ba
d6bbd35
8dbcf52
d0e03bb
646b1e2
24fb628
306ee94
5dc1d51
b620e45
0b10bed
a009c47
bccebbe
1792a98
5cfc237
829a850
a560917
a84ee2e
be3b3a5
e2bbe6f
cab4899
9db6557
460c06e
8e41840
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import ( | |
| "unsafe" | ||
|
|
||
| "github.com/apache/arrow/go/v15/arrow" | ||
| "github.com/apache/arrow/go/v15/arrow/memory" | ||
| "github.com/apache/arrow/go/v15/internal/json" | ||
| ) | ||
|
|
||
|
|
@@ -318,6 +319,126 @@ func arrayEqualLargeBinary(left, right *LargeBinary) bool { | |
| return true | ||
| } | ||
|
|
||
| type ViewLike interface { | ||
| arrow.Array | ||
| ValueHeader(int) *arrow.ViewHeader | ||
| } | ||
|
|
||
| type BinaryView struct { | ||
| array | ||
| values []arrow.ViewHeader | ||
| dataBuffers []*memory.Buffer | ||
| } | ||
|
|
||
| func NewBinaryViewData(data arrow.ArrayData) *BinaryView { | ||
| a := &BinaryView{} | ||
| a.refCount = 1 | ||
| a.setData(data.(*Data)) | ||
| return a | ||
| } | ||
|
|
||
| func (a *BinaryView) setData(data *Data) { | ||
| if len(data.buffers) < 2 { | ||
| panic("len(data.buffers) < 2") | ||
| } | ||
| a.array.setData(data) | ||
|
|
||
| if valueData := data.buffers[1]; valueData != nil { | ||
| a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) | ||
| } | ||
|
|
||
| a.dataBuffers = data.buffers[2:] | ||
| } | ||
|
|
||
| func (a *BinaryView) ValueHeader(i int) *arrow.ViewHeader { | ||
| if i < 0 || i >= a.array.data.length { | ||
| panic("arrow/array: index out of range") | ||
| } | ||
| return &a.values[a.array.data.offset+i] | ||
| } | ||
|
|
||
| func (a *BinaryView) Value(i int) []byte { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since there is no compiler guarantee in Go that the slice returned by this method will remain unchanged (whether intentionally or unintentionally) by users of this API, I would suggest adding a comment to this method. This comment should clearly specify that it's unsafe to make any kind of changes to the returned slice.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a concern present everywhere in Arrow though, so a comment here could be understood as implying that places without a comment like this allow buffers to be mutated.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably a reflex from my Rust experience where this slice will be immutable. In the context of Go, I agree with you that sporadically adding a comment might be counterproductive if we do not apply this globally.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the top level, there do already exist comments that state that it is intended that all Arrow Arrays be immutable. I agree with the concern that adding a comment here specifically could be counterproductive. If you can think of a good place to put such a comment that would be more universal, I'd be more than happy to do so. |
||
| s := a.ValueHeader(i) | ||
| if s.IsInline() { | ||
| return s.InlineBytes() | ||
| } | ||
| start := s.BufferOffset() | ||
| buf := a.dataBuffers[s.BufferIndex()] | ||
| return buf.Bytes()[start : start+int32(s.Len())] | ||
| } | ||
|
|
||
| // ValueString returns the value at index i as a string instead of | ||
| // a byte slice, without copying the underlying data. | ||
| func (a *BinaryView) ValueString(i int) string { | ||
| b := a.Value(i) | ||
| return *(*string)(unsafe.Pointer(&b)) | ||
| } | ||
|
|
||
| func (a *BinaryView) String() string { | ||
| var o strings.Builder | ||
| o.WriteString("[") | ||
| for i := 0; i < a.Len(); i++ { | ||
| if i > 0 { | ||
| o.WriteString(" ") | ||
| } | ||
| switch { | ||
| case a.IsNull(i): | ||
| o.WriteString(NullValueStr) | ||
| default: | ||
| fmt.Fprintf(&o, "%q", a.ValueString(i)) | ||
| } | ||
| } | ||
| o.WriteString("]") | ||
| return o.String() | ||
| } | ||
|
|
||
| // ValueStr is paired with AppendValueFromString in that it returns | ||
| // the value at index i as a string: Semantically this means that for | ||
| // a null value it will return the string "(null)", otherwise it will | ||
| // return the value as a base64 encoded string suitable for CSV/JSON. | ||
| // | ||
| // This is always going to be less performant than just using ValueString | ||
| // and exists to fulfill the Array interface to provide a method which | ||
| // can produce a human readable string for a given index. | ||
| func (a *BinaryView) ValueStr(i int) string { | ||
|
zeroshade marked this conversation as resolved.
Outdated
zeroshade marked this conversation as resolved.
|
||
| if a.IsNull(i) { | ||
| return NullValueStr | ||
| } | ||
| return base64.StdEncoding.EncodeToString(a.Value(i)) | ||
| } | ||
|
|
||
| func (a *BinaryView) GetOneForMarshal(i int) interface{} { | ||
| if a.IsNull(i) { | ||
| return nil | ||
| } | ||
| return a.Value(i) | ||
| } | ||
|
|
||
| func (a *BinaryView) MarshalJSON() ([]byte, error) { | ||
|
bkietz marked this conversation as resolved.
|
||
| vals := make([]interface{}, a.Len()) | ||
| for i := 0; i < a.Len(); i++ { | ||
| vals[i] = a.GetOneForMarshal(i) | ||
| } | ||
| // golang marshal standard says that []byte will be marshalled | ||
| // as a base64-encoded string | ||
| return json.Marshal(vals) | ||
| } | ||
|
|
||
| func arrayEqualBinaryView(left, right *BinaryView) bool { | ||
| leftBufs, rightBufs := left.dataBuffers, right.dataBuffers | ||
| for i := 0; i < left.Len(); i++ { | ||
| if left.IsNull(i) { | ||
|
zeroshade marked this conversation as resolved.
Outdated
|
||
| continue | ||
| } | ||
| if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| } | ||
|
|
||
| var ( | ||
| _ arrow.Array = (*Binary)(nil) | ||
| _ arrow.Array = (*LargeBinary)(nil) | ||
| _ arrow.Array = (*BinaryView)(nil) | ||
| ) | ||
Uh oh!
There was an error while loading. Please reload this page.