|
| 1 | +/* |
| 2 | + * SPDX-License-Identifier: BSD-2-Clause |
| 3 | + * |
| 4 | + * Copyright 2010-2024, Tarantool AUTHORS, please see AUTHORS file. |
| 5 | + */ |
| 6 | +#include "arrow_ipc.h" |
| 7 | + |
| 8 | +#include "diag.h" |
| 9 | +#include "error.h" |
| 10 | +#include "small/region.h" |
| 11 | +#include "nanoarrow/nanoarrow_ipc.h" |
| 12 | + |
| 13 | +int |
| 14 | +arrow_ipc_encode(struct ArrowArray *array, struct ArrowSchema *schema, |
| 15 | + struct region *region, const char **ret_data, |
| 16 | + const char **ret_data_end) |
| 17 | +{ |
| 18 | + ArrowErrorCode rc; |
| 19 | + struct ArrowError error; |
| 20 | + struct ArrowBuffer buffer; |
| 21 | + ArrowBufferInit(&buffer); |
| 22 | + |
| 23 | + struct ArrowArrayView array_view; |
| 24 | + rc = ArrowArrayViewInitFromSchema(&array_view, schema, &error); |
| 25 | + if (rc != NANOARROW_OK) { |
| 26 | + diag_set(ClientError, ER_ARROW_IPC_ENCODE, |
| 27 | + "ArrowArrayViewInitFromSchema", error.message); |
| 28 | + return -1; |
| 29 | + } |
| 30 | + |
| 31 | + /* Set buffer sizes and data pointers from an array. */ |
| 32 | + rc = ArrowArrayViewSetArray(&array_view, array, &error); |
| 33 | + if (rc != NANOARROW_OK) { |
| 34 | + diag_set(ClientError, ER_ARROW_IPC_ENCODE, |
| 35 | + "ArrowArrayViewSetArray", error.message); |
| 36 | + goto error1; |
| 37 | + } |
| 38 | + |
| 39 | + /* All bytes written to the stream will be appended to the buffer. */ |
| 40 | + struct ArrowIpcOutputStream stream; |
| 41 | + rc = ArrowIpcOutputStreamInitBuffer(&stream, &buffer); |
| 42 | + if (rc != NANOARROW_OK) { |
| 43 | + diag_set(ClientError, ER_ARROW_IPC_ENCODE, |
| 44 | + "ArrowIpcOutputStreamInitBuffer", NULL); |
| 45 | + goto error1; |
| 46 | + } |
| 47 | + |
| 48 | + /* |
| 49 | + * A stream writer which encodes schema and array into an IPC byte |
| 50 | + * stream. The writer takes ownership of the output byte stream. |
| 51 | + */ |
| 52 | + struct ArrowIpcWriter writer; |
| 53 | + rc = ArrowIpcWriterInit(&writer, &stream); |
| 54 | + if (rc != NANOARROW_OK) { |
| 55 | + diag_set(ClientError, ER_ARROW_IPC_ENCODE, "ArrowIpcWriterInit", |
| 56 | + NULL); |
| 57 | + stream.release(&stream); |
| 58 | + goto error1; |
| 59 | + } |
| 60 | + |
| 61 | + rc = ArrowIpcWriterWriteSchema(&writer, schema, &error); |
| 62 | + if (rc != NANOARROW_OK) { |
| 63 | + diag_set(ClientError, ER_ARROW_IPC_ENCODE, |
| 64 | + "ArrowIpcWriterWriteSchema", error.message); |
| 65 | + goto error2; |
| 66 | + } |
| 67 | + |
| 68 | + rc = ArrowIpcWriterWriteArrayView(&writer, &array_view, &error); |
| 69 | + if (rc != NANOARROW_OK) { |
| 70 | + diag_set(ClientError, ER_ARROW_IPC_ENCODE, |
| 71 | + "ArrowIpcWriterWriteArrayView", error.message); |
| 72 | + goto error2; |
| 73 | + } |
| 74 | + |
| 75 | + /* |
| 76 | + * TODO: It is possible to avoid extra `memcpy()' by switching |
| 77 | + * `ArrowBuffer' to `region_realloc()'. |
| 78 | + */ |
| 79 | + char *data = xregion_alloc(region, buffer.size_bytes); |
| 80 | + memcpy(data, buffer.data, buffer.size_bytes); |
| 81 | + *ret_data = data; |
| 82 | + *ret_data_end = data + buffer.size_bytes; |
| 83 | + |
| 84 | + ArrowIpcWriterReset(&writer); |
| 85 | + ArrowArrayViewReset(&array_view); |
| 86 | + ArrowBufferReset(&buffer); |
| 87 | + return 0; |
| 88 | +error2: |
| 89 | + ArrowIpcWriterReset(&writer); |
| 90 | +error1: |
| 91 | + ArrowArrayViewReset(&array_view); |
| 92 | + ArrowBufferReset(&buffer); |
| 93 | + return -1; |
| 94 | +} |
| 95 | + |
| 96 | +int |
| 97 | +arrow_ipc_decode(struct ArrowArray *array, struct ArrowSchema *schema, |
| 98 | + const char *data, const char *data_end) |
| 99 | +{ |
| 100 | + ssize_t size = data_end - data; |
| 101 | + if (size <= 0) { |
| 102 | + diag_set(ClientError, ER_ARROW_IPC_DECODE, NULL, |
| 103 | + "Unexpected data size"); |
| 104 | + return -1; |
| 105 | + } |
| 106 | + |
| 107 | + ArrowErrorCode rc; |
| 108 | + struct ArrowError error; |
| 109 | + struct ArrowBuffer buffer; |
| 110 | + ArrowBufferInit(&buffer); |
| 111 | + |
| 112 | + rc = ArrowBufferAppend(&buffer, data, size); |
| 113 | + if (rc != NANOARROW_OK) { |
| 114 | + diag_set(ClientError, ER_ARROW_IPC_DECODE, "ArrowBufferAppend", |
| 115 | + NULL); |
| 116 | + ArrowBufferReset(&buffer); |
| 117 | + return -1; |
| 118 | + } |
| 119 | + |
| 120 | + /* |
| 121 | + * Create an input stream from a buffer. |
| 122 | + * The stream takes ownership of the buffer and reads bytes from it. |
| 123 | + */ |
| 124 | + struct ArrowIpcInputStream input_stream; |
| 125 | + rc = ArrowIpcInputStreamInitBuffer(&input_stream, &buffer); |
| 126 | + if (rc != NANOARROW_OK) { |
| 127 | + diag_set(ClientError, ER_ARROW_IPC_DECODE, |
| 128 | + "ArrowIpcInputStreamInitBuffer", NULL); |
| 129 | + ArrowBufferReset(&buffer); |
| 130 | + return -1; |
| 131 | + } |
| 132 | + |
| 133 | + /* |
| 134 | + * Initialize an array stream from an input stream of bytes. |
| 135 | + * The array_stream takes ownership of input_stream. |
| 136 | + */ |
| 137 | + struct ArrowArrayStream array_stream; |
| 138 | + rc = ArrowIpcArrayStreamReaderInit(&array_stream, &input_stream, NULL); |
| 139 | + if (rc != NANOARROW_OK) { |
| 140 | + diag_set(ClientError, ER_ARROW_IPC_DECODE, |
| 141 | + "ArrowIpcArrayStreamReaderInit", NULL); |
| 142 | + input_stream.release(&input_stream); |
| 143 | + return -1; |
| 144 | + } |
| 145 | + |
| 146 | + rc = ArrowArrayStreamGetSchema(&array_stream, schema, &error); |
| 147 | + if (rc != NANOARROW_OK) { |
| 148 | + diag_set(ClientError, ER_ARROW_IPC_DECODE, |
| 149 | + "ArrowArrayStreamGetSchema", error.message); |
| 150 | + goto error; |
| 151 | + } |
| 152 | + |
| 153 | + rc = ArrowArrayStreamGetNext(&array_stream, array, &error); |
| 154 | + if (rc != NANOARROW_OK) { |
| 155 | + diag_set(ClientError, ER_ARROW_IPC_DECODE, |
| 156 | + "ArrowArrayStreamGetNext", error.message); |
| 157 | + schema->release(schema); |
| 158 | + goto error; |
| 159 | + } |
| 160 | + |
| 161 | + ArrowArrayStreamRelease(&array_stream); |
| 162 | + return 0; |
| 163 | +error: |
| 164 | + ArrowArrayStreamRelease(&array_stream); |
| 165 | + return -1; |
| 166 | +} |
0 commit comments