/Users/andrewlamb/Software/arrow-rs/arrow-avro/src/reader/cursor.rs
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | use crate::reader::vlq::read_varint; |
19 | | use arrow_schema::ArrowError; |
20 | | |
21 | | /// A wrapper around a byte slice, providing low-level decoding for Avro |
22 | | /// |
23 | | /// <https://avro.apache.org/docs/1.11.1/specification/#encodings> |
24 | | #[derive(Debug)] |
25 | | pub(crate) struct AvroCursor<'a> { |
26 | | buf: &'a [u8], |
27 | | start_len: usize, |
28 | | } |
29 | | |
30 | | impl<'a> AvroCursor<'a> { |
31 | 189 | pub(crate) fn new(buf: &'a [u8]) -> Self { |
32 | 189 | Self { |
33 | 189 | buf, |
34 | 189 | start_len: buf.len(), |
35 | 189 | } |
36 | 189 | } |
37 | | |
38 | | /// Returns the current cursor position |
39 | | #[inline] |
40 | 144 | pub(crate) fn position(&self) -> usize { |
41 | 144 | self.start_len - self.buf.len() |
42 | 144 | } |
43 | | |
44 | | /// Read a single `u8` |
45 | | #[inline] |
46 | 311 | pub(crate) fn get_u8(&mut self) -> Result<u8, ArrowError> { |
47 | 311 | match self.buf.first().copied() { |
48 | 311 | Some(x) => { |
49 | 311 | self.buf = &self.buf[1..]; |
50 | 311 | Ok(x) |
51 | | } |
52 | 0 | None => Err(ArrowError::ParseError("Unexpected EOF".to_string())), |
53 | | } |
54 | 311 | } |
55 | | |
56 | | #[inline] |
57 | 311 | pub(crate) fn get_bool(&mut self) -> Result<bool, ArrowError> { |
58 | 311 | Ok(self.get_u8()?0 != 0) |
59 | 311 | } |
60 | | |
61 | 7.39k | pub(crate) fn read_vlq(&mut self) -> Result<u64, ArrowError> { |
62 | 7.39k | let (val7.39k , offset7.39k ) = read_varint(self.buf) |
63 | 7.39k | .ok_or_else(|| ArrowError::ParseError("bad varint"1 .to_string1 ()))?1 ; |
64 | 7.39k | self.buf = &self.buf[offset..]; |
65 | 7.39k | Ok(val) |
66 | 7.39k | } |
67 | | |
68 | | #[inline] |
69 | 1.43k | pub(crate) fn get_int(&mut self) -> Result<i32, ArrowError> { |
70 | 1.43k | let varint1.43k = self.read_vlq()?1 ; |
71 | 1.43k | let val: u32 = varint |
72 | 1.43k | .try_into() |
73 | 1.43k | .map_err(|_| ArrowError::ParseError("varint overflow"0 .to_string0 ()))?0 ; |
74 | 1.43k | Ok((val >> 1) as i32 ^ -((val & 1) as i32)) |
75 | 1.43k | } |
76 | | |
77 | | #[inline] |
78 | 1.74k | pub(crate) fn get_long(&mut self) -> Result<i64, ArrowError> { |
79 | 1.74k | let val = self.read_vlq()?0 ; |
80 | 1.74k | Ok((val >> 1) as i64 ^ -((val & 1) as i64)) |
81 | 1.74k | } |
82 | | |
83 | 734 | pub(crate) fn get_bytes(&mut self) -> Result<&'a [u8], ArrowError> { |
84 | 734 | let len: usize = self.get_long()?0 .try_into().map_err(|_| {0 |
85 | 0 | ArrowError::ParseError("offset overflow reading avro bytes".to_string()) |
86 | 0 | })?; |
87 | | |
88 | 734 | if (self.buf.len() < len) { |
89 | 0 | return Err(ArrowError::ParseError( |
90 | 0 | "Unexpected EOF reading bytes".to_string(), |
91 | 0 | )); |
92 | 734 | } |
93 | 734 | let ret = &self.buf[..len]; |
94 | 734 | self.buf = &self.buf[len..]; |
95 | 734 | Ok(ret) |
96 | 734 | } |
97 | | |
98 | | #[inline] |
99 | 309 | pub(crate) fn get_float(&mut self) -> Result<f32, ArrowError> { |
100 | 309 | if (self.buf.len() < 4) { |
101 | 0 | return Err(ArrowError::ParseError( |
102 | 0 | "Unexpected EOF reading float".to_string(), |
103 | 0 | )); |
104 | 309 | } |
105 | 309 | let ret = f32::from_le_bytes(self.buf[..4].try_into().unwrap()); |
106 | 309 | self.buf = &self.buf[4..]; |
107 | 309 | Ok(ret) |
108 | 309 | } |
109 | | |
110 | | #[inline] |
111 | 317 | pub(crate) fn get_double(&mut self) -> Result<f64, ArrowError> { |
112 | 317 | if (self.buf.len() < 8) { |
113 | 0 | return Err(ArrowError::ParseError( |
114 | 0 | "Unexpected EOF reading float".to_string(), |
115 | 0 | )); |
116 | 317 | } |
117 | 317 | let ret = f64::from_le_bytes(self.buf[..8].try_into().unwrap()); |
118 | 317 | self.buf = &self.buf[8..]; |
119 | 317 | Ok(ret) |
120 | 317 | } |
121 | | |
122 | | /// Read exactly `n` bytes from the buffer (e.g. for Avro `fixed`). |
123 | 218 | pub(crate) fn get_fixed(&mut self, n: usize) -> Result<&'a [u8], ArrowError> { |
124 | 218 | if self.buf.len() < n { |
125 | 0 | return Err(ArrowError::ParseError( |
126 | 0 | "Unexpected EOF reading fixed".to_string(), |
127 | 0 | )); |
128 | 218 | } |
129 | 218 | let ret = &self.buf[..n]; |
130 | 218 | self.buf = &self.buf[n..]; |
131 | 218 | Ok(ret) |
132 | 218 | } |
133 | | } |