Skip to content

Commit e7ac697

Browse files
authored
Add methods to_tuples() and to_dict() (#1439)
This PR adds 2 new Frame methods `.to_tuples()` and `.to_dict()`, and replaces the `.to_list()` method with the one implemented in C++. `Frame.to_tuples()` converts the frame into a list of tuples, where each tuple represents one row. Note that we already have the ability to convert a list of tuples into a Frame, this is just the inverse of that operation: `list_of_tuples == dt.Frame(list_of_tuples).to_tuples()`. `Frame.to_dict()` is similar to `.to_list()`, except that we return a dictionary of columns (keys are column names) instead of a list. This operation is also an inverse of creating a Frame from a dictionary: `dict_of_lists == dt.Frame(dict_of_lists).to_dict()`. Closes #1400
1 parent 370d430 commit e7ac697

File tree

6 files changed

+351
-29
lines changed

6 files changed

+351
-29
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1111

1212
### [Unreleased](https://github.com/h2oai/datatable/compare/HEAD...v0.7.0)
1313

14+
#### Added
15+
- methods `Frame.to_tuples()` and `Frame.to_dict()` (#1400).
16+
1417
#### Fixed
1518
- crash when an int-column row selector is applied to a Frame which already
1619
had another row filter applied (#1437).

c/frame/py_frame.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ void Frame::Type::init_methods_and_getsets(Methods& mm, GetSetters& gs)
100100
mm.add<&Frame::copy, args_copy>();
101101
mm.add<&Frame::replace, args_replace>();
102102
mm.add<&Frame::_repr_html_, args__repr_html_>();
103+
mm.add<&Frame::to_dict, args_to_dict>();
104+
mm.add<&Frame::to_list, args_to_list>();
105+
mm.add<&Frame::to_tuples, args_to_tuples>();
103106
}
104107

105108

c/frame/py_frame.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ class Frame : public PyObject {
5050
static PKArgs args_colindex;
5151
static PKArgs args_replace;
5252
static NoArgs args_copy;
53+
static NoArgs args_to_dict;
54+
static NoArgs args_to_list;
55+
static NoArgs args_to_tuples;
5356
static const char* classname();
5457
static const char* classdoc();
5558
static bool is_subclassable() { return true; }
@@ -88,6 +91,9 @@ class Frame : public PyObject {
8891
oobj colindex(const PKArgs&);
8992
oobj copy(const NoArgs&);
9093
void replace(const PKArgs&);
94+
oobj to_dict(const NoArgs&);
95+
oobj to_list(const NoArgs&);
96+
oobj to_tuples(const NoArgs&);
9197

9298
private:
9399
static bool internal_construction;

c/frame/to_python.cc

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
//------------------------------------------------------------------------------
2+
// Copyright 2018 H2O.ai
3+
//
4+
// Permission is hereby granted, free of charge, to any person obtaining a
5+
// copy of this software and associated documentation files (the "Software"),
6+
// to deal in the Software without restriction, including without limitation
7+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
// and/or sell copies of the Software, and to permit persons to whom the
9+
// Software is furnished to do so, subject to the following conditions:
10+
//
11+
// The above copyright notice and this permission notice shall be included in
12+
// all copies or substantial portions of the Software.
13+
//
14+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20+
// IN THE SOFTWARE.
21+
//------------------------------------------------------------------------------
22+
#include "frame/py_frame.h"
23+
#include "python/_all.h"
24+
#include "python/args.h"
25+
#include "python/string.h"
26+
#include "python/tuple.h"
27+
28+
namespace py {
29+
30+
31+
//------------------------------------------------------------------------------
32+
// converters for various stypes
33+
//------------------------------------------------------------------------------
34+
35+
class converter {
36+
public:
37+
virtual ~converter();
38+
virtual oobj to_oobj(int64_t row) const = 0;
39+
};
40+
using convptr = std::unique_ptr<converter>;
41+
42+
converter::~converter() {}
43+
44+
45+
46+
class bool8_converter : public converter {
47+
private:
48+
const int8_t* values;
49+
public:
50+
bool8_converter(const Column*);
51+
oobj to_oobj(int64_t row) const override;
52+
};
53+
54+
bool8_converter::bool8_converter(const Column* col) {
55+
values = dynamic_cast<const BoolColumn*>(col)->elements_r();
56+
}
57+
58+
oobj bool8_converter::to_oobj(int64_t row) const {
59+
int8_t x = values[row];
60+
return x == 0? py::False() : x == 1? py::True() : py::None();
61+
}
62+
63+
64+
65+
template <typename T>
66+
class int_converter : public converter {
67+
private:
68+
const T* values;
69+
public:
70+
int_converter(const Column*);
71+
oobj to_oobj(int64_t row) const override;
72+
};
73+
74+
template <typename T>
75+
int_converter<T>::int_converter(const Column* col) {
76+
values = dynamic_cast<const IntColumn<T>*>(col)->elements_r();
77+
}
78+
79+
template <typename T>
80+
oobj int_converter<T>::to_oobj(int64_t row) const {
81+
T x = values[row];
82+
return ISNA<T>(x)? py::None() : oint(x);
83+
}
84+
85+
86+
87+
template <typename T>
88+
class float_converter : public converter {
89+
private:
90+
const T* values;
91+
public:
92+
float_converter(const Column*);
93+
oobj to_oobj(int64_t row) const override;
94+
};
95+
96+
template <typename T>
97+
float_converter<T>::float_converter(const Column* col) {
98+
values = dynamic_cast<const RealColumn<T>*>(col)->elements_r();
99+
}
100+
101+
template <typename T>
102+
oobj float_converter<T>::to_oobj(int64_t row) const {
103+
T x = values[row];
104+
return ISNA<T>(x)? py::None() : ofloat(x);
105+
}
106+
107+
108+
109+
template <typename T>
110+
class string_converter : public converter {
111+
private:
112+
const char* strdata;
113+
const T* offsets;
114+
public:
115+
string_converter(const Column*);
116+
oobj to_oobj(int64_t row) const override;
117+
};
118+
119+
template <typename T>
120+
string_converter<T>::string_converter(const Column* col) {
121+
auto scol = dynamic_cast<const StringColumn<T>*>(col);
122+
strdata = scol->strdata();
123+
offsets = scol->offsets();
124+
}
125+
126+
template <typename T>
127+
oobj string_converter<T>::to_oobj(int64_t row) const {
128+
T end = offsets[row];
129+
if (ISNA<T>(end)) return py::None();
130+
T start = offsets[row - 1] & ~GETNA<T>();
131+
return ostring(strdata + start, end - start);
132+
}
133+
134+
135+
136+
class pyobj_converter : public converter {
137+
private:
138+
const PyObject* const* values;
139+
public:
140+
pyobj_converter(const Column*);
141+
oobj to_oobj(int64_t row) const override;
142+
};
143+
144+
pyobj_converter::pyobj_converter(const Column* col) {
145+
values = dynamic_cast<const PyObjectColumn*>(col)->elements_r();
146+
}
147+
148+
oobj pyobj_converter::to_oobj(int64_t row) const {
149+
return oobj(values[row]);
150+
}
151+
152+
153+
154+
static convptr make_converter(const Column* col) {
155+
SType stype = col->stype();
156+
switch (stype) {
157+
case SType::BOOL: return convptr(new bool8_converter(col));
158+
case SType::INT8: return convptr(new int_converter<int8_t>(col));
159+
case SType::INT16: return convptr(new int_converter<int16_t>(col));
160+
case SType::INT32: return convptr(new int_converter<int32_t>(col));
161+
case SType::INT64: return convptr(new int_converter<int64_t>(col));
162+
case SType::FLOAT32: return convptr(new float_converter<float>(col));
163+
case SType::FLOAT64: return convptr(new float_converter<double>(col));
164+
case SType::STR32: return convptr(new string_converter<uint32_t>(col));
165+
case SType::STR64: return convptr(new string_converter<uint64_t>(col));
166+
case SType::OBJ: return convptr(new pyobj_converter(col));
167+
default:
168+
throw ValueError() << "Cannot stringify column of type " << stype;
169+
}
170+
}
171+
172+
173+
174+
//------------------------------------------------------------------------------
175+
// Frame's API
176+
//------------------------------------------------------------------------------
177+
178+
NoArgs Frame::Type::args_to_tuples("to_tuples",
179+
R"(to_tuples(self)
180+
--
181+
182+
Convert the Frame into a list of tuples, by rows.
183+
184+
Returns a list having `nrows` tuples, where each tuple has length `ncols` and
185+
contains data from each respective row of the Frame.
186+
187+
Examples
188+
--------
189+
>>> DT = dt.Frame(A=[1, 2, 3], B=["aye", "nay", "tain"])
190+
>>> DT.to_tuples()
191+
[(1, "aye"), (2, "nay"), (3, "tain")]
192+
)");
193+
194+
195+
oobj Frame::to_tuples(const NoArgs&) {
196+
std::vector<py::otuple> list_of_tuples;
197+
for (size_t i = 0; i < dt->nrows; ++i) {
198+
list_of_tuples.push_back(py::otuple(dt->ncols));
199+
}
200+
for (size_t j = 0; j < dt->ncols; ++j) {
201+
const Column* col = dt->columns[j];
202+
const RowIndex& ri = col->rowindex();
203+
auto conv = make_converter(col);
204+
ri.strided_loop2(0, static_cast<int64_t>(dt->nrows), 1,
205+
[&](int64_t i, int64_t ii) {
206+
oobj x = ii >= 0? conv->to_oobj(ii) : py::None();
207+
list_of_tuples[static_cast<size_t>(i)].set(j, std::move(x));
208+
});
209+
}
210+
py::olist res(dt->nrows);
211+
for (size_t i = 0; i < dt->nrows; ++i) {
212+
res.set(i, std::move(list_of_tuples[i]));
213+
}
214+
return std::move(res);
215+
}
216+
217+
218+
219+
NoArgs Frame::Type::args_to_list("to_list",
220+
R"(to_list(self)
221+
--
222+
223+
Convert the Frame into a list of lists, by columns.
224+
225+
Returns a list of `ncols` lists, each inner list representing one column of
226+
the Frame.
227+
228+
Examples
229+
--------
230+
>>> DT = dt.Frame(A=[1, 2, 3], B=["aye", "nay", "tain"])
231+
>>> DT.to_list()
232+
[[1, 2, 3], ["aye", "nay", "tain"]]
233+
)");
234+
235+
oobj Frame::to_list(const NoArgs&) {
236+
py::olist res(dt->ncols);
237+
for (size_t j = 0; j < dt->ncols; ++j) {
238+
py::olist pycol(dt->nrows);
239+
const Column* col = dt->columns[j];
240+
const RowIndex& ri = col->rowindex();
241+
auto conv = make_converter(col);
242+
ri.strided_loop2(0, static_cast<int64_t>(dt->nrows), 1,
243+
[&](int64_t i, int64_t ii) {
244+
oobj x = ii >= 0? conv->to_oobj(ii) : py::None();
245+
pycol.set(i, std::move(x));
246+
});
247+
res.set(j, std::move(pycol));
248+
}
249+
return std::move(res);
250+
}
251+
252+
253+
254+
NoArgs Frame::Type::args_to_dict("to_dict",
255+
R"(to_dict(self)
256+
--
257+
258+
Convert the Frame into a dictionary of lists, by columns.
259+
260+
Returns a dictionary with `ncols` entries, each being the `colname: coldata`
261+
pair, where `colname` is a string, and `coldata` is an array of column's data.
262+
263+
Examples
264+
--------
265+
>>> DT = dt.Frame(A=[1, 2, 3], B=["aye", "nay", "tain"])
266+
>>> DT.to_dict()
267+
{"A": [1, 2, 3], "B": ["aye", "nay", "tain"]}
268+
)");
269+
270+
oobj Frame::to_dict(const NoArgs&) {
271+
py::otuple names = dt->get_pynames();
272+
py::odict res;
273+
for (size_t j = 0; j < dt->ncols; ++j) {
274+
py::olist pycol(dt->nrows);
275+
const Column* col = dt->columns[j];
276+
const RowIndex& ri = col->rowindex();
277+
auto conv = make_converter(col);
278+
ri.strided_loop2(0, static_cast<int64_t>(dt->nrows), 1,
279+
[&](int64_t i, int64_t ii) {
280+
oobj x = ii >= 0? conv->to_oobj(ii) : py::None();
281+
pycol.set(i, std::move(x));
282+
});
283+
res.set(names[j], pycol);
284+
}
285+
return std::move(res);
286+
}
287+
288+
289+
290+
};

datatable/frame.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -483,15 +483,10 @@ def to_numpy(self, stype=None):
483483
return res
484484

485485

486-
def to_list(self):
487-
"""
488-
Convert the Frame into a python list-of-lists.
489-
"""
490-
return self._dt.window(0, self.nrows, 0, self.ncols).data
491-
486+
def topython(self): # DEPRECATED
487+
return self.to_list()
492488

493489
# Old names
494-
topython = to_list
495490
topandas = to_pandas
496491
tonumpy = to_numpy
497492

0 commit comments

Comments
 (0)