Skip to content

Commit 930d9e8

Browse files
authored
Implement C API for datatable (#1469)
This PR adds external C API to datatable. The user of this API will have to include the file "datatable.h" located in directory "datatable/include/". This file will also be included in `datatable` source and binary distributions. The following API functions are currently implemented: - `size_t DtABIVersion()` Check the ABI version of the current datatable package. This method allows you to verify that the runtime ABI version of datatable matches your compile-time expectations. - `int DtFrame_Check(PyObject* ob)` Check whether the python object is a datatable Frame. - `size_t DtFrame_NRows(PyObject* pydt)`, `size_t DtFrame_NColumns(PyObject* pydt)` Get the shape of the Frame. - `int DtFrame_ColumnStype(PyObject* pydt, size_t i)` Get the stype of column `i`. - `PyObject* DtFrame_ColumnRowindex(PyObject* pydt, size_t i)` Get the Rowindex of column `i`. - `const void* DtFrame_ColumnDataR(PyObject* pydt, size_t i)`, `void* DtFrame_ColumnDataW(PyObject* pydt, size_t i)`, `const char* DtFrame_ColumnStringDataR(PyObject* pydt, size_t i)` Get the data pointer(s) of column `i`. - `int DtRowindex_Check(PyObject* ob)` Check whether the python object is a datatable Rowindex. - `int DtRowindex_Type(PyObject* pyri)` Find the type of the Rowindex. - `size_t DtRowindex_Size(PyObject* pyri)` Get the number of elements in the Rowindex. - `int DtRowindex_UnpackSlice(PyObject* pyri, size_t* start, size_t* length, size_t* step)` Get fields of a slice Rowindex. - `const void* DtRowindex_ArrayData(PyObject* pyri)` Get data of an array Rowindex. Closes #1382
1 parent 1b59d1b commit 930d9e8

21 files changed

+506
-126
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
3939
- function `dt.repeat(frame, n)` that creates a Frame by row-binding `n`
4040
copies of the `frame`.
4141

42+
- `datatable` now exposes C API, to allow other C/C++ libraries interact
43+
with datatable Frames directly (#1469).
44+
4245

4346
#### Fixed
4447

MANIFEST.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ include CHANGELOG.md
44
include setup.py
55
include setup.cfg
66
include ci/setup_utils.py
7-
recursive-include c *.c *.cc *.h
8-
recursive-include datatable *.py
7+
recursive-include c *.cc *.h
8+
recursive-include datatable *.py *.h
99
recursive-include tests *.py
1010

1111
exclude datatable/lib/*.*

c/api.cc

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
//------------------------------------------------------------------------------
2+
// Copyright 2018 H2O.ai
3+
//
4+
// Permission is hereby granted, free of charge, to any person obtaining a
5+
// copy of this software and associated documentation files (the "Software"),
6+
// to deal in the Software without restriction, including without limitation
7+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
// and/or sell copies of the Software, and to permit persons to whom the
9+
// Software is furnished to do so, subject to the following conditions:
10+
//
11+
// The above copyright notice and this permission notice shall be included in
12+
// all copies or substantial portions of the Software.
13+
//
14+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20+
// IN THE SOFTWARE.
21+
//------------------------------------------------------------------------------
22+
#include "../datatable/include/datatable.h"
23+
#include "datatable.h"
24+
#include "frame/py_frame.h"
25+
#include "rowindex.h"
26+
#include "py_rowindex.h"
27+
extern "C" {
28+
29+
30+
//------------------------------------------------------------------------------
31+
// Helper functions
32+
//------------------------------------------------------------------------------
33+
34+
static int _column_index_oob(DataTable* dt, size_t i) {
35+
if (i < dt->ncols) return 0;
36+
PyErr_Format(PyExc_IndexError, "Column %zu does not exist in the Frame", i);
37+
return -1;
38+
}
39+
40+
static DataTable* _extract_dt(PyObject* pydt) {
41+
return static_cast<py::Frame*>(pydt)->get_datatable();
42+
}
43+
44+
static RowIndex* _extract_ri(PyObject* pyri) {
45+
if (pyri == Py_None) return nullptr;
46+
return static_cast<pyrowindex::obj*>(pyri)->ref;
47+
}
48+
49+
50+
size_t DtABIVersion() {
51+
return 1;
52+
}
53+
54+
55+
56+
//------------------------------------------------------------------------------
57+
// Frame
58+
//------------------------------------------------------------------------------
59+
60+
int DtFrame_Check(PyObject* ob) {
61+
if (ob == nullptr) return 0;
62+
auto typeptr = reinterpret_cast<PyObject*>(&py::Frame::Type::type);
63+
int ret = PyObject_IsInstance(ob, typeptr);
64+
if (ret == -1) {
65+
PyErr_Clear();
66+
ret = 0;
67+
}
68+
return ret;
69+
}
70+
71+
72+
size_t DtFrame_NColumns(PyObject* pydt) {
73+
auto dt = _extract_dt(pydt);
74+
return dt->ncols;
75+
}
76+
77+
size_t DtFrame_NRows(PyObject* pydt) {
78+
auto dt = _extract_dt(pydt);
79+
return dt->nrows;
80+
}
81+
82+
83+
int DtFrame_ColumnStype(PyObject* pydt, size_t i) {
84+
auto dt = _extract_dt(pydt);
85+
if (_column_index_oob(dt, i)) return -1;
86+
return static_cast<int>(dt->columns[i]->stype()); // stype() is noexcept
87+
}
88+
89+
90+
PyObject* DtFrame_ColumnRowindex(PyObject* pydt, size_t i) {
91+
auto dt = _extract_dt(pydt);
92+
if (_column_index_oob(dt, i)) return nullptr;
93+
const RowIndex& ri = dt->columns[i]->rowindex(); // rowindex() is noexcept
94+
return ri? pyrowindex::wrap(ri) : py::None().release();
95+
}
96+
97+
98+
const void* DtFrame_ColumnDataR(PyObject* pydt, size_t i) {
99+
auto dt = _extract_dt(pydt);
100+
if (_column_index_oob(dt, i)) return nullptr;
101+
try {
102+
return dt->columns[i]->data();
103+
} catch (const std::exception& e) {
104+
exception_to_python(e);
105+
return nullptr;
106+
}
107+
}
108+
109+
void* DtFrame_ColumnDataW(PyObject* pydt, size_t i) {
110+
auto dt = _extract_dt(pydt);
111+
if (_column_index_oob(dt, i)) return nullptr;
112+
try {
113+
return dt->columns[i]->data_w();
114+
} catch (const std::exception& e) {
115+
exception_to_python(e);
116+
return nullptr;
117+
}
118+
}
119+
120+
121+
const char* DtFrame_ColumnStringDataR(PyObject* pydt, size_t i) {
122+
auto dt = _extract_dt(pydt);
123+
if (_column_index_oob(dt, i)) return nullptr;
124+
SType st = dt->columns[i]->stype();
125+
try {
126+
if (st == SType::STR32) {
127+
auto scol = static_cast<StringColumn<uint32_t>*>(dt->columns[i]);
128+
return scol->strdata();
129+
}
130+
if (st == SType::STR64) {
131+
auto scol = static_cast<StringColumn<uint64_t>*>(dt->columns[i]);
132+
return scol->strdata();
133+
}
134+
} catch (const std::exception& e) {
135+
exception_to_python(e);
136+
return nullptr;
137+
}
138+
PyErr_Format(PyExc_TypeError, "Column %zu is not of string type", i);
139+
return nullptr;
140+
}
141+
142+
143+
144+
//------------------------------------------------------------------------------
145+
// Rowindex
146+
//------------------------------------------------------------------------------
147+
148+
int DtRowindex_Check(PyObject* ob) {
149+
if (ob == nullptr) return 0;
150+
if (ob == Py_None) return 1;
151+
auto typeptr = reinterpret_cast<PyObject*>(&pyrowindex::type);
152+
int ret = PyObject_IsInstance(ob, typeptr);
153+
if (ret == -1) {
154+
PyErr_Clear();
155+
ret = 0;
156+
}
157+
return ret;
158+
}
159+
160+
161+
int DtRowindex_Type(PyObject* pyri) {
162+
RowIndex* ri = _extract_ri(pyri);
163+
if (!ri) return 0;
164+
return static_cast<int>(ri->type());
165+
}
166+
167+
168+
size_t DtRowindex_Size(PyObject* pyri) {
169+
RowIndex* ri = _extract_ri(pyri);
170+
if (!ri) return 0;
171+
return ri->size();
172+
}
173+
174+
175+
int DtRowindex_UnpackSlice(
176+
PyObject* pyri, size_t* start, size_t* length, size_t* step)
177+
{
178+
RowIndex* ri = _extract_ri(pyri);
179+
if (!ri || ri->type() != RowIndexType::SLICE) {
180+
PyErr_Format(PyExc_TypeError, "expected a slice rowindex");
181+
return -1;
182+
}
183+
*start = ri->slice_start();
184+
*length = ri->size();
185+
*step = ri->slice_step();
186+
return 0;
187+
}
188+
189+
190+
const void* DtRowindex_ArrayData(PyObject* pyri) {
191+
RowIndex* ri = _extract_ri(pyri);
192+
if (ri && ri->type() == RowIndexType::ARR32) {
193+
return ri->indices32();
194+
}
195+
if (ri && ri->type() == RowIndexType::ARR64) {
196+
return ri->indices64();
197+
}
198+
PyErr_Format(PyExc_TypeError, "expected an array rowindex");
199+
return nullptr;
200+
}
201+
202+
203+
204+
//------------------------------------------------------------------------------
205+
// Deprecated
206+
//------------------------------------------------------------------------------
207+
208+
void* datatable_get_column_data(void* dt_, size_t column) {
209+
DataTable *dt = static_cast<DataTable*>(dt_);
210+
return dt->columns[column]->data_w();
211+
}
212+
213+
void datatable_unpack_slicerowindex(void* dt_, size_t* start, size_t* step) {
214+
DataTable *dt = static_cast<DataTable*>(dt_);
215+
RowIndex ri(dt->rowindex);
216+
*start = ri.slice_start();
217+
*step = ri.slice_step();
218+
}
219+
220+
void datatable_unpack_arrayrowindex(void *dt_, void **indices) {
221+
DataTable *dt = static_cast<DataTable*>(dt_);
222+
RowIndex ri(dt->rowindex);
223+
*indices = const_cast<int32_t*>(ri.indices32());
224+
}
225+
226+
227+
} // extern "C"

c/capi.cc

Lines changed: 0 additions & 38 deletions
This file was deleted.

c/capi.h

Lines changed: 0 additions & 26 deletions
This file was deleted.

c/column.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ void Column::verify_integrity(const std::string& name) const {
424424

425425
VoidColumn::VoidColumn() {}
426426
VoidColumn::VoidColumn(size_t nrows) : Column(nrows) {}
427-
SType VoidColumn::stype() const { return SType::VOID; }
427+
SType VoidColumn::stype() const noexcept { return SType::VOID; }
428428
size_t VoidColumn::elemsize() const { return 0; }
429429
bool VoidColumn::is_fixedwidth() const { return true; }
430430
size_t VoidColumn::data_nrows() const { return nrows; }

c/column.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ class Column
115115
virtual void replace_buffer(MemoryRange&&);
116116
virtual void replace_buffer(MemoryRange&&, MemoryRange&&);
117117

118-
virtual SType stype() const = 0;
118+
virtual SType stype() const noexcept = 0;
119119
virtual size_t elemsize() const = 0;
120120
virtual bool is_fixedwidth() const = 0;
121121

122-
const RowIndex& rowindex() const { return ri; }
122+
const RowIndex& rowindex() const noexcept { return ri; }
123123
RowIndex remove_rowindex();
124124
void replace_rowindex(const RowIndex& newri);
125125

@@ -396,7 +396,7 @@ class BoolColumn : public FwColumn<int8_t>
396396
{
397397
public:
398398
using FwColumn<int8_t>::FwColumn;
399-
SType stype() const override;
399+
SType stype() const noexcept override;
400400

401401
int8_t min() const;
402402
int8_t max() const;
@@ -448,7 +448,7 @@ template <typename T> class IntColumn : public FwColumn<T>
448448
{
449449
public:
450450
using FwColumn<T>::FwColumn;
451-
virtual SType stype() const override;
451+
virtual SType stype() const noexcept override;
452452

453453
T min() const;
454454
T max() const;
@@ -515,7 +515,7 @@ template <typename T> class RealColumn : public FwColumn<T>
515515
{
516516
public:
517517
using FwColumn<T>::FwColumn;
518-
virtual SType stype() const override;
518+
virtual SType stype() const noexcept override;
519519

520520
T min() const;
521521
T max() const;
@@ -594,7 +594,7 @@ class PyObjectColumn : public FwColumn<PyObject*>
594594
public:
595595
PyObjectColumn(size_t nrows);
596596
PyObjectColumn(size_t nrows, MemoryRange&&);
597-
virtual SType stype() const override;
597+
virtual SType stype() const noexcept override;
598598
PyObjectStats* get_stats() const override;
599599

600600
py::oobj get_value_at_index(size_t i) const override;
@@ -642,7 +642,7 @@ template <typename T> class StringColumn : public Column
642642
WritableBuffer::Strategy strategy) override;
643643
void replace_buffer(MemoryRange&&, MemoryRange&&) override;
644644

645-
SType stype() const override;
645+
SType stype() const noexcept override;
646646
size_t elemsize() const override;
647647
bool is_fixedwidth() const override;
648648

@@ -714,7 +714,7 @@ extern template class StringColumn<uint64_t>;
714714
class VoidColumn : public Column {
715715
public:
716716
VoidColumn(size_t nrows);
717-
SType stype() const override;
717+
SType stype() const noexcept override;
718718
size_t elemsize() const override;
719719
bool is_fixedwidth() const override;
720720
size_t data_nrows() const override;

0 commit comments

Comments
 (0)