1+ #include " openvino/whisper-openvino-encoder.h"
2+ #include " ggml.h"
3+ #include < openvino/openvino.hpp>
4+ #include < iostream>
5+
6+ struct whisper_openvino_context {
7+ ov::InferRequest inferRequest;
8+ };
9+
10+ struct whisper_openvino_context * whisper_openvino_init (const char * path_model,
11+ const char * device,
12+ const char * cache_dir)
13+ {
14+ if (!path_model || !device) {
15+ fprintf (stderr, " %s: path_model and/or device is null\n " , __func__);
16+ return nullptr ;
17+ }
18+
19+ fprintf (stderr, " %s: path_model = %s, device = %s, cache_dir = %s\n " ,
20+ __func__, path_model, device, cache_dir ? cache_dir : " (not set)" );
21+
22+ whisper_openvino_context *context = new whisper_openvino_context;
23+ try {
24+ ov::Core core;
25+
26+ if (cache_dir) {
27+ // enables caching of device-specific 'blobs' during core.compile_model
28+ // routine. This speeds up calls to compile_model for successive runs.
29+ core.set_property (ov::cache_dir (cache_dir));
30+ }
31+
32+ // Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
33+ std::shared_ptr<ov::Model> model = core.read_model (path_model);
34+
35+ // Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
36+ auto compiledModel = core.compile_model (model, device);
37+
38+ // From the compiled model object, create an infer request. This is the thing that we
39+ // we will use later on to trigger inference execution.
40+ context->inferRequest = compiledModel.create_infer_request ();
41+ }
42+ catch (const std::exception& error) {
43+ std::cout << " in openvino encoder compile routine: exception: " << error.what () << std::endl;
44+ delete context;
45+ context = nullptr ;
46+ }
47+
48+ return context;
49+ }
50+
51+ void whisper_openvino_free (struct whisper_openvino_context * ctx) {
52+ if ( ctx ) {
53+ delete ctx;
54+ }
55+ }
56+
57+ int whisper_openvino_encode (
58+ whisper_openvino_context* ctx,
59+ ggml_tensor* mel,
60+ ggml_tensor* out) {
61+
62+ if (!ctx || !mel || !out) {
63+ fprintf (stderr, " %s: Error! ctx / mel / out is null\n " , __func__);
64+ return 0 ;
65+ }
66+
67+ if (mel->n_dims != 2 ) {
68+ fprintf (stderr, " %s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n " ,
69+ __func__, mel->n_dims );
70+ return 0 ;
71+ }
72+
73+ if (out->n_dims != 2 ) {
74+ fprintf (stderr, " %s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n " ,
75+ __func__, out->n_dims );
76+ return 0 ;
77+ }
78+
79+ try {
80+
81+ // wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
82+ {
83+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
84+ ov::Shape input_shape = { 1 , (unsigned long long )mel->ne [1 ], (unsigned long long )mel->ne [0 ] };
85+ ov::Strides input_strides = { mel->nb [2 ], mel->nb [1 ], mel->nb [0 ] };
86+ ov::Tensor input_tensor (ov::element::f32 , input_shape, mel->data , input_strides);
87+ ctx->inferRequest .set_input_tensor (input_tensor);
88+ }
89+
90+ // wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
91+ {
92+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
93+ ov::Shape output_shape = { 1 , (unsigned long long )out->ne [1 ], (unsigned long long )out->ne [0 ] };
94+ ov::Strides output_strides = { out->nb [2 ], out->nb [1 ], out->nb [0 ] };
95+ ov::Tensor out_tensor (ov::element::f32 , output_shape, out->data , output_strides);
96+ ctx->inferRequest .set_output_tensor (out_tensor);
97+ }
98+
99+ // run inference
100+ ctx->inferRequest .infer ();
101+ }
102+ catch (const std::exception& error) {
103+ std::cout << " in openvino encode inference execution routine: exception: " << error.what () << std::endl;
104+ return 0 ;
105+ }
106+
107+ return 1 ;
108+ }
0 commit comments