|
4 | 4 | <titleabbrev>Script</titleabbrev> |
5 | 5 | ++++ |
6 | 6 |
|
7 | | -Allows inline and stored scripts to be executed within ingest pipelines. |
| 7 | +Runs an inline or stored <<modules-scripting,script>> on incoming documents. The |
| 8 | +script runs in the {painless}/painless-ingest-processor-context.html[`ingest`] |
| 9 | +context. |
8 | 10 |
|
9 | | -See <<modules-scripting-using, How to use scripts>> to learn more about writing scripts. The Script Processor |
10 | | -leverages caching of compiled scripts for improved performance. Since the |
11 | | -script specified within the processor is potentially re-compiled per document, it is important |
12 | | -to understand how script caching works. To learn more about |
13 | | -caching see <<scripts-and-search-speed, Script Caching>>. |
| 11 | +The script processor uses the <<scripts-and-search-speed,script cache>> to avoid |
| 12 | +recompiling the script for each incoming document. To improve performance, |
| 13 | +ensure the script cache is properly sized before using a script processor in |
| 14 | +production. |
14 | 15 |
|
15 | 16 | [[script-options]] |
16 | | -.Script Options |
| 17 | +.Script options |
17 | 18 | [options="header"] |
18 | 19 | |====== |
19 | | -| Name | Required | Default | Description |
20 | | -| `lang` | no | "painless" | The scripting language |
21 | | -| `id` | no | - | The stored script id to refer to |
22 | | -| `source` | no | - | An inline script to be executed |
23 | | -| `params` | no | - | Script Parameters |
| 20 | +| Name | Required | Default | Description |
| 21 | +| `lang` | no | "painless" | <<scripting-available-languages,Script language>>. |
| 22 | +| `id` | no | - | ID of a <<create-stored-script-api,stored script>>. |
| 23 | + If no `source` is specified, this parameter is required. |
| 24 | +| `source` | no | - | Inline script. |
| 25 | + If no `id` is specified, this parameter is required. |
| 26 | +| `params` | no | - | Object containing parameters for the script. |
24 | 27 | include::common-options.asciidoc[] |
25 | 28 | |====== |
26 | 29 |
|
27 | | -One of `id` or `source` options must be provided in order to properly reference a script to execute. |
| 30 | +[discrete] |
| 31 | +[[script-processor-access-source-fields]] |
| 32 | +==== Access source fields |
28 | 33 |
|
29 | | -You can access the current ingest document from within the script context by using the `ctx` variable. |
| 34 | +The script processor parses each incoming document's JSON source fields into a |
| 35 | +set of maps, lists, and primitives. To access these fields with a Painless |
| 36 | +script, use the |
| 37 | +{painless}/painless-operators-reference.html#map-access-operator[map access |
| 38 | +operator]: `ctx['my-field']`. You can also use the shorthand `ctx.<my-field>` |
| 39 | +syntax. |
30 | 40 |
|
31 | | -The following example sets a new field called `field_a_plus_b_times_c` to be the sum of two existing |
32 | | -numeric fields `field_a` and `field_b` multiplied by the parameter param_c: |
| 41 | +NOTE: The script processor does not support the `ctx['_source']['my-field']` or |
| 42 | +`ctx._source.<my-field>` syntaxes. |
33 | 43 |
|
34 | | -[source,js] |
35 | | --------------------------------------------------- |
| 44 | +The following processor uses a Painless script to extract the `tags` field from |
| 45 | +the `env` source field. |
| 46 | + |
| 47 | +[source,console] |
| 48 | +---- |
| 49 | +POST _ingest/pipeline/_simulate |
36 | 50 | { |
37 | | - "script": { |
38 | | - "lang": "painless", |
39 | | - "source": "ctx.field_a_plus_b_times_c = (ctx.field_a + ctx.field_b) * params.param_c", |
40 | | - "params": { |
41 | | - "param_c": 10 |
| 51 | + "pipeline": { |
| 52 | + "processors": [ |
| 53 | + { |
| 54 | + "script": { |
| 55 | + "description": "Extract 'tags' from 'env' field", |
| 56 | + "lang": "painless", |
| 57 | + "source": """ |
| 58 | + String[] envSplit = ctx['env'].splitOnToken(params['delimiter']); |
| 59 | + ArrayList tags = new ArrayList(); |
| 60 | + tags.add(envSplit[params['position']].trim()); |
| 61 | + ctx['tags'] = tags; |
| 62 | + """, |
| 63 | + "params": { |
| 64 | + "delimiter": "-", |
| 65 | + "position": 1 |
| 66 | + } |
| 67 | + } |
| 68 | + } |
| 69 | + ] |
| 70 | + }, |
| 71 | + "docs": [ |
| 72 | + { |
| 73 | + "_source": { |
| 74 | + "env": "es01-prod" |
| 75 | + } |
42 | 76 | } |
43 | | - } |
| 77 | + ] |
44 | 78 | } |
45 | | --------------------------------------------------- |
46 | | -// NOTCONSOLE |
| 79 | +---- |
47 | 80 |
|
48 | | -It is possible to use the Script Processor to manipulate document metadata like `_index` and `_type` during |
49 | | -ingestion. Here is an example of an Ingest Pipeline that renames the index and type to `my-index` no matter what |
50 | | -was provided in the original index request: |
| 81 | +The processor produces: |
51 | 82 |
|
52 | | -[source,console] |
53 | | --------------------------------------------------- |
54 | | -PUT _ingest/pipeline/my-index |
| 83 | +[source,console-result] |
| 84 | +---- |
55 | 85 | { |
56 | | - "description": "use index:my-index", |
57 | | - "processors": [ |
| 86 | + "docs": [ |
58 | 87 | { |
59 | | - "script": { |
60 | | - "source": """ |
61 | | - ctx._index = 'my-index'; |
62 | | - ctx._type = '_doc'; |
63 | | - """ |
| 88 | + "doc": { |
| 89 | + ... |
| 90 | + "_source": { |
| 91 | + "env": "es01-prod", |
| 92 | + "tags": [ |
| 93 | + "prod" |
| 94 | + ] |
| 95 | + } |
64 | 96 | } |
65 | 97 | } |
66 | 98 | ] |
67 | 99 | } |
68 | | --------------------------------------------------- |
| 100 | +---- |
| 101 | +// TESTRESPONSE[s/\.\.\./"_index":"_index","_id":"_id","_type":"_doc","_ingest":{"timestamp":$body.docs.0.doc._ingest.timestamp},/] |
69 | 102 |
|
70 | | -Using the above pipeline, we can attempt to index a document into the `any-index` index. |
| 103 | + |
| 104 | +[discrete] |
| 105 | +[[script-processor-access-metadata-fields]] |
| 106 | +==== Access metadata fields |
| 107 | + |
| 108 | +You can also use a script processor to access metadata fields. The following |
| 109 | +processor uses a Painless script to set an incoming document's `_index`. |
71 | 110 |
|
72 | 111 | [source,console] |
73 | | --------------------------------------------------- |
74 | | -PUT any-index/_doc/1?pipeline=my-index |
| 112 | +---- |
| 113 | +POST _ingest/pipeline/_simulate |
75 | 114 | { |
76 | | - "message": "text" |
| 115 | + "pipeline": { |
| 116 | + "processors": [ |
| 117 | + { |
| 118 | + "script": { |
| 119 | + "description": "Set index based on `lang` field and `dataset` param", |
| 120 | + "lang": "painless", |
| 121 | + "source": """ |
| 122 | + ctx['_index'] = ctx['lang'] + '-' + params['dataset']; |
| 123 | + """, |
| 124 | + "params": { |
| 125 | + "dataset": "catalog" |
| 126 | + } |
| 127 | + } |
| 128 | + } |
| 129 | + ] |
| 130 | + }, |
| 131 | + "docs": [ |
| 132 | + { |
| 133 | + "_index": "generic-index", |
| 134 | + "_source": { |
| 135 | + "lang": "fr" |
| 136 | + } |
| 137 | + } |
| 138 | + ] |
77 | 139 | } |
78 | | --------------------------------------------------- |
79 | | -// TEST[continued] |
| 140 | +---- |
80 | 141 |
|
81 | | -The response from the above index request: |
| 142 | +The processor changes the document's `_index` to `fr-catalog` from |
| 143 | +`generic-index`. |
82 | 144 |
|
83 | 145 | [source,console-result] |
84 | | --------------------------------------------------- |
| 146 | +---- |
85 | 147 | { |
86 | | - "_index": "my-index", |
87 | | - "_type": "_doc", |
88 | | - "_id": "1", |
89 | | - "_version": 1, |
90 | | - "result": "created", |
91 | | - "_shards": { |
92 | | - "total": 2, |
93 | | - "successful": 1, |
94 | | - "failed": 0 |
95 | | - }, |
96 | | - "_seq_no": 89, |
97 | | - "_primary_term": 1, |
| 148 | + "docs": [ |
| 149 | + { |
| 150 | + "doc": { |
| 151 | + ... |
| 152 | + "_index": "fr-catalog", |
| 153 | + "_source": { |
| 154 | + "lang": "fr" |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | + ] |
98 | 159 | } |
99 | | --------------------------------------------------- |
100 | | -// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] |
101 | | - |
102 | | -In the above response, you can see that our document was actually indexed into `my-index` instead of |
103 | | -`any-index`. This type of manipulation is often convenient in pipelines that have various branches of transformation, |
104 | | -and depending on the progress made, indexed into different indices. |
| 160 | +---- |
| 161 | +// TESTRESPONSE[s/\.\.\./"_id":"_id","_type":"_doc","_ingest":{"timestamp":$body.docs.0.doc._ingest.timestamp},/] |
0 commit comments