From 028fb587f91e97a8d6a4fff59f465bfafaf49c2f Mon Sep 17 00:00:00 2001 From: Delacrobix Date: Wed, 20 Aug 2025 08:52:58 -0500 Subject: [PATCH] notebook and dataset --- .../dataset/contract-1.txt | 21 + .../dataset/contract-10.txt | 21 + .../dataset/contract-2.txt | 20 + .../dataset/contract-3.txt | 21 + .../dataset/contract-4.txt | 21 + .../dataset/contract-5.txt | 21 + .../dataset/contract-6.txt | 21 + .../dataset/contract-7.txt | 21 + .../dataset/contract-8.txt | 21 + .../dataset/contract-9.txt | 21 + .../langextract_and_elasticsearch.ipynb | 494 ++++++++++++++++++ 11 files changed, 703 insertions(+) create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-1.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-10.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-2.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-3.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-4.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-5.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-6.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-7.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-8.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-9.txt create mode 100644 supporting-blog-content/using-langextract-and-elasticsearch/langextract_and_elasticsearch.ipynb diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-1.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-1.txt new file mode 100644 index 00000000..f93e365d --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-1.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on January 15, 2025, by and between: +* Contractor: TechNova Solutions Ltd. + +* Contractee: Sarah Mitchell + +Purpose: Web development services for redesigning the corporate website. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $12,500 for completion of the project. + + 2. The Contractee agrees to deliver the website within 90 days of this Agreement. + + 3. This Agreement expires on April 15, 2025. + + 4. This Agreement shall be governed by the laws of New York State. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +TechNova Solutions Ltd. +Sarah Mitchell \ No newline at end of file diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-10.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-10.txt new file mode 100644 index 00000000..a135d4c6 --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-10.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on February 12, 2025, by and between: +* Contractor: FinanceWise Consulting Group + +* Contractee: Rachel Turner + +Purpose: Financial planning and investment strategy development for retirement portfolio. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $7,350 for completion of the project. + + 2. The Contractee agrees to deliver the financial plan within 21 days of this Agreement. + + 3. This Agreement expires on March 5, 2025. + + 4. This Agreement shall be governed by the laws of Nevada. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +FinanceWise Consulting Group +Rachel Turner diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-2.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-2.txt new file mode 100644 index 00000000..92e91ada --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-2.txt @@ -0,0 +1,20 @@ +This Contract Agreement ("Agreement") is made and entered into on February 2, 2025, by and between: +* Contractor: GreenLeaf Landscaping Co. + +* Contractee: Robert Jenkins + +Purpose: Garden maintenance and landscaping for private residence. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the sum of $3,200 for the services. + + 2. The Contractee agrees to provide landscaping and maintenance services for a period of 3 months. + + 3. This Agreement shall terminate on May 2, 2025. + + 4. This Agreement shall be governed by the laws of California. + + 5. Both parties accept the conditions stated herein. + +Signed: +GreenLeaf Landscaping Co. +Robert Jenkins \ No newline at end of file diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-3.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-3.txt new file mode 100644 index 00000000..fd7b2bac --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-3.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on March 10, 2025, by and between: +* Contractor: DataFlow Analytics Inc. + +* Contractee: Maria Rodriguez + +Purpose: Database optimization and data migration services for retail management system. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $8,750 for completion of the project. + + 2. The Contractee agrees to deliver the optimized database within 60 days of this Agreement. + + 3. This Agreement expires on May 10, 2025. + + 4. This Agreement shall be governed by the laws of Texas. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +DataFlow Analytics Inc. +Maria Rodriguez diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-4.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-4.txt new file mode 100644 index 00000000..062e515c --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-4.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on January 22, 2025, by and between: +* Contractor: BlueWave Marketing Agency + +* Contractee: David Thompson + +Purpose: Social media marketing campaign and brand development for startup company. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $15,600 for completion of the project. + + 2. The Contractee agrees to deliver the marketing campaign within 120 days of this Agreement. + + 3. This Agreement expires on May 22, 2025. + + 4. This Agreement shall be governed by the laws of Florida. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +BlueWave Marketing Agency +David Thompson diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-5.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-5.txt new file mode 100644 index 00000000..451a3968 --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-5.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on February 18, 2025, by and between: +* Contractor: SecureNet Cybersecurity Ltd. + +* Contractee: Jennifer Adams + +Purpose: Network security audit and implementation of security protocols for small business. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $6,900 for completion of the project. + + 2. The Contractee agrees to deliver the security implementation within 45 days of this Agreement. + + 3. This Agreement expires on April 5, 2025. + + 4. This Agreement shall be governed by the laws of Illinois. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +SecureNet Cybersecurity Ltd. +Jennifer Adams diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-6.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-6.txt new file mode 100644 index 00000000..428e48e6 --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-6.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on March 5, 2025, by and between: +* Contractor: CreativeDesign Studios + +* Contractee: Michael Chen + +Purpose: Logo design and complete brand identity package for restaurant chain. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $4,800 for completion of the project. + + 2. The Contractee agrees to deliver the brand package within 30 days of this Agreement. + + 3. This Agreement expires on April 5, 2025. + + 4. This Agreement shall be governed by the laws of Oregon. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +CreativeDesign Studios +Michael Chen diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-7.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-7.txt new file mode 100644 index 00000000..20b241db --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-7.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on February 28, 2025, by and between: +* Contractor: CloudTech Solutions Inc. + +* Contractee: Amanda Foster + +Purpose: Cloud infrastructure migration and setup for e-commerce platform. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $22,400 for completion of the project. + + 2. The Contractee agrees to deliver the cloud migration within 75 days of this Agreement. + + 3. This Agreement expires on May 15, 2025. + + 4. This Agreement shall be governed by the laws of Washington. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +CloudTech Solutions Inc. +Amanda Foster diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-8.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-8.txt new file mode 100644 index 00000000..6291584e --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-8.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on January 8, 2025, by and between: +* Contractor: MobileDev Innovations + +* Contractee: Christopher Lee + +Purpose: Mobile application development for fitness tracking and personal training. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $18,200 for completion of the project. + + 2. The Contractee agrees to deliver the mobile application within 100 days of this Agreement. + + 3. This Agreement expires on April 18, 2025. + + 4. This Agreement shall be governed by the laws of Colorado. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +MobileDev Innovations +Christopher Lee diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-9.txt b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-9.txt new file mode 100644 index 00000000..4718d52b --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/dataset/contract-9.txt @@ -0,0 +1,21 @@ +CONTRACT AGREEMENT +This Contract Agreement ("Agreement") is made and entered into on March 1, 2025, by and between: +* Contractor: EcoFriendly Construction Corp. + +* Contractee: Lisa Wang + +Purpose: Home renovation with sustainable materials and energy-efficient installations. +Terms and Conditions: + 1. The Contractor agrees to pay the Contractee the amount of $35,750 for completion of the project. + + 2. The Contractee agrees to deliver the renovation within 180 days of this Agreement. + + 3. This Agreement expires on August 28, 2025. + + 4. This Agreement shall be governed by the laws of Arizona. + + 5. Both parties acknowledge and agree to these terms. + +Signed: +EcoFriendly Construction Corp. +Lisa Wang diff --git a/supporting-blog-content/using-langextract-and-elasticsearch/langextract_and_elasticsearch.ipynb b/supporting-blog-content/using-langextract-and-elasticsearch/langextract_and_elasticsearch.ipynb new file mode 100644 index 00000000..81c18a8a --- /dev/null +++ b/supporting-blog-content/using-langextract-and-elasticsearch/langextract_and_elasticsearch.ipynb @@ -0,0 +1,494 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "C3EFzwK9YbaP", + "metadata": { + "id": "C3EFzwK9YbaP" + }, + "source": [ + "# Using LangExtract and Elasticsearch\n", + "\n", + "This notebook demonstrates how to use LangExtract to extract structured data from text documents and index it into Elasticsearch for searching and analysis. This notebook is based on the article [Using LangExtract and Elasticsearch](https://www.elastic.co/search-labs/blog/using-langextract-and-elasticsearch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74bb30f2", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "74bb30f2", + "outputId": "185bf00e-67fb-4504-e56c-30f8c8872f83" + }, + "outputs": [], + "source": [ + "%pip install langextract elasticsearch -q" + ] + }, + { + "cell_type": "markdown", + "id": "TgEea48gYnp5", + "metadata": { + "id": "TgEea48gYnp5" + }, + "source": [ + "## Installing dependencies and importing packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4668548", + "metadata": { + "id": "b4668548" + }, + "outputs": [], + "source": [ + "import langextract as lx\n", + "import json\n", + "import os\n", + "import glob\n", + "\n", + "\n", + "from google.colab import files # only for Google Colab notebooks\n", + "from elasticsearch import Elasticsearch, helpers\n", + "from IPython.display import HTML\n", + "from getpass import getpass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01aab023", + "metadata": { + "id": "01aab023" + }, + "outputs": [], + "source": [ + "os.environ[\"ELASTICSEARCH_API_KEY\"] = getpass(\"Enter your Elasticsearch API key: \")\n", + "os.environ[\"ELASTICSEARCH_URL\"] = getpass(\"Enter your Elasticsearch URL: \")\n", + "os.environ[\"LANGEXTRACT_API_KEY\"] = getpass(\n", + " \"Enter your LangExtract API key: \"\n", + ") # Gemini APIKey\n", + "\n", + "\n", + "INDEX_NAME = \"contracts\"" + ] + }, + { + "cell_type": "markdown", + "id": "tjbXck_gc9lY", + "metadata": { + "id": "tjbXck_gc9lY" + }, + "source": [ + "## Elasticsearch Python client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cf922ab", + "metadata": { + "id": "8cf922ab" + }, + "outputs": [], + "source": [ + "es_client = Elasticsearch(\n", + " os.environ[\"ELASTICSEARCH_URL\"], api_key=os.environ[\"ELASTICSEARCH_API_KEY\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "iRX6_LuXdBy1", + "metadata": { + "id": "iRX6_LuXdBy1" + }, + "source": [ + "## Index setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fc74243", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3fc74243", + "outputId": "d0f171e6-fcb4-48be-b9e6-11682410cf4b" + }, + "outputs": [], + "source": [ + "try:\n", + " mapping = {\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"contract_date\": {\"type\": \"date\", \"format\": \"MM/dd/yyyy\"},\n", + " \"end_contract_date\": {\"type\": \"date\", \"format\": \"MM/dd/yyyy\"},\n", + " \"service_provider\": {\n", + " \"type\": \"text\",\n", + " \"fields\": {\"keyword\": {\"type\": \"keyword\"}},\n", + " },\n", + " \"client\": {\"type\": \"text\", \"fields\": {\"keyword\": {\"type\": \"keyword\"}}},\n", + " \"service_type\": {\"type\": \"keyword\"},\n", + " \"payment_amount\": {\"type\": \"float\"},\n", + " \"delivery_time_days\": {\"type\": \"integer\"},\n", + " \"governing_law\": {\"type\": \"keyword\"},\n", + " \"raw_contract\": {\"type\": \"text\"},\n", + " }\n", + " }\n", + " }\n", + "\n", + " es_client.indices.create(index=INDEX_NAME, body=mapping)\n", + " print(f\"Index {INDEX_NAME} created successfully\")\n", + "except Exception as e:\n", + " print(f\"Error creating index: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "zgiafql5dD8F", + "metadata": { + "id": "zgiafql5dD8F" + }, + "source": [ + "## LangExtract\n", + "\n", + "### Providing Context examples\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d66307a0", + "metadata": { + "id": "d66307a0" + }, + "outputs": [], + "source": [ + "contract_prompt_description = \"Extract contract information including dates, parties (contractor and contractee), purpose/services, payment amounts, timelines, and governing law in the order they appear in the text.\"\n", + "\n", + "# Define contract-specific example data to help the model understand what to extract\n", + "contract_examples = [\n", + " lx.data.ExampleData(\n", + " text=\"Service Agreement dated March 10, 2024, between ABC Corp (Service Provider) and John Doe (Client) for consulting services. Payment: $5,000. Delivery: 30 days. Contract ends June 10, 2024. Governed by California law.\",\n", + " extractions=[\n", + " lx.data.Extraction(\n", + " extraction_class=\"contract_date\", extraction_text=\"03/10/2024\"\n", + " ),\n", + " lx.data.Extraction(\n", + " extraction_class=\"end_contract_date\", extraction_text=\"06/10/2024\"\n", + " ),\n", + " lx.data.Extraction(\n", + " extraction_class=\"service_provider\", extraction_text=\"ABC Corp\"\n", + " ),\n", + " lx.data.Extraction(extraction_class=\"client\", extraction_text=\"John Doe\"),\n", + " lx.data.Extraction(\n", + " extraction_class=\"service_type\", extraction_text=\"consulting services\"\n", + " ),\n", + " lx.data.Extraction(\n", + " extraction_class=\"payment_amount\", extraction_text=\"5000\"\n", + " ),\n", + " lx.data.Extraction(\n", + " extraction_class=\"delivery_time_days\", extraction_text=\"30\"\n", + " ),\n", + " lx.data.Extraction(\n", + " extraction_class=\"governing_law\", extraction_text=\"California\"\n", + " ),\n", + " ],\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "cjjqggi8aEM_", + "metadata": { + "id": "cjjqggi8aEM_" + }, + "source": [ + "### Uploading contracts files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Z-B9e7LgaDlL", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 216 + }, + "id": "Z-B9e7LgaDlL", + "outputId": "a7c0a653-2d97-4f52-b58c-1cb3a03a779b" + }, + "outputs": [], + "source": [ + "files.upload()" + ] + }, + { + "cell_type": "markdown", + "id": "OU5yMGisYyBD", + "metadata": { + "id": "OU5yMGisYyBD" + }, + "source": [ + "### Extracting content using LangExtract" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ee40df6", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8ee40df6", + "outputId": "4f31d696-5dd8-49f7-8312-eaabd5aeb273" + }, + "outputs": [], + "source": [ + "contract_files = glob.glob(\"*.txt\")\n", + "\n", + "print(f\"Found {len(contract_files)} contract files:\")\n", + "\n", + "for i, file_path in enumerate(contract_files, 1):\n", + " filename = os.path.basename(file_path)\n", + " print(f\"\\t{i}. {filename}\")\n", + "\n", + "results = []\n", + "\n", + "for file_path in contract_files:\n", + " filename = os.path.basename(file_path)\n", + "\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " content = file.read()\n", + "\n", + " # Run the extraction\n", + " contract_result = lx.extract(\n", + " text_or_documents=content,\n", + " prompt_description=contract_prompt_description,\n", + " examples=contract_examples,\n", + " model_id=\"gemini-2.5-flash\",\n", + " )\n", + "\n", + " results.append(contract_result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "Fjhi_XEfl4MX", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Fjhi_XEfl4MX", + "outputId": "6bc31395-d5b2-4354-b07f-39853fe33d12" + }, + "outputs": [], + "source": [ + "NDJSON_FILE = \"extraction_results.jsonl\"\n", + "\n", + "# Save the results to a JSONL file\n", + "lx.io.save_annotated_documents(results, output_name=NDJSON_FILE, output_dir=\".\")\n", + "\n", + "# Generate the visualization from the file\n", + "html_content = lx.visualize(NDJSON_FILE)" + ] + }, + { + "cell_type": "markdown", + "id": "ZrO7Gbw1Y6Kr", + "metadata": { + "id": "ZrO7Gbw1Y6Kr" + }, + "source": [ + "### Rendering html visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "_Gt-kRtpY4uM", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 541 + }, + "id": "_Gt-kRtpY4uM", + "outputId": "e70cd434-a5ab-4f94-bca2-15ba0a0511ec" + }, + "outputs": [], + "source": [ + "# save HTML to file\n", + "with open(\"visualization.html\", \"w\", encoding=\"utf-8\") as f:\n", + " f.write(html_content.data)\n", + "\n", + "HTML(html_content.data)" + ] + }, + { + "cell_type": "markdown", + "id": "sBDsH4VCZNBX", + "metadata": { + "id": "sBDsH4VCZNBX" + }, + "source": [ + "## Ingesting data to Elasticsearch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "vuPAJRXLY_-J", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vuPAJRXLY_-J", + "outputId": "29b17505-0150-4ea3-b4d0-cf5fd192d1b6" + }, + "outputs": [], + "source": [ + "def build_data(ndjson_file, index_name):\n", + " with open(ndjson_file, \"r\") as f:\n", + " for line in f:\n", + " doc = json.loads(line)\n", + "\n", + " contract_doc = {}\n", + "\n", + " for extraction in doc[\"extractions\"]:\n", + " extraction_class = extraction[\"extraction_class\"]\n", + " extraction_text = extraction[\"extraction_text\"]\n", + "\n", + " contract_doc[extraction_class] = extraction_text\n", + "\n", + " contract_doc[\"raw_contract\"] = doc[\"text\"]\n", + "\n", + " yield {\"_index\": index_name, \"_source\": contract_doc}\n", + "\n", + "\n", + "try:\n", + " success, errors = helpers.bulk(es_client, build_data(NDJSON_FILE, INDEX_NAME))\n", + " print(f\"{success} documents indexed successfully\")\n", + "\n", + " if errors:\n", + " print(\"Errors during indexing:\", errors)\n", + "except Exception as e:\n", + " print(f\"Error: {str(e)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "gZD2xqc-ZW88", + "metadata": { + "id": "gZD2xqc-ZW88" + }, + "source": [ + "## Querying data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "R54-_-0IZITC", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "R54-_-0IZITC", + "outputId": "a243c9d5-1687-449e-f12a-7d82a0f592b3" + }, + "outputs": [], + "source": [ + "try:\n", + " response = es_client.search(\n", + " index=INDEX_NAME,\n", + " source_excludes=[\"raw_contract\"],\n", + " body={\n", + " \"query\": {\n", + " \"bool\": {\n", + " \"filter\": [\n", + " {\"range\": {\"payment_amount\": {\"gte\": 15000}}},\n", + " {\"range\": {\"end_contract_date\": {\"lte\": \"now\"}}},\n", + " ]\n", + " }\n", + " }\n", + " },\n", + " )\n", + "\n", + " print(f\"\\nTotal hits: {response['hits']['total']['value']}\")\n", + "\n", + " for hit in response[\"hits\"][\"hits\"]:\n", + " doc = hit[\"_source\"]\n", + "\n", + " print(json.dumps(doc, indent=4))\n", + "\n", + "except Exception as e:\n", + " print(f\"Error searching index: {str(e)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "C2WKnj8UZa7g", + "metadata": { + "id": "C2WKnj8UZa7g" + }, + "source": [ + "## Deleting\n", + "\n", + "Delete the resources used to prevent them from consuming resources." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "TgqFHEhPZfAd", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TgqFHEhPZfAd", + "outputId": "675e161b-f7e3-43f8-d3cc-f9e7fa72f6aa" + }, + "outputs": [], + "source": [ + "# Cleanup - Delete Index\n", + "es_client.indices.delete(index=INDEX_NAME, ignore=[400, 404])" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "3.12.2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}