diff --git a/languages/python/jupyter_notebook/CipherStash-Getting-Started.ipynb b/languages/python/jupyter_notebook/CipherStash-Getting-Started.ipynb index f7b642a8..bf9a5180 100644 --- a/languages/python/jupyter_notebook/CipherStash-Getting-Started.ipynb +++ b/languages/python/jupyter_notebook/CipherStash-Getting-Started.ipynb @@ -170,9 +170,7 @@ "cell_type": "code", "execution_count": null, "id": "b082598f-acc8-47bc-b561-c0f07d059f4b", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "%env CS_WORKSPACE_ID=" @@ -182,9 +180,7 @@ "cell_type": "code", "execution_count": null, "id": "e01757fa-0d70-4d53-b33d-88bf02449d3c", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "%env CS_CLIENT_ACCESS_KEY=" @@ -194,9 +190,7 @@ "cell_type": "code", "execution_count": null, "id": "348e6430-dae7-4183-8eab-46315b622f1c", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "%env CS_ENCRYPTION__CLIENT_ID=" @@ -206,9 +200,7 @@ "cell_type": "code", "execution_count": null, "id": "e8ac364e-98a1-4687-b11e-4b935b6d8b70", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "%env CS_ENCRYPTION__CLIENT_KEY=" @@ -270,11 +262,13 @@ "cell_type": "code", "execution_count": null, "id": "4e7763c1-4155-4fe3-ab40-ef359adb7ca8", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "%env PGPASSWORD=postgres\n", - "! curl -L https://github.com/cipherstash/encrypt-query-language/releases/download/eql-0.4.0/cipherstash-encrypt.sql | psql -h localhost -p 5432 -U postgres cipherstash_getting_started" + "! curl -L https://github.com/cipherstash/encrypt-query-language/releases/download/eql-0.4.2/cipherstash-encrypt.sql | psql -h localhost -p 5432 -U postgres cipherstash_getting_started" ] }, { @@ -308,7 +302,9 @@ "cell_type": "code", "execution_count": null, "id": "56e04d28-4ebc-463a-b7d0-1ea3cf843f1b", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "%env PGPASSWORD=postgres\n", @@ -418,6 +414,7 @@ "source": [ "from cs_types import *\n", "from psycopg2.extras import RealDictCursor\n", + "import psycopg2\n", "\n", "print(\"Importing done.\")" ] @@ -438,9 +435,7 @@ "cell_type": "code", "execution_count": null, "id": "698c6970-62f0-4cfb-b779-7a5dbbe11d3a", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "from pprint import pprint\n", @@ -470,14 +465,14 @@ }, { "cell_type": "markdown", - "id": "524c68bc-be47-4887-881b-1af0b365e259", + "id": "2f24b9c8-8537-4f44-93f5-925a37bf65b1", "metadata": {}, "source": [ "This should insert a single row in the encrypted `examples` table as:\n", "\n", "|encrypted_int|encrypted_boolean|encrypted_date|encrypted_float|encrypted_utf8_str|\n", "|---|-----|--------------|----|------------|\n", - "|-51|false||-0.5|hello, world|\n", + "|-51|false|2024-11-01 |-0.5|hello, world|\n", "\n", "You can check what it looks like from both regular PostgreSQL running on port 5432 and CipherStash Proxy running on port 6432.\n", "To look at the data through CipherStash Proxy, run the following:" @@ -531,6 +526,7 @@ "### Converting to Python types\n", "\n", "By querying the proxy, you will see the JSONB values as seen above (decrypted version in the Proxy example, not the PostgreSQL example).\n", + "\n", "The values should then be converted to types that can be used in Python using class methods for each type:" ] }, @@ -541,6 +537,8 @@ "metadata": {}, "outputs": [], "source": [ + "from IPython.display import display, Markdown\n", + "\n", "cur.execute(\"select * from examples\")\n", "\n", "records = cur.fetchall()\n", @@ -548,11 +546,18 @@ "record0 = records[0]\n", "\n", "# `from_parsed_json` methods convert the values into the corresponding Python types\n", - "print(f\"int: {CsInt.from_parsed_json(record0['encrypted_int'])}\")\n", - "print(f\"boolean: {CsBool.from_parsed_json(record0['encrypted_boolean'])}\")\n", - "print(f\"datetime: {CsDate.from_parsed_json(record0['encrypted_date'])}\")\n", - "print(f\"float: {CsFloat.from_parsed_json(record0['encrypted_float'])}\")\n", - "print(f\"text: {CsText.from_parsed_json(record0['encrypted_utf8_str'])}\")" + "content = f\"\"\"\n", + "### Values in the record\n", + "| | |\n", + "|--|--|\n", + "|int | {CsInt.from_parsed_json(record0['encrypted_int'])}|\n", + "|boolean | {CsBool.from_parsed_json(record0['encrypted_boolean'])}|\n", + "|datetime | {CsDate.from_parsed_json(record0['encrypted_date'])}|\n", + "|float | {CsFloat.from_parsed_json(record0['encrypted_float'])}|\n", + "|text | {CsText.from_parsed_json(record0['encrypted_utf8_str'])}|\n", + "\"\"\"\n", + "\n", + "display(Markdown(content))\n" ] }, { @@ -600,20 +605,6 @@ " )\n", ")\n", "\n", - "# data for JSONB containment queries\n", - "cur.execute(\"INSERT INTO examples (encrypted_jsonb) VALUES (%s)\",\n", - " (\n", - " CsJsonb({\"top\": {\"level1\": {\"level2\": [\"a\", \"b\", \"c\"]}}}, \"examples\", \"encrypted_jsonb\").to_db_format(),\n", - " )\n", - ")\n", - "\n", - "cur.execute(\"INSERT INTO examples (encrypted_jsonb) VALUES (%s)\",\n", - " (\n", - " CsJsonb({\"top\": {\"level1\": {\"another_key\": [\"a\"]}}}, \"examples\", \"encrypted_jsonb\").to_db_format(),\n", - " )\n", - ")\n", - "\n", - "\n", "conn.commit()\n", "\n", "print(\"created data for MATCH and ORE queries\")" @@ -626,14 +617,12 @@ "source": [ "The example code above should insert rows like these in the examples table:\n", "\n", - "| | encrypted_utf_data | encrypted_float| encrypted_jsonb | |\n", - "|--|---|---|---|---|\n", - "| |hello, python| | | |\n", - "| |hello, jupyter| | | |\n", - "| | | 100.1 | | |\n", - "| | | 100.2 | | |\n", - "| | | | {\"top\": {\"level1\": {\"level2\": [\"a\", \"b\", \"c\"]}}} | |\n", - "| | | | {\"top\": {\"level1\": {\"another_key\": [\"a\"]}}} | |\n" + "| | encrypted_utf_data | encrypted_float||\n", + "|--|---|---|---|\n", + "| |hello, python| | |\n", + "| |hello, jupyter| | |\n", + "| | | 100.1 | |\n", + "| | | 100.2 | |\n" ] }, { @@ -675,9 +664,7 @@ "cell_type": "code", "execution_count": null, "id": "834ce829-9d12-4d14-bdac-20abb2e76148", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "# UNIQUE\n", @@ -694,7 +681,7 @@ "source": [ "### ORE queries\n", "\n", - "Finally, a query for a record with `encrypted_float` that is larger than `100.15` which should match `100.2`:" + "With ORE, you can run a query for a record with `encrypted_float` that is larger than `100.15` which should match `100.2`:" ] }, { @@ -711,6 +698,95 @@ "print(f\"Float inside the found record: {CsFloat.from_parsed_json(found['encrypted_float'])}\")\n" ] }, + { + "cell_type": "markdown", + "id": "e9b9ee3e-6919-4a44-b98c-6964b06c3c87", + "metadata": {}, + "source": [ + "### Updating records\n", + "\n", + "Encrypted fields can be updated too.\n", + "The interface is similar to creating and querying:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9daad342-3bd7-47a0-9744-1aba1c955506", + "metadata": {}, + "outputs": [], + "source": [ + "cur.execute(\"SELECT * FROM examples WHERE cs_unique_v1(encrypted_utf8_str) = cs_unique_v1(%s)\", (CsText(\"hello, jupyter\", \"examples\", \"encrypted_utf8_str\").to_db_format(),))\n", + "found = cur.fetchall()[0]\n", + "record_id = found['id']\n", + "\n", + "cur.execute(\n", + " \"UPDATE examples SET encrypted_utf8_str = %s WHERE id = %s\",\n", + " (CsText(\"UPDATED TEXT\", \"examples\", \"encrypted_utf8_str\").to_db_format(), record_id) # Replace 'column_name' and 'new_value' with actual column and value\n", + ")\n", + "cur.execute(\"SELECT * FROM examples WHERE id = %s\", (record_id,))\n", + "found = cur.fetchall()[0]\n", + "print(f\"Updated row: {CsRow(found).row}\")" + ] + }, + { + "cell_type": "markdown", + "id": "c8c82996-b91d-4536-a141-1316d293c044", + "metadata": {}, + "source": [ + "### JSONB queries and operations\n", + "\n", + "There are multiple types of JSONB queries and operations supported.\n", + "Here, we introduce:\n", + "\n", + "* Containment query\n", + "* Field extraction\n", + "* WHERE with field exctraction\n", + "* ORDER BY with field extraction\n", + "* GROUP BY with field extraction\n", + "\n", + "First, create the data for JSONB queries. The following queries will create records with JSONB values as:\n", + "\n", + "| encrypted_jsonb |\n", + "|---------|\n", + "| {\"num\": 1, \"category\": \"a\", \"top\": {\"nested\": [\"a\", \"b\", \"c\"]} |\n", + "| {\"num\": 2, \"category\": \"b\", \"top\": {\"nested\": [\"a\"]}} |\n", + "| {\"num\": 3, \"category\": \"b\", \"top\": {\"nested\": [\"z\"]}} |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebdf4fe5-1265-484e-8549-cbfdeab5296f", + "metadata": {}, + "outputs": [], + "source": [ + "cur.execute(\"DELETE FROM examples;\")\n", + "\n", + "# data for JSONB queries\n", + "cur.execute(\"INSERT INTO examples (encrypted_jsonb) VALUES (%s)\",\n", + " (\n", + " CsJsonb({\"num\": 1, \"category\": \"a\", \"top\": {\"nested\": [\"a\", \"b\", \"c\"]}}, \"examples\", \"encrypted_jsonb\").to_db_format(),\n", + " )\n", + ")\n", + "\n", + "cur.execute(\"INSERT INTO examples (encrypted_jsonb) VALUES (%s)\",\n", + " (\n", + " CsJsonb({\"num\": 2, \"category\": \"b\", \"top\": {\"nested\": [\"a\"]}}, \"examples\", \"encrypted_jsonb\").to_db_format(),\n", + " )\n", + ")\n", + "\n", + "cur.execute(\"INSERT INTO examples (encrypted_jsonb) VALUES (%s)\",\n", + " (\n", + " CsJsonb({\"num\": 3, \"category\": \"b\", \"top\": {\"nested\": [\"z\"]}}, \"examples\", \"encrypted_jsonb\").to_db_format(),\n", + " )\n", + ")\n", + "\n", + "conn.commit()\n", + "\n", + "print(\"Data for JSONB queries created\")" + ] + }, { "cell_type": "markdown", "id": "c1f857e8-8fcb-46f0-b178-e17b580e7538", @@ -719,8 +795,7 @@ "### JSONB containment queries\n", "\n", "A record can be found using the JSONB containment.\n", - "This only works with a containment from the root with no missing nodes in the middle.\n", - "The following matches the JSONB field containing keys `top`, `level1` and `level2`:" + "The following matches the JSONB field containing keys `top` and `nested` with the `@>` operator:" ] }, { @@ -730,52 +805,170 @@ "metadata": {}, "outputs": [], "source": [ - "# JSONB\n", - "cur.execute(\"SELECT * from examples WHERE cs_ste_vec_v1(encrypted_jsonb) @> cs_ste_vec_v1(%s)\", (CsJsonb({'top': { 'level1': {'level2': [] } } }, \"examples\", \"encrypted_jsonb\").to_db_format(),))\n", - "found = cur.fetchall()[0]\n", - "print(f\"Record Found with JSONB query: {CsRow(found).row}\\n\")\n", - "print(f\"JSONB inside the found record: {CsJsonb.from_parsed_json(found['encrypted_jsonb'])}\")" + "# JSONB containment 1\n", + "cur.execute(\"SELECT * from examples WHERE cs_ste_vec_v1(encrypted_jsonb) @> cs_ste_vec_v1(%s)\", (CsJsonb({'top': { 'nested': ['a', 'b'] } }, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\"),))\n", + "found = cur.fetchall()\n", + "for f in found:\n", + " print(f\"Record Found with JSONB query: {CsRow(f).row}\\n\")\n", + " print(f\"JSONB inside the found record: {CsJsonb.from_parsed_json(f['encrypted_jsonb'])}\\n\")" ] }, { "cell_type": "markdown", - "id": "e9b9ee3e-6919-4a44-b98c-6964b06c3c87", + "id": "97b4897b-713a-473b-9d97-25f80f263a76", "metadata": {}, "source": [ - "### Updating records\n", + "Also, the reverse operator (`<@`) is available to the above.\n", + "This query matches the JSONB field that's contained in the query:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e68a18f-f482-473b-b05a-dd0f4291e031", + "metadata": {}, + "outputs": [], + "source": [ + "# JSONB containment 2\n", + "cur.execute(\"SELECT * from examples WHERE cs_ste_vec_v1(encrypted_jsonb) <@ cs_ste_vec_v1(%s)\", (CsJsonb({\"num\": 2, \"category\": \"b\", 'top': {'nested': ['a'] } }, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\"),))\n", + "found = cur.fetchall()\n", + "print(\"Record Found with JSONB query:\")\n", + "for f in found:\n", + " print(f\" {CsRow(f).row}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f8ff4434-ddad-40a1-9a28-b5be04b2d4b3", + "metadata": {}, + "source": [ + "### JSONB field extraction\n", "\n", - "Encrypted fields can be updated too.\n", - "The interface is similar to creating and querying:" + "Specific JSONB fields can be accessed in queries.\n", + "\n", + "The following finds all values in `$.top.nested`:\n", + "\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "9daad342-3bd7-47a0-9744-1aba1c955506", + "id": "909f4645-8c6a-42d2-92ea-f24181232155", "metadata": {}, "outputs": [], "source": [ - "cur.execute(\"SELECT * FROM examples WHERE cs_unique_v1(encrypted_utf8_str) = cs_unique_v1(%s)\", (CsText(\"hello, jupyter\", \"examples\", \"encrypted_utf8_str\").to_db_format(),))\n", - "found = cur.fetchall()[0]\n", - "record_id = found['id']\n", + "query = \"SELECT cs_ste_vec_value_v1(encrypted_jsonb, %s) AS val FROM examples\"\n", + "\n", + "results = cur.execute(query, (CsJsonb(\"$.top.nested\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\"),))\n", + "\n", + "found = cur.fetchall()\n", + "print(\"values from '$.top.nested':\")\n", + "for f in found:\n", + " print(f\" {CsRow(f).row.get('val', {}).get('p')}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ff4a571a-c00e-4e95-9554-c82a5daf9352", + "metadata": {}, + "source": [ + "### JSONB field in WHERE query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fae30cc-c3a4-4c03-8779-4efdc0c4483c", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"SELECT * FROM examples WHERE cs_ste_vec_term_v1(encrypted_jsonb, %s) > cs_ste_vec_term_v1(%s)\"\n", "\n", "cur.execute(\n", - " \"UPDATE examples SET encrypted_utf8_str = %s WHERE id = %s\",\n", - " (CsText(\"UPDATED TEXT\", \"examples\", \"encrypted_utf8_str\").to_db_format(), record_id) # Replace 'column_name' and 'new_value' with actual column and value\n", + " query,\n", + " (CsJsonb(\"$.num\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\"),\n", + " CsJsonb(1, \"examples\", \"encrypted_jsonb\").to_db_format(\"ste_vec\")\n", + " )\n", ")\n", - "cur.execute(\"SELECT * FROM examples WHERE id = %s\", (record_id,))\n", - "found = cur.fetchall()[0]\n", - "print(f\"Updated row: {CsRow(found).row}\")" + "\n", + "found = cur.fetchall()\n", + "\n", + "for f in found:\n", + " print(f\"Record Found with JSONB query: {CsRow(f).row}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "4690460d-bbe8-4aaa-ae27-d013cb73b690", + "metadata": {}, + "source": [ + "### JSONB field in ORDER BY\n", + "\n", + "Field extraction can also be used to order the results.\n", + "The following will demonstrate ordering by the 'num' field in descending order:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04afc9d6-4ff3-484d-a500-205236a2c06b", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"SELECT * FROM examples ORDER BY cs_ste_vec_term_v1(encrypted_jsonb, %s) DESC\"\n", + "\n", + "cur.execute(\n", + " query,\n", + " (CsJsonb(\"$.num\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\"),)\n", + ")\n", + "found = cur.fetchall();\n", + "print(\"JSONB contents, ordered by 'num' desc:\")\n", + "for f in found:\n", + " print(f\" {CsRow(f).row.get('encrypted_jsonb')}\")" + ] + }, + { + "cell_type": "markdown", + "id": "6c18cefe-f8a1-49cc-b10c-0a455e4908b2", + "metadata": {}, + "source": [ + "### JSONB field in GROUP BY\n", + "\n", + "Field extraction can also be used in GROUP BY.\n", + "This example demonstrates how to group the count results by the 'category' column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f3b1cd0-a82b-487b-bf32-6eeaf11c6cb3", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"SELECT cs_grouped_value_v1(cs_ste_vec_value_v1(encrypted_jsonb, %s)) AS category, COUNT(*) FROM examples GROUP BY cs_ste_vec_term_v1(encrypted_jsonb, %s)\"\n", + "\n", + "cur.execute(\n", + " query,\n", + " (CsJsonb(\"$.category\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\"),\n", + " CsJsonb(\"$.category\", \"examples\", \"encrypted_jsonb\").to_db_format(\"ejson_path\"))\n", + ")\n", + "\n", + "found = cur.fetchall()\n", + "\n", + "print(\"count, grouped by category:\")\n", + "for f in found:\n", + " row = CsRow(f).row\n", + " print(f\" category: {row.get('category', {}).get('p')}, count: {row.get('count')}\")\n" ] }, { "cell_type": "markdown", - "id": "e24df4fc-11c6-489e-8b6f-45faa47bab8e", + "id": "f2279c68-06ab-47cb-bd86-dfaf63a7ebb4", "metadata": {}, "source": [ - "### Roll back\n", + "### Clean up\n", "\n", - "Free up the database connection so we can delete the table contentsup by rolling back before going to the next section:" + "Clean up the data before going to the next section:" ] }, { @@ -785,7 +978,8 @@ "metadata": {}, "outputs": [], "source": [ - "conn.rollback()" + "cur.execute(\"DELETE FROM examples\")\n", + "conn.commit()" ] }, { @@ -1063,7 +1257,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/languages/python/jupyter_notebook/cs_models.py b/languages/python/jupyter_notebook/cs_models.py index 2ec8186f..c58b4c64 100644 --- a/languages/python/jupyter_notebook/cs_models.py +++ b/languages/python/jupyter_notebook/cs_models.py @@ -2,16 +2,17 @@ from sqlalchemy.types import TypeDecorator, String, Integer, Date, Boolean, Float from sqlalchemy import create_engine, select, text from sqlalchemy.exc import IntegrityError +from datetime import datetime import json import sys import os class CsTypeDecorator(TypeDecorator): - def __init__(self, table_name, column_name): + def __init__(self, table, column): super().__init__() - self.table_name = table_name - self.column_name = column_name + self.table = table + self.column = column def process_bind_param(self, value, dialect): if value is not None: @@ -19,10 +20,11 @@ def process_bind_param(self, value, dialect): "k": "pt", "p": str(value), "i": { - "t": self.table_name, - "c": self.column_name + "t": self.table, + "c": self.column }, "v": 1, + "q": None } value = json.dumps(value_dict) return value @@ -35,43 +37,51 @@ def process_result_value(self, value, dialect): class EncryptedInt(CsTypeDecorator): impl = String - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def process_result_value(self, value, dialect): + if value is None: + return None + return int(value['p']) + class EncryptedBoolean(CsTypeDecorator): impl = String - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - def process_bind_param(self, value, dialect): if value is not None: value = str(value).lower() return super().process_bind_param(value, dialect) + def process_result_value(self, value, dialect): + if value is None: + return None + return value['p'] == 'true' + class EncryptedDate(CsTypeDecorator): impl = String - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def process_result_value(self, value, dialect): + if value is None: + return None + return datetime.fromisoformat(value['p']).date() class EncryptedFloat(CsTypeDecorator): impl = String - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def process_result_value(self, value, dialect): + if value is None: + return None + return float(value['p']) class EncryptedUtf8Str(CsTypeDecorator): impl = String - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - class EncryptedJsonb(CsTypeDecorator): impl = String - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def process_result_value(self, value, dialect): + if value is None: + return None + return json.loads(value['p']) class BaseModel(DeclarativeBase): pass @@ -96,4 +106,12 @@ def __init__(self, e_utf8_str=None, e_jsonb=None, e_int=None, e_float=None, e_da self.encrypted_boolean = e_bool def __repr__(self): - return f"" + return "" diff --git a/languages/python/jupyter_notebook/cs_models_test.py b/languages/python/jupyter_notebook/cs_models_test.py new file mode 100644 index 00000000..700f5ba9 --- /dev/null +++ b/languages/python/jupyter_notebook/cs_models_test.py @@ -0,0 +1,58 @@ +import unittest +from datetime import date + +from cs_models import * + +class TestExampleModel(unittest.TestCase): + pg_password = os.getenv('PGPASSWORD', 'postgres') + pg_user = os.getenv('PGUSER', 'postgres') + pg_host = os.getenv('PGHOST', 'localhost') + pg_port = os.getenv('PGPORT', '6432') + pg_db = os.getenv('PGDATABASE', 'cs_test_db') + + def setUp(self): + self.engine = create_engine(f'postgresql://{self.pg_user}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}') + Session = sessionmaker(bind=self.engine) + self.session = Session() + BaseModel.metadata.create_all(self.engine) + + self.session.query(Example).delete() + self.example = Example( + e_int=1, e_utf8_str="str", e_jsonb='{"key": "value"}', e_float=1.1, e_date=date(2024, 1, 1), e_bool=True + ) + self.session.add(self.example) + self.session.commit() + + def test_encrypted_int(self): + found = self.session.query(Example).filter(Example.id == self.example.id).one() + self.assertEqual(found.encrypted_int, 1) + + def test_encrypted_boolean(self): + found = self.session.query(Example).filter(Example.id == self.example.id).one() + self.assertEqual(found.encrypted_boolean, True) + + def test_encrypted_date(self): + found = self.session.query(Example).filter(Example.id == self.example.id).one() + self.assertEqual(found.encrypted_date, date(2024, 1, 1)) + + def test_encrypted_float(self): + found = self.session.query(Example).filter(Example.id == self.example.id).one() + self.assertEqual(found.encrypted_float, 1.1) + + def test_encrypted_utf8_str(self): + found = self.session.query(Example).filter(Example.id == self.example.id).one() + self.assertEqual(found.encrypted_utf8_str, "str") + + def test_encrypted_jsonb(self): + found = self.session.query(Example).filter(Example.id == self.example.id).one() + self.assertEqual(found.encrypted_jsonb, {"key": "value"}) + + def test_example_prints_value(self): + self.example.id = 1 + self.assertEqual( + str(self.example), + "" + ) + +if __name__ == '__main__': + unittest.main() diff --git a/languages/python/jupyter_notebook/cs_types.py b/languages/python/jupyter_notebook/cs_types.py index f969c0fb..c71e127a 100755 --- a/languages/python/jupyter_notebook/cs_types.py +++ b/languages/python/jupyter_notebook/cs_types.py @@ -1,8 +1,4 @@ -#!/usr/bin/env python - -import psycopg2 from psycopg2.extras import RealDictCursor -from pprint import pprint from datetime import datetime import json @@ -12,79 +8,74 @@ def __init__(self, v, t: str, c: str): self.table = t self.column = c - def to_db_format(self): + def to_db_format(self, query_type = None): data = { "k": "pt", - "p": self.value_in_db_format(), + "p": self._value_in_db_format(query_type), "i": { "t": str(self.table), "c": str(self.column) }, "v": 1, + "q": query_type, } return json.dumps(data) - # TODO: Unused at the moment - @classmethod - def from_json_str(cls, json_str): - parsed = json.loads(json_str) - return cls.from_parsed_json(parsed) - @classmethod def from_parsed_json(cls, parsed): - return cls.value_from_db_format(parsed["p"]) + return cls._value_from_db_format(parsed["p"]) class CsInt(CsValue): - def value_in_db_format(self): + def _value_in_db_format(self, query_type): return str(self.value) @classmethod - def value_from_db_format(cls, s: str): + def _value_from_db_format(cls, s: str): return int(s) class CsBool(CsValue): - def value_in_db_format(self): + def _value_in_db_format(self, query_type): return str(self.value).lower() @classmethod - def value_from_db_format(cls, s: str): + def _value_from_db_format(cls, s: str): return s.lower() == 'true' class CsDate(CsValue): - def value_in_db_format(self): + def _value_in_db_format(self, query_type): return self.value.isoformat() @classmethod - def value_from_db_format(cls, s: str): - return datetime.fromisoformat(s) + def _value_from_db_format(cls, s: str): + return datetime.fromisoformat(s).date() class CsFloat(CsValue): - def value_in_db_format(self): + def _value_in_db_format(self, query_type): return str(self.value) @classmethod - def value_from_db_format(cls, s: str): + def _value_from_db_format(cls, s: str): return float(s) class CsText(CsValue): - def value_in_db_format(self): + def _value_in_db_format(self, query_type): return self.value @classmethod - def value_from_db_format(cls, s: str): + def _value_from_db_format(cls, s: str): return s class CsJsonb(CsValue): - def value_in_db_format(self): - return json.dumps(self.value) + def _value_in_db_format(self, query_type): + if query_type == "ejson_path": + return self.value + else: + return json.dumps(self.value) @classmethod - def value_from_db_format(cls, s: str): + def _value_from_db_format(cls, s: str): return json.loads(s) -def id_map(x): - return x - class CsRow: column_function_mapping = { 'encrypted_int': CsInt.from_parsed_json, @@ -95,9 +86,15 @@ class CsRow: 'encrypted_jsonb': CsText.from_parsed_json } + @staticmethod + def id_map(x): + return x + def __init__(self, row): self.row = {} for k, v in row.items(): - self.row[k] = None if v == None else self.column_function_mapping.get(k, id_map)(v) - - + if v == None: + self.row[k] = None + else: + mapping = self.column_function_mapping.get(k, self.id_map) + self.row[k] = mapping(v) diff --git a/languages/python/jupyter_notebook/cs_types_test.py b/languages/python/jupyter_notebook/cs_types_test.py new file mode 100644 index 00000000..9cfb3d5d --- /dev/null +++ b/languages/python/jupyter_notebook/cs_types_test.py @@ -0,0 +1,152 @@ +import unittest +import json +from datetime import date +from cs_types import * + +class EqlTest(unittest.TestCase): + def setUp(self): + self.template_dict = json.loads('{"k": "pt", "p": "1", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}') + + def test(self): + self.assertTrue(True) + + def test_to_db_format(self): + self.assertEqual( + CsInt(1, "table", "column").to_db_format(), + '{"k": "pt", "p": "1", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}' + ) + + def test_from_parsed_json_uses_p_value(self): + self.template_dict["p"] = "1" + self.assertEqual( + CsInt.from_parsed_json(self.template_dict), + 1 + ) + + def test_cs_int_to_db_format(self): + cs_int = CsInt(123, "table", "column") + self.assertEqual( + '{"k": "pt", "p": "123", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}', + cs_int.to_db_format() + ) + + def test_cs_int_from_parsed_json(self): + self.template_dict["p"] = "123" + self.assertEqual( + CsInt.from_parsed_json(self.template_dict), + 123 + ) + + def test_cs_bool_to_db_format_true(self): + cs_bool = CsBool(True, "table", "column") + self.assertEqual( + '{"k": "pt", "p": "true", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}', + cs_bool.to_db_format() + ) + + def test_cs_bool_to_db_format_false(self): + cs_bool = CsBool(False, "table", "column") + self.assertEqual( + '{"k": "pt", "p": "false", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}', + cs_bool.to_db_format() + ) + + def test_cs_bool_from_parsed_json_true(self): + self.template_dict["p"] = "true" + self.assertEqual( + CsBool.from_parsed_json(self.template_dict), + True + ) + + def test_cs_bool_from_parsed_json_false(self): + self.template_dict["p"] = "false" + self.assertEqual( + CsBool.from_parsed_json(self.template_dict), + False + ) + + def test_cs_date_to_db_format(self): + cs_date = CsDate(date(2024, 11, 1), "table", "column") + self.assertEqual( + '{"k": "pt", "p": "2024-11-01", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}', + cs_date.to_db_format() + ) + + def test_cs_date_from_parsed_json(self): + self.template_dict["p"] = "2024-11-01" + self.assertEqual( + CsDate.from_parsed_json(self.template_dict), + date(2024, 11, 1) + ) + + def test_cs_float_to_db_format(self): + cs_float = CsFloat(1.1, "table", "column") + self.assertEqual( + '{"k": "pt", "p": "1.1", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}', + cs_float.to_db_format() + ) + + def test_cs_float_from_parsed_json(self): + self.template_dict["p"] = "1.1" + self.assertEqual( + CsFloat.from_parsed_json(self.template_dict), + 1.1 + ) + + def test_cs_text_to_db_format(self): + cs_text = CsText("text", "table", "column") + self.assertEqual( + '{"k": "pt", "p": "text", "i": {"t": "table", "c": "column"}, "v": 1, "q": null}', + cs_text.to_db_format() + ) + + def test_cs_text_from_parsed_json(self): + self.template_dict["p"] = "text" + self.assertEqual( + CsText.from_parsed_json(self.template_dict), + "text" + ) + + def test_cs_jsonb_prints_json_string(self): + cs_jsonb = CsJsonb({"a": 1}, "table", "column") + self.assertEqual( + cs_jsonb._value_in_db_format("ste_vec"), + '{"a": 1}' + ) + + def test_cs_jsonb_prints_value_for_ejson_path(self): + cs_jsonb = CsJsonb("$.a.b", "table", "column") + self.assertEqual( + cs_jsonb._value_in_db_format("ejson_path"), + '$.a.b' + ) + + def test_cs_jsonb_returns_value(self): + self.assertEqual( + CsJsonb._value_from_db_format('{"a": 1}'), + {"a": 1} + ) + + def test_cs_row_makes_row(self): + cs_row = CsRow( + {"encrypted_int": json.loads(CsInt(1, "table", "column").to_db_format()), + "encrypted_boolean": json.loads(CsBool(True, "table", "column").to_db_format()), + "encrypted_date": json.loads(CsDate(date(2024, 11, 1), "table", "column").to_db_format()), + "encrypted_float": json.loads(CsFloat(1.1, "table", "column").to_db_format()), + "encrypted_utf8_str": json.loads(CsText("text", "table", "column").to_db_format()), + "encrypted_jsonb": json.loads(CsJsonb('{"a": 1}', "table", "column").to_db_format()) + }) + + self.assertEqual( + cs_row.row, + {"encrypted_int": 1, + "encrypted_boolean": True, + "encrypted_date": date(2024, 11, 1), + "encrypted_float": 1.1, + "encrypted_utf8_str": "text", + "encrypted_jsonb": '"{\\"a\\": 1}"' + } + ) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/languages/python/jupyter_notebook/docker-compose.yml b/languages/python/jupyter_notebook/docker-compose.yml index 1daefb74..7bc0dbbe 100644 --- a/languages/python/jupyter_notebook/docker-compose.yml +++ b/languages/python/jupyter_notebook/docker-compose.yml @@ -13,7 +13,7 @@ services: - cipherstash_getting_started_nw proxy: container_name: cipherstash_getting_started_proxy - image: cipherstash/cipherstash-proxy:cipherstash-proxy-v0.3.1 + image: cipherstash/cipherstash-proxy:cipherstash-proxy-v0.3.4 depends_on: - postgres ports: @@ -31,8 +31,7 @@ services: CS_DATABASE__PASSWORD: postgres CS_DATABASE__NAME: cipherstash_getting_started CS_DATABASE__HOST: cipherstash_getting_started_pg - # Enable this to get verbose debugging from Proxy - # CS_UNSAFE_LOGGING: "true" + CS_UNSAFE_LOGGING: "true" networks: - cipherstash_getting_started_nw