Authorized Mode - [Party 1]#

import crandas as cd
import pandas as pd
from crandas.base import session
from pathlib import Path
import plotly.express as px

session.authorization_file = None
#session.query_signing_key = '../clientsign.sk'

session.base_path = Path('../../../../vdl-instance/vdl/secrets')
session.endpoint = 'https://localhost:9820/api/v1'
cd.base.execute({"mpcinfo": "yes"})
{'buildinfo': 'Build: v1.3.1, build date: 2023-03-21, compiler family: clang',
 'maxint': '9409569905028393238',
 'number_of_parties': 1,
 'threshold': 0}

Step 1: Upload table to the Virtual Data Lake#

party1_table_vdl = cd.read_csv('../data/pharma_data/party1_dummy.csv', name = 'party1_table_vdl', add_nonce=True)
party1_table_vdl.name = 'party1_table_vdl'
>>> party1_table_vdl
    Name: party1_table_vdl
    Size: 9899 rows x 12 columns
    CIndex([Col("Unnamed: 0", "i", 1), Col("year", "i", 1), Col("month", "i", 1), Col("day", "i", 1), Col("article_nr", "i", 1), Col("condition_x", "i", 1), Col("category", "s", 8), Col("batch_nr", "i", 1), Col("smokes", "i", 1), Col("prescription", "i", 1), Col("quantity", "i", 1), Col("packaging", "i", 1)])

Step 2: Connect to external table from Party 2#

Note

We assume that a different party upload their data in a table called 'party2_table_vdl according to the steps in Authorized Mode - [Party 2].

party2_table_vdl = cd.get_table('party2_table_vdl')
party2_table_vdl.name = 'party2_table_vdl'
>>> party2_table_vdl
    Name: party2_table_vdl
    Size: 9931 rows x 8 columns
    CIndex([Col("Unnamed: 0", "i", 1), Col("year", "i", 1), Col("month", "i", 1), Col("day", "i", 1), Col("article_nr", "i", 1), Col("batch_nr", "i", 1), Col("smokes", "i", 1), Col("condition_y", "i", 1)])

Option 1: Join tables and retrieve statistics#

# Option 1 - a simple join; that requires specific requires for queries afterwards
merged = cd.merge(tab1=party1_table_vdl, tab2=party2_table_vdl, how='inner', on=['year', 'month', 'day', 'article_nr', 'batch_nr', 'smokes'])
>>> print(repr(merged))
    Name: 753FA9FCD9AF4BFE8E0E4F66957C0CA76A52A883BCBD34B3F3A63570125A023E
    Size: 130 rows x 14 columns
    CIndex([Col("Unnamed: 0_x", "i", 1), Col("year", "i", 1), Col("month", "i", 1), Col("day", "i", 1), Col("article_nr", "i", 1), Col("condition_x", "i", 1), Col("category", "s", 8), Col("batch_nr", "i", 1), Col("smokes", "i", 1), Col("prescription", "i", 1), Col("quantity", "i", 1), Col("packaging", "i", 1), Col("Unnamed: 0_y", "i", 1), Col("condition_y", "i", 1)])

Retrieve statistics#

>>> merged['condition_y'].mean()
    0.49230769230769234
merged.open()
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+----------+-----------+---------------+-------------+
|     | Unnamed: 0_x  | year | month | day | article_nr| condition_x | category | batch_nr| smokes       | prescription| quantity  | packaging     | condition_y |
+=====+===============+======+=======+=====+===========+=============+==========+========+==============+============+===========+===============+=============+
|  0  | 6644          | 2021 | 1     | 16  | 14039494  | 39935444    | N68YG35  | 6       | 0            | 46         | 55        | 15            | 1           |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
|  1  | 4637          | 2021 | 5     | 10  | 14039939  | 91225531    | C09UV03  | 5       | 0            | 34         | 56        | 44            | 0           |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
|  2  | 886           | 2021 | 7     | 22  | 14039446  | 50160592    | G40OU38  | 6       | 0            | 80         | 49        | 24            | 0           |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
|  3  | 1219          | 2021 | 1     | 2   | 14039916  | 41916861    | M60ZA74  | 5       | 0            | 14         | 15        | 3             | 0           |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
|  4  | 7956          | 2021 | 1     | 4   | 14039518  | 25503155    | J20UH66  | 3       | 0            | 40         | 46        | 54            | 1           |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
| ... | ...           | ...  | ...   | ... | ...       | ...         | ...      | ...     | ...          | ...        | ...       | ...           | ...         |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------|

130 rows × 14 columns
>>> merged[(merged["smokes"]==1).with_threshold(10)]["condition_y"].mean()
    0.44642857142857145
def compute_sum(month):
    try:
        result = merged[((merged["condition_y"]==1) & (merged['month']==month)).with_threshold(3)]["condition_y"].sum()
        return result             # computes means only if there are 3 cases for the month
    except:
        return None                                                                                  # if not, no output is given

dic={"Month": ['Jan', 'Feb', 'March'], "Sum of Astma": [
     compute_sum(1),compute_sum(2),compute_sum(3)]}

plot=px.bar(dic, x="Month", y="Sum of Astma")
plot.show()
../../_images/pharma_plot.png

Option 2: Join tables and create a restricted output#

with cd.Transaction():
    joined=cd.merge(tab1=party1_table_vdl, tab2=party2_table_vdl, how='inner', on=['year', 'month', 'day', 'article_nr', 'batch_nr', 'smokes'])
    joined_restricted = joined[['year', 'month', 'condition_x', 'smokes', 'condition_y']]
    opened_result = joined_restricted.open()
>>> print(joined_restricted.result)
    1AEA9CDB5435E66D2E36F687A237C4AD...[130x5]
opened_result.result
| year | month | condition_x | smokes | condition_y |
|------|-------|-------------|--------|-------------|
| 2021 | 1     | 39935444    | 0      | 1           |
| 2021 | 5     | 91225531    | 0      | 0           |
| 2021 | 7     | 50160592    | 0      | 0           |
| 2021 | 1     | 41916861    | 0      | 0           |
| 2021 | 1     | 25503155    | 0      | 1           |
| ...  | ...   | ...         | ...    | ...         |
| 2021 | 1     | 87459732    | 1      | 0           |
| 2021 | 5     | 31635324    | 1      | 0           |
| 2021 | 7     | 49820865    | 0      | 1           |
| 2021 | 9     | 33004168    | 0      | 0           |
| 2021 | 3     | 91658203    | 1      | 0           |

130 rows × 5 columns