Authorized Mode - [Party 1]#
import crandas as cd
import pandas as pd
from crandas.base import session
from pathlib import Path
import plotly.express as px
session.authorization_file = None
#session.query_signing_key = '../clientsign.sk'
session.base_path = Path('../../../../vdl-instance/vdl/secrets')
session.endpoint = 'https://localhost:9820/api/v1'
cd.base.execute({"mpcinfo": "yes"})
{'buildinfo': 'Build: v1.3.1, build date: 2023-03-21, compiler family: clang',
'maxint': '9409569905028393238',
'number_of_parties': 1,
'threshold': 0}
Step 1: Upload table to the Virtual Data Lake#
party1_table_vdl = cd.read_csv('../data/pharma_data/party1_dummy.csv', name = 'party1_table_vdl', add_nonce=True)
party1_table_vdl.name = 'party1_table_vdl'
>>> party1_table_vdl
Name: party1_table_vdl
Size: 9899 rows x 12 columns
CIndex([Col("Unnamed: 0", "i", 1), Col("year", "i", 1), Col("month", "i", 1), Col("day", "i", 1), Col("article_nr", "i", 1), Col("condition_x", "i", 1), Col("category", "s", 8), Col("batch_nr", "i", 1), Col("smokes", "i", 1), Col("prescription", "i", 1), Col("quantity", "i", 1), Col("packaging", "i", 1)])
Step 2: Connect to external table from Party 2#
Note
We assume that a different party upload their data in a table called 'party2_table_vdl
according to the steps in Authorized Mode - [Party 2].
party2_table_vdl = cd.get_table('party2_table_vdl')
party2_table_vdl.name = 'party2_table_vdl'
>>> party2_table_vdl
Name: party2_table_vdl
Size: 9931 rows x 8 columns
CIndex([Col("Unnamed: 0", "i", 1), Col("year", "i", 1), Col("month", "i", 1), Col("day", "i", 1), Col("article_nr", "i", 1), Col("batch_nr", "i", 1), Col("smokes", "i", 1), Col("condition_y", "i", 1)])
Option 1: Join tables and retrieve statistics#
# Option 1 - a simple join; that requires specific requires for queries afterwards
merged = cd.merge(tab1=party1_table_vdl, tab2=party2_table_vdl, how='inner', on=['year', 'month', 'day', 'article_nr', 'batch_nr', 'smokes'])
>>> print(repr(merged))
Name: 753FA9FCD9AF4BFE8E0E4F66957C0CA76A52A883BCBD34B3F3A63570125A023E
Size: 130 rows x 14 columns
CIndex([Col("Unnamed: 0_x", "i", 1), Col("year", "i", 1), Col("month", "i", 1), Col("day", "i", 1), Col("article_nr", "i", 1), Col("condition_x", "i", 1), Col("category", "s", 8), Col("batch_nr", "i", 1), Col("smokes", "i", 1), Col("prescription", "i", 1), Col("quantity", "i", 1), Col("packaging", "i", 1), Col("Unnamed: 0_y", "i", 1), Col("condition_y", "i", 1)])
Retrieve statistics#
>>> merged['condition_y'].mean()
0.49230769230769234
merged.open()
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+----------+-----------+---------------+-------------+
| | Unnamed: 0_x | year | month | day | article_nr| condition_x | category | batch_nr| smokes | prescription| quantity | packaging | condition_y |
+=====+===============+======+=======+=====+===========+=============+==========+========+==============+============+===========+===============+=============+
| 0 | 6644 | 2021 | 1 | 16 | 14039494 | 39935444 | N68YG35 | 6 | 0 | 46 | 55 | 15 | 1 |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
| 1 | 4637 | 2021 | 5 | 10 | 14039939 | 91225531 | C09UV03 | 5 | 0 | 34 | 56 | 44 | 0 |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
| 2 | 886 | 2021 | 7 | 22 | 14039446 | 50160592 | G40OU38 | 6 | 0 | 80 | 49 | 24 | 0 |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
| 3 | 1219 | 2021 | 1 | 2 | 14039916 | 41916861 | M60ZA74 | 5 | 0 | 14 | 15 | 3 | 0 |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
| 4 | 7956 | 2021 | 1 | 4 | 14039518 | 25503155 | J20UH66 | 3 | 0 | 40 | 46 | 54 | 1 |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------+
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
+-----+---------------+------+-------+-----+-----------+-------------+----------+--------+--------------+------------+-----------+---------------+-------------|
130 rows × 14 columns
>>> merged[(merged["smokes"]==1).with_threshold(10)]["condition_y"].mean()
0.44642857142857145
def compute_sum(month):
try:
result = merged[((merged["condition_y"]==1) & (merged['month']==month)).with_threshold(3)]["condition_y"].sum()
return result # computes means only if there are 3 cases for the month
except:
return None # if not, no output is given
dic={"Month": ['Jan', 'Feb', 'March'], "Sum of Astma": [
compute_sum(1),compute_sum(2),compute_sum(3)]}
plot=px.bar(dic, x="Month", y="Sum of Astma")
plot.show()

Option 2: Join tables and create a restricted output#
with cd.Transaction():
joined=cd.merge(tab1=party1_table_vdl, tab2=party2_table_vdl, how='inner', on=['year', 'month', 'day', 'article_nr', 'batch_nr', 'smokes'])
joined_restricted = joined[['year', 'month', 'condition_x', 'smokes', 'condition_y']]
opened_result = joined_restricted.open()
>>> print(joined_restricted.result)
1AEA9CDB5435E66D2E36F687A237C4AD...[130x5]
opened_result.result
| year | month | condition_x | smokes | condition_y |
|------|-------|-------------|--------|-------------|
| 2021 | 1 | 39935444 | 0 | 1 |
| 2021 | 5 | 91225531 | 0 | 0 |
| 2021 | 7 | 50160592 | 0 | 0 |
| 2021 | 1 | 41916861 | 0 | 0 |
| 2021 | 1 | 25503155 | 0 | 1 |
| ... | ... | ... | ... | ... |
| 2021 | 1 | 87459732 | 1 | 0 |
| 2021 | 5 | 31635324 | 1 | 0 |
| 2021 | 7 | 49820865 | 0 | 1 |
| 2021 | 9 | 33004168 | 0 | 0 |
| 2021 | 3 | 91658203 | 1 | 0 |
130 rows × 5 columns