Part 6 BCLink

import carrot
import glob
inputs =  carrot.tools.load_csv(glob.glob('../data/part1/*'),nrows=1000)
inputs
2022-06-17 14:50:33 - LocalDataCollection - INFO - DataCollection Object Created
2022-06-17 14:50:33 - LocalDataCollection - INFO - Using a chunksize of '1000' nrows
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  Blood_Test.csv [<carrot.io.common.DataBrick object at 0x10477c760>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  Demographics.csv [<carrot.io.common.DataBrick object at 0x1047fc2e0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  GP_Records.csv [<carrot.io.common.DataBrick object at 0x1047fc5e0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  Hospital_Visit.csv [<carrot.io.common.DataBrick object at 0x1084745b0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  Serology.csv [<carrot.io.common.DataBrick object at 0x1084748b0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  Symptoms.csv [<carrot.io.common.DataBrick object at 0x1084742e0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  Vaccinations.csv [<carrot.io.common.DataBrick object at 0x108474e20>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering  pks.csv [<carrot.io.common.DataBrick object at 0x108474d90>]

<carrot.io.plugins.local.LocalDataCollection at 0x1047fc040>
outputs = carrot.io.BCLinkDataCollection({'dry_run':True},
                                            output_folder="./cache/",
                                            write_separate=True)
outputs
2022-06-17 14:50:33 - BCLinkDataCollection - INFO - setup bclink collection
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'condition_occurrence' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - condition_occurrence (condition_occurrence) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'death' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - death (death) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'drug_exposure' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - drug_exposure (drug_exposure) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'measurement' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - measurement (measurement) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'observation' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - observation (observation) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'person' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - person (person) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'procedure_occurrence' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - procedure_occurrence (procedure_occurrence) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'specimen' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - specimen (specimen) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'visit_occurrence' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - visit_occurrence (visit_occurrence) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'person_ids' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - person_ids (person_ids) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM condition_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM death bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM drug_exposure bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM measurement bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM observation bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM person bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM procedure_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM specimen bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM visit_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM person_ids bclink
2022-06-17 14:50:33 - BCLinkDataCollection - INFO - DataCollection Object Created

<carrot.io.plugins.bclink.BCLinkDataCollection at 0x108494820>
rules = carrot.tools.load_json("../data/rules.json")
cdm = carrot.cdm.CommonDataModel.from_rules(rules,inputs=inputs,outputs=outputs)
cdm.process()
2022-06-17 14:50:33 - CommonDataModel - INFO - CommonDataModel (5.3.1) created with co-connect-tools version 0.0.0
2022-06-17 14:50:33 - CommonDataModel - INFO - Running with an DataCollection object
2022-06-17 14:50:33 - CommonDataModel - INFO - Turning on automatic cdm column filling
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT * FROM person_ids  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM condition_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'condition_occurrence' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM condition_occurrence ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM death bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'death' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM death ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM drug_exposure bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'drug_exposure' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM drug_exposure ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM measurement bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'measurement' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM measurement ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM observation bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'observation' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM observation ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM person bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'person' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM person ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM procedure_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'procedure_occurrence' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM procedure_occurrence ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM specimen bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'specimen' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM specimen ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM visit_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'visit_occurrence' LIMIT 1  bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM visit_occurrence ORDER BY -person_id LIMIT 1;  bclink
2022-06-17 14:50:33 - CommonDataModel - INFO - Added MALE 3025 of type person
2022-06-17 14:50:33 - CommonDataModel - INFO - Added FEMALE 3026 of type person
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Antibody 3027 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added H/O: heart failure 3043 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added 2019-nCoV 3044 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Cancer 3045 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Headache 3028 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Fatigue 3029 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Dizziness 3030 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Cough 3031 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Fever 3032 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Muscle pain 3033 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Pneumonia 3042 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Mental health problem 3046 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Mental disorder 3047 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Type 2 diabetes mellitus 3048 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Ischemic heart disease 3049 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Hypertensive disorder 3050 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added COVID-19 vaccine 3034 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added COVID-19 vaccine 3035 of type drug_exposure

2022-06-17 14:50:34 - CommonDataModel - INFO - Added COVID-19 vaccine 3036 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added SARS-CoV-2 (COVID-19) vaccine, mRNA-1273 0.2 MG/ML Injectable Suspension 3040 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added SARS-CoV-2 (COVID-19) vaccine, mRNA-BNT162b2 0.1 MG/ML Injectable Suspension 3041 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Starting processing in order: ['person', 'observation', 'condition_occurrence', 'drug_exposure']
2022-06-17 14:50:34 - CommonDataModel - INFO - Number of objects to process for each table...
{
      "person": 2,
      "observation": 4,
      "condition_occurrence": 12,
      "drug_exposure": 5
}
2022-06-17 14:50:34 - CommonDataModel - INFO - for person: found 2 objects
2022-06-17 14:50:34 - CommonDataModel - INFO - working on person
2022-06-17 14:50:34 - CommonDataModel - INFO - starting on MALE 3025
2022-06-17 14:50:34 - Person - INFO - Called apply_rules
2022-06-17 14:50:34 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Demographics.csv' for the first time
2022-06-17 14:50:34 - Person - INFO - Mapped birth_datetime
2022-06-17 14:50:34 - Person - INFO - Mapped gender_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_value
2022-06-17 14:50:34 - Person - INFO - Mapped person_id

could not convert string to float: 'na'

2022-06-17 14:50:34 - Person - WARNING - Requiring non-null values in gender_concept_id removed 438 rows, leaving 562 rows.
2022-06-17 14:50:34 - Person - WARNING - Requiring non-null values in birth_datetime removed 1 rows, leaving 561 rows.
2022-06-17 14:50:34 - Person - INFO - Automatically formatting data columns.
2022-06-17 14:50:34 - Person - INFO - created df (0x1084e3790)[MALE_3025]
2022-06-17 14:50:34 - CommonDataModel - INFO - finished MALE 3025 (0x1084e3790) ... 1/2 completed, 561 rows
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - saving person_ids.0x1084e3f10.2022-06-17T135034 to ./cache//person_ids.0x1084e3f10.2022-06-17T135034.csv
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - dataset_tool --load --table=person_ids --user=data --data_file=./cache//person_ids.0x1084e3f10.2022-06-17T135034.csv --support --bcqueue bclink
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=person_ids --user=data --database=bclink
2022-06-17 14:50:34 - CommonDataModel - INFO - starting on FEMALE 3026
2022-06-17 14:50:34 - Person - INFO - Called apply_rules
2022-06-17 14:50:34 - Person - INFO - Mapped birth_datetime
2022-06-17 14:50:34 - Person - INFO - Mapped gender_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_value
2022-06-17 14:50:34 - Person - INFO - Mapped person_id
2022-06-17 14:50:34 - Person - WARNING - Requiring non-null values in gender_concept_id removed 565 rows, leaving 435 rows.
2022-06-17 14:50:34 - Person - INFO - Automatically formatting data columns.
2022-06-17 14:50:34 - Person - INFO - created df (0x1084e3c10)[FEMALE_3026]
2022-06-17 14:50:34 - CommonDataModel - INFO - finished FEMALE 3026 (0x1084e3c10) ... 2/2 completed, 435 rows

could not convert string to float: 'na'

2022-06-17 14:50:34 - BCLinkDataCollection - INFO - saving person_ids.0x1085161f0.2022-06-17T135034 to ./cache//person_ids.0x1085161f0.2022-06-17T135034.csv
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - dataset_tool --load --table=person_ids --user=data --data_file=./cache//person_ids.0x1085161f0.2022-06-17T135034.csv --support --bcqueue bclink
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=person_ids --user=data --database=bclink
2022-06-17 14:50:34 - CommonDataModel - INFO - saving dataframe (0x10854c7c0) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - saving person.0x10854c7c0.2022-06-17T135034 to ./cache//person.0x10854c7c0.2022-06-17T135034.csv
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - dataset_tool --load --table=person --user=data --data_file=./cache//person.0x10854c7c0.2022-06-17T135034.csv --support --bcqueue bclink
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=person --user=data --database=bclink
2022-06-17 14:50:35 - CommonDataModel - INFO - finalised person on iteration 0 producing 996 rows from 2 tables
2022-06-17 14:50:35 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:35 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Demographics.csv'
2022-06-17 14:50:35 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:35 - LocalDataCollection - INFO - All input files for this object have now been used.
2022-06-17 14:50:35 - LocalDataCollection - INFO - resetting used bricks
2022-06-17 14:50:35 - CommonDataModel - INFO - for observation: found 4 objects
2022-06-17 14:50:35 - CommonDataModel - INFO - working on observation
2022-06-17 14:50:35 - CommonDataModel - INFO - starting on Antibody 3027
2022-06-17 14:50:35 - Observation - INFO - Called apply_rules
2022-06-17 14:50:35 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Serology.csv' for the first time
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:35 - Observation - INFO - Mapped person_id
2022-06-17 14:50:35 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:35 - Observation - INFO - created df (0x108592670)[Antibody_3027]
2022-06-17 14:50:35 - CommonDataModel - INFO - finished Antibody 3027 (0x108592670) ... 1/4 completed, 413 rows
2022-06-17 14:50:35 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:35 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:35 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:35 - CommonDataModel - ERROR - 410/413 were good, 3 studies are removed.
2022-06-17 14:50:35 - CommonDataModel - INFO - starting on H/O: heart failure 3043
2022-06-17 14:50:35 - Observation - INFO - Called apply_rules
2022-06-17 14:50:35 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Hospital_Visit.csv' for the first time
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:35 - Observation - INFO - Mapped person_id
2022-06-17 14:50:35 - Observation - WARNING - Requiring non-null values in observation_concept_id removed 781 rows, leaving 219 rows.
2022-06-17 14:50:35 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:35 - Observation - INFO - created df (0x108837760)[H_O_heart_failure_3043]
2022-06-17 14:50:35 - CommonDataModel - INFO - finished H/O: heart failure 3043 (0x108837760) ... 2/4 completed, 219 rows
2022-06-17 14:50:35 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:35 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:35 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:35 - CommonDataModel - ERROR - 218/219 were good, 1 studies are removed.
2022-06-17 14:50:35 - CommonDataModel - INFO - starting on 2019-nCoV 3044
2022-06-17 14:50:35 - Observation - INFO - Called apply_rules
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:35 - Observation - INFO - Mapped person_id
2022-06-17 14:50:35 - Observation - WARNING - Requiring non-null values in observation_concept_id removed 847 rows, leaving 153 rows.
2022-06-17 14:50:35 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:36 - Observation - INFO - created df (0x108857700)[2019_nCoV_3044]
2022-06-17 14:50:36 - CommonDataModel - INFO - finished 2019-nCoV 3044 (0x108857700) ... 3/4 completed, 153 rows
2022-06-17 14:50:36 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:36 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:36 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:36 - CommonDataModel - ERROR - 152/153 were good, 1 studies are removed.
2022-06-17 14:50:36 - CommonDataModel - INFO - starting on Cancer 3045

2022-06-17 14:50:36 - Observation - INFO - Called apply_rules
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:36 - Observation - INFO - Mapped person_id
2022-06-17 14:50:36 - Observation - WARNING - Requiring non-null values in observation_concept_id removed 708 rows, leaving 292 rows.
2022-06-17 14:50:36 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:36 - Observation - INFO - created df (0x108857190)[Cancer_3045]
2022-06-17 14:50:36 - CommonDataModel - INFO - finished Cancer 3045 (0x108857190) ... 4/4 completed, 292 rows
2022-06-17 14:50:36 - CommonDataModel - ERROR - Removed 1 row(s) due to duplicates found when merging observation
2022-06-17 14:50:36 - CommonDataModel - WARNING - Example duplicates...
2022-06-17 14:50:36 - CommonDataModel - WARNING -                 person_id  observation_concept_id observation_date  \
observation_id                                                       
440                 110.0                 4059317       2019-07-07   
441                 110.0                 4059317       2019-07-07   

                      observation_datetime observation_source_value  \
observation_id                                                        
440             2019-07-07 00:00:00.000000             Heart Attack   
441             2019-07-07 00:00:00.000000             Heart Attack   

                observation_source_concept_id  
observation_id                                 
440                                   4059317  
441                                   4059317  
2022-06-17 14:50:36 - CommonDataModel - INFO - saving dataframe (0x108887880) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:36 - BCLinkDataCollection - INFO - saving observation.0x108887880.2022-06-17T135036 to ./cache//observation.0x108887880.2022-06-17T135036.csv
2022-06-17 14:50:36 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:36 - BCLinkHelpers - NOTICE - dataset_tool --load --table=observation --user=data --data_file=./cache//observation.0x108887880.2022-06-17T135036.csv --support --bcqueue bclink
2022-06-17 14:50:36 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=observation --user=data --database=bclink
2022-06-17 14:50:36 - CommonDataModel - INFO - finalised observation on iteration 0 producing 1072 rows from 4 tables
2022-06-17 14:50:36 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:36 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Hospital_Visit.csv'
2022-06-17 14:50:36 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:36 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Serology.csv'
2022-06-17 14:50:36 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:36 - LocalDataCollection - INFO - All input files for this object have now been used.
2022-06-17 14:50:36 - LocalDataCollection - INFO - resetting used bricks
2022-06-17 14:50:36 - CommonDataModel - INFO - for condition_occurrence: found 12 objects
2022-06-17 14:50:36 - CommonDataModel - INFO - working on condition_occurrence
2022-06-17 14:50:36 - CommonDataModel - INFO - starting on Headache 3028
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:36 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Symptoms.csv' for the first time
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:36 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 55 rows, leaving 275 rows.
2022-06-17 14:50:36 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 274 rows.
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088a7ee0)[Headache_3028]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Headache 3028 (0x1088a7ee0) ... 1/12 completed, 274 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Fatigue 3029
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 95 rows, leaving 235 rows.
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 234 rows.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088e2ac0)[Fatigue_3029]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Fatigue 3029 (0x1088e2ac0) ... 2/12 completed, 234 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Dizziness 3030
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value

2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 195 rows, leaving 135 rows.
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 134 rows.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088e2760)[Dizziness_3030]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Dizziness 3030 (0x1088e2760) ... 3/12 completed, 134 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Cough 3031
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 100 rows, leaving 230 rows.
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 229 rows.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088f2100)[Cough_3031]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Cough 3031 (0x1088f2100) ... 4/12 completed, 229 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Fever 3032
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 265 rows, leaving 65 rows.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - created df (0x1088facd0)[Fever_3032]
2022-06-17 14:50:38 - CommonDataModel - INFO - finished Fever 3032 (0x1088facd0) ... 5/12 completed, 65 rows
2022-06-17 14:50:38 - CommonDataModel - INFO - starting on Muscle pain 3033
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 295 rows, leaving 35 rows.
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 34 rows.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - created df (0x1089076a0)[Muscle_pain_3033]
2022-06-17 14:50:38 - CommonDataModel - INFO - finished Muscle pain 3033 (0x1089076a0) ... 6/12 completed, 34 rows
2022-06-17 14:50:38 - CommonDataModel - INFO - starting on Pneumonia 3042
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:38 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Hospital_Visit.csv' for the first time
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 866 rows, leaving 134 rows.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x1088b4130)[Pneumonia_3042]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Pneumonia 3042 (0x1088b4130) ... 7/12 completed, 134 rows
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Mental health problem 3046
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - LocalDataCollection - INFO - Retrieving initial dataframe for 'GP_Records.csv' for the first time
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:39 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 781 rows, leaving 219 rows.

2022-06-17 14:50:39 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x1089595e0)[Mental_health_problem_3046]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Mental health problem 3046 (0x1089595e0) ... 8/12 completed, 219 rows
2022-06-17 14:50:39 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:39 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:39 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:39 - CommonDataModel - ERROR - 217/219 were good, 2 studies are removed.
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Mental disorder 3047
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:39 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 781 rows, leaving 219 rows.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x10898e880)[Mental_disorder_3047]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Mental disorder 3047 (0x10898e880) ... 9/12 completed, 219 rows
2022-06-17 14:50:39 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:39 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:39 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:39 - CommonDataModel - ERROR - 217/219 were good, 2 studies are removed.
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Type 2 diabetes mellitus 3048
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:39 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 861 rows, leaving 139 rows.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x10841bee0)[Type_2_diabetes_mellitus_3048]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Type 2 diabetes mellitus 3048 (0x10841bee0) ... 10/12 completed, 139 rows
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Ischemic heart disease 3049
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:40 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 897 rows, leaving 103 rows.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - created df (0x108990760)[Ischemic_heart_disease_3049]
2022-06-17 14:50:40 - CommonDataModel - INFO - finished Ischemic heart disease 3049 (0x108990760) ... 11/12 completed, 103 rows
2022-06-17 14:50:40 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:40 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:40 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:40 - CommonDataModel - ERROR - 102/103 were good, 1 studies are removed.
2022-06-17 14:50:40 - CommonDataModel - INFO - starting on Hypertensive disorder 3050
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:40 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 938 rows, leaving 62 rows.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - created df (0x1089a82b0)[Hypertensive_disorder_3050]
2022-06-17 14:50:40 - CommonDataModel - INFO - finished Hypertensive disorder 3050 (0x1089a82b0) ... 12/12 completed, 62 rows
2022-06-17 14:50:40 - CommonDataModel - ERROR - Removed 2 row(s) due to duplicates found when merging condition_occurrence

2022-06-17 14:50:40 - CommonDataModel - WARNING - Example duplicates...
2022-06-17 14:50:40 - CommonDataModel - WARNING -                          person_id  condition_concept_id condition_start_date  \
condition_occurrence_id                                                         
38                           125.0                378253           2020-04-11   
40                           125.0                378253           2020-04-11   
308                          125.0               4223659           2020-04-11   
310                          125.0               4223659           2020-04-11   

                           condition_start_datetime condition_end_date  \
condition_occurrence_id                                                  
38                       2020-04-11 00:00:00.000000         2020-04-11   
40                       2020-04-11 00:00:00.000000         2020-04-11   
308                      2020-04-11 00:00:00.000000         2020-04-11   
310                      2020-04-11 00:00:00.000000         2020-04-11   

                             condition_end_datetime condition_source_value  \
condition_occurrence_id                                                      
38                       2020-04-11 00:00:00.000000                    Yes   
40                       2020-04-11 00:00:00.000000                    Yes   
308                      2020-04-11 00:00:00.000000                    Yes   
310                      2020-04-11 00:00:00.000000                    Yes   

                         condition_source_concept_id  
condition_occurrence_id                               
38                                            378253  
40                                            378253  
308                                          4223659  
310                                          4223659  
2022-06-17 14:50:40 - CommonDataModel - INFO - saving dataframe (0x108887bb0) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:40 - BCLinkDataCollection - INFO - saving condition_occurrence.0x108887bb0.2022-06-17T135040 to ./cache//condition_occurrence.0x108887bb0.2022-06-17T135040.csv
2022-06-17 14:50:40 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:40 - BCLinkHelpers - NOTICE - dataset_tool --load --table=condition_occurrence --user=data --data_file=./cache//condition_occurrence.0x108887bb0.2022-06-17T135040.csv --support --bcqueue bclink
2022-06-17 14:50:40 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=condition_occurrence --user=data --database=bclink
2022-06-17 14:50:40 - CommonDataModel - INFO - finalised condition_occurrence on iteration 0 producing 1841 rows from 12 tables
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'GP_Records.csv'
2022-06-17 14:50:40 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Hospital_Visit.csv'
2022-06-17 14:50:40 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Symptoms.csv'
2022-06-17 14:50:40 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:40 - LocalDataCollection - INFO - All input files for this object have now been used.
2022-06-17 14:50:40 - LocalDataCollection - INFO - resetting used bricks
2022-06-17 14:50:40 - CommonDataModel - INFO - for drug_exposure: found 5 objects
2022-06-17 14:50:40 - CommonDataModel - INFO - working on drug_exposure
2022-06-17 14:50:40 - CommonDataModel - INFO - starting on COVID-19 vaccine 3034
2022-06-17 14:50:40 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:40 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Vaccinations.csv' for the first time
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:40 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 475 rows, leaving 245 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089b1e50)[COVID_19_vaccine_3034]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished COVID-19 vaccine 3034 (0x1089b1e50) ... 1/5 completed, 245 rows
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on COVID-19 vaccine 3035
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 494 rows, leaving 226 rows.
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_exposure_start_datetime removed 1 rows, leaving 225 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089bf070)[COVID_19_vaccine_3035]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished COVID-19 vaccine 3035 (0x1089bf070) ... 2/5 completed, 225 rows
2022-06-17 14:50:41 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:41 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:41 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:41 - CommonDataModel - ERROR - 224/225 were good, 1 studies are removed.
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on COVID-19 vaccine 3036
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id

2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 471 rows, leaving 249 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089bfb80)[COVID_19_vaccine_3036]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished COVID-19 vaccine 3036 (0x1089bfb80) ... 3/5 completed, 249 rows
2022-06-17 14:50:41 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:41 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:41 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:41 - CommonDataModel - ERROR - 248/249 were good, 1 studies are removed.
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on SARS-CoV-2 (COVID-19) vaccine, mRNA-1273 0.2 MG/ML Injectable Suspension 3040
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 475 rows, leaving 245 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089f46d0)[SARS_CoV_2_COVID_19_vaccine_mRNA_1273_0_2_MG_ML_Injectable_Suspension_3040]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished SARS-CoV-2 (COVID-19) vaccine, mRNA-1273 0.2 MG/ML Injectable Suspension 3040 (0x1089f46d0) ... 4/5 completed, 245 rows
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on SARS-CoV-2 (COVID-19) vaccine, mRNA-BNT162b2 0.1 MG/ML Injectable Suspension 3041
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 471 rows, leaving 249 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:42 - DrugExposure - INFO - created df (0x108a0aa60)[SARS_CoV_2_COVID_19_vaccine_mRNA_BNT162b2_0_1_MG_ML_Injectable_Suspension_3041]
2022-06-17 14:50:42 - CommonDataModel - INFO - finished SARS-CoV-2 (COVID-19) vaccine, mRNA-BNT162b2 0.1 MG/ML Injectable Suspension 3041 (0x108a0aa60) ... 5/5 completed, 249 rows
2022-06-17 14:50:42 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:42 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table, 
2022-06-17 14:50:42 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:42 - CommonDataModel - ERROR - 248/249 were good, 1 studies are removed.
2022-06-17 14:50:42 - CommonDataModel - INFO - saving dataframe (0x1088874c0) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:42 - BCLinkDataCollection - INFO - saving drug_exposure.0x1088874c0.2022-06-17T135042 to ./cache//drug_exposure.0x1088874c0.2022-06-17T135042.csv
2022-06-17 14:50:42 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:42 - BCLinkHelpers - NOTICE - dataset_tool --load --table=drug_exposure --user=data --data_file=./cache//drug_exposure.0x1088874c0.2022-06-17T135042.csv --support --bcqueue bclink
2022-06-17 14:50:42 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=drug_exposure --user=data --database=bclink
2022-06-17 14:50:42 - CommonDataModel - INFO - finalised drug_exposure on iteration 0 producing 1210 rows from 5 tables
2022-06-17 14:50:42 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:42 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Vaccinations.csv'
2022-06-17 14:50:42 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:42 - LocalDataCollection - INFO - All input files for this object have now been used.

cdm['person'].dropna(axis=1)
gender_concept_id year_of_birth month_of_birth day_of_birth birth_datetime gender_source_value gender_source_concept_id
person_id
1 8507 1963 7 16 1963-07-16 00:00:00.000000 Male 8507
2 8507 1969 7 14 1969-07-14 00:00:00.000000 Male 8507
3 8507 1956 7 17 1956-07-17 00:00:00.000000 Male 8507
4 8507 1960 7 16 1960-07-16 00:00:00.000000 Male 8507
5 8507 1962 7 16 1962-07-16 00:00:00.000000 Male 8507
... ... ... ... ... ... ... ...
992 8532 1995 7 8 1995-07-08 00:00:00.000000 Female 8532
993 8532 1956 7 17 1956-07-17 00:00:00.000000 Female 8532
994 8532 1944 7 20 1944-07-20 00:00:00.000000 Female 8532
995 8532 1966 7 15 1966-07-15 00:00:00.000000 Female 8532
996 8532 1974 7 13 1974-07-13 00:00:00.000000 Female 8532

996 rows × 7 columns

cdm['observation'].dropna(axis=1)
person_id observation_concept_id observation_date observation_datetime observation_source_value observation_source_concept_id
observation_id
1 357 4288455 2020-10-03 2020-10-03 00:00:00.000000 17.172114692899758 4288455
2 258 4288455 2020-11-02 2020-11-02 00:00:00.000000 201.93861878809216 4288455
4 556 4288455 2021-07-26 2021-07-26 00:00:00.000000 11.506250956970998 4288455
5 380 4288455 2021-10-29 2021-10-29 00:00:00.000000 2.6594057121417487 4288455
6 415 4288455 2021-09-07 2021-09-07 00:00:00.000000 40.844873593089126 4288455
... ... ... ... ... ... ...
1068 469 40757663 2021-03-04 2021-03-04 00:00:00.000000 Cancer 40757663
1069 936 40757663 2020-07-17 2020-07-17 00:00:00.000000 Cancer 40757663
1070 472 40757663 2019-10-25 2019-10-25 00:00:00.000000 Cancer 40757663
1071 944 40757663 2018-08-12 2018-08-12 00:00:00.000000 Cancer 40757663
1072 944 40757663 2019-11-12 2019-11-12 00:00:00.000000 Cancer 40757663

1071 rows × 6 columns