Part 6 BCLink
import carrot
import glob
inputs = carrot.tools.load_csv(glob.glob('../data/part1/*'),nrows=1000)
inputs
2022-06-17 14:50:33 - LocalDataCollection - INFO - DataCollection Object Created
2022-06-17 14:50:33 - LocalDataCollection - INFO - Using a chunksize of '1000' nrows
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering Blood_Test.csv [<carrot.io.common.DataBrick object at 0x10477c760>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering Demographics.csv [<carrot.io.common.DataBrick object at 0x1047fc2e0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering GP_Records.csv [<carrot.io.common.DataBrick object at 0x1047fc5e0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering Hospital_Visit.csv [<carrot.io.common.DataBrick object at 0x1084745b0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering Serology.csv [<carrot.io.common.DataBrick object at 0x1084748b0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering Symptoms.csv [<carrot.io.common.DataBrick object at 0x1084742e0>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering Vaccinations.csv [<carrot.io.common.DataBrick object at 0x108474e20>]
2022-06-17 14:50:33 - LocalDataCollection - INFO - Registering pks.csv [<carrot.io.common.DataBrick object at 0x108474d90>]
<carrot.io.plugins.local.LocalDataCollection at 0x1047fc040>
outputs = carrot.io.BCLinkDataCollection({'dry_run':True},
output_folder="./cache/",
write_separate=True)
outputs
2022-06-17 14:50:33 - BCLinkDataCollection - INFO - setup bclink collection
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'condition_occurrence' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - condition_occurrence (condition_occurrence) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'death' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - death (death) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'drug_exposure' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - drug_exposure (drug_exposure) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'measurement' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - measurement (measurement) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'observation' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - observation (observation) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'person' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - person (person) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'procedure_occurrence' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - procedure_occurrence (procedure_occurrence) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'specimen' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - specimen (specimen) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'visit_occurrence' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - visit_occurrence (visit_occurrence) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'person_ids' ) bclink
2022-06-17 14:50:33 - BCLinkHelpers - INFO - person_ids (person_ids) already exists --> all good
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM condition_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM death bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM drug_exposure bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM measurement bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM observation bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM person bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM procedure_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM specimen bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM visit_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM person_ids bclink
2022-06-17 14:50:33 - BCLinkDataCollection - INFO - DataCollection Object Created
<carrot.io.plugins.bclink.BCLinkDataCollection at 0x108494820>
rules = carrot.tools.load_json("../data/rules.json")
cdm = carrot.cdm.CommonDataModel.from_rules(rules,inputs=inputs,outputs=outputs)
cdm.process()
2022-06-17 14:50:33 - CommonDataModel - INFO - CommonDataModel (5.3.1) created with co-connect-tools version 0.0.0
2022-06-17 14:50:33 - CommonDataModel - INFO - Running with an DataCollection object
2022-06-17 14:50:33 - CommonDataModel - INFO - Turning on automatic cdm column filling
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT * FROM person_ids bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM condition_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'condition_occurrence' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM condition_occurrence ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM death bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'death' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM death ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM drug_exposure bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'drug_exposure' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM drug_exposure ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM measurement bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'measurement' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM measurement ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM observation bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'observation' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM observation ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM person bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'person' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM person ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM procedure_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'procedure_occurrence' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM procedure_occurrence ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM specimen bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'specimen' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM specimen ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT count(*) FROM visit_occurrence bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT column_name FROM INFORMATION_SCHEMA. COLUMNS WHERE table_name = 'visit_occurrence' LIMIT 1 bclink
2022-06-17 14:50:33 - BCLinkHelpers - NOTICE - bc_sqlselect --user=bclink --query=SELECT person_id FROM visit_occurrence ORDER BY -person_id LIMIT 1; bclink
2022-06-17 14:50:33 - CommonDataModel - INFO - Added MALE 3025 of type person
2022-06-17 14:50:33 - CommonDataModel - INFO - Added FEMALE 3026 of type person
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Antibody 3027 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added H/O: heart failure 3043 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added 2019-nCoV 3044 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Cancer 3045 of type observation
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Headache 3028 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Fatigue 3029 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Dizziness 3030 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Cough 3031 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Fever 3032 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Muscle pain 3033 of type condition_occurrence
2022-06-17 14:50:33 - CommonDataModel - INFO - Added Pneumonia 3042 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Mental health problem 3046 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Mental disorder 3047 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Type 2 diabetes mellitus 3048 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Ischemic heart disease 3049 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added Hypertensive disorder 3050 of type condition_occurrence
2022-06-17 14:50:34 - CommonDataModel - INFO - Added COVID-19 vaccine 3034 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added COVID-19 vaccine 3035 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added COVID-19 vaccine 3036 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added SARS-CoV-2 (COVID-19) vaccine, mRNA-1273 0.2 MG/ML Injectable Suspension 3040 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Added SARS-CoV-2 (COVID-19) vaccine, mRNA-BNT162b2 0.1 MG/ML Injectable Suspension 3041 of type drug_exposure
2022-06-17 14:50:34 - CommonDataModel - INFO - Starting processing in order: ['person', 'observation', 'condition_occurrence', 'drug_exposure']
2022-06-17 14:50:34 - CommonDataModel - INFO - Number of objects to process for each table...
{
"person": 2,
"observation": 4,
"condition_occurrence": 12,
"drug_exposure": 5
}
2022-06-17 14:50:34 - CommonDataModel - INFO - for person: found 2 objects
2022-06-17 14:50:34 - CommonDataModel - INFO - working on person
2022-06-17 14:50:34 - CommonDataModel - INFO - starting on MALE 3025
2022-06-17 14:50:34 - Person - INFO - Called apply_rules
2022-06-17 14:50:34 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Demographics.csv' for the first time
2022-06-17 14:50:34 - Person - INFO - Mapped birth_datetime
2022-06-17 14:50:34 - Person - INFO - Mapped gender_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_value
2022-06-17 14:50:34 - Person - INFO - Mapped person_id
could not convert string to float: 'na'
2022-06-17 14:50:34 - Person - WARNING - Requiring non-null values in gender_concept_id removed 438 rows, leaving 562 rows.
2022-06-17 14:50:34 - Person - WARNING - Requiring non-null values in birth_datetime removed 1 rows, leaving 561 rows.
2022-06-17 14:50:34 - Person - INFO - Automatically formatting data columns.
2022-06-17 14:50:34 - Person - INFO - created df (0x1084e3790)[MALE_3025]
2022-06-17 14:50:34 - CommonDataModel - INFO - finished MALE 3025 (0x1084e3790) ... 1/2 completed, 561 rows
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - saving person_ids.0x1084e3f10.2022-06-17T135034 to ./cache//person_ids.0x1084e3f10.2022-06-17T135034.csv
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - dataset_tool --load --table=person_ids --user=data --data_file=./cache//person_ids.0x1084e3f10.2022-06-17T135034.csv --support --bcqueue bclink
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=person_ids --user=data --database=bclink
2022-06-17 14:50:34 - CommonDataModel - INFO - starting on FEMALE 3026
2022-06-17 14:50:34 - Person - INFO - Called apply_rules
2022-06-17 14:50:34 - Person - INFO - Mapped birth_datetime
2022-06-17 14:50:34 - Person - INFO - Mapped gender_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_concept_id
2022-06-17 14:50:34 - Person - INFO - Mapped gender_source_value
2022-06-17 14:50:34 - Person - INFO - Mapped person_id
2022-06-17 14:50:34 - Person - WARNING - Requiring non-null values in gender_concept_id removed 565 rows, leaving 435 rows.
2022-06-17 14:50:34 - Person - INFO - Automatically formatting data columns.
2022-06-17 14:50:34 - Person - INFO - created df (0x1084e3c10)[FEMALE_3026]
2022-06-17 14:50:34 - CommonDataModel - INFO - finished FEMALE 3026 (0x1084e3c10) ... 2/2 completed, 435 rows
could not convert string to float: 'na'
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - saving person_ids.0x1085161f0.2022-06-17T135034 to ./cache//person_ids.0x1085161f0.2022-06-17T135034.csv
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - dataset_tool --load --table=person_ids --user=data --data_file=./cache//person_ids.0x1085161f0.2022-06-17T135034.csv --support --bcqueue bclink
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=person_ids --user=data --database=bclink
2022-06-17 14:50:34 - CommonDataModel - INFO - saving dataframe (0x10854c7c0) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - saving person.0x10854c7c0.2022-06-17T135034 to ./cache//person.0x10854c7c0.2022-06-17T135034.csv
2022-06-17 14:50:34 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - dataset_tool --load --table=person --user=data --data_file=./cache//person.0x10854c7c0.2022-06-17T135034.csv --support --bcqueue bclink
2022-06-17 14:50:34 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=person --user=data --database=bclink
2022-06-17 14:50:35 - CommonDataModel - INFO - finalised person on iteration 0 producing 996 rows from 2 tables
2022-06-17 14:50:35 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:35 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Demographics.csv'
2022-06-17 14:50:35 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:35 - LocalDataCollection - INFO - All input files for this object have now been used.
2022-06-17 14:50:35 - LocalDataCollection - INFO - resetting used bricks
2022-06-17 14:50:35 - CommonDataModel - INFO - for observation: found 4 objects
2022-06-17 14:50:35 - CommonDataModel - INFO - working on observation
2022-06-17 14:50:35 - CommonDataModel - INFO - starting on Antibody 3027
2022-06-17 14:50:35 - Observation - INFO - Called apply_rules
2022-06-17 14:50:35 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Serology.csv' for the first time
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:35 - Observation - INFO - Mapped person_id
2022-06-17 14:50:35 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:35 - Observation - INFO - created df (0x108592670)[Antibody_3027]
2022-06-17 14:50:35 - CommonDataModel - INFO - finished Antibody 3027 (0x108592670) ... 1/4 completed, 413 rows
2022-06-17 14:50:35 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:35 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:35 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:35 - CommonDataModel - ERROR - 410/413 were good, 3 studies are removed.
2022-06-17 14:50:35 - CommonDataModel - INFO - starting on H/O: heart failure 3043
2022-06-17 14:50:35 - Observation - INFO - Called apply_rules
2022-06-17 14:50:35 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Hospital_Visit.csv' for the first time
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:35 - Observation - INFO - Mapped person_id
2022-06-17 14:50:35 - Observation - WARNING - Requiring non-null values in observation_concept_id removed 781 rows, leaving 219 rows.
2022-06-17 14:50:35 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:35 - Observation - INFO - created df (0x108837760)[H_O_heart_failure_3043]
2022-06-17 14:50:35 - CommonDataModel - INFO - finished H/O: heart failure 3043 (0x108837760) ... 2/4 completed, 219 rows
2022-06-17 14:50:35 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:35 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:35 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:35 - CommonDataModel - ERROR - 218/219 were good, 1 studies are removed.
2022-06-17 14:50:35 - CommonDataModel - INFO - starting on 2019-nCoV 3044
2022-06-17 14:50:35 - Observation - INFO - Called apply_rules
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:35 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:35 - Observation - INFO - Mapped person_id
2022-06-17 14:50:35 - Observation - WARNING - Requiring non-null values in observation_concept_id removed 847 rows, leaving 153 rows.
2022-06-17 14:50:35 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:36 - Observation - INFO - created df (0x108857700)[2019_nCoV_3044]
2022-06-17 14:50:36 - CommonDataModel - INFO - finished 2019-nCoV 3044 (0x108857700) ... 3/4 completed, 153 rows
2022-06-17 14:50:36 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:36 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:36 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:36 - CommonDataModel - ERROR - 152/153 were good, 1 studies are removed.
2022-06-17 14:50:36 - CommonDataModel - INFO - starting on Cancer 3045
2022-06-17 14:50:36 - Observation - INFO - Called apply_rules
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_concept_id
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_datetime
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_source_concept_id
2022-06-17 14:50:36 - Observation - INFO - Mapped observation_source_value
2022-06-17 14:50:36 - Observation - INFO - Mapped person_id
2022-06-17 14:50:36 - Observation - WARNING - Requiring non-null values in observation_concept_id removed 708 rows, leaving 292 rows.
2022-06-17 14:50:36 - Observation - INFO - Automatically formatting data columns.
2022-06-17 14:50:36 - Observation - INFO - created df (0x108857190)[Cancer_3045]
2022-06-17 14:50:36 - CommonDataModel - INFO - finished Cancer 3045 (0x108857190) ... 4/4 completed, 292 rows
2022-06-17 14:50:36 - CommonDataModel - ERROR - Removed 1 row(s) due to duplicates found when merging observation
2022-06-17 14:50:36 - CommonDataModel - WARNING - Example duplicates...
2022-06-17 14:50:36 - CommonDataModel - WARNING - person_id observation_concept_id observation_date \
observation_id
440 110.0 4059317 2019-07-07
441 110.0 4059317 2019-07-07
observation_datetime observation_source_value \
observation_id
440 2019-07-07 00:00:00.000000 Heart Attack
441 2019-07-07 00:00:00.000000 Heart Attack
observation_source_concept_id
observation_id
440 4059317
441 4059317
2022-06-17 14:50:36 - CommonDataModel - INFO - saving dataframe (0x108887880) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:36 - BCLinkDataCollection - INFO - saving observation.0x108887880.2022-06-17T135036 to ./cache//observation.0x108887880.2022-06-17T135036.csv
2022-06-17 14:50:36 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:36 - BCLinkHelpers - NOTICE - dataset_tool --load --table=observation --user=data --data_file=./cache//observation.0x108887880.2022-06-17T135036.csv --support --bcqueue bclink
2022-06-17 14:50:36 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=observation --user=data --database=bclink
2022-06-17 14:50:36 - CommonDataModel - INFO - finalised observation on iteration 0 producing 1072 rows from 4 tables
2022-06-17 14:50:36 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:36 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Hospital_Visit.csv'
2022-06-17 14:50:36 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:36 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Serology.csv'
2022-06-17 14:50:36 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:36 - LocalDataCollection - INFO - All input files for this object have now been used.
2022-06-17 14:50:36 - LocalDataCollection - INFO - resetting used bricks
2022-06-17 14:50:36 - CommonDataModel - INFO - for condition_occurrence: found 12 objects
2022-06-17 14:50:36 - CommonDataModel - INFO - working on condition_occurrence
2022-06-17 14:50:36 - CommonDataModel - INFO - starting on Headache 3028
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:36 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Symptoms.csv' for the first time
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:36 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 55 rows, leaving 275 rows.
2022-06-17 14:50:36 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 274 rows.
2022-06-17 14:50:36 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088a7ee0)[Headache_3028]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Headache 3028 (0x1088a7ee0) ... 1/12 completed, 274 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Fatigue 3029
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 95 rows, leaving 235 rows.
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 234 rows.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088e2ac0)[Fatigue_3029]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Fatigue 3029 (0x1088e2ac0) ... 2/12 completed, 234 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Dizziness 3030
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 195 rows, leaving 135 rows.
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 134 rows.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088e2760)[Dizziness_3030]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Dizziness 3030 (0x1088e2760) ... 3/12 completed, 134 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Cough 3031
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 100 rows, leaving 230 rows.
2022-06-17 14:50:37 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 229 rows.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:37 - ConditionOccurrence - INFO - created df (0x1088f2100)[Cough_3031]
2022-06-17 14:50:37 - CommonDataModel - INFO - finished Cough 3031 (0x1088f2100) ... 4/12 completed, 229 rows
2022-06-17 14:50:37 - CommonDataModel - INFO - starting on Fever 3032
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:37 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 265 rows, leaving 65 rows.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - created df (0x1088facd0)[Fever_3032]
2022-06-17 14:50:38 - CommonDataModel - INFO - finished Fever 3032 (0x1088facd0) ... 5/12 completed, 65 rows
2022-06-17 14:50:38 - CommonDataModel - INFO - starting on Muscle pain 3033
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 295 rows, leaving 35 rows.
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_start_datetime removed 1 rows, leaving 34 rows.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - created df (0x1089076a0)[Muscle_pain_3033]
2022-06-17 14:50:38 - CommonDataModel - INFO - finished Muscle pain 3033 (0x1089076a0) ... 6/12 completed, 34 rows
2022-06-17 14:50:38 - CommonDataModel - INFO - starting on Pneumonia 3042
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:38 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Hospital_Visit.csv' for the first time
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:38 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 866 rows, leaving 134 rows.
2022-06-17 14:50:38 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x1088b4130)[Pneumonia_3042]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Pneumonia 3042 (0x1088b4130) ... 7/12 completed, 134 rows
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Mental health problem 3046
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - LocalDataCollection - INFO - Retrieving initial dataframe for 'GP_Records.csv' for the first time
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:39 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 781 rows, leaving 219 rows.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x1089595e0)[Mental_health_problem_3046]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Mental health problem 3046 (0x1089595e0) ... 8/12 completed, 219 rows
2022-06-17 14:50:39 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:39 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:39 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:39 - CommonDataModel - ERROR - 217/219 were good, 2 studies are removed.
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Mental disorder 3047
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:39 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 781 rows, leaving 219 rows.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x10898e880)[Mental_disorder_3047]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Mental disorder 3047 (0x10898e880) ... 9/12 completed, 219 rows
2022-06-17 14:50:39 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:39 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:39 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:39 - CommonDataModel - ERROR - 217/219 were good, 2 studies are removed.
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Type 2 diabetes mellitus 3048
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:39 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 861 rows, leaving 139 rows.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:39 - ConditionOccurrence - INFO - created df (0x10841bee0)[Type_2_diabetes_mellitus_3048]
2022-06-17 14:50:39 - CommonDataModel - INFO - finished Type 2 diabetes mellitus 3048 (0x10841bee0) ... 10/12 completed, 139 rows
2022-06-17 14:50:39 - CommonDataModel - INFO - starting on Ischemic heart disease 3049
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:39 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:40 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 897 rows, leaving 103 rows.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - created df (0x108990760)[Ischemic_heart_disease_3049]
2022-06-17 14:50:40 - CommonDataModel - INFO - finished Ischemic heart disease 3049 (0x108990760) ... 11/12 completed, 103 rows
2022-06-17 14:50:40 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:40 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:40 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:40 - CommonDataModel - ERROR - 102/103 were good, 1 studies are removed.
2022-06-17 14:50:40 - CommonDataModel - INFO - starting on Hypertensive disorder 3050
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Called apply_rules
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_concept_id
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_end_datetime
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_source_concept_id
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_source_value
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped condition_start_datetime
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Mapped person_id
2022-06-17 14:50:40 - ConditionOccurrence - WARNING - Requiring non-null values in condition_concept_id removed 938 rows, leaving 62 rows.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - Automatically formatting data columns.
2022-06-17 14:50:40 - ConditionOccurrence - INFO - created df (0x1089a82b0)[Hypertensive_disorder_3050]
2022-06-17 14:50:40 - CommonDataModel - INFO - finished Hypertensive disorder 3050 (0x1089a82b0) ... 12/12 completed, 62 rows
2022-06-17 14:50:40 - CommonDataModel - ERROR - Removed 2 row(s) due to duplicates found when merging condition_occurrence
2022-06-17 14:50:40 - CommonDataModel - WARNING - Example duplicates...
2022-06-17 14:50:40 - CommonDataModel - WARNING - person_id condition_concept_id condition_start_date \
condition_occurrence_id
38 125.0 378253 2020-04-11
40 125.0 378253 2020-04-11
308 125.0 4223659 2020-04-11
310 125.0 4223659 2020-04-11
condition_start_datetime condition_end_date \
condition_occurrence_id
38 2020-04-11 00:00:00.000000 2020-04-11
40 2020-04-11 00:00:00.000000 2020-04-11
308 2020-04-11 00:00:00.000000 2020-04-11
310 2020-04-11 00:00:00.000000 2020-04-11
condition_end_datetime condition_source_value \
condition_occurrence_id
38 2020-04-11 00:00:00.000000 Yes
40 2020-04-11 00:00:00.000000 Yes
308 2020-04-11 00:00:00.000000 Yes
310 2020-04-11 00:00:00.000000 Yes
condition_source_concept_id
condition_occurrence_id
38 378253
40 378253
308 4223659
310 4223659
2022-06-17 14:50:40 - CommonDataModel - INFO - saving dataframe (0x108887bb0) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:40 - BCLinkDataCollection - INFO - saving condition_occurrence.0x108887bb0.2022-06-17T135040 to ./cache//condition_occurrence.0x108887bb0.2022-06-17T135040.csv
2022-06-17 14:50:40 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:40 - BCLinkHelpers - NOTICE - dataset_tool --load --table=condition_occurrence --user=data --data_file=./cache//condition_occurrence.0x108887bb0.2022-06-17T135040.csv --support --bcqueue bclink
2022-06-17 14:50:40 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=condition_occurrence --user=data --database=bclink
2022-06-17 14:50:40 - CommonDataModel - INFO - finalised condition_occurrence on iteration 0 producing 1841 rows from 12 tables
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'GP_Records.csv'
2022-06-17 14:50:40 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Hospital_Visit.csv'
2022-06-17 14:50:40 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:40 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Symptoms.csv'
2022-06-17 14:50:40 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:40 - LocalDataCollection - INFO - All input files for this object have now been used.
2022-06-17 14:50:40 - LocalDataCollection - INFO - resetting used bricks
2022-06-17 14:50:40 - CommonDataModel - INFO - for drug_exposure: found 5 objects
2022-06-17 14:50:40 - CommonDataModel - INFO - working on drug_exposure
2022-06-17 14:50:40 - CommonDataModel - INFO - starting on COVID-19 vaccine 3034
2022-06-17 14:50:40 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:40 - LocalDataCollection - INFO - Retrieving initial dataframe for 'Vaccinations.csv' for the first time
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:40 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:40 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 475 rows, leaving 245 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089b1e50)[COVID_19_vaccine_3034]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished COVID-19 vaccine 3034 (0x1089b1e50) ... 1/5 completed, 245 rows
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on COVID-19 vaccine 3035
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 494 rows, leaving 226 rows.
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_exposure_start_datetime removed 1 rows, leaving 225 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089bf070)[COVID_19_vaccine_3035]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished COVID-19 vaccine 3035 (0x1089bf070) ... 2/5 completed, 225 rows
2022-06-17 14:50:41 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:41 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:41 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:41 - CommonDataModel - ERROR - 224/225 were good, 1 studies are removed.
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on COVID-19 vaccine 3036
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 471 rows, leaving 249 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089bfb80)[COVID_19_vaccine_3036]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished COVID-19 vaccine 3036 (0x1089bfb80) ... 3/5 completed, 249 rows
2022-06-17 14:50:41 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:41 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:41 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:41 - CommonDataModel - ERROR - 248/249 were good, 1 studies are removed.
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on SARS-CoV-2 (COVID-19) vaccine, mRNA-1273 0.2 MG/ML Injectable Suspension 3040
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 475 rows, leaving 245 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:41 - DrugExposure - INFO - created df (0x1089f46d0)[SARS_CoV_2_COVID_19_vaccine_mRNA_1273_0_2_MG_ML_Injectable_Suspension_3040]
2022-06-17 14:50:41 - CommonDataModel - INFO - finished SARS-CoV-2 (COVID-19) vaccine, mRNA-1273 0.2 MG/ML Injectable Suspension 3040 (0x1089f46d0) ... 4/5 completed, 245 rows
2022-06-17 14:50:41 - CommonDataModel - INFO - starting on SARS-CoV-2 (COVID-19) vaccine, mRNA-BNT162b2 0.1 MG/ML Injectable Suspension 3041
2022-06-17 14:50:41 - DrugExposure - INFO - Called apply_rules
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_end_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_exposure_start_datetime
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_concept_id
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped drug_source_value
2022-06-17 14:50:41 - DrugExposure - INFO - Mapped person_id
2022-06-17 14:50:41 - DrugExposure - WARNING - Requiring non-null values in drug_concept_id removed 471 rows, leaving 249 rows.
2022-06-17 14:50:41 - DrugExposure - INFO - Automatically formatting data columns.
2022-06-17 14:50:42 - DrugExposure - INFO - created df (0x108a0aa60)[SARS_CoV_2_COVID_19_vaccine_mRNA_BNT162b2_0_1_MG_ML_Injectable_Suspension_3041]
2022-06-17 14:50:42 - CommonDataModel - INFO - finished SARS-CoV-2 (COVID-19) vaccine, mRNA-BNT162b2 0.1 MG/ML Injectable Suspension 3041 (0x108a0aa60) ... 5/5 completed, 249 rows
2022-06-17 14:50:42 - CommonDataModel - ERROR - There are person_ids in this table that are not in the output person table!
2022-06-17 14:50:42 - CommonDataModel - ERROR - Either they are not in the original data, or while creating the person table,
2022-06-17 14:50:42 - CommonDataModel - ERROR - studies have been removed due to lack of required fields, such as birthdate.
2022-06-17 14:50:42 - CommonDataModel - ERROR - 248/249 were good, 1 studies are removed.
2022-06-17 14:50:42 - CommonDataModel - INFO - saving dataframe (0x1088874c0) to <carrot.io.plugins.bclink.BCLinkDataCollection object at 0x108494820>
2022-06-17 14:50:42 - BCLinkDataCollection - INFO - saving drug_exposure.0x1088874c0.2022-06-17T135042 to ./cache//drug_exposure.0x1088874c0.2022-06-17T135042.csv
2022-06-17 14:50:42 - BCLinkDataCollection - INFO - finished save to file
2022-06-17 14:50:42 - BCLinkHelpers - NOTICE - dataset_tool --load --table=drug_exposure --user=data --data_file=./cache//drug_exposure.0x1088874c0.2022-06-17T135042.csv --support --bcqueue bclink
2022-06-17 14:50:42 - BCLinkHelpers - NOTICE - datasettool2 list-updates --dataset=drug_exposure --user=data --database=bclink
2022-06-17 14:50:42 - CommonDataModel - INFO - finalised drug_exposure on iteration 0 producing 1210 rows from 5 tables
2022-06-17 14:50:42 - LocalDataCollection - INFO - Getting next chunk of data
2022-06-17 14:50:42 - LocalDataCollection - INFO - Getting the next chunk of size '1000' for 'Vaccinations.csv'
2022-06-17 14:50:42 - LocalDataCollection - INFO - --> Got 0 rows
2022-06-17 14:50:42 - LocalDataCollection - INFO - All input files for this object have now been used.
cdm['person'].dropna(axis=1)
gender_concept_id | year_of_birth | month_of_birth | day_of_birth | birth_datetime | gender_source_value | gender_source_concept_id | |
---|---|---|---|---|---|---|---|
person_id | |||||||
1 | 8507 | 1963 | 7 | 16 | 1963-07-16 00:00:00.000000 | Male | 8507 |
2 | 8507 | 1969 | 7 | 14 | 1969-07-14 00:00:00.000000 | Male | 8507 |
3 | 8507 | 1956 | 7 | 17 | 1956-07-17 00:00:00.000000 | Male | 8507 |
4 | 8507 | 1960 | 7 | 16 | 1960-07-16 00:00:00.000000 | Male | 8507 |
5 | 8507 | 1962 | 7 | 16 | 1962-07-16 00:00:00.000000 | Male | 8507 |
... | ... | ... | ... | ... | ... | ... | ... |
992 | 8532 | 1995 | 7 | 8 | 1995-07-08 00:00:00.000000 | Female | 8532 |
993 | 8532 | 1956 | 7 | 17 | 1956-07-17 00:00:00.000000 | Female | 8532 |
994 | 8532 | 1944 | 7 | 20 | 1944-07-20 00:00:00.000000 | Female | 8532 |
995 | 8532 | 1966 | 7 | 15 | 1966-07-15 00:00:00.000000 | Female | 8532 |
996 | 8532 | 1974 | 7 | 13 | 1974-07-13 00:00:00.000000 | Female | 8532 |
996 rows × 7 columns
cdm['observation'].dropna(axis=1)
person_id | observation_concept_id | observation_date | observation_datetime | observation_source_value | observation_source_concept_id | |
---|---|---|---|---|---|---|
observation_id | ||||||
1 | 357 | 4288455 | 2020-10-03 | 2020-10-03 00:00:00.000000 | 17.172114692899758 | 4288455 |
2 | 258 | 4288455 | 2020-11-02 | 2020-11-02 00:00:00.000000 | 201.93861878809216 | 4288455 |
4 | 556 | 4288455 | 2021-07-26 | 2021-07-26 00:00:00.000000 | 11.506250956970998 | 4288455 |
5 | 380 | 4288455 | 2021-10-29 | 2021-10-29 00:00:00.000000 | 2.6594057121417487 | 4288455 |
6 | 415 | 4288455 | 2021-09-07 | 2021-09-07 00:00:00.000000 | 40.844873593089126 | 4288455 |
... | ... | ... | ... | ... | ... | ... |
1068 | 469 | 40757663 | 2021-03-04 | 2021-03-04 00:00:00.000000 | Cancer | 40757663 |
1069 | 936 | 40757663 | 2020-07-17 | 2020-07-17 00:00:00.000000 | Cancer | 40757663 |
1070 | 472 | 40757663 | 2019-10-25 | 2019-10-25 00:00:00.000000 | Cancer | 40757663 |
1071 | 944 | 40757663 | 2018-08-12 | 2018-08-12 00:00:00.000000 | Cancer | 40757663 |
1072 | 944 | 40757663 | 2019-11-12 | 2019-11-12 00:00:00.000000 | Cancer | 40757663 |
1071 rows × 6 columns