Skip to content

Load bulk data in Python

This example is available in the Jupyter notebook at: github.com/intrahealth/client-registry-docs/docs/notebooks/load_bulk_data_in_python.ipynb

#!/usr/bin/env python3
from pathlib import Path
from requests_pkcs12 import get, post
import pandas as pd
import numpy as np

import recordlinkage

import fhirclient.models.patient as p
import fhirclient.models.humanname as hn
import fhirclient.models.contactpoint as cp
import fhirclient.models.fhirdate as fd
import fhirclient.models.identifier as ident
from fhirclient import client

import json
import time
import itertools

# suppress warning: "Certificate for localhost has no `subjectAltName`, falling back to check for a `commonName` for now"
import urllib3
urllib3.disable_warnings(urllib3.exceptions.SubjectAltNameWarning)
# versions
print("Pandas version: {0}".format(pd.__version__),'\n')
print("Python Record Linkage version: {0}".format(recordlinkage._version.get_versions()['version']),'\n')
print("Numpy version: {0}".format(np.__version__),'\n')
print("FHIR client version: {0}".format(client.__version__),'\n')
Pandas version: 1.0.3 

Python Record Linkage version: 0.14 

Numpy version: 1.18.4 

FHIR client version: 3.2.0 

# path to your git clone of github.com/intrahealth/client-registry
crhome = Path.home() / 'src' / 'github.com' / 'intrahealth' / 'client-registry'
clientcert = crhome / 'server' / 'sampleclientcertificates' / 'openmrs.p12'
servercert = crhome / 'server' / 'certificates' / 'server_cert.pem'
csv_file = crhome / 'tests' / 'uganda_data_v21_20201501.csv'
df_a = pd.read_csv(csv_file)
# df_a = df_a.set_index('rec_id')
print('Number of records :', len(df_a))
print(df_a.head())
Number of records : 5000
         rec_id sex date_of_birth given_name       surname phone_number  \
0  rec-2762-org   f      19671207     zuwena         acile   712 300633   
1  rec-2009-org   f      19761028     zuwena        lusike   772 614594   
2  rec-3269-org   f      19811002     zuwena      mungugeo   772 162632   
3  rec-1609-org   f      19270719    zuraika   akantambira   772 837692   
4  rec-2802-org   m                   zulfas      nyanchwo   782 855101   

       uganda_nin   art_number  
0  CF21927470OWMT   KMC-819708  
1  CF68167355NUZY   KUB-176148  
2  CF50136842UQFQ   MBA-746695  
3  CF68008770HZML   KMC-270901  
4  CM25736526XWGC   KSG-830566  
# some cleaning
df_a['rec_id'] = df_a['rec_id'].str.strip()
df_a['sex'] = df_a['sex'].str.strip()
df_a['given_name'] = df_a['given_name'].str.strip()
df_a['surname'] = df_a['surname'].str.strip()
df_a['date_of_birth'] = df_a['date_of_birth'].str.strip()
df_a['phone_number'] = df_a['phone_number'].str.strip()
df_a['uganda_nin'] = df_a['uganda_nin'].str.strip()
df_a['art_number'] = df_a['art_number'].str.strip()

df_a['sex']= df_a['sex'].replace('f', 'female')
df_a['sex']= df_a['sex'].replace('m', 'male')
print(df_a['sex'].value_counts())

# fhirclient validates and some birthdate fields are empty/improperly formatted
# remove non-digits
df_a['date_of_birth'] = df_a['date_of_birth'].str.extract('(\d+)', expand=False)
# force into datetime (coerce has benefit that it removes anything outside of 8 digits)
df_a['date_of_birth'] =  pd.to_datetime(df_a['date_of_birth'], errors='coerce')
# now back into str or fhirdate will complain
df_a['date_of_birth'] = df_a['date_of_birth'].apply(lambda x: x.strftime('%Y-%m-%d')if not pd.isnull(x) else '')

print(df_a.head())
female    3224
           963
male       809
d            1
r            1
q            1
k            1
Name: sex, dtype: int64
         rec_id     sex date_of_birth given_name      surname phone_number  \
0  rec-2762-org  female    1967-12-07     zuwena        acile   712 300633   
1  rec-2009-org  female    1976-10-28     zuwena       lusike   772 614594   
2  rec-3269-org  female    1981-10-02     zuwena     mungugeo   772 162632   
3  rec-1609-org  female    1927-07-19    zuraika  akantambira   772 837692   
4  rec-2802-org    male                   zulfas     nyanchwo   782 855101   

       uganda_nin  art_number  
0  CF21927470OWMT  KMC-819708  
1  CF68167355NUZY  KUB-176148  
2  CF50136842UQFQ  MBA-746695  
3  CF68008770HZML  KMC-270901  
4  CM25736526XWGC  KSG-830566  
# default server/path
server = "https://localhost:3000/Patient"
# 3 records, modify if more are required
limit = 100
for index, row in itertools.islice(df_a.iterrows(), limit):
# for index, row in df_a.iterrows():
    patient = p.Patient() # not using rec_id as pandas id, leaving empty
    patient.gender = row['sex']
    name = hn.HumanName()
    name.given = [row['given_name']]
    name.family = row['surname']
    name.use = 'official'
    patient.name = [name]
    phone = cp.ContactPoint()
    phone.system = 'phone'
    phone.value = row['phone_number']
    patient.telecom = [phone]
    patient.birthDate = fd.FHIRDate(row['date_of_birth'])
    emr = ident.Identifier()
    emr.system = 'http://clientregistry.org/openmrs'
    emr.value = row['rec_id']
    art = ident.Identifier()
    art.system = 'http://system1/artnumber'
    art.value = row['art_number']
    nin = ident.Identifier()
    nin.system = 'http://system1/nationalid'
    nin.value = row['uganda_nin']
    patient.identifier = [emr, art, nin]
    # print(json.dumps(patient.as_json()))

    headers = {'Content-Type': 'application/json'}
    start = time.time()
    response = post(server, headers=headers, data=json.dumps(patient.as_json()), 
                    pkcs12_filename=clientcert, pkcs12_password='', verify=servercert)
    end = time.time()
    print(index, response.headers['location'], " | ", round((end - start), 1), "ms") # response.headers['Date']
    # print(response.headers)
0 Patient/998bd085-8cac-4d80-a2dc-35dafad28140  |  2.1 ms
1 Patient/7e5bfabb-c77d-403d-b583-14f9c04bcc4f  |  1.6 ms
2 Patient/72a4ad32-4955-4ffc-9b6f-2ba849abcbcf  |  1.8 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
3 Patient/f372452d-d4e5-494d-b87b-da546e2908d7  |  1.3 ms
4 Patient/7c743b88-04f0-4638-b12c-cb45a7604234  |  1.4 ms
5 Patient/6299bc99-a6cf-4a37-8f31-08d7c954bcb6  |  1.5 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
6 Patient/b21337a7-72d2-43a7-8c4b-5c6ddc61e853  |  1.2 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
7 Patient/817c50fd-55fb-4bad-88ae-cfa32150cd72  |  1.4 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
8 Patient/ff310860-450d-49ac-9754-c4b52800258c  |  1.2 ms
9 Patient/f21900bc-b0ac-4145-bfe5-5d2d111ac34c  |  1.0 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
10 Patient/2329cbf5-3bf5-44d2-87dd-a7b75dc083dd  |  1.3 ms
11 Patient/b2f45d9a-be8a-4a98-ae0b-23bae838e2c9  |  1.1 ms
12 Patient/4eec0f3b-39f0-43da-a68c-0f2c68b7c998  |  0.9 ms
13 Patient/8fc2ab00-91dd-4b4b-81a8-25d7af976363  |  1.2 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
14 Patient/dfd9e992-92ba-4f80-839e-05e4f984a57d  |  1.0 ms
15 Patient/c47e5989-166e-44fa-828c-837dfbd73789  |  1.0 ms
16 Patient/cfe6500c-9ddf-466e-b14a-ba9f332ce53e  |  1.3 ms
17 Patient/2e5ef1ad-4e46-4cd5-8af6-e4624e420a5b  |  1.1 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
18 Patient/3bc91e43-e4ae-4a7b-8b38-be31d98b5828  |  0.8 ms
19 Patient/c18671b9-d986-4039-ab81-0a949b9a19a2  |  1.1 ms
20 Patient/208a9966-94c0-4955-8d5c-11b7e22e7289  |  1.0 ms
21 Patient/c21bc854-c1db-4ab3-a189-5d4459c5294e  |  0.8 ms
22 Patient/dc255b83-ca8d-42c9-bc2d-3c6c5b9d0561  |  0.8 ms
23 Patient/dc255b83-ca8d-42c9-bc2d-3c6c5b9d0561  |  1.0 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
24 Patient/85299742-df8a-4917-a16a-d466495b0a78  |  1.0 ms
25 Patient/5411a1bd-2ed8-49e6-bdbf-305d36361cf6  |  1.1 ms
26 Patient/def894a2-f86d-48da-9d1e-a37a0f88d55f  |  1.1 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
27 Patient/1844e0a4-8748-40dd-9914-603d5ef8a065  |  0.9 ms
28 Patient/03ab3f55-c854-42dd-ac2f-458eedadc317  |  0.9 ms
29 Patient/a7bc6c0c-b2a9-4a93-b445-246135c3457c  |  1.2 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
30 Patient/02312cad-0fcf-4af0-b256-0878bbbc8791  |  0.9 ms
31 Patient/70540ad6-f642-4800-9bba-49fe2f723a4d  |  1.2 ms
32 Patient/9507e691-b1c0-48a7-8997-197c40c4129a  |  1.0 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
33 Patient/0c816ae3-a880-4d9e-bbd9-265a6c5ac422  |  0.8 ms
34 Patient/b5e969be-5e18-4316-b59d-3c43bd7fdf85  |  0.8 ms
35 Patient/69901b11-cafc-4311-aa04-9c9a3c9654b7  |  1.1 ms
36 Patient/4973cb93-3111-4d6e-9a10-ce2c6f36e4da  |  1.0 ms
37 Patient/a5996f64-54a7-45a3-9c73-288809305413  |  0.8 ms
38 Patient/731be98f-9cab-40cf-9897-e2a220d95eab  |  1.1 ms
39 Patient/738b656d-1654-4341-893b-ce8743240974  |  0.9 ms
40 Patient/478d11de-006e-43ce-aadc-b2a55e6f7e96  |  1.1 ms
41 Patient/d88cb8fa-4055-4215-8f88-36a65ae41f87  |  0.9 ms
42 Patient/44b448f1-6765-477d-b2f7-3556fb57fc7a  |  0.9 ms
43 Patient/8e42ebd7-3037-4c1a-9bb3-278c70b2dde0  |  1.0 ms
44 Patient/8c97acdb-ccd1-4ccd-a8c8-ed8f827d6dd7  |  1.2 ms
45 Patient/b6c88343-822c-4443-91c1-0f45e5eb3906  |  0.7 ms
46 Patient/cd88762c-e116-472d-b05e-8ba7d6d7c81c  |  0.9 ms
47 Patient/11603220-0e66-4b0e-a5ff-a4635a91f16d  |  1.0 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
48 Patient/b3322a33-3374-486b-96b5-9a88883d4515  |  0.9 ms
49 Patient/a16d8e5d-7304-4b3b-8462-548efc1faac4  |  1.1 ms
50 Patient/92720ee1-2e2e-49c6-a971-1a9bcc669902  |  1.1 ms
51 Patient/30c9d4e9-dd00-476e-beb0-4f4ba95f7cd9  |  0.9 ms
52 Patient/92720ee1-2e2e-49c6-a971-1a9bcc669902  |  0.7 ms
53 Patient/3f399446-e624-4e89-94ac-1f46e2a612c1  |  1.1 ms
54 Patient/e4f323cd-3c01-4b5e-b612-a2bbe188d865  |  0.8 ms
55 Patient/c23c89db-5660-4839-840b-eb0a77e2f49d  |  1.0 ms
56 Patient/d2393b8d-ed7a-4eb9-b1ab-c70460146a4f  |  1.2 ms
57 Patient/b6010023-5162-473c-a4e1-56e1b348087e  |  1.0 ms
58 Patient/3843a23b-4ae7-4592-9b4a-a966e22d8fc9  |  0.9 ms
59 Patient/7212d479-d306-4568-8013-e59ee8913dd7  |  1.1 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
60 Patient/6ff08c0c-d0fb-4eda-96ad-91ece4f207ca  |  0.8 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
61 Patient/f3f62e4d-1b4d-453d-9ea3-c66ce2ad1f20  |  0.9 ms
62 Patient/e321c2dd-96e7-4c6d-ba36-6fa71b63ae0a  |  1.1 ms
63 Patient/92b9eb21-0b30-4b49-b611-a3830c233612  |  0.9 ms
64 Patient/92b9eb21-0b30-4b49-b611-a3830c233612  |  0.7 ms
65 Patient/47318447-2d0a-473d-a31e-6cf92619a4e6  |  1.0 ms
66 Patient/acd8f41e-3285-447d-a4d0-af6a440b329e  |  1.0 ms
67 Patient/4b2414e5-177a-45dd-9820-e0208212b517  |  0.8 ms
68 Patient/2f6365b6-861e-411e-b429-2d579ab5bd04  |  1.0 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
69 Patient/2f6365b6-861e-411e-b429-2d579ab5bd04  |  1.0 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
70 Patient/213c6b3d-5d13-4ad9-b6b4-ab2ceaee324a  |  0.8 ms
71 Patient/3576d266-c676-43ea-9023-bb11c11b2b6b  |  0.9 ms
72 Patient/59369ae7-a50f-4c88-8fa0-7cd30e317d7e  |  0.9 ms
73 Patient/d78ca73d-e8ff-4bd6-a505-ef929161fda6  |  0.7 ms
74 Patient/70277660-d1ec-4fe2-9853-50ad88cc5f7a  |  0.8 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
75 Patient/59369ae7-a50f-4c88-8fa0-7cd30e317d7e  |  0.7 ms
76 Patient/70277660-d1ec-4fe2-9853-50ad88cc5f7a  |  1.2 ms
77 Patient/1261bb98-582e-45cc-8a19-ec172c6c6f4f  |  1.3 ms
78 Patient/9c912e01-5e34-471a-b6c1-b650cc6c2601  |  1.2 ms
79 Patient/d6bc698f-6af2-4578-ad12-ba0bd9d14ed4  |  0.8 ms
80 Patient/1261bb98-582e-45cc-8a19-ec172c6c6f4f  |  1.2 ms
81 Patient/e5426498-f107-447c-ab01-b0b4008732aa  |  0.9 ms
82 Patient/3f6ddd3e-f4fd-45f6-9c60-c16904d9f0aa  |  0.7 ms
83 Patient/a933a54d-e34e-485e-9c95-6cb54f4f2533  |  1.5 ms
84 Patient/609660b5-c1aa-409f-bd9a-cc3d7dac6384  |  1.2 ms
85 Patient/c622e666-fdf6-4d61-a853-3e33d49e2d11  |  0.9 ms
Failed to initialize FHIRDate from "": Unrecognised ISO 8601 date format: ''
86 Patient/86b01ded-7acd-49d8-90e0-63e25662188a  |  1.1 ms
87 Patient/6672ce4d-190a-4be0-9afa-11340b34f9c7  |  1.8 ms
88 Patient/50f359c6-cb99-4a7e-a466-ec97b80225fc  |  0.8 ms
89 Patient/2cf7014c-6a88-42d0-951f-f24375e9c249  |  0.9 ms
90 Patient/f60e5b30-921d-4ca9-b5a8-c00574049ae3  |  1.0 ms
91 Patient/b9e04ae8-0ca2-4c50-b122-5e57448e0d38  |  1.1 ms
92 Patient/4bf4445a-8026-432f-a6ca-f9d277ff7d11  |  1.0 ms
93 Patient/d34127e7-cc18-43df-b45b-12679c286d5d  |  0.8 ms
94 Patient/4bf4445a-8026-432f-a6ca-f9d277ff7d11  |  1.2 ms
95 Patient/a2ed2adf-bb3f-40e5-a05d-c99c89eb3ed5  |  0.9 ms
96 Patient/7fbe823f-5a44-4ef8-9728-98e27f00d038  |  0.8 ms
97 Patient/7506a352-2486-452d-be15-25a40838a48e  |  0.7 ms
98 Patient/54309891-fa19-489e-bb0f-e95360bb7ee0  |  0.8 ms
99 Patient/71e25c0b-1eb3-4739-9264-80e2971dc581  |  0.7 ms


Last update: June 24, 2020