-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathblockchain.py
443 lines (378 loc) · 14.2 KB
/
blockchain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
import os
import json
import re
import fitz
import datetime
import pandas as pd
import hashlib
from flask import Flask, jsonify, request, render_template
from cryptography.fernet import Fernet
from werkzeug.utils import secure_filename
import tkinter as tk
from tkinter import filedialog
# --------------------------
# PDF Processing Functions
# --------------------------
def extract_data_from_pdf(file_path):
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
text = text.replace('\ufb02', 'fl')
data = {
"subject_id": None,
"PRESCRIPTION": [],
"DIAGNOSES": []
}
subject_id_synonyms = ["Subject Id", "Patient Id", "Subject Identifier"]
drug_name_synonyms = ["Drug Name", "Medication Name", "Prescription Name"]
diagnosis_synonyms = ["Diagnosis", "Diagnosis Name", "Disease Name"]
for synonym in subject_id_synonyms:
subject_id_match = re.search(fr'{synonym}:\s*(\d+)', text)
if subject_id_match:
data["subject_id"] = int(subject_id_match.group(1))
break
for synonym in drug_name_synonyms:
drug_name_matches = re.findall(fr'{synonym}:\s*([^:\n]+)', text)
for match in drug_name_matches:
data["PRESCRIPTION"].append({"drug_name": match.strip()})
for synonym in diagnosis_synonyms:
diagnosis_matches = re.findall(fr'{synonym}:\s*([^:\n]+)', text)
for match in diagnosis_matches:
diagnosis_data = {
"diagnosis": match.strip(),
"icd9_code": None
}
data["DIAGNOSES"].append(diagnosis_data)
return data
def save_to_json(data, output_file):
with open(output_file, 'w') as f:
json.dump(data, f, indent=4)
def main(selected_pdfs, output_file):
all_data = []
for file_path in selected_pdfs:
data = extract_data_from_pdf(file_path)
all_data.append(data)
save_to_json(all_data, output_file)
def extract_data_from_pdfs():
selected_pdfs = select_pdf_files()
if selected_pdfs:
main(selected_pdfs, 'output.json')
def select_pdf_files():
root = tk.Tk()
root.withdraw()
file_paths = filedialog.askopenfilenames(title="Select PDF Files", filetypes=[("PDF files", "*.pdf")])
return file_paths
# --------------------------
# Blockchain Class & Methods
# --------------------------
class Blockchain:
def __init__(self):
self.chain = []
self.create_blockchain(proof=1, previous_hash='0')
def create_blockchain(self, proof, previous_hash, patient_data=None):
block = {
'index': len(self.chain) + 1,
'timestamp': str(datetime.datetime.now()),
'proof': proof,
'previous_hash': previous_hash
}
if patient_data:
block['patient_data'] = patient_data # Add patient details to the block
self.chain.append(block)
return block
def get_previous_block(self):
last_block = self.chain[-1]
return last_block
def proof_of_work(self, previous_proof):
# miners proof submitted
new_proof = 1
# status of proof of work
check_proof = False
while check_proof is False:
# problem and algorithm based off the previous proof and new proof
hash_operation = hashlib.sha256(str(new_proof ** 2 - previous_proof ** 2).encode()).hexdigest()
# check miners solution to problem, by using miners proof in cryptographic encryption
# if miners proof results in 4 leading zero's in the hash operation, then:
if hash_operation[:4] == '0000':
check_proof = True
else:
# if miners solution is wrong, give mine another chance until correct
new_proof += 1
return new_proof
# generate a hash of an entire block
def hash(self, block):
encoded_block = json.dumps(block, sort_keys=True).encode()
return hashlib.sha256(encoded_block).hexdigest()
# check if the blockchain is valid
def is_chain_valid(self, chain):
# get the first block in the chain and it serves as the previous block
previous_block = chain[0]
# an index of the blocks in the chain for iteration
block_index = 1
while block_index < len(chain):
# get the current block
block = chain[block_index]
# check if the current block link to previous block has is the same as the hash of the previous block
if block["previous_hash"] != self.hash(previous_block):
return False
# get the previous proof from the previous block
previous_proof = previous_block['proof']
# get the current proof from the current block
current_proof = block['proof']
# run the proof data through the algorithm
hash_operation = hashlib.sha256(str(current_proof ** 2 - previous_proof ** 2).encode()).hexdigest()
# check if hash operation is invalid
if hash_operation[:4] != '0000':
return False
# set the previous block to the current block after running validation on current block
previous_block = block
block_index += 1
return True
blockchain = Blockchain()
# --------------------------
# Encryption/Decryption Functions
# --------------------------
def generate_key():
"""Generates a key for encryption and returns it."""
return Fernet.generate_key()
def encrypt_data(data, key):
"""Encrypts the data using the provided key."""
f = Fernet(key)
encrypted_data = f.encrypt(json.dumps(data).encode())
return encrypted_data.decode()
def decrypt_data(encrypted_data, key):
"""Decrypts the data using the provided key."""
f = Fernet(key)
decrypted_data = f.decrypt(encrypted_data.encode())
return json.loads(decrypted_data.decode())
# --------------------------
# Flask App Configuration
# --------------------------
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'json'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# --------------------------
# Helper Functions for Flask
# --------------------------
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
# --------------------------
# Flask Routes
# --------------------------
@app.route('/process_file', methods=['POST'])
def process_file():
if 'file' not in request.files:
return jsonify({"error": "No file part"}), 400
uploaded_files = request.files.getlist('file')
if not uploaded_files or uploaded_files[0].filename == '':
return jsonify({"error": "No selected file"}), 400
all_data = []
# Open keys.txt to append keys and IDs
with open('keys.txt', 'a') as key_file:
for file in uploaded_files:
filename = secure_filename(file.filename)
file_path = os.path.join('uploads', filename)
file.save(file_path)
data = extract_data_from_pdf(file_path)
all_data.append(data)
# Generate a key and encrypt the data with it
key = generate_key()
encrypted_data = encrypt_data(data, key)
# Write the subject ID and key to keys.txt
subject_id = data['subject_id'] # Assuming 'SubjectID' is the field's name
key_file.write(f"{subject_id}: {key.decode()}\n")
# Mine a block with the encrypted data
previous_block = blockchain.get_previous_block()
previous_proof = previous_block['proof']
proof = blockchain.proof_of_work(previous_proof)
previous_hash = blockchain.hash(previous_block)
block = blockchain.create_blockchain(proof, previous_hash, patient_data=encrypted_data)
flagged_data = flag_suspicious_data(all_data)
if flagged_data: # Check if there's any flagged data
# Handle flagged data. You might want to log them, store them, or send a response
print("Suspicious data found:", flagged_data)
save_to_json(all_data, 'output.json')
return jsonify({"message": "Files processed, encrypted, blocks mined, and keys saved successfully"})
@app.route('/')
def index():
return render_template('index.html')
@app.route('/mine_block', methods=['POST'])
def mine_block():
# Get patient details from JSON input
patient_data = request.json
# Validate if required fields exist
required_fields = ["subject_id", "gender", "drug", "care_type", "diagnoses"] # date of birth, percription
if not all(field in patient_data for field in required_fields):
response = {
'message': 'Invalid input. All patient fields are required.'
}
return jsonify(response), 400
# Proceed with mining a block with the patient details
previous_block = blockchain.get_previous_block()
previous_proof = previous_block['proof']
proof = blockchain.proof_of_work(previous_proof)
previous_hash = blockchain.hash(previous_block)
response = {
'message': 'Block mined with patient details!',
'block': block
}
return jsonify(response), 200
@app.route('/get_chain', methods=['GET'])
def get_chain():
response = {'chain': blockchain.chain,
'length': len(blockchain.chain)}
return jsonify(response), 200
@app.route('/get_patient_data', methods=['POST'])
def get_patient_data():
key = request.form['key'].encode() # Get the decryption key from the form
subject_id = int(request.form['subject_id']) # Convert SubjectID to integer
# Iterate through the chain to find the patient's data
for block in blockchain.chain:
# Check if the block has patient data
if 'patient_data' in block:
try:
decrypted_data = decrypt_data(block['patient_data'], key)
if decrypted_data['subject_id'] == subject_id:
return jsonify(decrypted_data), 200
except:
# Decryption failed for this block, move to the next block
continue
return jsonify({"message": "Data not found or incorrect key"}), 404
# --------------------------
# Flagging Data
# --------------------------
known_drug_names = pd.read_csv(
"Datasets/ReferenceDataSets/20220906_product.csv").iloc[:, [3] + [5]]
known_drug_names_set = set(known_drug_names.iloc[:, 0]).union(
set(known_drug_names.iloc[:, 1]))
known_diseases_and_conditions = [
"Acute Flaccid Myelitis (AFM)",
"Adenovirus",
"Anthrax",
"Asthma",
"Avian Influenza",
"Botulism",
"Blue-Green Algae",
"Brucellosis",
"Campylobacteriosis",
"Chagas Disease (American trypanosomiasis)",
"Chickenpox",
"Chikungunya",
"Cholera",
"Congenital Rubella Syndrome",
"COVID-19 (Coronavirus Disease 2019)",
"Creutzfeldt-Jakob Disease",
"Cryptosporidiosis",
"Cyclosporiasis",
"Cytomegalovirus",
"Dengue Fever",
"Diphtheria",
"E. coli",
"Ebola Virus Disease",
"Ehrlichiosis",
"Enteroviruses",
"Fifth Disease",
"Giardiasis",
"Haemophilus Influenzae Invasive Disease",
"Hand, Foot, and Mouth Disease",
"Hantavirus",
"Head Lice",
"Heartland Virus",
"Hemolytic Uremic Syndrome",
"Hepatitis A",
"Influenza",
"Legionellosis",
"Leprosy (Hansen's Disease)",
"Leptospirosis",
"Listeriosis",
"Lyme Disease",
"Malaria",
"Marburg Virus Disease",
"Measles (Rubeola)",
"Meningitis",
"Meningococcal Disease",
"Molluscum Contagiosum",
"Mpox",
"Mononucleosis",
"Mumps",
"Norovirus",
"Pertussis",
"Plague",
"Polio (polio myelitis)",
"Primary Amebic Meningoencephalitis (PAM)",
"Psittacosis",
"Q Fever",
"Rabies",
"Reye Syndrome",
"Ringworm",
"Rocky Mountain Spotted Fever",
"Rotavirus",
"Rubella (German Measles)",
"Salmonellosis",
"Scabies",
"Shiga toxin-producing E. coli (STEC)",
"Shigellosis",
"Shingles",
"Smallpox",
"Southern Tick-Associated Rash Illness (STARI)",
"Streptococcus, group A, invasive disease",
"Streptococcus pneumoniae, invasive disease",
"Tetanus",
"Toxoplasmosis",
"Trichinellosis",
"Tuberculosis",
"Tularemia",
"Typhoid Fever",
"Vibrio species",
"West Nile Virus",
"Yellow Fever",
"Zika Virus"
]
def validate_drug_name(drug_name):
"""Validate drug_name (should be in the known drug names database)"""
return drug_name in known_drug_names_set
try:
with open("output.json", "r") as file:
json_data = json.load(file)
except json.JSONDecodeError:
print("Error: The file 'output.json' does not contain valid JSON data.")
json_data = []
def validate_subject_id(subject_id):
"""Validate subject_id (should be more than 1 digit)"""
return len(str(subject_id)) >= 1
def validate_icd9_code(icd9_code):
"""Validate icd9_code (should not be null)"""
return icd9_code is not None
def flag_suspicious_data(data):
"""Flag suspicious data points based on validation functions"""
flagged_data = []
for record in data:
flags = []
# Validate subject_id
if not validate_subject_id(record["subject_id"]):
flags.append("Invalid subject_id")
# Validate drug_names
for prescription in record["PRESCRIPTION"]:
if not validate_drug_name(prescription["drug_name"]):
flags.append(f"Invalid drug_name: {prescription['drug_name']}")
# Validate icd9_codes
for diagnosis in record["DIAGNOSES"]:
if not validate_icd9_code(diagnosis["icd9_code"]):
flags.append(
f"Null icd9_code for diagnosis: {diagnosis['diagnosis']}")
if diagnosis['diagnosis'] not in known_diseases_and_conditions:
flags.append(
f"Disease not in recognized: {diagnosis['diagnosis']}"
)
if flags:
flagged_record = record.copy()
flagged_record["flags"] = flags
flagged_data.append(flagged_record)
return flagged_data
# Flagging suspicious data
flagged_data = flag_suspicious_data(json_data)
print(flagged_data)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)