Add files via upload

bismillahir rahmanir rahim
mobassir94 · Sep 9, 2022 · b79b314 · b79b314
1 parent 4939d9c
commit b79b314
Show file tree

Hide file tree

Showing 13 changed files with 986 additions and 0 deletions.
diff --git a/Procfile b/Procfile
@@ -0,0 +1 @@
+web: gunicorn app:app --log-file=-
diff --git a/__pycache__/inference_utils.cpython-38.pyc b/__pycache__/inference_utils.cpython-38.pyc
diff --git a/__pycache__/laser_cpu_ddt.cpython-38.pyc b/__pycache__/laser_cpu_ddt.cpython-38.pyc
diff --git a/__pycache__/metrics.cpython-38.pyc b/__pycache__/metrics.cpython-38.pyc
diff --git a/app.py b/app.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Aug 28 11:39:33 2022
+
+@author: MOBASSIR
+"""
+from flask import Flask, render_template, url_for, request
+import pandas as pd
+from inference_utils import Multilingual_Quran_Bible_Search_Engine
+
+# Cleaning the texts
+#import re
+
+app = Flask(__name__)
+
+@app.route('/')
+def home():
+	return render_template('home.html')
+
+@app.route('/predict',methods=['GET','POST'])
+def predict():
+
+    if request.method == 'POST':
+        query = request.form['comment']
+    n_pairs = int(request.form.get('show_top_results'))
+
+
+    if request.form.get('predict'):
+        mlt_quran_bible =Multilingual_Quran_Bible_Search_Engine(query,size=n_pairs,language = 'en',metric = 'dot')
+    elif request.form.get('predict1'):
+        mlt_quran_bible = Multilingual_Quran_Bible_Search_Engine(query,size=n_pairs,language = 'en',metric = 'l2')
+
+
+
+    return render_template('result.html', prediction=mlt_quran_bible)
+
+if __name__ == '__main__':
+    app.run(debug=True, port=33507)
+
+
diff --git a/data_driven_theology/LICENSE b/data_driven_theology/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Mobassir
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/inference_utils.py b/inference_utils.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Aug 29 09:22:11 2022
+
+@author: MOBASSIR
+"""
+
+import numpy as np
+import pandas as pd
+
+from metrics import dot_product_similarity,pairwise_euclidean_dists
+
+import os
+import gdown
+
+# assets folder
+url = "https://drive.google.com/drive/folders/1gMDdBPFYcc0ACCVZr5tJ82JoaN-Le0aR?usp=sharing"
+
+id = "1gMDdBPFYcc0ACCVZr5tJ82JoaN-Le0aR"
+
+files = os.listdir('./data_driven_theology/')
+if(len(files)<5):
+    print("downloading necessary files....")
+    gdown.download_folder(id=id, quiet=True, use_cookies=False)    
+
+print("copy done...")
+from laser_cpu_ddt import Laser
+laser = Laser()
+
+
+corpus_emb = np.load('./data_driven_theology/mlt_quran_torah_emb.npy')
+
+sim_quran_torah = pd.read_csv('./data_driven_theology/ensemble_preds_most_sim_pairs_quran_and_old_testament.csv')
+
+
+def Multilingual_Quran_Bible_Search_Engine(query,size=10,language = 'en',metric = 'dot'):
+
+    query_embedding = laser.embed_sentences(query, lang=language)
+
+    if(metric == 'dot'):
+        query_embedding = np.squeeze(np.asarray(query_embedding))
+        linear_similarities = dot_product_similarity(corpus_emb, query_embedding)
+    else:
+        linear_similarities = pairwise_euclidean_dists(corpus_emb, query_embedding)
+        linear_similarities = np.squeeze(np.asarray(linear_similarities))
+        linear_similarities = np.array(linear_similarities, dtype=np.float32)
+
+    if(metric == 'dot'):
+        Top_index_doc = linear_similarities.argsort()[:-(size+1):-1]
+    else:
+        Top_index_doc = linear_similarities.argsort()[:-(size+1):]
+        Top_index_doc = Top_index_doc[:size]
+
+
+    linear_similarities.sort()
+    find = pd.DataFrame()
+    for i,index in enumerate(Top_index_doc):
+        find.loc[i,'t_citation'] = str(sim_quran_torah['t_citation'][index])
+        find.loc[i,'t_book'] = str(sim_quran_torah['t_book'][index])
+        find.loc[i,'t_chapter'] = str(sim_quran_torah['t_chapter'][index])
+        find.loc[i,'t_verse'] = str(sim_quran_torah['t_verse'][index]) 
+        find.loc[i,'t_text'] = str(sim_quran_torah['t_text'][index]) 
+
+        find.loc[i,'q_Name'] = str(sim_quran_torah['q_Name'][index])
+        find.loc[i,'q_Surah'] = str(sim_quran_torah['q_Surah'][index])
+        find.loc[i,'q_Ayat'] = str(sim_quran_torah['q_Ayat'][index])
+        find.loc[i,'q_Verse'] = str(sim_quran_torah['q_Verse'][index])
+        find.loc[i,'similarity_score'] = str(sim_quran_torah['similarity_score'][index])
+
+    for j,simScore in enumerate(linear_similarities[:-(size+1):-1]):
+        find.loc[j,'Score'] = simScore
+
+    return find
+