-
Notifications
You must be signed in to change notification settings - Fork 0
/
Course_3_Week_1_Project_3.py
53 lines (36 loc) · 1.37 KB
/
Course_3_Week_1_Project_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""
This is is a part of the DeepLearning.AI TensorFlow Developer Professional Certificate offered on Coursera.
All copyrights belong to them. I am sharing this work here to showcase the projects I have worked on
Course: Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning
Week 1: Sentiment in text
Aim: Sarcasm Dataset
"""
import tensorflow as tf
import numpy as np
import pathlib
import json
"""
dataset_url = "https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json"
sarcasm = tf.keras.utils.get_file("sarcasm.json", origin=dataset_url, untar=True)
print(len(sarcasm))
"""
with open("Dataset/sarcasm.json", "r") as file:
datastore = json.load(file)
sentences = []
labels=[]
urls=[]
for item in datastore:
sentences.append(item["headline"])
labels.append(item["is_sarcastic"])
urls.append(item["article_link"])
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print("Length: ", len(word_index))
print("\nWord Index: ", word_index)
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences, padding="post")
print(padded[0])
print(padded.shape)