-
Notifications
You must be signed in to change notification settings - Fork 0
/
Course_3_Week_1_Project_2.py
47 lines (36 loc) · 1.42 KB
/
Course_3_Week_1_Project_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
This is is a part of the DeepLearning.AI TensorFlow Developer Professional Certificate offered on Coursera.
All copyrights belong to them. I am sharing this work here to showcase the projects I have worked on
Course: Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning
Week 1: Sentiment in text
Aim: Tokenization and padding sequences
"""
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
sentences = [
"I am going to Berlin in April",
"Berlin is a great city. The people are friendly!",
"I plan to backpack through Europe",
"This year is going to bring a 1ot of changes",
"I am learning TensorFlow"
]
tokenizer = Tokenizer(num_words=100, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences, maxlen=7)
print("\nWord Index: ", word_index)
print("\nSequences: ", sequences)
print("\nPadded Sequences: ")
print(padded)
# Try with words that the tokenizer wasn't fit to
test_data=[
"I am so happy!",
"Happy new year 2021!!"
]
test_seq = tokenizer.texts_to_sequences(test_data)
print("\nPadded test sequence: ")
padded_test = pad_sequences(test_seq, maxlen=10)
print(padded_test)