Train your first Convolutional Neural Network for classifying news articles
Convolutional Neural Networks or simply convnets, are generally used for Computer Vision problems with their ability to operate convolutionally, extracting features from local input patches and allowing for representation modularity and data efficiency and coupled with augmentation techniques, convnets can extract a lot of information from a small representative dataset.
The same properties can also help convnets learn representations on sequence data, notably on text. 1-D convnets can be competitive with RNNs on certain sequence processing tasks, usually at a cheaper computational cost.

1-D convnets can recognize local patterns in a sequence since the same transformation is performed on every patch, a pattern learned at a certain position in a sentence can later be recognized at a different position, making 1-D convnets translation invariant.
Let's get into the tutorial!
In this tutorial, we will train a model of 1-D convnet layers on BBC News Dataset that maps news articles to the categories they come under. We will also be using GloVe embeddings for tokenization. We will build the model on Keras and train it on Kaggle.
Step 1: Import and Preprocess the dataset
def load_data(filename,encoding):
data = pd.read_csv(filename,encoding=encoding)
return data
data = load_data('../input/bbc-fulltext-and-category/bbc-text.csv','latin1')
words = set()
vocab = {}
token = data['text'][0].split()
table = str.maketrans('','',punctuation)
tokens = [w.translate(table) for w in token]
tokens = [word for word in tokens if word.isalpha()]
tokens = [word for word in tokens if len(word)>2]
data['category'] = data['category'].astype('category').cat.codes
def clean_and_get_tokens(doc):
tokens = doc.split()
table = str.maketrans('','',punctuation)
tokens = [w.translate(table) for w in tokens]
tokens = [word for word in tokens if word.isalpha()]
tokens = [word for word in tokens if len(word)>2]
return tokens
documents = data['text']
for doc in documents:
tokens = clean_and_get_tokens(doc)
for token in tokens:
if token in vocab:
vocab[token] += 1
else:
vocab[token] = 1
for word in vocab:
if vocab[word] > 5:
words.add(word)
Step 2: Split the dataset into Training and Testing set
def create_train_test_sets(data,split):
np.random.seed(0)
mask = np.random.rand(len(data)) < split
train_data = data[mask]
test_data = data[~mask]
return train_data,test_data
train_data,test_data = create_train_test_sets(data,0.8)
train_documents = []
for doc in train_data['text']:
tokens = doc.split()
final_tokens = []
for token in tokens:
if token in words:
final_tokens.append(token)
final_string = ' '.join(final_tokens)
train_documents.append(final_string)
test_documents = []
for doc in test_data['text']:
tokens = doc.split()
final_tokens = []
for token in tokens:
if token in words:
final_tokens.append(token)
final_string = ' '.join(final_tokens)
test_documents.append(final_string)
Step 3: Use Tokenizer to convert sequences of words into encoded text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_documents)
encoded_docs = tokenizer.texts_to_sequences(train_documents)
max_length = max(([len(s.split()) for s in train_documents]))
labels = train_data['category']
train_labels = labels
Xtrain = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
ytrain = keras.utils.to_categorical(labels, num_classes=5)
encoded_docs = tokenizer.texts_to_sequences(test_documents)
labels = test_data['category']
Xtest = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
ytest = keras.utils.to_categorical(labels, num_classes=5)
Now, load the GloVe embedding to make an embedding layer. vocab_size
is the number of words in the vocabulary.
def load_embedding(filename,encoding):
file = open(filename,'r',encoding=encoding)
lines = file.readlines()[1:]
file.close()
embedding = dict()
for line in lines:
parts = line.split()
embedding[parts[0]] = asarray(parts[1:], dtype='float32')
return embedding
vocab_size = len(tokenizer.word_index)+1
raw_embedding = load_embedding('../input/glove-global-vectors-for-word-representation/glove.6B.100d.txt','utf8')
weight_matrix = zeros((vocab_size, 100))
for word,i in tokenizer.word_index.items():
if word in raw_embedding:
weight_matrix[i] = raw_embedding[word]
embedding_layer = Embedding(vocab_size, 100, weights=[weight_matrix], input_length=max_length, trainable=True)
Step 4: Train the Model
vocab_size = len(tokenizer.word_index) + 1
model = Sequential()
model.add(Embedding(vocab_size, 100, input_length = max_length))
model.add(Conv1D(filters=16, kernel_size=16, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=32, kernel_size=32, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(5, activation='softmax'))
print(model.summary())
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(Xtrain, ytrain, epochs=10, verbose=2, validation_data = (Xtest,ytest))
Step 5: Make predictions on the Test set and plot the Confusion Matrix
ypred = model.predict(Xtest)
pred_labels = []
for probs in ypred:
label = np.argmax(probs, axis=-1)
pred_labels.append(int(label))
actual_labels = list(labels)
cm = confusion_matrix(actual_labels, pred_labels)
cmap = plt.cm.Blues
title = "Confusion Matrix"
classes = 5
normalize = False
tick_marks = np.arange(classes)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(5)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

Step 6: Check the accuracy
metric = keras.metrics.CategoricalAccuracy()
metric.update_state(ytest, ypred)
metric.result().numpy()
The model we just trained has an accuracy of 90.25%
on the testing set.
[Optional]
Find out the public notebook with the entire implementation below:

[Bonus]
Checkout the below post on Machine Learning Mastery for implementation of time series forecasting using convnets:

Cheers!