I have followed this tutorial in Youtube
Here the whole code from my jupyter notebook
import spacy
import fitz
import pickle
import pandas as pd
import random
train_data = pickle.load(open('train_data.pkl', 'rb'))
train_data[0]
The output of train_data[0] is shown here
nlp = spacy.blank('en')
def train_model(train_data):
if 'ner' not in nlp.pipe_names:
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner, last = True)
for _, annotation in train_data:
for ent in annotation['entities']:
ner.add_label(ent[2])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):
optimizer = nlp.begin_training()
for itn in range(10):
print('Starting iteration' + str(itn))
random.shuffle(train_data)
losses = {}
index = 0
# batch up the examples using spaCy's minibatch
#batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
for text, annotations in train_data:
try:
nlp.update(
[texts], # batch of texts
[annotations],# batch of annotations
sgd=optimizer,
drop=0.5, # dropout - make it harder to memorise data
losses=losses)
except Exception as e:
pass
print("Losses", losses)
train_model(train_data)
What is weird is the output of the function which is :
Starting iteration0
Losses {}
Starting iteration1
Losses {}
Starting iteration2
Losses {}
Starting iteration3
Losses {}
Starting iteration4
Losses {}
Starting iteration5
Losses {}
Starting iteration6
Losses {}
Starting iteration7
Losses {}
Starting iteration8
Losses {}
Starting iteration9
Losses {}
It seems like there is no data entering the model at all even though I can run train_data and get an output !!
spaCy version 2.3.0
Python version 3.7.3
train_data? - Raqib