概述
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
import numpy as np
np.random.seed(13)
path = get_file('alice.txt', origin='http://www.gutenberg.org/cache/epub/11/pg11.txt')
doc = open(path).readlines()[0:50]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(doc)
doc = tokenizer.texts_to_sequences(doc)
doc = [l for l in doc if len(l) > 1]
words_size = sum([len(words) - 1 for words in doc])
maxlen = max([len(x)-1 for x in doc])
vocab_size = len(tokenizer.word_index) + 1
def generate_data(X, maxlen, V):
for sentence in X:
inputs = []
targets = []
for i in range(1, len(sentence)):
inputs.append(sentence[0:i])
targets.append(sentence[i])
y = np_utils.to_categorical(targets, V)
inputs_sequence = sequence.pad_sequences(inputs, maxlen=maxlen)
yield (inputs_sequence, y)
def sample(p):
p /= sum(p)
return np.where(np.random.multinomial(1, p, 1)==1)[1][0]
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=maxlen))
model.add(LSTM(128, return_sequences=False))
model.add(Dense(vocab_size, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adadelta')
for i in range(30):
for x, y in generate_data(doc, maxlen, vocab_size):
model.train_on_batch(x, y)
in_words = "alice's"
for _ in range(maxlen):
in_sequence = sequence.pad_sequences(tokenizer.texts_to_sequences([in_words]), maxlen=maxlen)
wordid = sample(model.predict(in_sequence)[0])
for k, v in tokenizer.word_index.items():
if v == wordid:
in_words += ' ' + k
break
print(i, in_words)
in_words = "alice's"
for _ in range(maxlen):
in_sequence = sequence.pad_sequences(tokenizer.texts_to_sequences([in_words]), maxlen=maxlen)
wordid = model.predict_classes(in_sequence, verbose=0)[0]
for k, v in tokenizer.word_index.items():
if v == wordid:
in_words += ' ' + k
break
print(in_words)
最后
以上就是忧伤宝马为你收集整理的神经网络语言模型 NNLM (Keras实现)的全部内容,希望文章能够帮你解决神经网络语言模型 NNLM (Keras实现)所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复