# 自然语言处理如何检查拼写错误？（Tensorflow实例教程&源代码）

+关注继续查看

Spellin is difficult, whch is wyh you need to study everyday.

Spelling is difficult, which is why you need to study everyday.

The first days of her existence in th country were vrey hard for Dolly.

The first days of her existence in the country were very hard for Dolly.

Thi is really something impressiv thaat we should look into right away!

This is really something impressive that we should look into right away!

input_file = os.path.join(path)
with open(input_file) as f:
return book

path = './books/'
book_files = [f for f in listdir(path) if isfile(join(path, f))]
book_files = book_files[1:]

books = []
for book in book_files:

for i in range(len(books)):
print("There are {} words in {}.".format(len(books[i].split()), book_files[i]))

def clean_text(text):
'''Remove unwanted characters and extra spaces from the text'''
text = re.sub(r'\n', ' ', text)
text = re.sub(r'[{}
@_*>()\\#%+=]','', text)
text = re.sub('a0','', text)
text = re.sub('\'92t','\'t', text)
text = re.sub('\'92s','\'s', text)
text = re.sub('\'92m','\'m', text)
text = re.sub('\'92ll','\'ll', text)
text = re.sub('\'91','', text)
text = re.sub('\'92','', text)
text = re.sub('\'93','', text)
text = re.sub('\'94','', text)
text = re.sub('\.','. ', text)
text = re.sub('\!','! ', text)
text = re.sub('\?','? ', text)
text = re.sub(' +',' ', text) # Removes extra spaces
return text

The vocabulary contains 78 characters.
[' ', '!', '"', '\$', '&', "'", ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<EOS>', '<GO>', '<PAD>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

Today is a lovely day. I want to go to the beach. (这将被拆分为两个输入句子)

Is today a lovely day? I want to go to the beach. (这将是一个长的输入句子)

sentences = []
for book in clean_books:
for sentence in book.split('. '):
sentences.append(sentence + '.')

max_length = 92
min_length = 10

good_sentences = []

for sentence in int_sentences:
if len(sentence) <= max_length and len(sentence) >= min_length:
good_sentences.append(sentence)

training, testing = train_test_split(good_sentences,
test_size = 0.15,
random_state = 2)

training_sorted = []
testing_sorted = []

for i in range(min_length, max_length+1):
for sentence in training:
if len(sentence) == i:
training_sorted.append(sentence)
for sentence in testing:
if len(sentence) == i:
testing_sorted.append(sentence)

letters = ['a','b','c','d','e','f','g','h','i','j','k','l','m',
'n','o','p','q','r','s','t','u','v','w','x','y','z',]

def noise_maker(sentence, threshold):

noisy_sentence = []
i = 0
while i < len(sentence):
random = np.random.uniform(0,1,1)
if random < threshold:
noisy_sentence.append(sentence[i])
else:
new_random = np.random.uniform(0,1,1)
if new_random > 0.67:
if i == (len(sentence) - 1):
continue
else:
noisy_sentence.append(sentence[i+1])
noisy_sentence.append(sentence[i])
i += 1
elif new_random < 0.33:
random_letter = np.random.choice(letters, 1)[0]
noisy_sentence.append(vocab_to_int[random_letter])
noisy_sentence.append(sentence[i])
else:
pass
i += 1
return noisy_sentence

def get_batches(sentences, batch_size, threshold):

for batch_i in range(0, len(sentences)//batch_size):
start_i = batch_i * batch_size
sentences_batch = sentences[start_i:start_i + batch_size]

sentences_batch_noisy = []
for sentence in sentences_batch:
sentences_batch_noisy.append(
noise_maker(sentence, threshold))

sentences_batch_eos = []
for sentence in sentences_batch:
sentence.append(vocab_to_int['<EOS>'])
sentences_batch_eos.append(sentence)

Redis常见超时原因分析
2483 0

3241 0
[推荐]ORACLE PL/SQL编程之五：异常错误处理(知已知彼、百战不殆)

758 0
【软件测试】3、代码检查与Code Review

819 0

725 0
+关注

17142

2569

+ 订阅

《2021云上架构与运维峰会演讲合集》

《零基础CSS入门教程》

《零基础HTML入门教程》