以下是一个使用 Python 实现基于 seq2seq 的智能问答系统的示例代码: ```python import tensorflow as tf import numpy as np # 定义编码器 class Encoder(tf.keras.Model): def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz): super(Encoder, self).__init__() self.batch_sz = batch_sz self.enc_units = enc_units self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) self.gru = tf.keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform') def call(self, x, hidden): # 嵌入输入 x = self.embedding(x) output, state = self.gru(x, initial_state=hidden) return output, state def initialize_hidden_state(self): return tf.zeros((self.batch_sz, self.enc_units)) # 定义解码器 class Decoder(tf.keras.Model): def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz): super(Decoder, self).__init__() self.batch_sz = batch_sz self.dec_units = dec_units self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) self.gru = tf.keras.layers.GRU(self.dec_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform') self.fc = tf.keras.layers.Dense(vocab_size) def call(self, x, hidden, enc_output): # 嵌入输入 x = self.embedding(x) # 计算注意力权重 attention_weights = tf.nn.softmax(tf.nn.dense(tf.concat([hidden, enc_output], axis=-1), units=1), axis=1) # 应用注意力 context_vector = tf.reduce_sum(attention_weights * enc_output, axis=1) # 结合上下文向量和输入 x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) output, state = self.gru(x) output = tf.reshape(output, (-1, output.shape[2])) x = self.fc(output) return x, state, attention_weights # 训练函数 def train_step(inp, targ, enc_hidden): loss = 0 with tf.GradientTape() as tape: enc_output, enc_hidden = encoder(inp, enc_hidden) dec_hidden = enc_hidden dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1) for t in range(1, targ.shape[1]): predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output) loss += loss_function(targ[:, t], predictions) dec_input = tf.expand_dims(targ[:, t], 1) batch_loss = (loss / int(targ.shape[1])) variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss # 评估函数 def evaluate(sentence): attention_plot = np.zeros((max_length_targ, max_length_inp)) sentence = preprocess_sentence(sentence) inputs = [inp_lang.word_index[i] for i in sentence.split(' ')] inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen=max_length_inp, padding='post') inputs = tf.convert_to_tensor(inputs) enc_hidden = encoder.initialize_hidden_state() enc_output, enc_hidden = encoder(inputs, enc_hidden) dec_hidden = enc_hidden dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0) predicted_ids = [] for t in range(max_length_targ): predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_output) # 存储注意力权重 attention_plot[t] = attention_weights.numpy() predicted_id = tf.argmax(predictions[0]).numpy() if predicted_id == targ_lang.word_index['<end>']: break predicted_ids.append(predicted_id) dec_input = tf.expand_dims([predicted_id], 0) return predicted_ids, attention_plot # 数据预处理等其他代码... # 总结 这个智能问答系统使用了 seq2seq 架构,通过编码器和解码器进行训练和生成回答。在训练过程中,使用梯度下降来优化模型参数。评估时,根据输入生成回答,并可以观察注意力权重的分布。 ``` 需要注意的逻辑点: 1. 数据预处理的质量对模型效果影响很大,包括清理噪声、分词等。 2. 模型的超参数调整需要根据具体数据和任务进行试验。 3. 训练时的 batch size 和 epoch 数量需要合理设置,以平衡训练时间和效果。 [2024-11-20 23:07:29 | AI写代码神器 | 1217点数解答]