產(chǎn)品宣傳類網(wǎng)站設(shè)計(jì)互聯(lián)網(wǎng)推廣公司靠譜嗎
我們基于python代碼,使用PyQt5創(chuàng)建圖形用戶界面(GUI),同時(shí)支持中英文兩種語言的文本論文文獻(xiàn)關(guān)鍵信息提取。
PyQt5:用于創(chuàng)建GUI應(yīng)用程序。
jieba:中文分詞庫,用于中文文本的處理。
re:正則表達(dá)式模塊,用于文本清理和句子分割。
numpy:提供數(shù)值計(jì)算能力,如數(shù)組操作、矩陣運(yùn)算等,主要用于TextRank算法的實(shí)現(xiàn)。
import sys
import re
import jieba
import numpy as np
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,QHBoxLayout, QTextEdit, QPushButton, QLabel,QMessageBox, QSpinBox, QFileDialog, QComboBox)
from PyQt5.QtCore import Qtclass TextRankSummarizer:def __init__(self, language='chinese'):self.language = languageself.stopwords = self.load_stopwords()# 初始化jieba中文分詞器if language == 'chinese':jieba.initialize()def load_stopwords(self):"""內(nèi)置停用詞表"""if self.language == 'chinese':return {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一個(gè)', '也', '要'}else: # 英文停用詞return {'a', 'an', 'the', 'and', 'or', 'but', 'if', 'is', 'are', 'of', 'to', 'in', 'on'}def preprocess_text(self, text):"""文本預(yù)處理"""# 清洗特殊字符text = re.sub(r'[^\w\s。,.?!]', '', text)# 分句處理if self.language == 'chinese':sentences = re.split(r'[。!?]', text)else:sentences = re.split(r'[.!?]', text)return [s.strip() for s in sentences if len(s) > 2]def calculate_similarity(self, sentence, other_sentence):"""計(jì)算句子相似度"""words1 = [w for w in (jieba.cut(sentence) if self.language == 'chinese' else sentence.lower().split())if w not in self.stopwords]words2 = [w for w in(jieba.cut(other_sentence) if self.language == 'chinese' else other_sentence.lower().split())if w not in self.stopwords]# 使用Jaccard相似度intersection = len(set(words1) & set(words2))union = len(set(words1) | set(words2))return intersection / union if union != 0 else 0def textrank(self, sentences, top_n=5, damping_factor=0.85, max_iter=100):"""TextRank算法實(shí)現(xiàn)"""similarity_matrix = np.zeros((len(sentences), len(sentences)))# 構(gòu)建相似度矩陣for i in range(len(sentences)):for j in range(len(sentences)):if i != j:similarity_matrix[i][j] = self.calculate_similarity(sentences[i], sentences[j])# 歸一化矩陣row_sum = similarity_matrix.sum(axis=1)normalized_matrix = similarity_matrix / row_sum[:, np.newaxis]# 初始化得分scores = np.ones(len(sentences))# 迭代計(jì)算for _ in range(max_iter):prev_scores = np.copy(scores)for i in range(len(sentences)):scores[i] = (1 - damping_factor) + damping_factor * np.sum(normalized_matrix[i, :] * prev_scores)if np.linalg.norm(scores - prev_scores) < 1e-5:break# 獲取重要句子索引ranked_indices = np.argsort(scores)[::-1][:top_n]return sorted(ranked_indices)def summarize(self, text, ratio=0.2):"""生成摘要"""sentences = self.preprocess_text(text)if len(sentences) < 3:return "文本過短,無法生成有效摘要"top_n = max(1, int(len(sentences) * ratio))important_indices = self.textrank(sentences, top_n=top_n)# 按原文順序排列selected_sentences = [sentences[i] for i in sorted(important_indices)]# 中文使用句號連接,英文使用.連接separator = '。' if self.language == 'chinese' else '. 'return separator.join(selected_sentences) + ('。' if self.language == 'chinese' else '.')class MainWindow(QMainWindow):def __init__(self):super().__init__()# 初始化摘要器self.summarizer = TextRankSummarizer()# 界面設(shè)置self.setup_ui()def setup_ui(self):self.setWindowTitle("TextRank文本摘要工具")self.setGeometry(100, 100, 1000, 800)main_widget = QWidget()layout = QVBoxLayout()# 輸入?yún)^(qū)self.input_text = QTextEdit()self.input_text.setPlaceholderText("在此粘貼需要摘要的文本(建議500字以上)...")# 控制區(qū)control_layout = QHBoxLayout()self.ratio_spin = QSpinBox()self.ratio_spin.setRange(5, 50)self.ratio_spin.setValue(20)self.ratio_spin.setSuffix("%")self.lang_combo = QComboBox()self.lang_combo.addItems(["中文", "英文"])self.summarize_btn = QPushButton("生成摘要")self.import_btn = QPushButton("導(dǎo)入文件")self.clear_btn = QPushButton("清空")control_layout.addWidget(QLabel("摘要比例:"))control_layout.addWidget(self.ratio_spin)control_layout.addWidget(QLabel("語言:"))control_layout.addWidget(self.lang_combo)control_layout.addWidget(self.import_btn)control_layout.addWidget(self.summarize_btn)control_layout.addWidget(self.clear_btn)# 輸出區(qū)self.output_text = QTextEdit()self.output_text.setReadOnly(True)# 布局組合layout.addWidget(QLabel("輸入文本:"))layout.addWidget(self.input_text)layout.addLayout(control_layout)layout.addWidget(QLabel("摘要結(jié)果:"))layout.addWidget(self.output_text)main_widget.setLayout(layout)self.setCentralWidget(main_widget)# 信號連接self.summarize_btn.clicked.connect(self.generate_summary)self.import_btn.clicked.connect(self.import_file)self.clear_btn.clicked.connect(self.clear_content)self.lang_combo.currentTextChanged.connect(self.change_language)def change_language(self, lang):self.summarizer = TextRankSummarizer('chinese' if lang == "中文" else 'english')def generate_summary(self):text = self.input_text.toPlainText().strip()if not text:QMessageBox.warning(self, "輸入錯(cuò)誤", "請輸入需要摘要的文本")returnratio = self.ratio_spin.value() / 100summary = self.summarizer.summarize(text, ratio)self.output_text.setPlainText(summary)def import_file(self):path, _ = QFileDialog.getOpenFileName(self, "打開文本文件", "","文本文件 (*.txt);;所有文件 (*.*)")if path:try:with open(path, 'r', encoding='utf-8') as f:self.input_text.setPlainText(f.read())except Exception as e:QMessageBox.critical(self, "錯(cuò)誤", f"文件讀取失敗:\n{str(e)}")def clear_content(self):self.input_text.clear()self.output_text.clear()if __name__ == "__main__":app = QApplication(sys.argv)window = MainWindow()window.show()sys.exit(app.exec_())