什么是 词云
词云 其实就是就是对网络文本中出现频率较高的〝关键词〞予以视觉上的突出,形成〝关键词云层〞或〝关键词渲染〞从而过滤掉大量的文本信息
词云 也是数据可视化的一种形式。给出一段文本,根据关键词的出现频率而生成的一幅图像,人们只要扫一眼就能够明白其文章主旨。
词云图:
你学废了嘛
#!/usr/bin/env python # encoding: utf-8 import jieba import numpy as np import PIL.Image as Image from wordcloud import WordCloud class wordCloud: ''' This is a main Class, the file contains all documents. One document contains paragraphs that have several sentences It loads the original file and converts the original file to new content Then the new content will be saved by this class ''' def __init__(self): self.bg_img = 'assets/picture.jpeg' self.word_path = 'assets/word.txt' def hello(self): ''' This is a welcome speech :return: self ''' print('*' * 50) print(' ' * 20 + '词云制作') print(' ' * 5 + 'Author: autofelix Date: 2022-01-17 13:14') print('*' * 50) return self def run(self): ''' The program entry ''' with open(self.word_path, 'r') as f: word = f.read() cut_word = ' '.join(jieba.cut(word)) color_mask = np.array(Image.open(self.bg_img)) word_cloud = WordCloud( # 设置字体,不指定就会出现乱码 font_path='/System/Library/Fonts/PingFang.ttc', # 设置背景色 background_color='white', # 词云形状 mask=color_mask, # 允许最大词汇 max_words=120, # 最大号字体 max_font_size=2000 ).generate(cut_word) word_cloud.to_file('word_cloud.jpg') im = word_cloud.to_image() im.show() if __name__ == '__main__': wordCloud().hello().run()