#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Author : {Jack Zhao}
@Time : 2020/1/9 10:26
@Contact : {[email protected]}
@Desc : 测试新字段,加入TF-IDF,替换数据源为London,最终版代码
'''
import math
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
from gensim import corpora, models, similarities
import gensim
import pyLDAvis.gensim
from collections import defaultdict
import nltk
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob
#去空
def clean_none():
# 这里修改为utf-8也没用,依旧报错UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 0-1: invali,需要将csv文件
# 另存为utf-8格式,纯属智障操作,导致乱码
# 使用London数据集
df = pd.read_csv("../data/London.csv")
# 去空字段
print("原本数据集共%d条数据\n" % (len(df.values)))
# 这里将衡量字段reviews_per_month放入去空
df = df.dropna(subset = ['id', 'host_abou
Python项目实战-Gensim手动实现LDA算法玩转情感分析
猜你喜欢
转载自blog.csdn.net/weixin_40539952/article/details/108537720
今日推荐
周排行