プログラミング【python】で作る。前に流行ったtweetからwordcloud を作る方法

import tweepy
import datetime
import time
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import MeCab
consumer_key = "xxxxxxxxxxxxxxxxxxxxxx"
consumer_secret = "xxxxxxxxxxxxxxxxxxxxx"
access_token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
access_token_secret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth ,wait_on_rate_limit = True)

name="name"
userID="Twitter ID"
date=datetime.date.today()
tweets_data=[]

def get_tweets(api,userID,dfile):
    date=datetime.date.today()
    page=1
    deadend=False
    while True:
        tweets=api.user_timeline(userID,page=page)
        for tweet in tweets:
            if (datetime.datetime.now()-tweet.created_at).days <1:
                print(date)
                print (tweet.text.encode('utf-8').decode('utf-8'))
                tweets_data.append(tweet.text.encode('utf-8').decode('utf-8') + '\n')
                fname = r"'"+ dfile +".txt" + "'"
                fname = fname.replace("'","")

                with open(fname, "w",encoding="utf-8") as f:
                    f.writelines(tweets_data)
            else:
                deadend=True
                return
            if not deadend:
                page+=1
                time.sleep(500)
print('******Enter file name******')
dfile = input('>  ')
get_tweets(api, userID, dfile)
def analyze_tweet(dfile):
    fname = r"'"+ dfile +".txt" + "'"
    fname = fname.replace("'","")

        #Mecabを使用して、形態素解析
    mecab = MeCab.Tagger("-Ochasen")

        #"名詞", "動詞", "形容詞", "副詞"を格納するリスト
    words=[]

        #ファイルを読込み
    with open(fname, 'r',encoding="utf-8") as f:

        reader = f.readline()

        while reader:
                #Mecabで形態素解析を実施
            node = mecab.parseToNode(reader)

            while node:
                word_type = node.feature.split(",")[0]

                    #取得する単語は、"名詞", "動詞", "形容詞", "副詞","感動詞"
                if word_type in ["名詞", "動詞", "形容詞", "副詞","感動詞"]:

                    words.append(node.surface)

                node = node.next

            reader = f.readline()

    font_path = r"C:\WINDOWS\Fonts\HGRGE.TTC"

    txt = " ".join(words)

         # ストップワードの設定　※これは検索キーワードによって除外したほうがいい単語を設定
    stop_words = [ 'です' ,'ました','いる','あり','ある','www','ww','そう','する'
        ,'すぎ','https','co','みたい']

        #解析した単語、ストップワードを設定、背景の色は黒にしてます
    wordcloud = WordCloud(background_color="white",font_path=font_path, stopwords=set(stop_words),
            width=800,height=600).generate(txt)

    pct =  r"'"+ dfile +".png" + "'"
    pct = pct.replace("'","")
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.savefig(pct,format='png',dpi=300)
    plt.show()


analyze_tweet(dfile)

これでツイートを取得してwordcloudを作ることが可能です。

コード内のuserIDを変更することでそのIDのツイートを取得できます。

consumer_key = "xxxxxxxxxxxxxxxxxxxxxx"
consumer_secret = "xxxxxxxxxxxxxxxxxxxxx"
access_token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
access_token_secret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

これは個人で取得したAPIのアカウントから設定可能です。

参考にしたサイト

Python Twitterからツイートを取得してテキスト分析(wordcloudで見える化) - Qiita

実行結果はこんな感じです。TwitterのDMでお願います。@panNakott25

なにか質問があれば私の

f:id:panNakotta:20191012201959p:plain — wordcloud

みんな魚でもみて癒そう
🐟🐡🐠 pic.twitter.com/BJSb3zPxDO
— パンナコッタ (@panNaKott25) 2019年10月12日

お風呂に水溜めた人いる？？？やらなきゃ？？
— パンナコッタ (@panNaKott25) 2019年10月12日

今起きたクソ生活
— パンナコッタ (@panNaKott25) 2019年10月12日

誰か宅飲みしよー
— パンナコッタ (@panNaKott25) 2019年10月12日

学生結婚したい(後先考えないクズ
— パンナコッタ (@panNaKott25) 2019年10月12日

パンナの学生生活

地方大学の大学生/プログラミング言語【python】を中心に紹介しています。/日々の脳内をアウトプット/麻雀とワインと日本酒が好き/将来は幸せになりたい

プログラミング【python】で作る。前に流行ったtweetからwordcloud を作る方法

TwitterAPIについて

word cloudについて

MeCabについて

ソースコード