分析コードのまとめ -個別単語の分析- - S-Linguistics

分析コードのまとめ -個別単語の分析-

投稿者: Sho オン 24/06/2024 18/06/2024 BLOG 日本語

分析コードのまとめていく．個別単語の分析編

前回に続いて，コードをまとめる．

今回は個別単語の基本統計量にフォーカスをして分析をする．

# 解析対象の単語を設定
target_word = ""

import pandas as pd
from collections import Counter

def analyze_target_word(text, target_word):
    """
    指定した単語の分析を行う関数
    """
    tokens = tokenize(text)
    
    # 単語の出現回数を計算
    word_count = Counter(tokens)[target_word]
    
    # 文ごとの単語の出現回数を計算
    sentences = text.split('.')
    sentence_word_counts = [sentence.lower().split().count(target_word) for sentence in sentences]
    
    # 単語が含まれる文の抽出
    sentences_with_target_word = [sentence.strip() for sentence in sentences if target_word in sentence.lower().split()]
    
    # 文中の出現位置を計算
    sentence_positions = [[i+1 for i, word in enumerate(sentence.lower().split()) if word == target_word] for sentence in sentences]
    
    # 文全体の数
    total_sentences = len(sentences)
    
    # 相対頻度を計算
    relative_frequency = word_count / len(tokens)
    
    return {
        "word_count": word_count,
        "sentence_word_counts": sentence_word_counts,
        "sentences_with_target_word": sentences_with_target_word,
        "sentence_positions": sentence_positions,
        "total_sentences": total_sentences,
        "relative_frequency": relative_frequency
    }

def display_target_word_analysis(analysis, target_word):
    """
    指定した単語の分析結果を表示する関数
    """
    # 総出現回数と相対頻度の表示
    print(f"Analysis for the word '{target_word}':")
    print(f"Total Count: {analysis['word_count']}")
    print(f"Relative Frequency: {analysis['relative_frequency']:.4f}")
    
    # 文ごとの出現回数を表形式で表示
    df_sentence_counts = pd.DataFrame({
        "Sentence": [i+1 for i in range(len(analysis['sentence_word_counts']))],
        "Word Count": analysis['sentence_word_counts']
    })
    print("\nSentence Counts:")
    print(df_sentence_counts)
    
    # 単語が含まれる文とその出現位置を表形式で表示
    df_sentences_with_word = pd.DataFrame({
        "Sentence": analysis['sentences_with_target_word'],
        "Positions": [' '.join(map(str, positions)) for positions in analysis['sentence_positions'] if positions]
    })
    print("\nSentences containing the word and positions:")
    print(df_sentences_with_word)


# 前処理されたテキストデータを使用して解析を実行
analysis = analyze_target_word(processed_text, target_word)

# 解析結果を表示
display_target_word_analysis(analysis, target_word)

関連

コメントを残すコメントをキャンセル