# Python实现个人消费记录分析与可视化工具：从CSV到理财洞察

一、背景介绍

在数字化时代，我们的消费记录（如账单、支付记录）往往以CSV格式存储，但手动分析这些数据耗时且低效。本文将用Python实现一个个人消费记录分析与可视化工具，帮助用户快速提取关键洞察：总消费、月度趋势、类别占比等，并通过图表直观展示，辅助制定理财计划。该工具基于pandas（数据处理）和matplotlib（可视化），本地即可运行，适合数据分析入门者练习。

二、实现思路

工具的核心流程分为5步：
1. 数据导入：读取CSV格式的消费记录；
2. 数据清洗：处理缺失值、统一格式（日期、类别）；
3. 统计分析：计算总消费、月度趋势、类别占比等；
4. 可视化：生成饼图（类别占比）、折线图（月度趋势）、柱状图（类别月度对比）；
5. 结果导出：保存统计报告（TXT）和图表（PNG）。

三、完整代码实现

以下是可直接运行的代码，包含详细注释：

1. 导入依赖库

import pandas as pd
import matplotlib.pyplot as plt

# 解决matplotlib中文显示问题（根据系统调整字体）
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS']  # Windows用SimHei，Mac用Arial Unicode MS
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

2. 数据导入与清洗

def load_and_clean_data(file_path):
    """导入并清洗消费数据"""
    # 读取CSV文件
    df = pd.read_csv(file_path)

    # 1. 去除日期或金额为空的行
    df.dropna(subset=['日期', '金额'], inplace=True)

    # 2. 转换日期格式为datetime，无效日期设为NaT后删除
    df['日期'] = pd.to_datetime(df['日期'], errors='coerce')
    df.dropna(subset=['日期'], inplace=True)

    # 3. 统一类别格式：去除空格+转为大写
    df['类别'] = df['类别'].str.strip().str.upper()

    # 4. 过滤无效金额（仅保留正数）
    df = df[df['金额'] > 0]

    return df

3. 统计分析函数

def analyze_data(df):
    """计算核心统计指标"""
    # 基础统计
    total_expense = df['金额'].sum()
    record_count = len(df)
    avg_expense = total_expense / record_count if record_count >0 else 0

    # 月度消费趋势（按月份分组求和）
    monthly_expense = df.groupby(df['日期'].dt.to_period('M'))['金额'].sum()
    # 将period类型转为字符串（便于后续可视化）
    monthly_expense.index = monthly_expense.index.astype(str)

    # 类别消费统计
    category_expense = df.groupby('类别')['金额'].sum().sort_values(ascending=False)
    category_percent = (category_expense / total_expense *100).round(1)

    # Top3支出类别
    top3_categories = category_expense.head(3)

    # 统计周期（最早到最晚日期的月份）
    start_month = df['日期'].min().strftime('%Y-%m')
    end_month = df['日期'].max().strftime('%Y-%m')

    return {
        'total': total_expense,
        'count': record_count,
        'avg': avg_expense,
        'monthly': monthly_expense,
        'category': category_expense,
        'category_percent': category_percent,
        'top3': top3_categories,
        'period': f"{start_month} ~ {end_month}"
    }

4. 可视化函数

def visualize_data(analysis_result):
    """生成三种可视化图表并保存"""
    # 1. 类别占比饼图
    plt.figure(figsize=(8,6))
    categories = analysis_result['category'].index
    amounts = analysis_result['category'].values
    plt.pie(amounts, labels=categories, autopct='%1.1f%%', startangle=90)
    plt.title('消费类别占比', fontsize=14)
    plt.axis('equal')  # 保持饼图圆形
    plt.savefig('category_pie.png', dpi=300, bbox_inches='tight')
    plt.close()

    # 2. 月度消费趋势折线图
    plt.figure(figsize=(10,6))
    months = analysis_result['monthly'].index
    monthly_amounts = analysis_result['monthly'].values
    plt.plot(months, monthly_amounts, marker='o', linewidth=2, color='b')
    plt.title('月度消费趋势', fontsize=14)
    plt.xlabel('月份', fontsize=12)
    plt.ylabel('消费金额（元）', fontsize=12)
    plt.grid(alpha=0.3)
    plt.savefig('monthly_trend.png', dpi=300, bbox_inches='tight')
    plt.close()

    # 3. 类别月度对比柱状图（需要原始数据的月度类别分组）
    # 注：这里需要传入原始df，因为分析结果中没有月度类别的细分组
    # 修正：将原始df作为参数传入函数
def visualize_data_with_df(df, analysis_result):
    """生成三种可视化图表并保存（含原始df用于柱状图）"""
    # 1. 类别占比饼图
    plt.figure(figsize=(8,6))
    categories = analysis_result['category'].index
    amounts = analysis_result['category'].values
    plt.pie(amounts, labels=categories, autopct='%1.1f%%', startangle=90)
    plt.title('消费类别占比', fontsize=14)
    plt.axis('equal')
    plt.savefig('category_pie.png', dpi=300, bbox_inches='tight')
    plt.close()

    # 2. 月度消费趋势折线图
    plt.figure(figsize=(10,6))
    months = analysis_result['monthly'].index
    monthly_amounts = analysis_result['monthly'].values
    plt.plot(months, monthly_amounts, marker='o', linewidth=2, color='b')
    plt.title('月度消费趋势', fontsize=14)
    plt.xlabel('月份', fontsize=12)
    plt.ylabel('消费金额（元）', fontsize=12)
    plt.grid(alpha=0.3)
    plt.savefig('monthly_trend.png', dpi=300, bbox_inches='tight')
    plt.close()

    # 3. 类别月度对比柱状图
    plt.figure(figsize=(12,6))
    # 按月份和类别分组求和
    df['月份'] = df['日期'].dt.to_period('M').astype(str)
    category_monthly = df.groupby(['月份', '类别'])['金额'].sum().unstack(fill_value=0)
    category_monthly.plot(kind='bar', ax=plt.gca())
    plt.title('各类别月度消费对比', fontsize=14)
    plt.xlabel('月份', fontsize=12)
    plt.ylabel('消费金额（元）', fontsize=12)
    plt.legend(title='消费类别', loc='upper right')
    plt.grid(axis='y', alpha=0.3)
    plt.savefig('category_monthly_bar.png', dpi=300, bbox_inches='tight')
    plt.close()

5. 导出报告函数

def export_report(analysis_result, file_path='report.txt'):
    """导出统计报告到文本文件"""
    report_content = f"""=== 个人消费分析报告 ===
统计周期：{analysis_result['period']}
总消费金额：{analysis_result['total']:.1f}元
消费记录数：{analysis_result['count']}条
平均每笔消费：{analysis_result['avg']:.1f}元

--- 月度消费 ---
"""
    # 添加月度消费数据
    for month, amount in analysis_result['monthly'].items():
        report_content += f"{month}：{amount:.1f}元\n"

    report_content += "\n--- 类别占比 ---\n"
    # 添加类别占比数据
    for category, percent in analysis_result['category_percent'].items():
        amount = analysis_result['category'][category]
        report_content += f"{category}：{amount:.1f}元（{percent}%）\n"

    report_content += "\n--- Top3支出类别 ---\n"
    # 添加Top3类别
    for i, (category, amount) in enumerate(analysis_result['top3'].items(), 1):
        report_content += f"{i}. {category}（{amount:.1f}元）\n"

    # 写入文件
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(report_content)
    print(f"报告已导出到：{file_path}")

6. 主函数（整合所有步骤）

def main():
    # 1. 导入并清洗数据
    csv_path = 'expenses.csv'  # 替换为你的CSV文件路径
    try:
        df = load_and_clean_data(csv_path)
        if df.empty:
            print("清洗后的数据为空，请检查输入文件！")
            return
    except FileNotFoundError:
        print(f"错误：未找到文件 {csv_path}")
        return

    # 2. 统计分析
    analysis_result = analyze_data(df)

    # 3. 可视化
    visualize_data_with_df(df, analysis_result)
    print("图表已导出为PNG文件")

    # 4. 导出报告
    export_report(analysis_result)

    print("分析完成！")

if __name__ == "__main__":
    main()

四、使用说明

1.** 准备CSV文件：按照示例格式（日期、类别、金额、备注）准备消费记录，保存为expenses.csv；
2. 安装依赖：执行pip install pandas matplotlib；
3. 运行代码：将代码保存为expense_analyzer.py，与CSV文件同目录，运行python expense_analyzer.py；
4. 查看结果 **：生成report.txt（统计报告）和三个PNG图表（类别饼图、月度折线图、类别月度柱状图）。

五、总结与优化方向

本工具实现了消费数据分析的核心功能，但仍可优化：
1.** 用户交互：添加文件选择对话框（用tkinter），支持自定义输出路径；
2. 更多统计维度：增加周度消费趋势、年度对比、节假日消费分析；
3. 可视化增强：支持堆叠柱状图、交互式图表（用plotly替代matplotlib）；
4. 报告格式 **：导出Excel或PDF报告，包含更详细的统计表格。

通过这个项目，你可以掌握Python数据分析的基础流程：数据清洗→统计分析→可视化→结果导出，为后续更复杂的数据分析项目打下基础。快去试试分析你的消费记录吧！
“`

AI管家

# Python实现个人消费记录分析与可视化工具：从CSV到理财洞察

一、背景介绍

二、实现思路

三、完整代码实现

1. 导入依赖库

2. 数据导入与清洗

3. 统计分析函数

4. 可视化函数

5. 导出报告函数

6. 主函数（整合所有步骤）

四、使用说明

五、总结与优化方向

发表回复取消回复

# Python实现个人消费记录分析与可视化工具：从CSV到理财洞察

一、背景介绍

二、实现思路

三、完整代码实现

1. 导入依赖库

2. 数据导入与清洗

3. 统计分析函数

4. 可视化函数

5. 导出报告函数

6. 主函数（整合所有步骤）

四、使用说明

五、总结与优化方向

发表回复 取消回复

发表回复取消回复