import_machine_temp.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import pandas as pd
  2. from sqlalchemy import create_engine
  3. import time
  4. from datetime import datetime
  5. # 数据库连接配置
  6. DB_CONFIG = "mysql+pymysql://root:root@localhost:3306/recycle"
  7. engine = create_engine(DB_CONFIG)
  8. def process_and_import(file_path):
  9. # 1. 读取 CSV
  10. # df = pd.read_csv(file_path)
  11. df = pd.read_csv(file_path, encoding='gbk')
  12. # 2. 条件转换:type_id 等于 101 时转成 2
  13. # 注意:确保 type_id 在读取时是数值类型,如果是字符串请用 '101'
  14. if 'type_id' in df.columns:
  15. df.loc[df['type_id'] == 101, 'type_id'] = 2
  16. # 3. 数据过滤:machine_id 不为空
  17. # dropna 会删除 machine_id 列中值为 NaN 的行
  18. df_filtered = df.dropna(subset=['machine_id']).copy()
  19. current_now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  20. df_filtered['create_time'] = current_now
  21. df_filtered['update_time'] = current_now
  22. # 4. 筛选目标字段
  23. # 虽然我们要过滤 machine_id 和 type_id,但最后写入表的字段按你要求的来
  24. required_columns = [
  25. 'id', 'machine_id', 'temp_type',
  26. 'estimate_packet', 'create_time', 'update_time'
  27. ]
  28. # 检查字段是否存在,避免报错
  29. final_df = df_filtered[required_columns]
  30. # 5. 数据清洗:处理空值并写入
  31. final_df = final_df.where(pd.notnull(final_df), None)
  32. try:
  33. final_df.to_sql(
  34. name='machine_temp',
  35. con=engine,
  36. if_exists='append',
  37. index=False
  38. )
  39. print(f"导入成功!处理后的有效数据行数: {len(final_df)}")
  40. except Exception as e:
  41. print(f"导入失败: {e}")
  42. if __name__ == "__main__":
  43. process_and_import('t_chx_machine_temp.csv')