1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
import pandas as pd import datetime import numpy as np
data = pd.read_csv("成都市二手房交易信息.csv") print(type(data))
print(data["房屋所属市辖区"].value_counts()[0:5])
label=['心动价','大众价','奋斗价'] bins=[0,10000,20000,max(data['单价(元/平方米)'])]
data_price = pd.cut(data['单价(元/平方米)'],bins,labels=label)
data.insert(16,column='价格分布',value=data_price) print(data)
data_agg=data.groupby(['房屋所属市辖区','价格分布'])['单价(元/平方米)']
print(data_agg.agg(['max','min','median']))
print(data['挂牌时间'].dtype)
current_time = datetime.datetime.now() datedata = current_time.strftime("%Y-%m-%d") def g_date(i): if len(i)!=5: return pd.to_datetime(i) else: return pd.to_datetime('1900-1-1')+pd.Timedelta(days=int(i))
data['挂牌时间']=data['挂牌时间'].apply(g_date) print(data['挂牌时间'].dtype) print(data['挂牌时间']) date_time = pd.to_datetime(datedata)-data['挂牌时间'] def print_days(day): print(day.days) date_time.apply(print_days)
data_new=data.set_index("挂牌时间") print(data_new)
aver = data.groupby(data['挂牌时间'].dt.strftime('%Y%m'))['单价(元/平方米)'] print(aver.agg('mean').round(0)) avg = data_new['单价(元/平方米)'].resample('M').mean() print(avg.round(0))
data_povit = pd.pivot_table(data,index=['房屋所属市辖区'],columns='价格分布',values=['单价(元/平方米)'],aggfunc=np.mean) print(data_povit.round(0)) data_crosstab = pd.crosstab(index=data['房屋所属市辖区'], columns=data['价格分布'],values=data['单价(元/平方米)'],aggfunc=np.mean).round(0) print(data_crosstab)
|