import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost import plot_importance,plot_tree
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge
    df_x_train = pd.read_csv('new/new_my_train.csv')
    df_x_test = pd.read_csv('new/new_my_test.csv')
    df_y_train = pd.read_csv('new/new_label.csv')
    df_y_test = pd.read_csv('new/new_test_label.csv')
    df_x_train.drop('Id', axis=1, inplace=True)
    df_x_test.drop('Id', axis=1, inplace=True)
    df_y_train.drop('Id', axis=1, inplace=True)
    df_y_test.drop('Id', axis=1, inplace=True)
    x_train = np.array(df_x_train)
    y_train = np.array(df_y_train)
    x_test = np.array(df_x_test)
    y_test = np.array(df_y_test)
def my_loss(st, sp):
    num_example = sp.shape[0]
    num_size = sp.shape[1]
    w = np.ones(sp.shape)
    b = np.zeros(sp.shape)
    e = np.exp(abs(st - sp)/0.012) - 1
    for j in range(num_size):
        if j == 0:
            LL = 299.85
            UL = 300.15
            LL = 199.925
            UL = 200.075
        for i in range(num_example):
            if st[i][j] >= LL and st[i][j] <= UL:
                if sp[i][j] < LL or sp[i][j] > UL:
                    w[i][j] = 10
            if st[i][j] <LL:
                b[i][j] = abs(st[i][j] - LL)
                if sp[i][j] >= LL:
                    w[i][j] = 10
                b[i][j] = abs(st[i][j] - UL)
                if sp[i][j] <= UL:
                    w[i][j] = 10
    score = np.sum(w*e*a) / float(num_example)
    return score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from scipy.stats import norm, skew, boxcox_normmax
import warnings
from sklearn import decomposition
from scipy.special import boxcox1p

from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb
import lightgbm as lgb
n_folds = 5
# 给特征排序,删除无用特征
def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train)
    mse = np.sqrt(-cross_val_score(model, train, y_train, scoring="neg_mean_squared_error", cv=kf))
    return mse
kfolds = KFold(n_splits=n_folds, shuffle=True, random_state=42)
alph = [0.01, 0.001, 0.0001, 0.0002, 0.0004, 0.0008, 0.002, 0.004, 0.008, 1, 2, 4, 6, 8, 10, 12]
alph2 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
lasso = make_pipeline(RobustScaler(), LassoCV(alphas=alph, cv=kfolds, random_state=1))
ENet = make_pipeline(RobustScaler(), ElasticNetCV(alphas=alph, l1_ratio=.9, cv=kfolds, random_state=3))
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alph2, cv=kfolds))
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
GBoost = GradientBoostingRegressor(n_estimators=300, learning_rate=0.05,
                                   max_depth=4, max_features='sqrt',
                                   min_samples_leaf=15, min_samples_split=10,
                                   loss='huber', random_state=5)
model_xgb = xgb.XGBRegressor(max_depth=10,
                          colsample_bytree= 0.6,
                          reg_lambda= 2,
                          seed =1000,
model_lgb = lgb.LGBMRegressor(boosting_type= 'gbdt',  # 设置提升类型
    objective= 'regression',  # 目标函数
#     metric= 'l2',  # 评估函数
    num_leaves=31,  # 叶子节点数
    learning_rate=0.1,  # 学习速率
    feature_fraction=0.9,  # 建树的特征选择比例
    bagging_fraction= 0.8,  # 建树的样本采样比例
    bagging_freq= 5,  # k 意味着每 k 次迭代执行bagging
    verbose= 1
    )# <0 显示致命的, =0 显示错误 (警告), >0 显示信息)
stacked_averaged_models = StackingCVRegressor(regressors=(ENet, GBoost, KRR),
def rmsle(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))
model_xgb.fit(x_train, y_train[:,0])
xgb_train_pred = model_xgb.predict(x_train)
xgb_importance = model_xgb.feature_importances_
xgb_out = np.argsort(xgb_importance)
print(rmsle(y_train[:,0], xgb_train_pred))
model_lgb.fit(x_train, y_train[:,0])
booster = model_lgb.booster_
lgb_importance = booster.feature_importance(importance_type='split')
lgb_out = np.argsort(lgb_importance)
lgb_train_pred = model_lgb.predict(x_train)
print(rmsle(y_train[:,0], lgb_train_pred))
print('RMSLE score on train data:')
GBoost.fit(x_train, y_train[:,0])
GBoost_train_pred = GBoost.predict(x_train)
GBT_feature = GBoost.feature_importances_
gbt_out = np.argsort(GBT_feature)
drop_num = 30
lgb_out = lgb_out[:drop_num]
xgb_out = xgb_out[:drop_num]
gbt_out = gbt_out[:drop_num]
drop_feature = list(set(lgb_out).union(xgb_out).union(gbt_out))
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38]
train = np.delete(x_train, drop_feature, axis=1)
test = np.delete(x_test, drop_feature, axis=1)
stacked_averaged_models.fit(train, y_train[:,0])
stacked_pred1 = stacked_averaged_models.predict(test)
model_lgb.fit(train, y_train[:,0])
model_xgb.fit(train, y_train[:,0])
lgb_pred1= model_lgb.predict(test)
y_pred1=  stacked_pred1 * 0.1+ xgb_pred1 * 0.5+ lgb_pred1*0.4 
stacked_averaged_models.fit(train, y_train[:,1])
stacked_pred2 = stacked_averaged_models.predict(test)
model_lgb.fit(train, y_train[:,1])
model_xgb.fit(train, y_train[:,1])
xgb_pred2 = model_xgb.predict(test)
lgb_pred2 = model_lgb.predict(test)
y_pred2= stacked_pred2 * 0.1+ xgb_pred2 * 0.5+ lgb_pred2 *0.4
stacked_averaged_models.fit(train, y_train[:,2])
stacked_pred3 = stacked_averaged_models.predict(test)
model_lgb.fit(train, y_train[:,2])
model_xgb.fit(train, y_train[:,2])
xgb_pred3 = model_xgb.predict(test)
lgb_pred3 = model_lgb.predict(test)
y_pred3= stacked_pred3 * 0.1+ xgb_pred3 * 0.5+lgb_pred3 *0.4
accuracy = my_loss(y_test, y_pred)
# df = pd.DataFrame(y_pred)
# df.to_csv('new/提交8.csv')
# df = pd.DataFrame(y_pred)
# df.to_csv('new/predict_label3.csv')
# plot_importance(model,importance_type ="weight")
# plt.show()





