import pandas as pd
import numpy as np
import os
import sys
import json
#from predict_prophet_helper import *
from predict_helper import *
import lightgbm as lgb
from sklearn.model_selection import train_test_split


# ToDos:
# - make own models for lightgbm "day" and "interval" models
# - LightGBM model parameters -> WFE frontend
# - Feature engineering: moving averages on y???


# Set display options to show all columns
pd.set_option('display.max_columns', None)

print("--start prophet prediction module--")
config, resultId, output_data_file, regressors, histData, futureData, cutoff_minimum, log_transform, anomaly_threshold_n_stddev, onlyModelAnalysis = predict_initialize()


# Feature engineering

# intra-monthly seasonality
histData["normalized_month"] = (histData["ds"].dt.month - 1) / 12
histData["sin_month"] = np.sin(2 * np.pi * histData["normalized_month"])
histData["cos_month"] = np.cos(2 * np.pi * histData["normalized_month"])

# intra-yearly seasonality
histData["day_of_year"] = histData["ds"].dt.dayofyear
histData["days_in_year"] = histData["ds"].dt.is_leap_year.apply(lambda x: 366 if x else 365)
histData["sin_day_of_year"] = np.sin(2 * np.pi * histData["day_of_year"] / histData["days_in_year"])
histData["cos_day_of_year"] = np.cos(2 * np.pi * histData["day_of_year"] / histData["days_in_year"])

# week-of-year seasonality
histData["week_of_year"] = histData["ds"].dt.isocalendar().week
histData["sin_week_of_year"] = np.sin(2 * np.pi * histData["week_of_year"] / 53)
histData["cos_week_of_year"] = np.cos(2 * np.pi * histData["week_of_year"] / 53)

histData["day_of_week"] = histData["ds"].dt.dayofweek  # 0=Monday, 6=Sunday
histData["is_weekend"] = (histData["day_of_week"] >= 5).astype(int)


print("First 10 rows of the hist dataframe:")
print(histData.head(10))  # Print the first 10 records


# 1. Ensure your data has a datetime column and a value column (the target to predict).
# If you have regressors, include them as additional columns.

# 2. LightGBM requires numerical features, so you need to convert the datetime column into features such as:
#
#    Year, month, day, hour, minute, etc.
#    Cyclical features like sine and cosine transformations for month, day, or hour.
#
#Include lag features (previous values) and rolling statistics (moving averages, rolling standard deviations).
#If you have regressors, process them appropriately (e.g., scaling).

#Encode cyclic time features like month and week into sine and cosine components to handle their periodic nature:
#
#data["sin_month"] = np.sin(2 * np.pi * data["month"] / 12)
#data["cos_month"] = np.cos(2 * np.pi * data["month"] / 12)
#
#Similarly for days of the week or hours.

#Rolling and Aggregated Statistics
#
#These smooth out noise and provide a broader context of recent activity.
#
#    Rolling Mean:
#        Average number of calls over the past n days (Rolling_mean_7, Rolling_mean_30).
#    Rolling Standard Deviation:
#        Standard deviation over the past n days to capture variability (Rolling_std_7, Rolling_std_30).
#    Expanding Features:
#        Cumulative statistics (e.g., cumulative mean or sum).

#Time Since Event:
#    Days_since_last_holiday.
#    Days_since_invoice_sent.

#Time Index:
#    Include a numeric index (t) representing the progression of time to help the model capture long-term trends.

# 3. Split the data into training and validation sets based on time. For time series, use sequential splitting rather than random splitting.

# Split into train and validation
X = histData.drop(columns=["ds", "y"])
y = histData["y"]
try:
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)
except ValueError as ve:
    exitWithError(1, f"ValueError: {ve}")
except TypeError as te:
    exitWithError(2, f"TypeError: {te}")
except Exception as e:
    exitWithError(3, f"An unexpected error occurred: {e}")


# boosting_type: gbdt, dart, goss, rf (default=gbdt) - The type of boosting to use, gbdt is OK for most cases.
# num_leaves (default=31) -> change to 50-150 -> 64
# learning_rate (default=0.1) -> change to 0.03
# n_estimators (default=100) -> change to 1000
# max_depth (default=-1) -> change to 10
# min_child_samples (default=20) -> change to 50 -> 5
# min_child_weight (default=0.001)
# subsample (default=1.0) -> change to 0.75
# colsample_bytree (default=1.0) -> change to 0.75
# subsample_freq (default=0) -> change to 1
# reg_alpha (default=0.0) -> change to 0.5
# reg_lambda (default=0.0) -> change to 0.5
# objective (default=regression)
# importance_type (default=split)
# n_jobs (default=-1)
# verbose (default=0)
# max_bin (default=255) -> change to 384
# min_split_gain (default=0.0) -> change to 0.05 -> 0.1
# early_stopping_rounds (default=0) -> change to 100
# lambda_l1 (default=0.0) -> 1e-4
# lambda_l2 (default=0.0) -> 1e-4


# Train LightGBM
model = lgb.LGBMRegressor(early_stopping_rounds=100, verbosity=1, num_leaves=64, max_depth=10, learning_rate=0.03, n_estimators=1000, reg_alpha=0.5, reg_lambda=0.5, min_child_samples=5, subsample=0.75, subsample_freq=1, colsample_bytree=0.75, max_bin=384, min_split_gain=0.1, lambda_l1=1e-4, lambda_l2=1e-4)
#model = lgb.LGBMRegressor(early_stopping_rounds=100, verbosity=1)
model.fit(X_train, y_train, eval_set=[(X_val, y_val)])


if not onlyModelAnalysis:

    # Feature engineering

    # intra-monthly seasonality
    futureData["normalized_month"] = (futureData["ds"].dt.month - 1) / 12
    futureData["sin_month"] = np.sin(2 * np.pi * futureData["normalized_month"])
    futureData["cos_month"] = np.cos(2 * np.pi * futureData["normalized_month"])

    # intra-yearly seasonality
    futureData["day_of_year"] = futureData["ds"].dt.dayofyear
    futureData["days_in_year"] = futureData["ds"].dt.is_leap_year.apply(lambda x: 366 if x else 365)
    futureData["sin_day_of_year"] = np.sin(2 * np.pi * futureData["day_of_year"] / futureData["days_in_year"])
    futureData["cos_day_of_year"] = np.cos(2 * np.pi * futureData["day_of_year"] / futureData["days_in_year"])

    # week-of-year seasonality
    futureData["week_of_year"] = futureData["ds"].dt.isocalendar().week
    futureData["sin_week_of_year"] = np.sin(2 * np.pi * futureData["week_of_year"] / 53)
    futureData["cos_week_of_year"] = np.cos(2 * np.pi * futureData["week_of_year"] / 53)

    futureData["day_of_week"] = futureData["ds"].dt.dayofweek  # 0=Monday, 6=Sunday
    futureData["is_weekend"] = (futureData["day_of_week"] >= 5).astype(int)


    futureData["predicted_value"] = model.predict(futureData.drop(columns=["ds"]))
    print(futureData[["ds", "predicted_value"]])


if not onlyModelAnalysis:
    wMAPE = predict_processAfterForecast(log_transform, cutoff_minimum, anomaly_threshold_n_stddev, futureData, ["predicted_value"], None, [], histData, ["y"])
else:
    wMAPE = predict_processAfterForecast(log_transform, cutoff_minimum, anomaly_threshold_n_stddev, None, None, None, [], histData, ["y"])

print("--end processing--")

if not onlyModelAnalysis:
    futureColumns = [
        ['predicted_value', 'FC']
        ]

    predict_saveResults(resultId, 'FUTURE', futureData, futureColumns, 0.0)

exitWithError(0, "Success: No errors occurred.", additional_data={"wMAPE": wMAPE})