# import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from rdkit import Chem
from rdkit.Chem import Draw
from mordred import descriptors, Calculator
from molml.features import CoulombMatrix
import ast

# load dataset
ml_df = pd.read_csv('ml_df.csv')
df_19 = pd.read_csv('df_19.csv')

# Defining NN model
def nn_model(X, y, neurons=64, epochs=100, test_size = 0.2, batch_size=32, lr=0.001, random_state = 42):
    # train_test_split
    # X is input features, and y is target values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size = test_size, random_state = random_state
    )

    # Convert to tensor
    X_train = torch.tensor(X_train.values, dtype=torch.float32)
    y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1) 
    # Reshape into a 2D column vector with 1 column and as many rows as needed
    X_test = torch.tensor(X_test.values, dtype=torch.float32)
    y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

    # Build model
    model = nn.Sequential(
        nn.Linear(X_train.shape[1], neurons), # .shape[1] gets the number of input features from .shape
        nn.ReLU(), # Adds non-linearity without activation function, goes in between hidden layers 
        nn.Linear(neurons, neurons),
        nn.ReLU(),
        nn.Linear(neurons, 1)
    )

    # Instantiate loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr) # .parameters() return all learnable parameters in model

    # Training loop
    for epoch in range(epochs):
        model.train()
        perm = torch.randperm(X_train.size(0)) # Randomly shuffles indices in training data
        for i in range(0, X_train.size(0), batch_size): # Split into batches for efficiency and speed
            idx = perm[i:i+batch_size]
            batch_x, batch_y = X_train[idx], y_train[idx]
            optimizer.zero_grad()
            preds = model(batch_x)
            loss = criterion(preds, batch_y)
            loss.backward()
            optimizer.step()
        
    # Evaluation
    model.eval()
    with torch.no_grad():
        preds = model(X_test)
        mse = mean_squared_error(y_test.numpy(), preds.numpy())
        print(f"Test MSE: {mse:.4f}")

    return model, mse

# Test with 'mu' property
model, mse = nn_model(ml_df, df_19['mu'])

Test MSE: 0.7909