import cx_Oracle
import json
import logging
import os
import sys
from configparser import ConfigParser
from datetime import datetime, date
import re

config = ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))
print(config.sections())
print(config['PATHS']['instantclient_dir'])

# Set the Oracle client library path
cx_Oracle.init_oracle_client(lib_dir=config['PATHS']['instantclient_dir'])

# Configuring logging
log_dir = config['LOGGING']['log_dir']
log_file_path = os.path.join(log_dir, 'audit_json_to_database_load.log')
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

print(f"Loading OAC Audit JSON data into the database. Logging into file: {log_file_path}")
logging.info("OAC Audit JSON data loading process started...")

def parse_datetime_with_milliseconds(date_str):
    try:
        return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    except ValueError:
        return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ")

def determine_action(message):
    action = None

    if message.lower().startswith('uploaded'):
        action = 'UPLOAD'
    elif re.search(r'\bupdated\b', message, re.IGNORECASE):
        action = 'UPDATE'
    elif re.search(r'\bcreated\b', message, re.IGNORECASE):
        action = 'CREATE'
    elif re.search(r'\bdeleted\b', message, re.IGNORECASE):
        action = 'DELETE'
    elif re.search(r'\brenamed\b', message, re.IGNORECASE):
        action = 'RENAME'
    elif 'exported' in message.lower():
        action = 'EXPORT'
    elif 'archived' in message.lower():
        action = 'ARCHIVE'
    elif 'downloaded' in message.lower():
        action = 'DOWNLOAD'
    elif 'copied' in message.lower():
        action = 'COPY'
    elif message.lower().startswith('imported'):
        action = 'IMPORTED'

    return action

def determine_type(existing_type, message):
    type_mapping = {
        'dataset': 'DATASET',
        'data flow': 'DATAFLOW',
        'sequence': 'SEQUENCE',
        'updated connection': 'CONNECTION',
        'deleted connection': 'CONNECTION',
        'created connection': 'CONNECTION'
    }

    if existing_type:
        return existing_type

    for keyword, mapped_type in type_mapping.items():
        if re.search(r'\b' + re.escape(keyword) + r'\b', message, re.IGNORECASE):
            return mapped_type
    return None

BATCH_SIZE = 1000

def fetch_last_load_ts(cursor):
    try:
        cursor.execute("SELECT MAX(log_ts) FROM AUDIT_LOG_DATA")
        last_load_ts = cursor.fetchone()[0]
        if last_load_ts is None:
            last_load_ts = datetime.strptime('2023-01-01 00:00:00.000', '%Y-%m-%d %H:%M:%S.%f')
        return last_load_ts
    except Exception as e:
        logging.error(f"Error fetching last job timestamp: {e}")
        return None

def batch_insert(cursor, records):
    cursor.executemany("""
        INSERT INTO AUDIT_LOG_DATA (export_type, path, destination_path, action, type, recursive, source_path, category, ecid, message, user_name, log_content_id, instance_name, log_ts, log_dt)
        VALUES (:format, :path, :destinationPath, :action, :type, :recursive, :sourcepath, :category, :ecid, :message, :user_id, :id, :source, TO_TIMESTAMP(:log_ts, 'YYYY-MM-DD HH24:MI:SS.FF'), TO_DATE(:log_dt, 'DD/MM/YYYY'))
    """, records)

def load_json_to_database(connection, json_file_path):
    load_start_time = datetime.now().strftime("%d-%m-%Y %H:%M:%S.%f")

    # Fetch last load timestamp
    cursor = connection.cursor()
    last_load_ts = fetch_last_load_ts(cursor)
    cursor.close()
    
    with open(json_file_path) as json_file:
        data_list = json.load(json_file)

    logging.info(f"Number of records in JSON file: {len(data_list)}")
    cursor = connection.cursor()
    records = []
    total_rows_inserted = 0

    for i, data in enumerate(data_list, start=1):
        log_content = data.get('logContent', {})
        data_content = log_content.get('data', {})
        additional_details = data_content.get('additionalDetails', {})
        oracle_details = log_content.get('oracle', {})

        log_time_str = log_content.get('time')
        log_time = parse_datetime_with_milliseconds(log_time_str)

        ingested_time_str = oracle_details.get('ingestedtime')
        ingested_time = parse_datetime_with_milliseconds(ingested_time_str)

        message = data_content.get('message', '')
        action = determine_action(message)

        existing_type = additional_details.get('type')
        new_type = determine_type(existing_type, message)

        record = {
            'format': additional_details.get('format'),
            'path': additional_details.get('path'),
            'destinationPath': additional_details.get('destinationPath'),
            'type': new_type,
            'recursive': additional_details.get('recursive'),
            'sourcepath': additional_details.get('sourcePath'),
            'category': data_content.get('category'),
            'ecid': data_content.get('ecid'),
            'message': message,
            'user_id': data_content.get('userId'),
            'id': log_content.get('id'),
            'source': log_content.get('source'),
            'log_ts': log_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
            'log_dt': log_time.date().strftime("%d/%m/%Y"),
            'action': action
        }

        records.append(record)

        if len(records) >= BATCH_SIZE or i == len(data_list):
            batch_insert(cursor, records)
            total_rows_inserted += len(records)
            logging.info(f"Inserted JSON row {i} into the database.")
            records = []

    connection.commit()
    cursor.close()
    logging.info(f"OAC Audit JSON data loaded successfully into AUDIT_LOG_DATA table. Total rows inserted: {total_rows_inserted}")
    print(f"OAC Audit JSON data has been successfully loaded into the database. {total_rows_inserted} rows inserted.")
    logging.info(f"Deleting duplicate rows where log_ts between {last_load_ts} and {load_start_time}")

    try:
        cursor = connection.cursor()
        cursor.execute("""
            DELETE FROM AUDIT_LOG_DATA WHERE ROWID NOT IN (
            SELECT MIN(ROWID) FROM AUDIT_LOG_DATA WHERE log_ts BETWEEN TO_TIMESTAMP(:last_load_ts, 'YYYY-MM-DD HH24:MI:SS.FF') AND TO_TIMESTAMP(:load_start_time, 'DD-MM-YYYY HH24:MI:SS.FF')
            GROUP BY export_type, path, destination_path, type, recursive, source_path, category, ecid, user_name, log_content_id, instance_name, log_ts, log_dt, message
            ) AND (log_ts BETWEEN TO_TIMESTAMP(:last_load_ts, 'YYYY-MM-DD HH24:MI:SS.FF') AND TO_TIMESTAMP(:load_start_time, 'DD-MM-YYYY HH24:MI:SS.FF'))""",{'load_start_time': load_start_time, 'last_load_ts': last_load_ts})
        deleted_rows = cursor.rowcount
        connection.commit()
        logging.info(f"{deleted_rows} duplicate rows deleted successfully.")
    except Exception as e:
        logging.error(f"Error deleting duplicate rows: {e}")
    finally:
        cursor.close()
        
if __name__ == "__main__":
    try:
        connection = cx_Oracle.connect(config['DATABASE']['username'], config['DATABASE']['password'], config['DATABASE']['service_name'])
        print("Database connection established successfully.")
        load_json_to_database(connection, os.path.join(config['DATA']['data_dir'], 'audit_logs_json_data.json'))
        load_json_to_database(connection, os.path.join(config['DATA']['data_dir'], 'dv_exports_logs_json_data.json'))

    except cx_Oracle.DatabaseError as db_error:
        print("Failed to connect to the database or execute a query. Please check the log file for details.")
        logging.exception("Database connection or query execution error occurred")
        sys.exit(1)

    except Exception as e:
        logging.exception("An unexpected error occurred")
        print("An unexpected error occurred. Please check the log file for details.")
        sys.exit(1)