import cx_Oracle
import json
import logging
import os
import sys
from configparser import ConfigParser
from datetime import datetime, date
import re

config = ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))

cx_Oracle.init_oracle_client(lib_dir=config['PATHS']['instantclient_dir'])

# Configuring logging
log_dir = config['LOGGING']['log_dir']
log_file_path = os.path.join(log_dir, f'dv_jobs_json_data_to_database.log')
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

print(f"OAC DV Jobs JSON data started loading into DV_JOBS_LOG_DATA. Please check the logfile {log_file_path}")
logging.info("OAC DV Jobs JSON data started loading into DV_JOBS_LOG_DATA. Please wait...")

def parse_datetime_with_milliseconds(date_str):
    try:
        return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%fZ")
    except ValueError:
        return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ")

def parse_duration(duration_str):
    try:
        if duration_str:
            time_units = {'hour': 3600, 'minute': 60, 'second': 1, 'millisecond': 0.001}
            total_seconds = sum(int(value) * time_units[unit] for value, unit in re.findall(r'(\d+)\s*(hour|minute|second|millisecond)s*', duration_str))
            return total_seconds
        else:
            return None
    except Exception as e:
        logging.error(f"Error parsing duration: {e}")
        return None

BATCH_SIZE = 1000

def fetch_last_job_ts(cursor):
    try:
        cursor.execute("SELECT MAX(job_ts) FROM DV_JOBS_LOG_DATA")
        last_job_ts = cursor.fetchone()[0]
        if last_job_ts is None:
            last_job_ts = datetime.strptime('2023-01-01 00:00:00.000', '%Y-%m-%d %H:%M:%S.%f')
        return last_job_ts
    except Exception as e:
        logging.error(f"Error fetching last job timestamp: {e}")
        return None

def batch_insert(cursor, records):
    try:
        cursor.executemany("""
            INSERT INTO DV_JOBS_LOG_DATA (duration_sec, run_id, job_name, job_type, job_log, rows_loaded, number_of_tasks,
                                  job_request_type, source_connection, status, task_id, task_log, category,
                                  ecid,message, user_name, log_content_id,instance_name,job_ts,job_dt, job_id)
            VALUES (:duration, :run_id, :job_name, :job_type, :job_log, :rows_loaded, :no_of_tasks,
                    :job_request_type, :source_connection, :status, :task_id, :task_log, :category,
                    :ecid,:message, :user_id, :id,:source,TO_TIMESTAMP(:log_time, 'YYYY-MM-DD HH24:MI:SS.FF'), TO_DATE(:job_dt, 'DD/MM/YYYY'), :job_id)
        """, records)
    except Exception as e:
        logging.error(f"Error inserting records: {e}")

def extract_job_id(job_log, task_log):
    try:
        pattern = r'job\s+id\s*:?\'?\"?\s*([\w-]+)\'?\"?'

        if job_log and "job id" in job_log:
            match = re.search(pattern, job_log, re.IGNORECASE)
            if match:
                return match.group(1).strip()

        if task_log and "job id" in task_log:
            match = re.search(pattern, task_log, re.IGNORECASE)
            if match:
                return match.group(1).strip()

        return None
    except Exception as e:
        logging.error("Error extracting job ID: %s", e)
        return None

def load_json_to_database(connection, json_file_path):
    load_start_time = datetime.now().strftime("%d-%m-%Y %H:%M:%S.%f")

    # Fetch last job timestamp
    cursor = connection.cursor()
    last_job_ts = fetch_last_job_ts(cursor)
    cursor.close()

    with open(json_file_path) as json_file:
        data_list = json.load(json_file)

    cursor = connection.cursor()
    records = []
    total_rows_inserted = 0

    for i, data in enumerate(data_list, start=1):
        log_content = data.get('logContent', {})
        data_content = log_content.get('data', {})
        additional_details = data_content.get('additionalDetails', {})
        oracle_details = log_content.get('oracle', {})

        log_time_str = log_content.get('time')
        log_time = parse_datetime_with_milliseconds(log_time_str)

        duration_str = additional_details.get('Duration')
        duration_sec = parse_duration(duration_str)

        record = {
            'duration': duration_sec,
            'run_id': additional_details.get('Job ID'),
            'job_id': extract_job_id(additional_details.get('Job Log'), additional_details.get('Task Log')),
            'job_name': additional_details.get('Name'),
            'job_type': additional_details.get('Job Type'),
            'job_log': additional_details.get('Job Log'),
            'rows_loaded': additional_details.get('Number of Rows Successfully Loaded'),
            'no_of_tasks': additional_details.get('Number of Tasks'),
            'job_request_type': additional_details.get('Request Type'),
            'source_connection': additional_details.get('Source Connection'),
            'status': additional_details.get('Status'),
            'task_id': additional_details.get('Task ID'),
            'task_log': additional_details.get('Task Log'),
            'category': data_content.get('category'),
            'ecid': data_content.get('ecid'),
            'message': data_content.get('message'),
            'user_id': data_content.get('userId'),
            'id': log_content.get('id'),
            'source': log_content.get('source'),
            'log_time': log_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3],
            'job_dt': log_time.date().strftime("%d/%m/%Y")
        }

        records.append(record)

        if len(records) >= BATCH_SIZE or i == len(data_list):
            batch_insert(cursor, records)
            total_rows_inserted += len(records)
            logging.info(f"Inserted JSON row {i} into the database.")
            records = []

    connection.commit()
    cursor.close()
    logging.info(f"OAC DV Jobs JSON data loaded successfully into DV_JOBS_LOG_DATA table. Total rows inserted: {total_rows_inserted}")
    print(f"OAC DV Jobs JSON data has been successfully loaded into the database.Total rows inserted: {total_rows_inserted}")
    logging.info(f"Deleting duplicate rows where job_ts between {last_job_ts} and {load_start_time}")

    try:
        cursor = connection.cursor()
        cursor.execute("""
            DELETE FROM DV_JOBS_LOG_DATA WHERE ROWID NOT IN (
        SELECT MAX(ROWID) FROM DV_JOBS_LOG_DATA WHERE job_ts BETWEEN TO_TIMESTAMP(:last_job_ts, 'YYYY-MM-DD HH24:MI:SS.FF') AND TO_TIMESTAMP(:load_start_time, 'DD-MM-YYYY HH24:MI:SS.FF')
        GROUP BY ecid, duration_sec, run_id, job_id, task_id, category, source_connection, log_content_id, job_request_type, job_name, job_type, status, user_name, instance_name, job_ts, job_dt, rows_loaded, number_of_tasks
        ) AND job_ts BETWEEN TO_TIMESTAMP(:last_job_ts, 'YYYY-MM-DD HH24:MI:SS.FF') AND TO_TIMESTAMP(:load_start_time, 'DD-MM-YYYY HH24:MI:SS.FF')""", {'load_start_time': load_start_time, 'last_job_ts': last_job_ts})
        deleted_rows = cursor.rowcount
        connection.commit()
        logging.info(f"{deleted_rows} duplicate rows deleted successfully.")
    except Exception as e:
        logging.error(f"Error deleting duplicate rows: {e}")
    finally:
        cursor.close()
        logging.basicConfig(filename='error.log', level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

if __name__ == "__main__":
    try:
        connection = cx_Oracle.connect(config['DATABASE']['username'], config['DATABASE']['password'], config['DATABASE']['service_name'])
        print("Database connection established successfully.")
        load_json_to_database(connection, config['DATA']['data_dir'] + f'/dv_jobs_logs_json_data.json')
    except cx_Oracle.Error as oracle_error:
        logging.exception("An Oracle Database error occurred")
        print("Failed to load JSON data into the database due to Oracle Database error.")
        sys.exit(1)
    except Exception as generic_error:
        logging.exception("An error occurred")
        print("Failed to load JSON data into the database due to an error.")
        sys.exit(1)