""" SQL query building functions for employee workstation activity tracking. This module contains functions for building SQL queries to calculate working durations. """ from datetime import datetime, timedelta # Added timedelta from flask import current_app # Define timeout constants - consider moving to config if shared across modules # This timeout is for capping session duration in reports if no explicit stop event is found soon after a start. REPORT_QUERY_SESSION_TIMEOUT_SECONDS = 15 * 60 # 15 minutes def calculate_duration_sql( time_period, user_filter=None, start_date_filter=None, end_date_filter=None ): """ Generates the core SQL query to calculate working durations. Uses LEAD() window function to pair 'working' with the next event. Calculates duration in hours using PostgreSQL functions. Args: time_period (str): Time period to group by ('daily', 'weekly', or 'monthly'). user_filter (str, optional): Username to filter results by (exact match). start_date_filter (str, optional): Start date for filtering events (YYYY-MM-DD). end_date_filter (str, optional): End date for filtering events (YYYY-MM-DD). Returns: str: SQL query for calculating durations. dict: Parameters for the SQL query. """ params = {} date_conditions = [] if start_date_filter: date_conditions.append("w_events.ts >= :start_date_param") params["start_date_param"] = start_date_filter if end_date_filter: # Add 1 day to end_date_filter to make it inclusive of the whole day # Or expect end_date_filter to be exclusive upper bound already. # Assuming YYYY-MM-DD means start of that day. # To include the entire end_date_filter day, we query < (end_date_filter + 1 day) # For simplicity, let's assume date filters are applied directly to DATE(ts) # or that ts is already a date for daily grouping if filters apply to period_start. # For now, let's filter the base events. # If end_date_filter is '2023-10-20', events up to '2023-10-20 23:59:59.999' should be included. # So ts < '2023-10-21' try: end_date_dt = datetime.strptime(end_date_filter, "%Y-%m-%d") exclusive_end_date_dt = end_date_dt + timedelta(days=1) params["end_date_param_exclusive"] = exclusive_end_date_dt.strftime( "%Y-%m-%d" ) date_conditions.append("w_events.ts < :end_date_param_exclusive") except ValueError: current_app.logger.warning( f"Invalid end_date_filter format: {end_date_filter}. Ignoring." ) event_selection_where_clause = "" if date_conditions: event_selection_where_clause = "WHERE " + " AND ".join(date_conditions) # For virtual event injection, also apply date filters if possible to limit scope # This part is tricky because virtual events are based on existing events. # If an event is just outside start_date_filter but its virtual stop is inside, how to handle? # For now, primary date filtering is on `public.work_events w_events`. user_where_clause_for_all_events = "" user_group_by_filter_final = "" if user_filter: user_where_clause_for_all_events = f"{'AND' if event_selection_where_clause else 'WHERE'} w_events.\"user\" = :user_param" user_group_by_filter_final = 'WHERE cd."user" = :user_param' params["user_param"] = user_filter # Conditional SQL parts for the 'w' alias in AllEvents UNION user_filter_condition_for_w = "" if user_filter: user_filter_condition_for_w = 'AND w."user" = :user_param' start_date_condition_for_w = "" if "start_date_param" in params: start_date_condition_for_w = "AND w.ts >= :start_date_param" end_date_condition_for_w = "" if "end_date_param_exclusive" in params: end_date_condition_for_w = "AND w.ts < :end_date_param_exclusive" period_grouping_options = { "daily": "DATE_TRUNC('day', start_time)", "weekly": "DATE_TRUNC('week', start_time)", "monthly": "DATE_TRUNC('month', start_time)", } period_grouping = period_grouping_options.get(time_period) if not period_grouping: current_app.logger.warning( f"Invalid time_period '{time_period}' in calculate_duration_sql, defaulting to 'daily'." ) period_grouping = period_grouping_options["daily"] # SQL query structure sql_query = f""" WITH AllEvents AS ( -- Select base events within the date range and for the specific user if provided SELECT w_events.\"user\", w_events.ts, w_events.state FROM public.work_events w_events {event_selection_where_clause} {user_where_clause_for_all_events} UNION ALL -- Add virtual "stopped" events for users who have been inactive for more than REPORT_QUERY_SESSION_TIMEOUT_SECONDS -- This part needs to be mindful of the user_filter and date_filter as well. -- We select from work_events `w` here. If `w` is filtered by user/date, this UNION part is also filtered. SELECT w.\"user\", w.ts + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds' AS ts, 'stopped' AS state FROM public.work_events w -- Ensure the base 'working' event for virtual stop also respects date and user filters -- The primary selection for `w` is implicitly filtered if `w_events` in the top part of UNION is filtered. -- However, to be explicit or if `w` needs its own alias for clarity: -- Let's assume `w` here implicitly respects the outer scope if it was part of a larger subquery, -- but in a UNION, it's a separate SELECT. So, `w` also needs filtering. -- Re-applying filters here to `w` to ensure virtual stops are only for relevant events. WHERE w.state = 'working' {user_filter_condition_for_w} {start_date_condition_for_w} {end_date_condition_for_w} AND NOT EXISTS ( SELECT 1 FROM public.work_events w2 WHERE w2.\"user\" = w.\"user\" AND w2.ts > w.ts AND w2.ts <= w.ts + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds' ) AND NOT EXISTS ( SELECT 1 FROM public.work_events w3 WHERE w3.\"user\" = w.\"user\" AND w3.state = 'stopped' AND w3.ts = w.ts + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds' ) ), EventPairs AS ( -- Pair events using LEAD function SELECT \"user\", ts AS start_time, state, LEAD(ts) OVER (PARTITION BY \"user\" ORDER BY ts) AS next_event_time, LEAD(state) OVER (PARTITION BY \"user\" ORDER BY ts) AS next_event_state FROM AllEvents -- No user filter here, it's applied in AllEvents and final select ORDER BY \"user\", ts ), TimeoutAdjusted AS ( -- Adjust end time for sessions exceeding timeout SELECT \"user\", start_time, state, CASE WHEN state = 'working' AND next_event_time IS NOT NULL AND EXTRACT(EPOCH FROM (next_event_time - start_time)) > {REPORT_QUERY_SESSION_TIMEOUT_SECONDS} THEN start_time + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds' WHEN state = 'working' AND next_event_time IS NULL -- Dangling working event, cap at timeout from start_time or now THEN LEAST(start_time + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds', NOW() AT TIME ZONE 'UTC') ELSE next_event_time END AS adjusted_end_time, next_event_state FROM EventPairs ), CalculatedDurations AS ( -- Calculate duration for each working session and find first login time SELECT \"user\", {period_grouping} AS period_start, SUM( CASE WHEN state = 'working' AND adjusted_end_time IS NOT NULL THEN GREATEST(0, EXTRACT(EPOCH FROM (adjusted_end_time - start_time)) / 3600.0) -- Ensure non-negative ELSE 0 END ) AS total_hours, MIN(CASE WHEN state = 'working' THEN start_time END) AS first_login_time FROM TimeoutAdjusted WHERE state = 'working' GROUP BY \"user\", period_start ) -- Final aggregation and user filtering if applicable SELECT cd.\"user\", cd.period_start, SUM(cd.total_hours) AS total_hours, MIN(cd.first_login_time) AS first_login_time FROM CalculatedDurations cd {user_group_by_filter_final} GROUP BY cd.\"user\", cd.period_start ORDER BY cd.\"user\", cd.period_start DESC; """ current_app.logger.debug(f"Generated SQL query for reports: {sql_query}") current_app.logger.debug(f"SQL query parameters: {params}") return sql_query, params # The filter_sql_by_user function is now obsolete and has been removed.