work-tracing/app/utils/queries.py
2025-05-16 17:55:30 +04:00

213 lines
9.5 KiB
Python

"""
SQL query building functions for employee workstation activity tracking.
This module contains functions for building SQL queries to calculate working durations.
"""
from datetime import datetime, timedelta # Added timedelta
from flask import current_app
# Define timeout constants - consider moving to config if shared across modules
# This timeout is for capping session duration in reports if no explicit stop event is found soon after a start.
REPORT_QUERY_SESSION_TIMEOUT_SECONDS = 15 * 60 # 15 minutes
def calculate_duration_sql(
time_period, user_filter=None, start_date_filter=None, end_date_filter=None
):
"""
Generates the core SQL query to calculate working durations.
Uses LEAD() window function to pair 'working' with the next event.
Calculates duration in hours using PostgreSQL functions.
Args:
time_period (str): Time period to group by ('daily', 'weekly', or 'monthly').
user_filter (str, optional): Username to filter results by (exact match).
start_date_filter (str, optional): Start date for filtering events (YYYY-MM-DD).
end_date_filter (str, optional): End date for filtering events (YYYY-MM-DD).
Returns:
str: SQL query for calculating durations.
dict: Parameters for the SQL query.
"""
params = {}
date_conditions = []
if start_date_filter:
date_conditions.append("w_events.ts >= :start_date_param")
params["start_date_param"] = start_date_filter
if end_date_filter:
# Add 1 day to end_date_filter to make it inclusive of the whole day
# Or expect end_date_filter to be exclusive upper bound already.
# Assuming YYYY-MM-DD means start of that day.
# To include the entire end_date_filter day, we query < (end_date_filter + 1 day)
# For simplicity, let's assume date filters are applied directly to DATE(ts)
# or that ts is already a date for daily grouping if filters apply to period_start.
# For now, let's filter the base events.
# If end_date_filter is '2023-10-20', events up to '2023-10-20 23:59:59.999' should be included.
# So ts < '2023-10-21'
try:
end_date_dt = datetime.strptime(end_date_filter, "%Y-%m-%d")
exclusive_end_date_dt = end_date_dt + timedelta(days=1)
params["end_date_param_exclusive"] = exclusive_end_date_dt.strftime(
"%Y-%m-%d"
)
date_conditions.append("w_events.ts < :end_date_param_exclusive")
except ValueError:
current_app.logger.warning(
f"Invalid end_date_filter format: {end_date_filter}. Ignoring."
)
event_selection_where_clause = ""
if date_conditions:
event_selection_where_clause = "WHERE " + " AND ".join(date_conditions)
# For virtual event injection, also apply date filters if possible to limit scope
# This part is tricky because virtual events are based on existing events.
# If an event is just outside start_date_filter but its virtual stop is inside, how to handle?
# For now, primary date filtering is on `public.work_events w_events`.
user_where_clause_for_all_events = ""
user_group_by_filter_final = ""
if user_filter:
user_where_clause_for_all_events = f"{'AND' if event_selection_where_clause else 'WHERE'} w_events.\"user\" = :user_param"
user_group_by_filter_final = 'WHERE cd."user" = :user_param'
params["user_param"] = user_filter
# Conditional SQL parts for the 'w' alias in AllEvents UNION
user_filter_condition_for_w = ""
if user_filter:
user_filter_condition_for_w = 'AND w."user" = :user_param'
start_date_condition_for_w = ""
if "start_date_param" in params:
start_date_condition_for_w = "AND w.ts >= :start_date_param"
end_date_condition_for_w = ""
if "end_date_param_exclusive" in params:
end_date_condition_for_w = "AND w.ts < :end_date_param_exclusive"
period_grouping_options = {
"daily": "DATE_TRUNC('day', start_time)",
"weekly": "DATE_TRUNC('week', start_time)",
"monthly": "DATE_TRUNC('month', start_time)",
}
period_grouping = period_grouping_options.get(time_period)
if not period_grouping:
current_app.logger.warning(
f"Invalid time_period '{time_period}' in calculate_duration_sql, defaulting to 'daily'."
)
period_grouping = period_grouping_options["daily"]
# SQL query structure
sql_query = f"""
WITH AllEvents AS (
-- Select base events within the date range and for the specific user if provided
SELECT
w_events.\"user\",
w_events.ts,
w_events.state
FROM public.work_events w_events
{event_selection_where_clause}
{user_where_clause_for_all_events}
UNION ALL
-- Add virtual "stopped" events for users who have been inactive for more than REPORT_QUERY_SESSION_TIMEOUT_SECONDS
-- This part needs to be mindful of the user_filter and date_filter as well.
-- We select from work_events `w` here. If `w` is filtered by user/date, this UNION part is also filtered.
SELECT
w.\"user\",
w.ts + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds' AS ts,
'stopped' AS state
FROM public.work_events w
-- Ensure the base 'working' event for virtual stop also respects date and user filters
-- The primary selection for `w` is implicitly filtered if `w_events` in the top part of UNION is filtered.
-- However, to be explicit or if `w` needs its own alias for clarity:
-- Let's assume `w` here implicitly respects the outer scope if it was part of a larger subquery,
-- but in a UNION, it's a separate SELECT. So, `w` also needs filtering.
-- Re-applying filters here to `w` to ensure virtual stops are only for relevant events.
WHERE w.state = 'working'
{user_filter_condition_for_w}
{start_date_condition_for_w}
{end_date_condition_for_w}
AND NOT EXISTS (
SELECT 1
FROM public.work_events w2
WHERE w2.\"user\" = w.\"user\"
AND w2.ts > w.ts
AND w2.ts <= w.ts + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds'
)
AND NOT EXISTS (
SELECT 1
FROM public.work_events w3
WHERE w3.\"user\" = w.\"user\"
AND w3.state = 'stopped'
AND w3.ts = w.ts + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds'
)
),
EventPairs AS (
-- Pair events using LEAD function
SELECT
\"user\",
ts AS start_time,
state,
LEAD(ts) OVER (PARTITION BY \"user\" ORDER BY ts) AS next_event_time,
LEAD(state) OVER (PARTITION BY \"user\" ORDER BY ts) AS next_event_state
FROM AllEvents
-- No user filter here, it's applied in AllEvents and final select
ORDER BY \"user\", ts
),
TimeoutAdjusted AS (
-- Adjust end time for sessions exceeding timeout
SELECT
\"user\",
start_time,
state,
CASE
WHEN state = 'working' AND next_event_time IS NOT NULL
AND EXTRACT(EPOCH FROM (next_event_time - start_time)) > {REPORT_QUERY_SESSION_TIMEOUT_SECONDS}
THEN start_time + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds'
WHEN state = 'working' AND next_event_time IS NULL -- Dangling working event, cap at timeout from start_time or now
THEN LEAST(start_time + INTERVAL '{REPORT_QUERY_SESSION_TIMEOUT_SECONDS} seconds', NOW() AT TIME ZONE 'UTC')
ELSE next_event_time
END AS adjusted_end_time,
next_event_state
FROM EventPairs
),
CalculatedDurations AS (
-- Calculate duration for each working session and find first login time
SELECT
\"user\",
{period_grouping} AS period_start,
SUM(
CASE
WHEN state = 'working' AND adjusted_end_time IS NOT NULL THEN
GREATEST(0, EXTRACT(EPOCH FROM (adjusted_end_time - start_time)) / 3600.0) -- Ensure non-negative
ELSE 0
END
) AS total_hours,
MIN(CASE WHEN state = 'working' THEN start_time END) AS first_login_time
FROM TimeoutAdjusted
WHERE state = 'working'
GROUP BY \"user\", period_start
)
-- Final aggregation and user filtering if applicable
SELECT
cd.\"user\",
cd.period_start,
SUM(cd.total_hours) AS total_hours,
MIN(cd.first_login_time) AS first_login_time
FROM CalculatedDurations cd
{user_group_by_filter_final}
GROUP BY cd.\"user\", cd.period_start
ORDER BY cd.\"user\", cd.period_start DESC;
"""
current_app.logger.debug(f"Generated SQL query for reports: {sql_query}")
current_app.logger.debug(f"SQL query parameters: {params}")
return sql_query, params
# The filter_sql_by_user function is now obsolete and has been removed.