Fixed ° and the overload ssue

2025-04-09 16:49:59 +00:00 · 2025-04-09 16:49:59 +00:00 · 322d662011
commit 322d662011
parent d51d597e87
118 changed files with 36900 additions and 277 deletions
--- a/.cursorignore
+++ b/.cursorignore
@ -3,3 +3,4 @@ pdfs/
 node_modules/
 pycache/
 cloned_repo/
+extracted_texts/
--- a/List).csv
+++ b/List).csv
@ -2,7 +2,7 @@ Control Panel,Unit Number,Alias,Equipment Type,Type of Conveyor,Speed,Drive Hand
 BULK INBOUND NORTH,BS1-005,PS10-1,Powered-Belted,Level Belt,150,RH,460/3/60,25,5,Y,N,RPH3200BXB-FR,NA,24,12,N,Y,NA,NA,"36"" SG, Bi-directional"
 BULK INBOUND NORTH,BS1-010,PS10-2,Powered-Belted,Incline Belt,200,LH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24,,N,Y,NA,NA,"36"" SG"
 BULK INBOUND NORTH,BS1-011,PS10-3,Powered-Belted,Incline Belt,240,RH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24.00'',12'',,,,,
-BULK INBOUND NORTH,BS1-015-CH,PS10-4CH,90<EFBFBD> Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND NORTH,BS1-015-CH,PS10-4CH,90° Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND NORTH,BS1-020,PS10-5,Powered-Belted,Level Belt,240,LH,460/3/60,25,15,Y,N,RPH3200BXB-FR,NA,24.00'',12'',N,Y,Y,QTY 4,
 BULK INBOUND NORTH,BS1-020-CH1,PS10-5CH1,Induct Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND NORTH,BS1-020-CH2,PS10-5CH2,Induct Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
@ -12,17 +12,17 @@ BULK INBOUND NORTH,BS1-020-DIV1,PS10-5DIV1,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,N
 BULK INBOUND NORTH,BS1-020-DIV2,PS10-5DIV2,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND NORTH,BS1-020-DIV3,PS10-5DIV3,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND NORTH,BS1-020-DIV4,PS10-5DIV5,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
-BULK INBOUND NORTH,BS1-025-CH,PS10-6CH,90<EFBFBD> Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND NORTH,BS1-025-CH,PS10-6CH,90° Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-005,PS11-1,Powered-Belted,Level Belt,150,RH,460/3/60,25,5,Y,N,RPH3200BXB-FR,NA,24.00'',12'',N,Y,NA,NA,"36"" SG"
 BULK INBOUND NORTH,BS2-010,PS11-2,Powered-Belted,Incline Belt,200,RH,460/3/60,25,5,Y,N,APH150MFOXLN,NA,24.00'',12'',Y,Y,NA,NA,"36"" SG"
 BULK INBOUND NORTH,BS2-015,PS11-3,Powered-Belted,Level Belt,240,RH,460/3/60,25,10,Y,N,RPH3200BXB-FR,NA,24.00'',12'',Y,Y,NA,NA,
 BULK INBOUND NORTH,BS2-020,PS11-4,Powered-Belted,Incline Belt,240,LH,460/3/60,25,10,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,
-BULK INBOUND NORTH,BS2-025-CH,PS11-5CH,90<EFBFBD> Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND NORTH,BS2-025-CH,PS11-5CH,90° Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-030,PS11-6,Powered-Belted,Incline Belt,240,RH,460/3/60,25,10,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,"36"" SG"
 BULK INBOUND NORTH,BS2-035,PS11-7,Powered-Belted,Incline Belt,240,RH,460/3/60,25,20,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,"36"" SG"
 BULK INBOUND NORTH,BS2-040,PS11-8,Powered-Belted,Incline Belt,240,RH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,"36"" SG"
 BULK INBOUND NORTH,BS2-045,PS11-9,Powered-Belted,Incline Belt,240,RH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24.00'',12'',N,Y,NA,NA,"36"" SG"
-BULK INBOUND NORTH,BS2-050-CH,PS11-10CH,90<EFBFBD> Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND NORTH,BS2-050-CH,PS11-10CH,90° Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-055,PS11-11,Powered-Belted,Level Belt,240,LH,460/3/60,25,15,Y,N,RPH3200BXB-FR,NA,24.00'',12'',N,Y,Y,QTY 6,
 BULK INBOUND NORTH,BS2-055-CH1,PS11-11CH1,Induct Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-055-CH2,PS11-11CH2,Induct Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
@ -36,17 +36,17 @@ BULK INBOUND NORTH,BS2-055-DIV3,PS11-11DIV3,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-055-DIV4,PS11-11DIV4,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-055-DIV5,PS11-11DIV5,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND NORTH,BS2-055-DIV6,PS11-11DIV6,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
-BULK INBOUND NORTH,BS2-060-CH,PS1-12CH,90<EFBFBD> Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND NORTH,BS2-060-CH,PS1-12CH,90° Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-005,PS8-1,Powered-Belted,Level Belt,150,LH,460/3/60,25,7.5,Y,N,RPH3200BXB-FR,NA,24.00'',12'',N,Y,NA,NA,
 BULK INBOUND SOUTH,BS3-010,PS8-2,Powered-Belted,Incline Belt,200,LH,460/3/60,25,10,Y,N,RPH3200BXB-FR,NA,24.00'',12'',Y,Y,NA,NA,
-BULK INBOUND SOUTH,BS3-015-CH,PS8-3CH,90<EFBFBD> Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND SOUTH,BS3-015-CH,PS8-3CH,90° Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-020,PS8-4,Powered-Belted,Incline Belt,240,LH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,"36"" SG"
 BULK INBOUND SOUTH,BS3-025,PS8-5,Powered-Belted,Incline Belt,240,LH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24.00'',12'',N,Y,NA,NA,"36"" SG"
 BULK INBOUND SOUTH,BS3-030,PS8-6,Sorter,Intralox Flowsplitter,240,LH,460/3/60,25,5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-035-CH,PS8-7CH,Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-040,PS8-8,Powered-Belted,Incline Belt,240,RH,460/3/60,25,7.5,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,
 BULK INBOUND SOUTH,BS3-045,PS8-9,Powered-Belted,Incline Belt,240,LH,460/3/60,25,15,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,
-BULK INBOUND SOUTH,BS3-050-CH,PS8-10CH,90<EFBFBD> Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND SOUTH,BS3-050-CH,PS8-10CH,90° Spiral Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-055,PS8-11,Powered-Belted,Level Belt,240,RH,460/3/60,25,10,Y,N,RPH3200BXB-FR,NA,24.00'',12'',N,Y,Y,QTY 4,
 BULK INBOUND SOUTH,BS3-055-CH1,PS8-11CH1,Induct Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-055-CH2,PS8-11CH2,Induct Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
@ -56,7 +56,7 @@ BULK INBOUND SOUTH,BS3-055-DIV1,PS8-11DIV1,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,N
 BULK INBOUND SOUTH,BS3-055-DIV2,PS8-11DIV2,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-055-DIV3,PS8-11DIV3,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS3-055-DIV4,PS8-11DIV4,Sorter,Divert Arm,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
-BULK INBOUND SOUTH,BS3-060-CH,PS8-12CH,90<EFBFBD> Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
+BULK INBOUND SOUTH,BS3-060-CH,PS8-12CH,90° Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS4-005-CH,PS9-1CH,Straight Chute,Chute,NA,NA,NA,25,NA,NA,NA,NA,NA,24.00'',24.00'',NA,NA,NA,NA,
 BULK INBOUND SOUTH,BS4-010,PS9-2,Powered-Belted,Decline Belt,240,RH,460/3/60,25,7.5,Y,N,APH150MFOXLN,NA,24.00'',-,N,Y,NA,NA,
 BULK INBOUND SOUTH,BS4-015,PS9-3,Powered-Belted,Level Belt,240,RH,460/3/60,25,7.5,Y,N,RPH3200BXB-FR,NA,24.00'',12'',N,Y,Y,QTY 4,
--- a/app.py
+++ b/app.py
@ -54,7 +54,8 @@ def get_views_dir_path():
 def get_text_output_dir_path():
     # Construct absolute path based on the script's directory
    script_dir = os.path.dirname(os.path.abspath(__file__))
-    return os.path.join(script_dir, TEXT_OUTPUT_FOLDER)
+    # Use os.path.join to handle path separators correctly and avoid './'
+    return os.path.abspath(os.path.join(script_dir, TEXT_OUTPUT_FOLDER))

 def normalize(text):
    """Normalize string for comparison: lowercase, treat '-' and '_' the same, remove all whitespace."""
@ -68,12 +69,14 @@ def normalize(text):
 def read_manifest(csv_filepath):
    """Reads the manifest CSV into a list of dictionaries."""
    manifest_items = []
+    # Only require Alias and Panel now for basic grouping
    required_cols = {CSV_ALIAS_COL, CSV_PANEL_COL}
    optional_cols = {CSV_EQ_TYPE_COL, CSV_CONV_TYPE_COL}
    try:
-        with open(csv_filepath, mode='r', newline='', encoding='utf-8') as infile:
+        # Revert back to 'utf-8-sig' to handle potential BOM from Excel
+        with open(csv_filepath, mode='r', newline='', encoding='utf-8-sig') as infile:
            reader = csv.DictReader(infile)
-            headers = set(h.strip() for h in reader.fieldnames) # Handle potential whitespace in headers
+            headers = set(h.strip() for h in reader.fieldnames)

            # Check for required columns
            missing_required = required_cols - headers
@ -85,12 +88,16 @@ def read_manifest(csv_filepath):
            for row in reader:
                alias = row.get(CSV_ALIAS_COL, "").strip()
                panel = row.get(CSV_PANEL_COL, "").strip()
-                if alias and panel: # Only add if Alias and Control Panel are present
+                # unit_number = row.get('Unit Number', "").strip() # No longer needed for filename
+
+                # Add if Alias and Control Panel are present (Panel needed for grouping results later)
+                if alias and panel:
                    item = {
                        "alias": alias,
                        "normalized_alias": normalize(alias),
                        "control_panel": panel,
-                        "expected_drawing_filename": f"{panel}.txt", # Assuming .txt file matches panel name
+                        # "unit_number": unit_number, # Removed
+                        # "expected_drawing_filename": f"MTN6_SYSDL-{unit_number}.txt", # Removed
                        # Add optional data if columns exist
                        "equipment_type": row.get(CSV_EQ_TYPE_COL, "").strip() if CSV_EQ_TYPE_COL in headers else "N/A",
                        "conveyor_type": row.get(CSV_CONV_TYPE_COL, "").strip() if CSV_CONV_TYPE_COL in headers else "N/A",
@ -99,8 +106,11 @@ def read_manifest(csv_filepath):
                        "found_drawing": False
                    }
                    manifest_items.append(item)
+                # elif alias and panel: # If Unit Number is missing but others are present # Condition removed
+                #     print(f"Warning: Alias '{alias}' in Panel '{panel}' is missing 'Unit Number' in CSV. Skipping drawing check for this item.")
                elif alias and not panel:
                    print(f"Warning: Alias '{alias}' found in CSV but is missing its '{CSV_PANEL_COL}'. Skipping.")
+                # Add other specific warnings if needed

    except FileNotFoundError:
        print(f"Error: Manifest file not found at {csv_filepath}")
@ -154,44 +164,50 @@ def check_scada(manifest_data, views_dir):


 def check_drawings(manifest_data, text_output_dir):
-    """Checks for aliases in extracted drawing text files, one file per panel."""
+    """Checks if aliases from manifest exist in *any* extracted drawing text file."""
    if not manifest_data: return
-    print(f"Starting Drawings check in directory: {text_output_dir}...")
-    found_count = 0
-    file_cache = {} # Cache normalized content of processed text files
+    print(f"Starting Drawings check: Scanning all .txt files in directory: {text_output_dir}...")

-    for item in manifest_data:
-        normalized_alias = item['normalized_alias']
-        txt_filename = item['expected_drawing_filename']
-        txt_filepath = os.path.join(text_output_dir, txt_filename)
+    all_normalized_content = "" # Combine all text content here
+    processed_files = 0
+    found_files = []

    try:
-            # Check cache first
-            if txt_filepath in file_cache:
-                normalized_content = file_cache[txt_filepath]
-            # Read and cache if not already processed
-            elif os.path.exists(txt_filepath):
-                with open(txt_filepath, 'r', encoding='utf-8') as f:
+        # Step 1: Read and combine content of all .txt files in the directory
+        for filename in os.listdir(text_output_dir):
+            if filename.lower().endswith('.txt'):
+                filepath = os.path.join(text_output_dir, filename)
+                processed_files += 1
+                try:
+                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read()
-                    normalized_content = normalize(content)
-                    file_cache[txt_filepath] = normalized_content # Cache it
-            else:
-                # File doesn't exist, mark as not found in cache to avoid re-checking
-                file_cache[txt_filepath] = None
-                # print(f"  Info: Expected drawing text file not found: {txt_filepath}")
-                continue # Cannot find alias if file doesn't exist
+                        # Add a separator to prevent false matches across file boundaries
+                        all_normalized_content += normalize(content) + "\n--file-separator--\n"
+                        found_files.append(filename)
+                except Exception as e:
+                    print(f"  Warning: Could not read or process text file {filepath}: {e}")

-            # Perform check if file content exists
-            if normalized_content is not None and normalized_alias in normalized_content:
-                 if not item['found_drawing']: # Avoid double counting if alias appears multiple times in manifest
+        if processed_files == 0:
+            print("  Warning: No .txt files found in the directory. Cannot perform drawing check.")
+            return
+        else:
+            print(f"  Successfully read and normalized content from {len(found_files)} out of {processed_files} .txt files found.")
+
+        # Step 2: Check each manifest alias against the combined content
+        found_count = 0
+        for item in manifest_data:
+            normalized_alias = item['normalized_alias']
+            if normalized_alias and normalized_alias in all_normalized_content:
                item['found_drawing'] = True
                found_count += 1
+            # else: item['found_drawing'] is already False by default

+        print(f"Drawings check finished. Found {found_count} manifest aliases within the combined text content.")
+
+    except FileNotFoundError:
+        print(f"  Error: Drawings text directory not found: {text_output_dir}")
    except Exception as e:
-            print(f"  Warning: Could not read or process text file {txt_filepath}: {e}")
-            file_cache[txt_filepath] = None # Mark as failed in cache
-
-    print(f"Drawings check finished. Processed {len(file_cache)} unique text files. Found {found_count} manifest aliases.")
+        print(f"  Error during drawings check: {e}")


 def calculate_combined_progress(manifest_data):
@ -323,8 +339,7 @@ def update_progress_data():
    status_message = current_status # Update status regardless of calculation success/failure
    if new_data_calculated is not None:
        progress_data = new_data_calculated
-
-    # Signal that an update attempt finished
+        # Signal that an update attempt finished WITH new data
        data_updated_event.set()
        data_updated_event.clear()

@ -342,8 +357,6 @@ def check_and_update_repo():
            if not repo_existed:
                print(f"Cloning repository {REPO_URL} into {repo_path}...")
                status_message = f"Cloning repository {REPO_URL}..."
-                # Signal status change during long operation
-                data_updated_event.set(); data_updated_event.clear()
                git.Repo.clone_from(REPO_URL, repo_path, branch=BRANCH)
                repo = git.Repo(repo_path)
                last_commit_hash = repo.head.commit.hexsha
@ -368,8 +381,6 @@ def check_and_update_repo():
                if current_local_commit != current_remote_commit:
                    print("New commit detected! Pulling changes...")
                    status_message = "Pulling updates..."
-                    # Signal status change during potentially long operation
-                    data_updated_event.set(); data_updated_event.clear()
                    try:
                        pull_info = origin.pull()
                        new_commit_hash = repo.head.commit.hexsha
@ -392,9 +403,10 @@ def check_and_update_repo():
                 # Status will be updated within update_progress_data
                 update_progress_data()
            # If no git update, signal any status change (e.g., "No changes" or error)
-            else:
-                 data_updated_event.set() # Signal status change event
-                 data_updated_event.clear()
+            # else: # REMOVED block that signaled event for no changes
+                 # REMOVED: data_updated_event.set() # Signal status change event
+                 # REMOVED: data_updated_event.clear()
+                 # Status message is still updated globally, just won't trigger event

        except git.GitCommandError as e:
            status_message = f"Git command error: {e}"
@ -407,14 +419,14 @@ def check_and_update_repo():
                      if last_commit_hash is None: last_commit_hash = repo.head.commit.hexsha
            except Exception:
                 if last_commit_hash is None: last_commit_hash = "Error reading commit"
-            data_updated_event.set() # Signal error status change
-            data_updated_event.clear()
+            # REMOVED: data_updated_event.set() # Signal error status change
+            # REMOVED: data_updated_event.clear()
        except Exception as e:
            status_message = f"Error checking repository: {e}"
            print(status_message)
            if last_commit_hash is None: last_commit_hash = "Error checking repo"
-            data_updated_event.set() # Signal error status change
-            data_updated_event.clear()
+            # REMOVED: data_updated_event.set() # Signal error status change
+            # REMOVED: data_updated_event.clear()

    # Return true if analysis was run (because repo changed), false otherwise
    return did_update
@ -439,6 +451,16 @@ def periodic_repo_check():
 def index():
    return render_template('index.html')

+@app.route('/drawings')
+def drawings_page():
+    # Render the main index template which now contains all content
+    return render_template('index.html')
+
+@app.route('/conflicts')
+def conflicts_page():
+    # Render the main index template which now contains all content
+    return render_template('index.html')
+
@app.route('/stream')
 def stream():
    def event_stream():
@ -479,11 +501,11 @@ def stream():
                })
                yield f"data: {data_payload}\n\n"
                last_sent_hash_to_client = current_global_hash # Update the hash sent to this client
-            else:
+            # else: # No need for the else block logging here anymore, as the event shouldn't trigger if hash is same
                 # If hash is the same, maybe only the status message changed (e.g., error occurred)
                 # Option: Send update only if status is different from last sent status?
                 # For simplicity now, we only send if hash differs. Client UI shows last known status.
-                 print(f"Data updated event triggered, but hash {current_global_hash} unchanged for this client. Status: '{current_global_status}'")
+                 # print(f"Data updated event triggered, but hash {current_global_hash} unchanged for this client. Status: '{current_global_status}'") # Removed log


    return Response(event_stream(), mimetype="text/event-stream")
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit c8aa36809970e0557f46ee80b7f7cf3735efb487
+Subproject commit 456de12cca56c09bc1881660b163ac3b5dff593a
--- a/templates/index.html
+++ b/templates/index.html
@ -3,7 +3,7 @@
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Ignition SCADA & Drawing Progress Monitor</title>
+    <title>SCADA Progress Monitor</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <style>
@ -37,7 +37,7 @@
            margin: 0 auto; /* Center the canvas */
            cursor: pointer; /* Indicate clickable */
        }
-        #panels-progress {
+        #scada-panels-progress, #drawing-panels-progress {
            display: grid;
            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); /* Responsive grid */
            gap: 20px;
@ -47,27 +47,68 @@
        .modal-body th { background-color: #f8f9fa; text-align: left; }
        .status-yes { color: green; font-weight: bold; }
        .status-no { color: red; font-weight: bold; }
+        nav { margin-bottom: 20px; } /* Added for nav spacing */
    </style>
 </head>
 <body>
    <div class="container">
-        <h1 class="mb-4">SCADA & Drawing Device Placement Progress</h1>
+        <!-- Added Navigation -->
+        <nav class="nav nav-pills">
+          <a class="nav-link active" aria-current="page" href="/">SCADA Progress</a>
+          <a class="nav-link" href="/drawings">Drawing Progress</a>
+          <a class="nav-link" href="/conflicts">Conflicts</a>
+        </nav>

-        <div id="overall-progress" class="chart-container">
-            <span class="chart-label">Overall Progress</span>
-            <canvas id="overall-chart-canvas" class="panel-chart-canvas" style="max-width: 200px; max-height: 200px;"></canvas>
-            <div id="overall-text" style="font-weight: bold; margin-top: 10px;">Found Both: 0/0 (0%)</div>
+        <!-- SCADA Content Section -->
+        <div id="scada-content">
+            <h1 class="mb-4">SCADA Device Placement Progress</h1>
+            <p>Compares the Equipment Manifest against the SCADA view.json files.</p>
+
+            <div id="overall-scada-progress" class="chart-container">
+                <span class="chart-label">Overall SCADA Progress</span>
+                <canvas id="overall-scada-chart-canvas" class="panel-chart-canvas" style="max-width: 200px; max-height: 200px;"></canvas>
+                <div id="overall-scada-text" style="font-weight: bold; margin-top: 10px;">Found in SCADA: 0/0 (0%)</div>
            </div>

            <hr>

-        <h2>Progress by Control Panel</h2>
-        <div id="panels-progress">
-            <!-- Charts will be loaded here -->
+            <h2>SCADA Progress by Control Panel</h2>
+            <div id="scada-panels-progress">
                <p>Loading panel data...</p>
            </div>
        </div>

+        <!-- Drawing Content Section (Initially Hidden) -->
+        <div id="drawings-content" style="display: none;">
+            <h1 class="mb-4">Drawing Device Placement Progress</h1>
+            <p>Compares the Equipment Manifest against the extracted text from drawing files (.txt).</p>
+
+            <div id="overall-drawing-progress" class="chart-container">
+                <span class="chart-label">Overall Drawing Progress</span>
+                <canvas id="overall-drawing-chart-canvas" class="panel-chart-canvas" style="max-width: 200px; max-height: 200px;"></canvas>
+                <div id="overall-drawing-text" style="font-weight: bold; margin-top: 10px;">Found in Drawing: 0/0 (0%)</div>
+            </div>
+
+            <hr>
+
+            <h2>Drawing Progress by Control Panel</h2>
+            <div id="drawing-panels-progress">
+                <p>Loading panel data...</p>
+            </div>
+        </div>
+
+        <!-- Conflicts Content Section (Initially Hidden) -->
+        <div id="conflicts-content" style="display: none;">
+            <h1 class="mb-4">SCADA/Drawing Conflicts <span id="conflict-count" class="badge bg-warning ms-2">0</span></h1>
+            <p>Items found in SCADA views but <strong>not</strong> found in the extracted drawing text files.</p>
+
+            <div id="panels-conflicts">
+                 <p>Loading conflict data...</p>
+            </div>
+        </div>
+
+    </div>
+
    <!-- Status Bar -->
    <div class="status-bar">
        <span id="status-message">Initializing...</span> | Last Commit: <span id="last-commit">N/A</span>
@ -89,7 +130,6 @@
                        <th>Panel</th>
                        <th>SCADA Status</th>
                        <th>Drawing Status</th>
-                        <th>Expected Drawing File</th>
                        <th>Equipment Type</th>
                        <th>Type of Conveyor</th>
                    </tr>
@ -108,180 +148,332 @@

    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
    <script>
-        let chartInstances = {};
-        let progressDetailsData = {};
+        // --- Global State Variables ---
+        let chartInstancesScada = {}; // Separate instances for SCADA
+        let chartInstancesDrawing = {}; // Separate instances for Drawing
+        let progressDetailsData = {}; // Stores the raw data from SSE (shared)
+        let previousCommitHash = null; // Single hash for the whole page
        let detailsModalInstance = null;
+        let currentVisibleSection = 'scada'; // Track visible section: 'scada', 'drawing', 'conflicts'

-        // Define labels and colors consistently
-        const chartLabels = ['Found Both', 'SCADA Only', 'Drawing Only', 'Missing Both'];
-        const chartColors = [
-            'rgb(25, 135, 84)',  // Green (Found Both)
-            'rgb(13, 202, 240)', // Cyan (SCADA Only)
-            'rgb(255, 193, 7)',  // Yellow (Drawing Only)
-            'rgb(220, 53, 69)'   // Red (Missing Both)
-        ];
-        const listKeys = ['found_both_list', 'found_scada_only_list', 'found_drawing_only_list', 'missing_list'];
+        // --- Chart Configurations ---
+        const scadaChartLabels = ['Found in SCADA', 'Not Found in SCADA'];
+        const scadaChartColors = ['rgb(13, 110, 253)', 'rgb(220, 53, 69)'];
+        const drawingChartLabels = ['Found in Drawing', 'Not Found in Drawing'];
+        const drawingChartColors = ['rgb(25, 135, 84)', 'rgb(220, 53, 69)'];

-        // --- Chart Click Handler (Updated) ---
-        function handleChartClick(event, elements, chart) {
-            if (elements.length > 0) {
-                const clickedElementIndex = elements[0].index;
-                const isOverallChart = chart.canvas.id === 'overall-chart-canvas';
-                const identifier = isOverallChart ? '__overall__' : chart.canvas.id.replace('chart-', '');
-
-                // Map clicked index to the correct list type/key
-                if (clickedElementIndex >= 0 && clickedElementIndex < listKeys.length) {
-                    const listType = listKeys[clickedElementIndex];
-                    showDetailsModal(identifier, listType);
-                } else {
-                    console.warn("Clicked unknown chart segment index:", clickedElementIndex);
-                }
-            }
-        }
-
-        // --- UI Update Function (Heavily Updated) ---
-        function updateUI(data) {
-            console.log("Updating UI with data:", data);
-            progressDetailsData = data.progress;
-
-            // Update status bar
-            document.getElementById('status-message').textContent = data.status;
-            document.getElementById('last-commit').textContent = data.last_commit || 'N/A';
-
-            // --- Update Overall Chart & Text ---
-            const overallData = progressDetailsData.overall;
-            const overallTotal = overallData.total_csv;
-            const overallChartCounts = [
-                overallData.found_both,
-                overallData.found_scada_only,
-                overallData.found_drawing_only,
-                overallData.missing_both
-            ];
-            // Update text (showing found both %)
-            document.getElementById('overall-text').textContent = `Found Both: ${overallData.found_both}/${overallTotal} (${overallData.percentage_found_both}%)`;
-
-            const overallChartConfig = {
-                type: 'pie',
-                data: {
-                    labels: chartLabels,
-                    datasets: [{
-                        label: 'Overall Aliases',
-                        data: overallChartCounts,
-                        backgroundColor: chartColors,
-                        hoverOffset: 4
-                    }]
-                },
-                options: {
-                    responsive: true,
-                    maintainAspectRatio: false,
-                    onClick: handleChartClick,
-                    plugins: {
-                        legend: { display: false },
-                        tooltip: {
-                            callbacks: {
-                                label: function(context) {
-                                    let label = context.label || '';
-                                    if (label) label += ': ';
-                                    const value = context.parsed;
-                                    if (value !== null) label += value;
-                                    if (overallTotal > 0) {
-                                        label += ` (${((value / overallTotal) * 100).toFixed(1)}%)`;
-                                    }
-                                    return label;
-                                }
-                            }
-                        }
-                    }
-                }
+        // Map backend list keys for modal clicks (can be combined or kept separate if needed)
+        const scadaListKeysMap = {
+            found: ['found_both_list', 'found_scada_only_list'],
+            notFound: ['found_drawing_only_list', 'missing_list']
+        };
+        const drawingListKeysMap = {
+            found: ['found_both_list', 'found_drawing_only_list'],
+            notFound: ['found_scada_only_list', 'missing_list']
        };

-            const overallCanvas = document.getElementById('overall-chart-canvas');
-            if (chartInstances['overall']) {
-                chartInstances['overall'].data = overallChartConfig.data;
-                chartInstances['overall'].update();
-            } else if (overallCanvas) {
-                const ctxOverall = overallCanvas.getContext('2d');
-                chartInstances['overall'] = new Chart(ctxOverall, overallChartConfig);
+        // --- Debounce Utility (Only need one) ---
+        function debounce(func, wait) {
+            let timeout;
+            return function executedFunction(...args) {
+                const later = () => {
+                    clearTimeout(timeout);
+                    func(...args);
+                };
+                clearTimeout(timeout);
+                timeout = setTimeout(later, wait);
+            };
        }

-            // --- Update Panel Charts ---
-            const panelsContainer = document.getElementById('panels-progress');
-            const panelsData = progressDetailsData.panels;
-            const sortedPanels = Object.keys(panelsData).sort();
-            const currentPanelsOnPage = new Set(Object.keys(chartInstances).filter(k => k !== 'overall'));
-            const incomingPanels = new Set(sortedPanels);
+        // --- Chart Click Handler (Needs context: SCADA or Drawing?) ---
+        function handleChartClick(event, elements, chart, context) { // Added context
+            if (elements.length > 0) {
+                const clickedElementIndex = elements[0].index;
+                const isOverallChart = chart.canvas.id.startsWith('overall-'); // More robust check
+                const identifier = isOverallChart ? '__overall__' : chart.canvas.id.replace(`chart-${context}-`, ''); // Use context
+                const categoryType = clickedElementIndex === 0 ? 'found' : 'notFound';

-            // Remove charts for panels no longer present
-            currentPanelsOnPage.forEach(panelName => {
-                if (!incomingPanels.has(panelName)) {
-                    if(chartInstances[panelName]) { chartInstances[panelName].destroy(); delete chartInstances[panelName]; }
-                    const chartElement = document.getElementById(`chart-container-${panelName}`);
-                    if (chartElement) chartElement.remove();
+                showDetailsModal(identifier, categoryType, context); // Pass context to modal
+            }
        }
-            });

-            // Update or create charts for current panels
-            if (sortedPanels.length === 0) {
-                 panelsContainer.innerHTML = '<p>No panel data available yet.</p>';
+        // --- Core UI Update Functions (One for each section) ---
+
+        function updateUIScadaCore(data) {
+            console.log("Running core SCADA UI redraw logic for commit:", data.last_commit);
+            progressDetailsData = data.progress; // Update shared raw data
+
+            // --- Overall SCADA Chart ---
+            const overallData = progressDetailsData.overall;
+            const overallTotal = overallData.total_csv;
+            const overallFoundScada = overallData.found_both + overallData.found_scada_only;
+            const overallNotFoundScada = overallData.found_drawing_only + overallData.missing_both;
+            const overallPercentageFound = overallTotal > 0 ? ((overallFoundScada / overallTotal) * 100).toFixed(1) : 0;
+            const overallChartCounts = [overallFoundScada, overallNotFoundScada];
+
+            document.getElementById('overall-scada-text').textContent = `Found in SCADA: ${overallFoundScada}/${overallTotal} (${overallPercentageFound}%)`;
+
+            // --- Only update/create chart if section is visible ---
+            const isSectionVisible = (currentVisibleSection === 'scada');
+            if (isSectionVisible) {
+                const overallScadaCanvas = document.getElementById('overall-scada-chart-canvas');
+                if (chartInstancesScada['overall']) {
+                    if (JSON.stringify(chartInstancesScada['overall'].data.datasets[0].data) !== JSON.stringify(overallChartCounts)) {
+                         chartInstancesScada['overall'].data.datasets[0].data = overallChartCounts;
+                         chartInstancesScada['overall'].update('none');
+                    }
+                } else if (overallScadaCanvas) {
+                    console.log("Creating overall SCADA chart (visible).");
+                    const ctxOverall = overallScadaCanvas.getContext('2d');
+                    chartInstancesScada['overall'] = new Chart(ctxOverall, createChartConfig(overallChartCounts, overallTotal, 'scada', 'overall'));
+                }
            } else {
-                // Remove loading message if it exists
-                const loadingMsg = panelsContainer.querySelector('p');
-                if (loadingMsg && loadingMsg.textContent.includes('Loading')) { loadingMsg.remove(); }
+                // If section is not visible, destroy the chart instance if it exists
+                if (chartInstancesScada['overall']) {
+                    console.log("Destroying hidden overall SCADA chart.");
+                    chartInstancesScada['overall'].destroy();
+                    delete chartInstancesScada['overall'];
+                }
+            }

+            // --- SCADA Panel Charts ---
+            const panelsContainer = document.getElementById('scada-panels-progress');
+            const panelsData = progressDetailsData.panels || {};
+            updatePanelCharts(panelsContainer, panelsData, chartInstancesScada, 'scada');
+
+            console.log("Finished SCADA UI core redraw.");
+        }
+
+        function updateUIDrawingCore(data) {
+            console.log("Running core Drawing UI redraw logic for commit:", data.last_commit);
+            progressDetailsData = data.progress; // Update shared raw data
+
+            // --- Overall Drawing Chart ---
+            const overallData = progressDetailsData.overall;
+            const overallTotal = overallData.total_csv;
+            const overallFoundDrawing = overallData.found_both + overallData.found_drawing_only;
+            const overallNotFoundDrawing = overallData.found_scada_only + overallData.missing_both;
+            const overallPercentageFound = overallTotal > 0 ? ((overallFoundDrawing / overallTotal) * 100).toFixed(1) : 0;
+            const overallChartCounts = [overallFoundDrawing, overallNotFoundDrawing];
+
+            document.getElementById('overall-drawing-text').textContent = `Found in Drawing: ${overallFoundDrawing}/${overallTotal} (${overallPercentageFound}%)`;
+
+             // --- Only update/create chart if section is visible ---
+             const isSectionVisible = (currentVisibleSection === 'drawings');
+             if (isSectionVisible) {
+                const overallDrawingCanvas = document.getElementById('overall-drawing-chart-canvas');
+                if (chartInstancesDrawing['overall']) {
+                     if (JSON.stringify(chartInstancesDrawing['overall'].data.datasets[0].data) !== JSON.stringify(overallChartCounts)) {
+                         chartInstancesDrawing['overall'].data.datasets[0].data = overallChartCounts;
+                         chartInstancesDrawing['overall'].update('none');
+                     }
+                } else if (overallDrawingCanvas) {
+                     console.log("Creating overall drawing chart (visible).");
+                     const ctxOverall = overallDrawingCanvas.getContext('2d');
+                     chartInstancesDrawing['overall'] = new Chart(ctxOverall, createChartConfig(overallChartCounts, overallTotal, 'drawing', 'overall'));
+                }
+            } else {
+                // If section is not visible, destroy the chart instance if it exists
+                 if (chartInstancesDrawing['overall']) {
+                    console.log("Destroying hidden overall Drawing chart.");
+                    chartInstancesDrawing['overall'].destroy();
+                    delete chartInstancesDrawing['overall'];
+                 }
+            }
+
+            // --- Drawing Panel Charts (call updatePanelCharts, which also checks visibility/destroys) ---
+            const panelsContainer = document.getElementById('drawing-panels-progress');
+            const panelsData = progressDetailsData.panels || {};
+            console.log(`[updateUIDrawingCore] Found drawing panels container:`, panelsContainer ? panelsContainer.id : 'Not Found'); // Added Log
+            updatePanelCharts(panelsContainer, panelsData, chartInstancesDrawing, 'drawings'); // Changed context to plural 'drawings'
+
+            console.log("Finished Drawing UI core redraw.");
+        }
+
+        function updateUIConflictsCore(data) {
+            console.log("Running core Conflicts UI redraw logic for commit:", data.last_commit);
+            progressDetailsData = data.progress; // Update shared raw data
+
+            const panelsContainer = document.getElementById('panels-conflicts');
+            panelsContainer.innerHTML = ''; // Clear previous
+
+            const panelsData = progressDetailsData.panels;
+            let totalConflicts = 0;
+            let panelsWithConflicts = 0;
+
+            if (!panelsData || Object.keys(panelsData).length === 0) {
+                panelsContainer.innerHTML = '<p class="text-center fst-italic">No panel data available yet.</p>';
+            } else {
+                const sortedPanels = Object.keys(panelsData).sort();
                sortedPanels.forEach(panelName => {
+                    const panel = panelsData[panelName];
+                    const conflictsList = panel.found_scada_only_list || [];
+                    if (conflictsList.length > 0) {
+                        panelsWithConflicts++;
+                        totalConflicts += conflictsList.length;
+                        // ... (Create header and table as in conflicts.html) ...
+                        const panelHeader = document.createElement('h4');
+                        panelHeader.className = 'mt-4 mb-2';
+                        panelHeader.textContent = `${panelName} (${conflictsList.length} conflicts)`;
+                        panelsContainer.appendChild(panelHeader);
+
+                        const table = document.createElement('table');
+                        table.className = 'table table-sm table-striped table-hover table-bordered';
+                        const thead = table.createTHead();
+                        thead.innerHTML = `<tr><th>Alias</th><th>Panel</th><th>SCADA Status</th><th>Drawing Status</th><th>Equipment Type</th><th>Type of Conveyor</th></tr>`;
+                        const tbody = table.createTBody();
+                        conflictsList.sort((a, b) => a.alias.localeCompare(b.alias)).forEach(item => {
+                            const row = tbody.insertRow();
+                            row.classList.add('table-warning');
+                            row.insertCell().textContent = item.alias;
+                            row.insertCell().textContent = item.control_panel;
+                            row.insertCell().innerHTML = '<span class="status-yes">Yes</span>';
+                            row.insertCell().innerHTML = '<span class="status-no">No</span>';
+                            row.insertCell().textContent = item.equipment_type || 'N/A';
+                            row.insertCell().textContent = item.conveyor_type || 'N/A';
+                        });
+                        panelsContainer.appendChild(table);
+                    }
+                });
+                if (panelsWithConflicts === 0) {
+                     panelsContainer.innerHTML = '<p class="text-center fst-italic">No conflicts found across all panels.</p>';
+                }
+            }
+            // Update total count badge
+            const countBadge = document.getElementById('conflict-count');
+            if (countBadge) {
+                countBadge.textContent = totalConflicts;
+                countBadge.style.display = totalConflicts > 0 ? 'inline-block' : 'none';
+            }
+            console.log("Finished Conflicts UI core redraw.");
+        }
+
+        // --- Generic Panel Chart Update Logic ---
+        function updatePanelCharts(panelsContainer, panelsData, chartInstances, context) { // context: 'scada' or 'drawing'
+            const incomingPanelNames = new Set(Object.keys(panelsData).sort());
+            const existingInstanceNames = new Set(Object.keys(chartInstances).filter(k => k !== 'overall'));
+
+            // --- Check if the context matches the currently visible section ---            
+            const isSectionVisible = (context === currentVisibleSection);
+            if (!isSectionVisible) {
+                // If section is not visible, destroy existing panel chart instances for this context
+                console.log(`Destroying hidden panel charts for context: ${context}`);
+                existingInstanceNames.forEach(panelName => {
+                    if (chartInstances[panelName]) {
+                        chartInstances[panelName].destroy();
+                        delete chartInstances[panelName];
+                    }
+                });
+                 // Don't proceed further if the section is hidden
+                 return; 
+            }
+
+            if (incomingPanelNames.size > 0) {
+                const loadingMsg = panelsContainer.querySelector('p');
+                if (loadingMsg) { loadingMsg.remove(); }
+
+                incomingPanelNames.forEach(panelName => {
                    const panel = panelsData[panelName];
                    const panelTotal = panel.total;
-                    const panelChartCounts = [
-                        panel.found_both,
-                        panel.found_scada_only,
-                        panel.found_drawing_only,
-                        panel.missing_both
-                    ];
+                    let panelChartCounts;
+                    if (context === 'scada') {
+                        panelChartCounts = [panel.found_both + panel.found_scada_only, panel.found_drawing_only + panel.missing_both];
+                    } else { // drawing
+                        panelChartCounts = [panel.found_both + panel.found_drawing_only, panel.found_scada_only + panel.missing_both];
+                    }

-                    let chartContainer = document.getElementById(`chart-container-${panelName}`);
-                    let canvas = document.getElementById(`chart-${panelName}`);
-
-                    // Create container and canvas if they don't exist
-                    if (!chartContainer) {
-                        chartContainer = document.createElement('div');
-                        chartContainer.id = `chart-container-${panelName}`;
+                    // --- Only update/create chart if section is visible ---
+                    if (isSectionVisible) {
+                        if (chartInstances[panelName]) {
+                            if (JSON.stringify(chartInstances[panelName].data.datasets[0].data) !== JSON.stringify(panelChartCounts)) {
+                                chartInstances[panelName].data.datasets[0].data = panelChartCounts;
+                                chartInstances[panelName].update('none');
+                            }
+                        } else {
+                            let canvas = document.getElementById(`chart-${context}-${panelName}`); // Use context in ID
+                            if (canvas) {
+                                console.log(`Recreating ${context} chart instance for panel (visible): ${panelName}`);
+                                const ctx = canvas.getContext('2d');
+                                chartInstances[panelName] = new Chart(ctx, createChartConfig(panelChartCounts, panelTotal, context, panelName));
+                            } else {
+                                console.log(`Creating new ${context} panel elements and chart (visible) for: ${panelName}`);
+                                const chartContainer = document.createElement('div');
+                                chartContainer.id = `chart-container-${context}-${panelName}`; // Use context in ID
                                chartContainer.className = 'chart-container';
                                const label = document.createElement('span');
                                label.className = 'chart-label'; label.textContent = panelName;
-                        canvas = document.createElement('canvas');
-                        canvas.id = `chart-${panelName}`;
+                                canvas = document.createElement('canvas'); // Reassign canvas variable
+                                canvas.id = `chart-${context}-${panelName}`; // Use context in ID
                                canvas.className = 'panel-chart-canvas';
                                chartContainer.appendChild(label);
                                chartContainer.appendChild(canvas);
-                        panelsContainer.appendChild(chartContainer);
+                                // Added Log before append
+                                console.log(`[updatePanelCharts] Appending chartContainer (${chartContainer.id}) to panelsContainer (${panelsContainer ? panelsContainer.id : 'null'})`); 
+                                panelsContainer.appendChild(chartContainer); // Append to the main panels progress div
+                                const ctx = canvas.getContext('2d');
+                                chartInstances[panelName] = new Chart(ctx, createChartConfig(panelChartCounts, panelTotal, context, panelName));
+                            }
+                        }
+                    }
+                    // --- End visibility check ---
+                });
+            } else {
+                 if (!panelsContainer.querySelector('p')) {
+                    panelsContainer.innerHTML = '<p class="text-center fst-italic">No panel data available yet.</p>';
+                 }
            }

-                    const panelChartConfig = {
+            existingInstanceNames.forEach(panelName => {
+                if (!incomingPanelNames.has(panelName)) {
+                    console.log(`Removing ${context} panel elements and chart for: ${panelName}`);
+                    // Ensure chart is destroyed before removing element
+                    if (chartInstances[panelName]) {
+                        chartInstances[panelName].destroy();
+                        delete chartInstances[panelName];
+                    }
+                    const chartElement = document.getElementById(`chart-container-${context}-${panelName}`); // Use context
+                    if (chartElement) {
+                        chartElement.remove();
+                    }
+                }
+            });
+        }
+
+        // --- Generic Helper to create chart config --- Needs context ---
+        function createChartConfig(chartCounts, total, context, identifier) { // identifier is 'overall' or panelName
+            const labels = context === 'scada' ? scadaChartLabels : drawingChartLabels;
+            const colors = context === 'scada' ? scadaChartColors : drawingChartColors;
+            const datasetLabel = context === 'scada' ? 'SCADA Match' : 'Drawing Match';
+
+            return {
                type: 'pie',
                data: {
-                            labels: chartLabels,
+                    labels: labels,
                    datasets: [{
-                                label: 'Aliases',
-                                data: panelChartCounts,
-                                backgroundColor: chartColors,
+                        label: datasetLabel,
+                        data: chartCounts,
+                        backgroundColor: colors,
                        hoverOffset: 4
                    }]
                },
                options: {
                    responsive: true,
                    maintainAspectRatio: false,
-                            onClick: handleChartClick,
+                    onClick: (event, elements, chart) => handleChartClick(event, elements, chart, context), // Pass context
                    plugins: {
                        legend: { display: false },
                        tooltip: {
                            callbacks: {
-                                        label: function(context) {
-                                            let label = context.label || '';
+                                label: function(ctxTooltip) {
+                                    let label = ctxTooltip.label || '';
                                    if (label) label += ': ';
-                                            const value = context.parsed;
+                                    const value = ctxTooltip.parsed;
                                    if (value !== null) label += value;
-                                            if (panelTotal > 0) {
-                                                label += ` (${((value / panelTotal) * 100).toFixed(1)}%)`;
+                                    // Use overallTotal for overall chart, panelTotal otherwise (How to get panelTotal here? Needs rethinking)
+                                    // Workaround: Don't show percentage on panel tooltips for now
+                                    const chartTotal = (identifier === 'overall' && progressDetailsData.overall) ? progressDetailsData.overall.total_csv : null;
+                                    if (chartTotal && chartTotal > 0) {
+                                        label += ` (${((value / chartTotal) * 100).toFixed(1)}%)`;
                                    }
                                    return label;
                                }
@ -290,96 +482,168 @@
                    }
                }
            };
-
-                    // Update existing chart or create new one
-                    if (chartInstances[panelName]) {
-                        chartInstances[panelName].data = panelChartConfig.data;
-                        chartInstances[panelName].update();
-                    } else if (canvas) {
-                        const ctx = canvas.getContext('2d');
-                        chartInstances[panelName] = new Chart(ctx, panelChartConfig);
        }
-                });
+
+        // --- Wrapper function called by debouncer (Handles all sections) ---
+        function processUpdate(data) {
+             console.log("Processing update for commit:", data.last_commit);
+
+             // Always update status bar and commit hash text immediately
+             document.getElementById('status-message').textContent = data.status;
+             document.getElementById('last-commit').textContent = data.last_commit || 'N/A';
+
+             // *** Strict Check: Only proceed if commit hash has changed ***
+             if (data.last_commit && data.last_commit !== previousCommitHash) {
+                 console.log("Commit hash changed (" + (previousCommitHash || 'None') + " -> " + data.last_commit + ") or initial load. Queueing core redraw.");
+                 previousCommitHash = data.last_commit;
+                 // Defer the core UI update calls
+                 setTimeout(() => {
+                     // Update all sections - they have internal checks/efficiency
+                     updateUIScadaCore(data);
+                     updateUIDrawingCore(data);
+                     updateUIConflictsCore(data);
+                 }, 0);
+             } else {
+                console.log("Commit hash unchanged (" + previousCommitHash + "), skipping core UI redraw.");
             }
        }

-        // --- Modal Display Function (Heavily Updated) ---
-        function showDetailsModal(identifier, listKey) {
+        // --- Debounced version of the processing function ---
+        const debouncedProcessUpdate = debounce(processUpdate, 250); // Single debouncer
+
+        // --- Modal Display Function (Needs context) ---
+        function showDetailsModal(identifier, categoryType, context) { // Added context
            let sourceData = null;
-            let panelNameDisplay = ""; // Name to show in the title
-            const listTypeLabel = chartLabels[listKeys.indexOf(listKey)] || "Details"; // Get nice label
+            let panelNameDisplay = "";
+            const listKeysMap = context === 'scada' ? scadaListKeysMap : drawingListKeysMap;
+            const listTypeLabel = categoryType === 'found'
+                ? (context === 'scada' ? 'Found in SCADA' : 'Found in Drawing')
+                : (context === 'scada' ? 'Not Found in SCADA' : 'Not Found in Drawing');

            if (identifier === '__overall__') {
                sourceData = progressDetailsData.overall;
                panelNameDisplay = "Overall";
            } else {
-                 sourceData = progressDetailsData.panels[identifier];
-                 panelNameDisplay = identifier; // Use panel name from identifier
+                 sourceData = progressDetailsData.panels ? progressDetailsData.panels[identifier] : null;
+                 panelNameDisplay = identifier;
            }

-            if (!sourceData || !sourceData[listKey]) {
-                console.error("Data list not found for:", identifier, listKey);
-                alert(`Could not find data for ${listTypeLabel} in ${panelNameDisplay}.`);
-                return;
-            }
+            if (!sourceData) { /* ... error handling ... */ return; }

-            const dataList = sourceData[listKey];
+            const backendListKeys = listKeysMap[categoryType];
+            if (!backendListKeys) { /* ... error handling ... */ return; }

-            if (!dataList || dataList.length === 0) {
-                console.log(`No items to show for:`, panelNameDisplay, listKey);
-                alert(`No ${listTypeLabel} items found for ${panelNameDisplay}.`);
-                return;
+            let combinedDataList = [];
+            backendListKeys.forEach(key => {
+                if (sourceData[key]) {
+                    combinedDataList = combinedDataList.concat(sourceData[key]);
                }
+            });
+
+            if (combinedDataList.length === 0) { /* ... alert handling ... */ return; }

            const modalTitleElement = document.getElementById('detailsModalLabel');
            const modalTableBody = document.querySelector('#detailsModal .modal-body tbody');

-            // Update modal title dynamically
-            modalTitleElement.innerHTML = `${listTypeLabel} Items for ${panelNameDisplay} <span class="badge bg-secondary ms-2">${dataList.length}</span>`;
+            modalTitleElement.innerHTML = `${listTypeLabel} Items for ${panelNameDisplay} <span class="badge bg-secondary ms-2">${combinedDataList.length}</span>`;
+            modalTableBody.innerHTML = '';

-            modalTableBody.innerHTML = ''; // Clear previous entries
-
-            // Populate table rows with detailed info
-            dataList.forEach(item => {
+            combinedDataList.sort((a, b) => a.alias.localeCompare(b.alias)).forEach(item => {
                const row = document.createElement('tr');
-
                row.insertCell().textContent = item.alias;
                row.insertCell().textContent = item.control_panel;
-
-                // SCADA Status Cell
-                const scadaCell = row.insertCell();
-                scadaCell.innerHTML = item.found_scada
-                    ? '<span class="status-yes">Yes</span>'
-                    : '<span class="status-no">No</span>';
-
-                // Drawing Status Cell
-                const drawingCell = row.insertCell();
-                drawingCell.innerHTML = item.found_drawing
-                    ? '<span class="status-yes">Yes</span>'
-                    : '<span class="status-no">No</span>';
-
-                row.insertCell().textContent = item.expected_drawing_filename || 'N/A';
+                const scadaCell = row.insertCell(); scadaCell.innerHTML = item.found_scada ? '<span class="status-yes">Yes</span>' : '<span class="status-no">No</span>';
+                const drawingCell = row.insertCell(); drawingCell.innerHTML = item.found_drawing ? '<span class="status-yes">Yes</span>' : '<span class="status-no">No</span>';
                row.insertCell().textContent = item.equipment_type || 'N/A';
                row.insertCell().textContent = item.conveyor_type || 'N/A';
-
+                if (item.found_scada && !item.found_drawing) { row.classList.add('table-warning'); }
                modalTableBody.appendChild(row);
            });

-            // Initialize and show modal
            if (!detailsModalInstance) {
                 detailsModalInstance = new bootstrap.Modal(document.getElementById('detailsModal'));
            }
            detailsModalInstance.show();
        }

-        // --- Connect to SSE stream (Unchanged) ---
+        // --- Navigation Handling ---
+        function showSection(sectionId) {
+            console.log("Showing section:", sectionId);
+            document.getElementById('scada-content').style.display = 'none';
+            document.getElementById('drawings-content').style.display = 'none';
+            document.getElementById('conflicts-content').style.display = 'none';
+
+            const elementToShow = document.getElementById(`${sectionId}-content`);
+            if (elementToShow) {
+                elementToShow.style.display = 'block';
+                currentVisibleSection = sectionId;
+
+                // --- Trigger update for the now-visible section ---
+                // The update function will check visibility internally before drawing charts.
+                if (progressDetailsData && Object.keys(progressDetailsData).length > 0) {
+                    const updateData = { progress: progressDetailsData }; // Pass existing data
+                    console.log(`Calling update function for now-visible section: ${sectionId}`);
+                    // Use setTimeout to ensure DOM update (display: block) is processed first
+                    if (sectionId === 'scada') {
+                        updateUIScadaCore(updateData);
+                    } else if (sectionId === 'drawings') {
+                        updateUIDrawingCore(updateData);
+                    } else if (sectionId === 'conflicts') {
+                        updateUIConflictsCore(updateData);
+                    }
+                } else {
+                    console.log(`Section ${sectionId} shown, but no progress data yet.`);
+                    // If data arrives later, the debouncedProcessUpdate will handle drawing
+                    // for the currently visible section.
+                }
+                // --- End section update trigger ---
+
+            } else {
+                 console.error("Attempted to show unknown section:", sectionId);
+                 document.getElementById('scada-content').style.display = 'block'; // Default back to SCADA
+                 currentVisibleSection = 'scada';
+            }
+
+            // Update active nav link
+            document.querySelectorAll('.nav-link').forEach(link => {
+                link.classList.remove('active');
+                // Use href attribute to match sectionId
+                const targetSection = link.getAttribute('data-target-section');
+                if (targetSection === sectionId) {
+                    link.classList.add('active');
+                }
+            });
+        }
+
+        document.addEventListener('DOMContentLoaded', () => {
+            console.log("DOM Loaded, setting up navigation...");
+            document.querySelectorAll('.nav-link').forEach(link => {
+                // Store target section ID in a data attribute from href
+                const href = link.getAttribute('href');
+                let targetSection = 'scada'; // Default
+                if (href === '/drawings') targetSection = 'drawings'; // Use plural to match ID
+                else if (href === '/conflicts') targetSection = 'conflicts'; // Use plural to match ID
+                link.setAttribute('data-target-section', targetSection);
+
+                link.addEventListener('click', (event) => {
+                    event.preventDefault(); // Prevent page reload
+                    const sectionId = link.getAttribute('data-target-section');
+                    showSection(sectionId);
+                });
+            });
+
+            // Show initial section (SCADA by default)
+            showSection('scada');
+        });
+
+        // --- Connect to SSE stream (Single connection) ---
+        console.log("Initializing SSE connection...");
        const eventSource = new EventSource("/stream");

        eventSource.onmessage = function(event) {
-            console.log("SSE message received:", event.data);
            try {
                const data = JSON.parse(event.data);
-                updateUI(data); // Call the UI update function with the new data
+                debouncedProcessUpdate(data); // Call the single debounced processor
            } catch (error) {
                console.error("Error parsing SSE data:", error);
                document.getElementById('status-message').textContent = 'Error processing update from server.';
@ -389,10 +653,9 @@
        eventSource.onerror = function(err) {
            console.error("EventSource failed:", err);
            document.getElementById('status-message').textContent = 'Connection to server lost. Retrying...';
-            // Note: browser usually attempts reconnection automatically
        };

-        // No need for initial fetch here, SSE stream sends initial state on connect
+        console.log("SSE handler set up.");

    </script>
 </body>
--- a/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/INSTALLER
+++ b/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/INSTALLER
@ -0,0 +1 @@
+pip
--- a/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/METADATA
+++ b/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/METADATA
@ -0,0 +1,173 @@
+Metadata-Version: 2.4
+Name: pypdf
+Version: 5.4.0
+Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
+Author-email: Mathieu Fenniak <biziqe@mathieu.fenniak.net>
+Maintainer: stefan6419846
+Maintainer-email: Martin Thoma <info@martin-thoma.de>
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+License-File: LICENSE
+Requires-Dist: typing_extensions >= 4.0; python_version < '3.11'
+Requires-Dist: cryptography ; extra == "crypto"
+Requires-Dist: PyCryptodome ; extra == "cryptodome"
+Requires-Dist: black ; extra == "dev"
+Requires-Dist: flit ; extra == "dev"
+Requires-Dist: pip-tools ; extra == "dev"
+Requires-Dist: pre-commit<2.18.0 ; extra == "dev"
+Requires-Dist: pytest-cov ; extra == "dev"
+Requires-Dist: pytest-socket ; extra == "dev"
+Requires-Dist: pytest-timeout ; extra == "dev"
+Requires-Dist: pytest-xdist ; extra == "dev"
+Requires-Dist: wheel ; extra == "dev"
+Requires-Dist: myst_parser ; extra == "docs"
+Requires-Dist: sphinx ; extra == "docs"
+Requires-Dist: sphinx_rtd_theme ; extra == "docs"
+Requires-Dist: cryptography ; extra == "full"
+Requires-Dist: Pillow>=8.0.0 ; extra == "full"
+Requires-Dist: Pillow>=8.0.0 ; extra == "image"
+Project-URL: Bug Reports, https://github.com/py-pdf/pypdf/issues
+Project-URL: Changelog, https://pypdf.readthedocs.io/en/latest/meta/CHANGELOG.html
+Project-URL: Documentation, https://pypdf.readthedocs.io/en/latest/
+Project-URL: Source, https://github.com/py-pdf/pypdf
+Provides-Extra: crypto
+Provides-Extra: cryptodome
+Provides-Extra: dev
+Provides-Extra: docs
+Provides-Extra: full
+Provides-Extra: image
+
+[![PyPI version](https://badge.fury.io/py/pypdf.svg)](https://badge.fury.io/py/pypdf)
+[![Python Support](https://img.shields.io/pypi/pyversions/pypdf.svg)](https://pypi.org/project/pypdf/)
+[![](https://img.shields.io/badge/-documentation-green)](https://pypdf.readthedocs.io/en/stable/)
+[![GitHub last commit](https://img.shields.io/github/last-commit/py-pdf/pypdf)](https://github.com/py-pdf/pypdf)
+[![codecov](https://codecov.io/gh/py-pdf/pypdf/branch/main/graph/badge.svg?token=id42cGNZ5Z)](https://codecov.io/gh/py-pdf/pypdf)
+
+# pypdf
+
+pypdf is a free and open-source pure-python PDF library capable of splitting,
+[merging](https://pypdf.readthedocs.io/en/stable/user/merging-pdfs.html),
+[cropping, and transforming](https://pypdf.readthedocs.io/en/stable/user/cropping-and-transforming.html)
+the pages of PDF files. It can also add
+custom data, viewing options, and
+[passwords](https://pypdf.readthedocs.io/en/stable/user/encryption-decryption.html)
+to PDF files. pypdf can
+[retrieve text](https://pypdf.readthedocs.io/en/stable/user/extract-text.html)
+and
+[metadata](https://pypdf.readthedocs.io/en/stable/user/metadata.html)
+from PDFs as well.
+
+See [pdfly](https://github.com/py-pdf/pdfly) for a CLI application that uses pypdf to interact with PDFs.
+
+## Installation
+
+Install pypdf using pip:
+
+```
+pip install pypdf
+```
+
+For using pypdf with AES encryption or decryption, install extra dependencies:
+
+```
+pip install pypdf[crypto]
+```
+
+> **NOTE**: `pypdf` 3.1.0 and above include significant improvements compared to
+> previous versions. Please refer to [the migration
+> guide](https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html) for
+> more information.
+
+## Usage
+
+```python
+from pypdf import PdfReader
+
+reader = PdfReader("example.pdf")
+number_of_pages = len(reader.pages)
+page = reader.pages[0]
+text = page.extract_text()
+```
+
+pypdf can do a lot more, e.g. splitting, merging, reading and creating
+annotations, decrypting and encrypting, and more. Check out [the
+documentation](https://pypdf.readthedocs.io/en/stable/) for additional usage
+examples!
+
+For questions and answers, visit
+[StackOverflow](https://stackoverflow.com/questions/tagged/pypdf)
+(tagged with [pypdf](https://stackoverflow.com/questions/tagged/pypdf)).
+
+## Contributions
+
+Maintaining pypdf is a collaborative effort. You can support the project by
+writing documentation, helping to narrow down issues, and submitting code.
+See the [CONTRIBUTING.md](https://github.com/py-pdf/pypdf/blob/main/CONTRIBUTING.md) file for more information.
+
+### Q&A
+
+The experience pypdf users have covers the whole range from beginners who
+want to make their live easier to experts who developed software before PDF
+existed. You can contribute to the pypdf community by answering questions
+on [StackOverflow](https://stackoverflow.com/questions/tagged/pypdf),
+helping in [discussions](https://github.com/py-pdf/pypdf/discussions),
+and asking users who report issues for [MCVE](https://stackoverflow.com/help/minimal-reproducible-example)'s (Code + example PDF!).
+
+
+### Issues
+
+A good bug ticket includes a MCVE - a minimal complete verifiable example.
+For pypdf, this means that you must upload a PDF that causes the bug to occur
+as well as the code you're executing with all of the output. Use
+`print(pypdf.__version__)` to tell us which version you're using.
+
+### Code
+
+All code contributions are welcome, but smaller ones have a better chance to
+get included in a timely manner. Adding unit tests for new features or test
+cases for bugs you've fixed help us to ensure that the Pull Request (PR) is fine.
+
+pypdf includes a test suite which can be executed with `pytest`:
+
+```bash
+$ pytest
+===================== test session starts =====================
+platform linux -- Python 3.6.15, pytest-7.0.1, pluggy-1.0.0
+rootdir: /home/moose/GitHub/Martin/pypdf
+plugins: cov-3.0.0
+collected 233 items
+
+tests/test_basic_features.py ..                         [  0%]
+tests/test_constants.py .                               [  1%]
+tests/test_filters.py .................x.....           [ 11%]
+tests/test_generic.py ................................. [ 25%]
+.............                                           [ 30%]
+tests/test_javascript.py ..                             [ 31%]
+tests/test_merger.py .                                  [ 32%]
+tests/test_page.py .........................            [ 42%]
+tests/test_pagerange.py ................                [ 49%]
+tests/test_papersizes.py ..................             [ 57%]
+tests/test_reader.py .................................. [ 72%]
+...............                                         [ 78%]
+tests/test_utils.py ....................                [ 87%]
+tests/test_workflows.py ..........                      [ 91%]
+tests/test_writer.py .................                  [ 98%]
+tests/test_xmp.py ...                                   [100%]
+
+========== 232 passed, 1 xfailed, 1 warning in 4.52s ==========
+```
+
--- a/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/RECORD
+++ b/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/RECORD
@ -0,0 +1,113 @@
+pypdf-5.4.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pypdf-5.4.0.dist-info/METADATA,sha256=E-D5PSflgLScgSvyNJcLdhpDBX4H0QUafueJFd7PDSA,7262
+pypdf-5.4.0.dist-info/RECORD,,
+pypdf-5.4.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pypdf-5.4.0.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
+pypdf-5.4.0.dist-info/licenses/LICENSE,sha256=qXrCMOXzPvEKU2eoUOsB-R8aCwZONHQsd5TSKUVX9SQ,1605
+pypdf/__init__.py,sha256=WYkiisiLw4TrsrobuzUkEFGwAUbPF8V8ei_HJSdEJNY,1302
+pypdf/__pycache__/__init__.cpython-312.pyc,,
+pypdf/__pycache__/_cmap.cpython-312.pyc,,
+pypdf/__pycache__/_doc_common.cpython-312.pyc,,
+pypdf/__pycache__/_encryption.cpython-312.pyc,,
+pypdf/__pycache__/_merger.cpython-312.pyc,,
+pypdf/__pycache__/_page.cpython-312.pyc,,
+pypdf/__pycache__/_page_labels.cpython-312.pyc,,
+pypdf/__pycache__/_protocols.cpython-312.pyc,,
+pypdf/__pycache__/_reader.cpython-312.pyc,,
+pypdf/__pycache__/_utils.cpython-312.pyc,,
+pypdf/__pycache__/_version.cpython-312.pyc,,
+pypdf/__pycache__/_writer.cpython-312.pyc,,
+pypdf/__pycache__/_xobj_image_helpers.cpython-312.pyc,,
+pypdf/__pycache__/constants.cpython-312.pyc,,
+pypdf/__pycache__/errors.cpython-312.pyc,,
+pypdf/__pycache__/filters.cpython-312.pyc,,
+pypdf/__pycache__/pagerange.cpython-312.pyc,,
+pypdf/__pycache__/papersizes.cpython-312.pyc,,
+pypdf/__pycache__/types.cpython-312.pyc,,
+pypdf/__pycache__/xmp.cpython-312.pyc,,
+pypdf/_cmap.py,sha256=Q4_EJC73QZ-0_I4jtLeHD-rkT5GASW9zehhNcums_0A,18642
+pypdf/_codecs/__init__.py,sha256=WXMkzlMCDlmG5U6ixQk8MrYxaQeJxEfig5DTaGlklLk,1676
+pypdf/_codecs/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/_codecs.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/adobe_glyphs.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/pdfdoc.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/std.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/symbol.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/zapfding.cpython-312.pyc,,
+pypdf/_codecs/_codecs.py,sha256=zduPFkHbt9BjCpAc7Mx_rSOTEoSOZkUayr8EL5l82VM,9966
+pypdf/_codecs/adobe_glyphs.py,sha256=t3cDFPDqwIz1w9B0gdVzjdc8eEK9AuRjk5f7laEw_fY,447213
+pypdf/_codecs/pdfdoc.py,sha256=xfSvMFYsvxuaSQ0Uu9vZDKaB0Wu85h1uCiB1i9rAcUU,4269
+pypdf/_codecs/std.py,sha256=DyQMuEpAGEpS9uy1jWf4cnj-kqShPOAij5sI7Q1YD8E,2630
+pypdf/_codecs/symbol.py,sha256=nIaGQIlhWCJiPMHrwUlmGHH-_fOXyEKvguRmuKXcGAk,3734
+pypdf/_codecs/zapfding.py,sha256=PQxjxRC616d41xF3exVxP1W8nM4QrZfjO3lmtLxpE_s,3742
+pypdf/_crypt_providers/__init__.py,sha256=K3Z6AuXhXVeXgLet-Tukq2gt9H66OgdupsvxIS1CmkI,3054
+pypdf/_crypt_providers/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_base.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_cryptography.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_fallback.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_pycryptodome.cpython-312.pyc,,
+pypdf/_crypt_providers/_base.py,sha256=_f53Mj6vivhEZMQ4vNxN5G0IOgFY-n5_leke0c_qiNU,1711
+pypdf/_crypt_providers/_cryptography.py,sha256=zT3WmbPzesvgHRkGcKAldqJ24MY3BwZViVbSc55Zxhw,4557
+pypdf/_crypt_providers/_fallback.py,sha256=vsYoowR1YCAV_q-HrdIZhkUcrCb6HvRBNMYm03QtCU8,3334
+pypdf/_crypt_providers/_pycryptodome.py,sha256=U1aQZ9iYBrZo-hKCjJUhGOPhwEFToiitowQ316TNrrA,3381
+pypdf/_doc_common.py,sha256=lyM-6je3IbNfzL6gfYdFU2VvX3pkxj5AWHcEZRCFMQk,51871
+pypdf/_encryption.py,sha256=pPg7fIfqdL96Tc6RVoBytEVjMrmZFecr_6l7dbtDFrE,48775
+pypdf/_merger.py,sha256=YfSQKDiiQz2WtCmVZjxP_nv2pR2shiBf2tDiAb41c7s,1744
+pypdf/_page.py,sha256=6Pts2harKZyD_qhKdbNjWLwy07Gw0QLTWIf_fAMENaA,102235
+pypdf/_page_labels.py,sha256=nEU0knE7IRQ6LPhzwgw1RjJgm8WxXIfkmiHuv7ep2ow,8546
+pypdf/_protocols.py,sha256=noE1y2fVE-z1wq-FkQzaS5exa8ovOFTUXqdQSvqi57c,2142
+pypdf/_reader.py,sha256=tf8l66t8DmoeuZviN2YOdFHAwahnTu92ABAXiK9zCUA,51503
+pypdf/_text_extraction/__init__.py,sha256=0zxSe5aXqO15dpOg5Q24FawupoTbvJCiHfBzGsWgpJE,8556
+pypdf/_text_extraction/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__init__.py,sha256=k1tN46gDX1zhAatD8oTGMuCJUp-pgbHjyQ8H6axXRgU,338
+pypdf/_text_extraction/_layout_mode/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_fixed_width_page.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_font.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_font_widths.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_text_state_manager.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_text_state_params.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/_fixed_width_page.py,sha256=xXC6BwQvrOXMZmSKQ6UPnPtCnjjZ9jCCWTbEJ35E3ko,15424
+pypdf/_text_extraction/_layout_mode/_font.py,sha256=F0uvly32AcFeTE4jBFg7JvuAQZSMUjO6HZgQYYFDQ40,7048
+pypdf/_text_extraction/_layout_mode/_font_widths.py,sha256=Hfgsd2ftGw8Ajl7IcwNIlfLYnum-ekaadfwErcUdWtI,4265
+pypdf/_text_extraction/_layout_mode/_text_state_manager.py,sha256=ugOJRALDNXW3snNAjKKKT8xmWt7D3GZZbcMVaGuVfFM,7989
+pypdf/_text_extraction/_layout_mode/_text_state_params.py,sha256=b8DSoJ2easCZW_JvMl84WFFIANKGhLD1zjMVAlqScyU,5318
+pypdf/_utils.py,sha256=h97CvvcQpxq7px__GzaMGzJWqJGZt2FYsZYR6wFiU3w,19300
+pypdf/_version.py,sha256=xjYaBGUFGg0kGZj_WhuoFyPD8NILPsr79SaMwmYQGSg,22
+pypdf/_writer.py,sha256=Kjrk1_uMUyZBlsze0qQhS-We90GIk3WtclKLzb373-s,128663
+pypdf/_xobj_image_helpers.py,sha256=KVC80bgNcHBdqGEOfQbmQO4in6Foayt_lPTgSOgb-BA,14020
+pypdf/annotations/__init__.py,sha256=f2k_-jAn39CCB27KxQ_e93GinnzkAHbUnnSeGJl1jyE,990
+pypdf/annotations/__pycache__/__init__.cpython-312.pyc,,
+pypdf/annotations/__pycache__/_base.cpython-312.pyc,,
+pypdf/annotations/__pycache__/_markup_annotations.cpython-312.pyc,,
+pypdf/annotations/__pycache__/_non_markup_annotations.cpython-312.pyc,,
+pypdf/annotations/_base.py,sha256=7rQJyOMPtKkd_Yp2CXGT6KN17W3WOj8Albx6ehMki3w,916
+pypdf/annotations/_markup_annotations.py,sha256=F4qMyS15OqXNLL9OTR5Wj2_4vO7ScG60yqNh-wayIFQ,10116
+pypdf/annotations/_non_markup_annotations.py,sha256=qX51TJMTRUyWz1ogIK-cXXGK7k5oKhgYQhemA_sVxGE,3622
+pypdf/constants.py,sha256=gwFz97ZB5j0Nn5R7LbWBUqBOcyEjIQRV7O598eLZSKc,20959
+pypdf/errors.py,sha256=x0J5mTIbp5YcXA1pdYa5DO83uAhXP5NCO0Ankf4DsUY,1740
+pypdf/filters.py,sha256=hT6e4odOa6WTpXYOxRm2r6fYOS2cocLsVdNPpjEPhn8,27869
+pypdf/generic/__init__.py,sha256=nnLmD7bnhSJu1qZ774pj0eE7lmeRuYDEUcpa52-Mk5A,7168
+pypdf/generic/__pycache__/__init__.cpython-312.pyc,,
+pypdf/generic/__pycache__/_base.cpython-312.pyc,,
+pypdf/generic/__pycache__/_data_structures.cpython-312.pyc,,
+pypdf/generic/__pycache__/_files.cpython-312.pyc,,
+pypdf/generic/__pycache__/_fit.cpython-312.pyc,,
+pypdf/generic/__pycache__/_image_inline.cpython-312.pyc,,
+pypdf/generic/__pycache__/_outline.cpython-312.pyc,,
+pypdf/generic/__pycache__/_rectangle.cpython-312.pyc,,
+pypdf/generic/__pycache__/_utils.cpython-312.pyc,,
+pypdf/generic/__pycache__/_viewerpref.cpython-312.pyc,,
+pypdf/generic/_base.py,sha256=u8oX747OyUZ5KPG8IYWUGD6lgeL-_MzWX0J-LsY0DjA,30885
+pypdf/generic/_data_structures.py,sha256=kqIGv06r3p0BeUrmTePeFrEoB4v0LiulDvEkTt45TN8,63998
+pypdf/generic/_files.py,sha256=UcyL_mCDpVh_dRuxxH8bENWA76rYt5eFw0emFcOE79Y,5655
+pypdf/generic/_fit.py,sha256=lLkLgW0AQ36sVG4py-HXV__EPQYkLA1bNLoCwGJ_ijI,5511
+pypdf/generic/_image_inline.py,sha256=OyP1GDpg-zgH-UWA--vsLIUriV_07-VqpFZ9mL31vl8,11447
+pypdf/generic/_outline.py,sha256=qKbMX42OWfqnopIiE6BUy6EvdTLGe3ZtjaiWN85JpaY,1094
+pypdf/generic/_rectangle.py,sha256=5KJRbQESqdzrYvJOFcwfp0_v_bhCDVj9r4yMyGXSGyc,3808
+pypdf/generic/_utils.py,sha256=8T_2fGpRt9tZpN-06fa-7Wma9gFAkdtgJW2SuD7Yqfk,7415
+pypdf/generic/_viewerpref.py,sha256=40YdivA2MAW6hTZEB-b_8Y84-tlNJNwXEusPmHMgS64,6739
+pypdf/pagerange.py,sha256=9QqjrP6VrR2m8BN_sbbjZQ8Fi476xPpRiKqd8CxGoKM,6996
+pypdf/papersizes.py,sha256=6Tz5sfNN_3JOUapY83U-lakohnpXYA0hSEQNmOVLFL8,1413
+pypdf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pypdf/types.py,sha256=6B6pMncEhcqFfq-iKs5IBPg6guWXffU6YHpeYzCJH-s,1963
+pypdf/xmp.py,sha256=0G9Gmb5lc7jdcGG-MYDSxYPg5P7SU_RswVRipuDY7lU,14246
--- a/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/REQUESTED
+++ b/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/REQUESTED
--- a/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/WHEEL
+++ b/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/WHEEL
@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: flit 3.11.0
+Root-Is-Purelib: true
+Tag: py3-none-any
--- a/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/licenses/LICENSE
+++ b/venv/lib/python3.12/site-packages/pypdf-5.4.0.dist-info/licenses/LICENSE
@ -0,0 +1,29 @@
+Copyright (c) 2006-2008, Mathieu Fenniak
+Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+* The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
--- a/venv/lib/python3.12/site-packages/pypdf/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/init.py
@ -0,0 +1,49 @@
+"""
+pypdf is a free and open-source pure-python PDF library capable of splitting,
+merging, cropping, and transforming the pages of PDF files. It can also add
+custom data, viewing options, and passwords to PDF files. pypdf can retrieve
+text and metadata from PDFs as well.
+
+You can read the full docs at https://pypdf.readthedocs.io/.
+"""
+
+from ._crypt_providers import crypt_provider
+from ._doc_common import DocumentInformation
+from ._encryption import PasswordType
+from ._merger import PdfMerger
+from ._page import PageObject, Transformation, mult
+from ._reader import PdfReader
+from ._version import __version__
+from ._writer import ObjectDeletionFlag, PdfWriter
+from .constants import ImageType
+from .pagerange import PageRange, parse_filename_page_ranges
+from .papersizes import PaperSize
+
+try:
+    import PIL
+
+    pil_version = PIL.__version__
+except ImportError:
+    pil_version = "none"
+
+_debug_versions = (
+    f"pypdf=={__version__}, {crypt_provider=}, PIL={pil_version}"
+)
+
+__all__ = [
+    "DocumentInformation",
+    "ImageType",
+    "ObjectDeletionFlag",
+    "PageObject",
+    "PageRange",
+    "PaperSize",
+    "PasswordType",
+    "PdfMerger",
+    "PdfReader",
+    "PdfWriter",
+    "Transformation",
+    "__version__",
+    "_debug_versions",
+    "mult",
+    "parse_filename_page_ranges",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_cmap.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_cmap.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_doc_common.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_doc_common.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_encryption.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_encryption.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_merger.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_merger.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_page.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_page.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_page_labels.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_page_labels.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_protocols.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_protocols.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_reader.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_reader.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_utils.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_utils.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_version.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_version.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_writer.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_writer.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/_xobj_image_helpers.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/_xobj_image_helpers.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/constants.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/constants.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/errors.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/errors.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/filters.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/filters.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/pagerange.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/pagerange.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/papersizes.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/papersizes.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/types.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/types.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/pycache/xmp.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/pycache/xmp.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_cmap.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_cmap.py
@ -0,0 +1,544 @@
+import binascii
+from binascii import unhexlify
+from math import ceil
+from typing import Any, Dict, List, Tuple, Union, cast
+
+from ._codecs import adobe_glyphs, charset_encoding
+from ._utils import logger_error, logger_warning
+from .generic import (
+    DecodedStreamObject,
+    DictionaryObject,
+    StreamObject,
+    is_null_or_none,
+)
+
+
+# code freely inspired from @twiggy ; see #711
+def build_char_map(
+    font_name: str, space_width: float, obj: DictionaryObject
+) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any], DictionaryObject]:
+    """
+    Determine information about a font.
+
+    Args:
+        font_name: font name as a string
+        space_width: default space width if no data is found.
+        obj: XObject or Page where you can find a /Resource dictionary
+
+    Returns:
+        Font sub-type, space_width criteria (50% of width), encoding, map character-map, font-dictionary.
+        The font-dictionary itself is suitable for the curious.
+
+    """
+    ft: DictionaryObject = obj["/Resources"]["/Font"][font_name]  # type: ignore
+    font_subtype, font_halfspace, font_encoding, font_map = build_char_map_from_dict(
+        space_width, ft
+    )
+    return font_subtype, font_halfspace, font_encoding, font_map, ft
+
+
+def build_char_map_from_dict(
+    space_width: float, ft: DictionaryObject
+) -> Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]]:
+    """
+    Determine information about a font.
+
+    Args:
+        space_width: default space with if no data found
+             (normally half the width of a character).
+        ft: Font Dictionary
+
+    Returns:
+        Font sub-type, space_width criteria(50% of width), encoding, map character-map.
+        The font-dictionary itself is suitable for the curious.
+
+    """
+    font_type = cast(str, ft["/Subtype"].get_object())
+    encoding, map_dict = get_encoding(ft)
+
+    space_key_char = get_actual_str_key(" ", encoding, map_dict)
+    font_width_map = build_font_width_map(ft, space_width * 2.0)
+    half_space_width = compute_space_width(font_width_map, space_key_char) / 2.0
+
+    return (
+        font_type,
+        half_space_width,
+        encoding,
+        # https://github.com/python/mypy/issues/4374
+        map_dict
+    )
+
+
+# used when missing data, e.g. font def missing
+unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict[Any, Any]] = (
+    "Unknown",
+    9999,
+    dict.fromkeys(range(256), "<EFBFBD>"),
+    {},
+)
+
+
+_predefined_cmap: Dict[str, str] = {
+    "/Identity-H": "utf-16-be",
+    "/Identity-V": "utf-16-be",
+    "/GB-EUC-H": "gbk",
+    "/GB-EUC-V": "gbk",
+    "/GBpc-EUC-H": "gb2312",
+    "/GBpc-EUC-V": "gb2312",
+    "/GBK-EUC-H": "gbk",
+    "/GBK-EUC-V": "gbk",
+    "/GBK2K-H": "gb18030",
+    "/GBK2K-V": "gb18030",
+    "/ETen-B5-H": "cp950",
+    "/ETen-B5-V": "cp950",
+    "/ETenms-B5-H": "cp950",
+    "/ETenms-B5-V": "cp950",
+    "/UniCNS-UTF16-H": "utf-16-be",
+    "/UniCNS-UTF16-V": "utf-16-be",
+    "/UniGB-UTF16-H": "gb18030",
+    "/UniGB-UTF16-V": "gb18030",
+    # UCS2 in code
+}
+
+# manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
+_default_fonts_space_width: Dict[str, int] = {
+    "/Courier": 600,
+    "/Courier-Bold": 600,
+    "/Courier-BoldOblique": 600,
+    "/Courier-Oblique": 600,
+    "/Helvetica": 278,
+    "/Helvetica-Bold": 278,
+    "/Helvetica-BoldOblique": 278,
+    "/Helvetica-Oblique": 278,
+    "/Helvetica-Narrow": 228,
+    "/Helvetica-NarrowBold": 228,
+    "/Helvetica-NarrowBoldOblique": 228,
+    "/Helvetica-NarrowOblique": 228,
+    "/Times-Roman": 250,
+    "/Times-Bold": 250,
+    "/Times-BoldItalic": 250,
+    "/Times-Italic": 250,
+    "/Symbol": 250,
+    "/ZapfDingbats": 278,
+}
+
+
+def get_encoding(
+    ft: DictionaryObject
+) -> Tuple[Union[str, Dict[int, str]], Dict[Any, Any]]:
+    encoding = _parse_encoding(ft)
+    map_dict, int_entry = _parse_to_unicode(ft)
+
+    # Apply rule from PDF ref 1.7 §5.9.1, 1st bullet:
+    #   if cmap not empty encoding should be discarded
+    #   (here transformed into identity for those characters)
+    # If encoding is a string it is expected to be an identity translation.
+    if isinstance(encoding, dict):
+        for x in int_entry:
+            if x <= 255:
+                encoding[x] = chr(x)
+
+    return encoding, map_dict
+
+
+def _parse_encoding(
+    ft: DictionaryObject
+) -> Union[str, Dict[int, str]]:
+    encoding: Union[str, List[str], Dict[int, str]] = []
+    if "/Encoding" not in ft:
+        if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
+            encoding = dict(
+                zip(range(256), charset_encoding[cast(str, ft["/BaseFont"])])
+            )
+        else:
+            encoding = "charmap"
+        return encoding
+    enc: Union(str, DictionaryObject) = ft["/Encoding"].get_object()  # type: ignore
+    if isinstance(enc, str):
+        try:
+            # already done : enc = NameObject.unnumber(enc.encode()).decode()
+            # for #xx decoding
+            if enc in charset_encoding:
+                encoding = charset_encoding[enc].copy()
+            elif enc in _predefined_cmap:
+                encoding = _predefined_cmap[enc]
+            elif "-UCS2-" in enc:
+                encoding = "utf-16-be"
+            else:
+                raise Exception("not found")
+        except Exception:
+            logger_error(f"Advanced encoding {enc} not implemented yet", __name__)
+            encoding = enc
+    elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
+        try:
+            encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
+        except Exception:
+            logger_error(
+                f"Advanced encoding {encoding} not implemented yet",
+                __name__,
+            )
+            encoding = charset_encoding["/StandardEncoding"].copy()
+    else:
+        encoding = charset_encoding["/StandardEncoding"].copy()
+    if "/Differences" in enc:
+        x: int = 0
+        o: Union[int, str]
+        for o in cast(DictionaryObject, cast(DictionaryObject, enc)["/Differences"]):
+            if isinstance(o, int):
+                x = o
+            else:  # isinstance(o,str):
+                try:
+                    if x < len(encoding):
+                        encoding[x] = adobe_glyphs[o]  # type: ignore
+                except Exception:
+                    encoding[x] = o  # type: ignore
+                x += 1
+    if isinstance(encoding, list):
+        encoding = dict(zip(range(256), encoding))
+    return encoding
+
+
+def _parse_to_unicode(
+    ft: DictionaryObject
+) -> Tuple[Dict[Any, Any], List[int]]:
+    # will store all translation code
+    # and map_dict[-1] we will have the number of bytes to convert
+    map_dict: Dict[Any, Any] = {}
+
+    # will provide the list of cmap keys as int to correct encoding
+    int_entry: List[int] = []
+
+    if "/ToUnicode" not in ft:
+        if ft.get("/Subtype", "") == "/Type1":
+            return _type1_alternative(ft, map_dict, int_entry)
+        else:
+            return {}, []
+    process_rg: bool = False
+    process_char: bool = False
+    multiline_rg: Union[
+        None, Tuple[int, int]
+    ] = None  # tuple = (current_char, remaining size) ; cf #1285 for example of file
+    cm = prepare_cm(ft)
+    for line in cm.split(b"\n"):
+        process_rg, process_char, multiline_rg = process_cm_line(
+            line.strip(b" \t"),
+            process_rg,
+            process_char,
+            multiline_rg,
+            map_dict,
+            int_entry,
+        )
+
+    return map_dict, int_entry
+
+
+def get_actual_str_key(
+    value_char: str, encoding: Union[str, Dict[int, str]], map_dict: Dict[Any, Any]
+) -> str:
+    key_dict = {}
+    if isinstance(encoding, dict):
+        key_dict = {value: chr(key) for key, value in encoding.items() if value == value_char}
+    else:
+        key_dict = {value: key for key, value in map_dict.items() if value == value_char}
+    key_char = key_dict.get(value_char, value_char)
+    return key_char
+
+
+def prepare_cm(ft: DictionaryObject) -> bytes:
+    tu = ft["/ToUnicode"]
+    cm: bytes
+    if isinstance(tu, StreamObject):
+        cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
+    else:  # if (tu is None) or cast(str, tu).startswith("/Identity"):
+        # the full range 0000-FFFF will be processed
+        cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
+    if isinstance(cm, str):
+        cm = cm.encode()
+    # we need to prepare cm before due to missing return line in pdf printed
+    # to pdf from word
+    cm = (
+        cm.strip()
+        .replace(b"beginbfchar", b"\nbeginbfchar\n")
+        .replace(b"endbfchar", b"\nendbfchar\n")
+        .replace(b"beginbfrange", b"\nbeginbfrange\n")
+        .replace(b"endbfrange", b"\nendbfrange\n")
+        .replace(b"<<", b"\n{\n")  # text between << and >> not used but
+        .replace(b">>", b"\n}\n")  # some solution to find it back
+    )
+    ll = cm.split(b"<")
+    for i in range(len(ll)):
+        j = ll[i].find(b">")
+        if j >= 0:
+            if j == 0:
+                # string is empty: stash a placeholder here (see below)
+                # see https://github.com/py-pdf/pypdf/issues/1111
+                content = b"."
+            else:
+                content = ll[i][:j].replace(b" ", b"")
+            ll[i] = content + b" " + ll[i][j + 1 :]
+    cm = (
+        (b" ".join(ll))
+        .replace(b"[", b" [ ")
+        .replace(b"]", b" ]\n ")
+        .replace(b"\r", b"\n")
+    )
+    return cm
+
+
+def process_cm_line(
+    line: bytes,
+    process_rg: bool,
+    process_char: bool,
+    multiline_rg: Union[None, Tuple[int, int]],
+    map_dict: Dict[Any, Any],
+    int_entry: List[int],
+) -> Tuple[bool, bool, Union[None, Tuple[int, int]]]:
+    if line == b"" or line[0] == 37:  # 37 = %
+        return process_rg, process_char, multiline_rg
+    line = line.replace(b"\t", b" ")
+    if b"beginbfrange" in line:
+        process_rg = True
+    elif b"endbfrange" in line:
+        process_rg = False
+    elif b"beginbfchar" in line:
+        process_char = True
+    elif b"endbfchar" in line:
+        process_char = False
+    elif process_rg:
+        try:
+            multiline_rg = parse_bfrange(line, map_dict, int_entry, multiline_rg)
+        except binascii.Error as error:
+            logger_warning(f"Skipping broken line {line!r}: {error}", __name__)
+    elif process_char:
+        parse_bfchar(line, map_dict, int_entry)
+    return process_rg, process_char, multiline_rg
+
+
+def parse_bfrange(
+    line: bytes,
+    map_dict: Dict[Any, Any],
+    int_entry: List[int],
+    multiline_rg: Union[None, Tuple[int, int]],
+) -> Union[None, Tuple[int, int]]:
+    lst = [x for x in line.split(b" ") if x]
+    closure_found = False
+    if multiline_rg is not None:
+        fmt = b"%%0%dX" % (map_dict[-1] * 2)
+        a = multiline_rg[0]  # a, b not in the current line
+        b = multiline_rg[1]
+        for sq in lst[0:]:
+            if sq == b"]":
+                closure_found = True
+                break
+            map_dict[
+                unhexlify(fmt % a).decode(
+                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                    "surrogatepass",
+                )
+            ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+            int_entry.append(a)
+            a += 1
+    else:
+        a = int(lst[0], 16)
+        b = int(lst[1], 16)
+        nbi = max(len(lst[0]), len(lst[1]))
+        map_dict[-1] = ceil(nbi / 2)
+        fmt = b"%%0%dX" % (map_dict[-1] * 2)
+        if lst[2] == b"[":
+            for sq in lst[3:]:
+                if sq == b"]":
+                    closure_found = True
+                    break
+                map_dict[
+                    unhexlify(fmt % a).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                        "surrogatepass",
+                    )
+                ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+                int_entry.append(a)
+                a += 1
+        else:  # case without list
+            c = int(lst[2], 16)
+            fmt2 = b"%%0%dX" % max(4, len(lst[2]))
+            closure_found = True
+            while a <= b:
+                map_dict[
+                    unhexlify(fmt % a).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                        "surrogatepass",
+                    )
+                ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
+                int_entry.append(a)
+                a += 1
+                c += 1
+    return None if closure_found else (a, b)
+
+
+def parse_bfchar(line: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
+    lst = [x for x in line.split(b" ") if x]
+    map_dict[-1] = len(lst[0]) // 2
+    while len(lst) > 1:
+        map_to = ""
+        # placeholder (see above) means empty string
+        if lst[1] != b".":
+            map_to = unhexlify(lst[1]).decode(
+                "charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
+            )  # join is here as some cases where the code was split
+        map_dict[
+            unhexlify(lst[0]).decode(
+                "charmap" if map_dict[-1] == 1 else "utf-16-be", "surrogatepass"
+            )
+        ] = map_to
+        int_entry.append(int(lst[0], 16))
+        lst = lst[2:]
+
+
+def build_font_width_map(
+    ft: DictionaryObject, default_font_width: float
+) -> Dict[Any, float]:
+    font_width_map: Dict[Any, float] = {}
+    st: int = 0
+    en: int = 0
+    try:
+        default_font_width = _default_fonts_space_width[cast(str, ft["/BaseFont"].get_object())] * 2.0
+    except KeyError:
+        pass
+    if "/DescendantFonts" in ft:  # ft["/Subtype"].startswith("/CIDFontType"):
+        # §9.7.4.3 of the 1.7 reference ("Glyph Metrics in CIDFonts")
+        # Widths for a CIDFont are defined using the DW and W entries.
+        # DW2 and W2 are for vertical use. Vertical type is not implemented.
+        ft1 = ft["/DescendantFonts"][0].get_object()  # type: ignore
+        if "/DW" in ft1:
+            font_width_map["default"] = cast(float, ft1["/DW"].get_object())
+        else:
+            font_width_map["default"] = default_font_width
+        if "/W" in ft1:
+            w = ft1["/W"].get_object()
+        else:
+            w = []
+        while len(w) > 0:
+            st = w[0] if isinstance(w[0], int) else w[0].get_object()
+            second = w[1].get_object()
+            if isinstance(second, int):
+                # C_first C_last same_W
+                en = second
+                width = w[2].get_object()
+                if not isinstance(width, (int, float)):
+                    logger_warning(f"Expected numeric value for width, got {width}. Ignoring it.", __name__)
+                    w = w[3:]
+                    continue
+                for c_code in range(st, en + 1):
+                    font_width_map[chr(c_code)] = width
+                w = w[3:]
+            elif isinstance(second, list):
+                # Starting_C [W1 W2 ... Wn]
+                c_code = st
+                for ww in second:
+                    width = ww.get_object()
+                    font_width_map[chr(c_code)] = width
+                    c_code += 1
+                w = w[2:]
+            else:
+                logger_warning(
+                    "unknown widths : \n" + (ft1["/W"]).__repr__(),
+                    __name__,
+                )
+                break
+    elif "/Widths" in ft:
+        w = ft["/Widths"].get_object()
+        if "/FontDescriptor" in ft and "/MissingWidth" in cast(
+            DictionaryObject, ft["/FontDescriptor"]
+        ):
+            font_width_map["default"] = ft["/FontDescriptor"]["/MissingWidth"].get_object()  # type: ignore
+        else:
+            # will consider width of char as avg(width)
+            m = 0
+            cpt = 0
+            for xx in w:
+                xx = xx.get_object()
+                if xx > 0:
+                    m += xx
+                    cpt += 1
+            font_width_map["default"] = m / max(1, cpt)
+        st = cast(int, ft["/FirstChar"])
+        en = cast(int, ft["/LastChar"])
+        for c_code in range(st, en + 1):
+            try:
+                width = w[c_code - st].get_object()
+                font_width_map[chr(c_code)] = width
+            except (IndexError, KeyError):
+                # The PDF structure is invalid. The array is too small
+                # for the specified font width.
+                pass
+    if is_null_or_none(font_width_map.get("default")):
+        font_width_map["default"] = default_font_width if default_font_width else 0.0
+    return font_width_map
+
+
+def compute_space_width(
+    font_width_map: Dict[Any, float], space_char: str
+) -> float:
+    try:
+        sp_width = font_width_map[space_char]
+        if sp_width == 0:
+            raise ValueError("Zero width")
+    except (KeyError, ValueError):
+        sp_width = (
+            font_width_map["default"] / 2.0
+        )  # if using default we consider space will be only half size
+
+    return sp_width
+
+
+def compute_font_width(
+    font_width_map: Dict[Any, float],
+    char: str
+) -> float:
+    char_width: float = 0.0
+    try:
+        char_width = font_width_map[char]
+    except KeyError:
+        char_width = (
+            font_width_map["default"]
+        )
+
+    return char_width
+
+
+def _type1_alternative(
+    ft: DictionaryObject,
+    map_dict: Dict[Any, Any],
+    int_entry: List[int],
+) -> Tuple[Dict[Any, Any], List[int]]:
+    if "/FontDescriptor" not in ft:
+        return map_dict, int_entry
+    ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
+    if is_null_or_none(ft_desc):
+        return map_dict, int_entry
+    assert ft_desc is not None, "mypy"
+    txt = ft_desc.get_object().get_data()
+    txt = txt.split(b"eexec\n")[0]  # only clear part
+    txt = txt.split(b"/Encoding")[1]  # to get the encoding part
+    lines = txt.replace(b"\r", b"\n").split(b"\n")
+    for li in lines:
+        if li.startswith(b"dup"):
+            words = [_w for _w in li.split(b" ") if _w != b""]
+            if len(words) > 3 and words[3] != b"put":
+                continue
+            try:
+                i = int(words[1])
+            except ValueError:  # pragma: no cover
+                continue
+            try:
+                v = adobe_glyphs[words[2].decode()]
+            except KeyError:
+                if words[2].startswith(b"/uni"):
+                    try:
+                        v = chr(int(words[2][4:], 16))
+                    except ValueError:  # pragma: no cover
+                        continue
+                else:
+                    continue
+            map_dict[chr(i)] = v
+            int_entry.append(i)
+    return map_dict, int_entry
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/init.py
@ -0,0 +1,61 @@
+from typing import Dict, List
+
+from .adobe_glyphs import adobe_glyphs
+from .pdfdoc import _pdfdoc_encoding
+from .std import _std_encoding
+from .symbol import _symbol_encoding
+from .zapfding import _zapfding_encoding
+
+
+def fill_from_encoding(enc: str) -> List[str]:
+    lst: List[str] = []
+    for x in range(256):
+        try:
+            lst += (bytes((x,)).decode(enc),)
+        except Exception:
+            lst += (chr(x),)
+    return lst
+
+
+def rev_encoding(enc: List[str]) -> Dict[str, int]:
+    rev: Dict[str, int] = {}
+    for i in range(256):
+        char = enc[i]
+        if char == "\u0000":
+            continue
+        assert char not in rev, f"{char} at {i} already at {rev[char]}"
+        rev[char] = i
+    return rev
+
+
+_win_encoding = fill_from_encoding("cp1252")
+_mac_encoding = fill_from_encoding("mac_roman")
+
+
+_win_encoding_rev: Dict[str, int] = rev_encoding(_win_encoding)
+_mac_encoding_rev: Dict[str, int] = rev_encoding(_mac_encoding)
+_symbol_encoding_rev: Dict[str, int] = rev_encoding(_symbol_encoding)
+_zapfding_encoding_rev: Dict[str, int] = rev_encoding(_zapfding_encoding)
+_pdfdoc_encoding_rev: Dict[str, int] = rev_encoding(_pdfdoc_encoding)
+
+
+charset_encoding: Dict[str, List[str]] = {
+    "/StandardEncoding": _std_encoding,
+    "/WinAnsiEncoding": _win_encoding,
+    "/MacRomanEncoding": _mac_encoding,
+    "/PDFDocEncoding": _pdfdoc_encoding,
+    "/Symbol": _symbol_encoding,
+    "/ZapfDingbats": _zapfding_encoding,
+}
+
+__all__ = [
+    "_mac_encoding",
+    "_pdfdoc_encoding",
+    "_pdfdoc_encoding_rev",
+    "_std_encoding",
+    "_symbol_encoding",
+    "_win_encoding",
+    "_zapfding_encoding",
+    "adobe_glyphs",
+    "charset_encoding",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/_codecs.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/_codecs.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/adobe_glyphs.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/adobe_glyphs.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/pdfdoc.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/pdfdoc.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/std.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/std.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/symbol.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/symbol.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/zapfding.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pycache/zapfding.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.py
@ -0,0 +1,268 @@
+"""
+This module is for codecs only.
+
+While the codec implementation can contain details of the PDF specification,
+the module should not do any PDF parsing.
+"""
+
+import io
+from abc import ABC, abstractmethod
+from typing import Dict, List
+
+from pypdf._utils import logger_warning
+
+
+class Codec(ABC):
+    """Abstract base class for all codecs."""
+
+    @abstractmethod
+    def encode(self, data: bytes) -> bytes:
+        """
+        Encode the input data.
+
+        Args:
+            data: Data to encode.
+
+        Returns:
+            Encoded data.
+
+        """
+
+    @abstractmethod
+    def decode(self, data: bytes) -> bytes:
+        """
+        Decode the input data.
+
+        Args:
+            data: Data to decode.
+
+        Returns:
+            Decoded data.
+
+        """
+
+
+class LzwCodec(Codec):
+    """Lempel-Ziv-Welch (LZW) adaptive compression codec."""
+
+    CLEAR_TABLE_MARKER = 256  # Special code to indicate table reset
+    EOD_MARKER = 257  # End-of-data marker
+    INITIAL_BITS_PER_CODE = 9  # Initial code bit width
+    MAX_BITS_PER_CODE = 12  # Maximum code bit width
+
+    def _initialize_encoding_table(self) -> None:
+        """Initialize the encoding table and state to initial conditions."""
+        self.encoding_table: Dict[bytes, int] = {bytes([i]): i for i in range(256)}
+        self.next_code = self.EOD_MARKER + 1
+        self.bits_per_code = self.INITIAL_BITS_PER_CODE
+        self.max_code_value = (1 << self.bits_per_code) - 1
+
+    def _increase_next_code(self) -> None:
+        """Update bits_per_code and max_code_value if necessary."""
+        self.next_code += 1
+        if (
+            self.next_code > self.max_code_value
+            and self.bits_per_code < self.MAX_BITS_PER_CODE
+        ):
+            self.bits_per_code += 1
+            self.max_code_value = (1 << self.bits_per_code) - 1
+
+    def encode(self, data: bytes) -> bytes:
+        """
+        Encode data using the LZW compression algorithm.
+
+        Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
+        """
+        result_codes: List[int] = []
+
+        # The encoder shall begin by issuing a clear-table code
+        result_codes.append(self.CLEAR_TABLE_MARKER)
+        self._initialize_encoding_table()
+
+        current_sequence = b""
+        for byte in data:
+            next_sequence = current_sequence + bytes([byte])
+
+            if next_sequence in self.encoding_table:
+                # Extend current sequence if already in the table
+                current_sequence = next_sequence
+            else:
+                # Output code for the current sequence
+                result_codes.append(self.encoding_table[current_sequence])
+
+                # Add the new sequence to the table if there's room
+                if self.next_code <= (1 << self.MAX_BITS_PER_CODE) - 1:
+                    self.encoding_table[next_sequence] = self.next_code
+                    self._increase_next_code()
+                else:
+                    # If the table is full, emit a clear-table command
+                    result_codes.append(self.CLEAR_TABLE_MARKER)
+                    self._initialize_encoding_table()
+
+                # Start new sequence
+                current_sequence = bytes([byte])
+
+        # Ensure everything actually is encoded
+        if current_sequence:
+            result_codes.append(self.encoding_table[current_sequence])
+        result_codes.append(self.EOD_MARKER)
+
+        return self._pack_codes_into_bytes(result_codes)
+
+    def _pack_codes_into_bytes(self, codes: List[int]) -> bytes:
+        """
+        Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
+        The bit-width starts at 9 bits and expands as needed.
+        """
+        self._initialize_encoding_table()
+        buffer = 0
+        bits_in_buffer = 0
+        output = bytearray()
+
+        for code in codes:
+            buffer = (buffer << self.bits_per_code) | code
+            bits_in_buffer += self.bits_per_code
+
+            # Codes shall be packed into a continuous bit stream, high-order bit
+            # first. This stream shall then be divided into bytes, high-order bit
+            # first.
+            while bits_in_buffer >= 8:
+                bits_in_buffer -= 8
+                output.append((buffer >> bits_in_buffer) & 0xFF)
+
+            if code == self.CLEAR_TABLE_MARKER:
+                self._initialize_encoding_table()
+            elif code == self.EOD_MARKER:
+                continue
+            else:
+                self._increase_next_code()
+
+        # Flush any remaining bits in the buffer
+        if bits_in_buffer > 0:
+            output.append((buffer << (8 - bits_in_buffer)) & 0xFF)
+
+        return bytes(output)
+
+    def _initialize_decoding_table(self) -> None:
+        self.max_code_value = (1 << self.MAX_BITS_PER_CODE) - 1
+        self.decoding_table = [bytes([i]) for i in range(self.CLEAR_TABLE_MARKER)] + [
+            b""
+        ] * (self.max_code_value - self.CLEAR_TABLE_MARKER + 1)
+        self._table_index = self.EOD_MARKER + 1
+        self._bits_to_get = 9
+
+    def _next_code_decode(self, data: bytes) -> int:
+        self._next_data: int
+        try:
+            while self._next_bits < self._bits_to_get:
+                self._next_data = (self._next_data << 8) | (
+                    data[self._byte_pointer] & 0xFF
+                )
+                self._byte_pointer += 1
+                self._next_bits += 8
+
+            code = (
+                self._next_data >> (self._next_bits - self._bits_to_get)
+            ) & self._and_table[self._bits_to_get - 9]
+            self._next_bits -= self._bits_to_get
+
+            return code
+        except IndexError:
+            return self.EOD_MARKER
+
+    # The following method has been converted to Python from PDFsharp:
+    # https://github.com/empira/PDFsharp/blob/5fbf6ed14740bc4e16786816882d32e43af3ff5d/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
+    #
+    # Original license:
+    #
+    # -------------------------------------------------------------------------
+    # Copyright (c) 2001-2024 empira Software GmbH, Troisdorf (Cologne Area),
+    # Germany
+    #
+    # http://docs.pdfsharp.net
+    #
+    # MIT License
+    #
+    # Permission is hereby granted, free of charge, to any person obtaining a
+    # copy of this software and associated documentation files (the "Software"),
+    # to deal in the Software without restriction, including without limitation
+    # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    # and/or sell copies of the Software, and to permit persons to whom the
+    # Software is furnished to do so, subject to the following conditions:
+    #
+    # The above copyright notice and this permission notice shall be included
+    # in all copies or substantial portions of the Software.
+    #
+    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    # DEALINGS IN THE SOFTWARE.
+    # --------------------------------------------------------------------------
+    def decode(self, data: bytes) -> bytes:
+        """
+        The following code was converted to Python from the following code:
+        https://github.com/empira/PDFsharp/blob/master/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
+        """
+        self._and_table = [511, 1023, 2047, 4095]
+        self._table_index = 0
+        self._bits_to_get = 9
+        self._byte_pointer = 0
+        self._next_data = 0
+        self._next_bits = 0
+
+        output_stream = io.BytesIO()
+
+        self._initialize_decoding_table()
+        self._byte_pointer = 0
+        self._next_data = 0
+        self._next_bits = 0
+        old_code = self.CLEAR_TABLE_MARKER
+
+        while True:
+            code = self._next_code_decode(data)
+            if code == self.EOD_MARKER:
+                break
+
+            if code == self.CLEAR_TABLE_MARKER:
+                self._initialize_decoding_table()
+                code = self._next_code_decode(data)
+                if code == self.EOD_MARKER:
+                    break
+                output_stream.write(self.decoding_table[code])
+                old_code = code
+            elif code < self._table_index:
+                string = self.decoding_table[code]
+                output_stream.write(string)
+                if old_code != self.CLEAR_TABLE_MARKER:
+                    self._add_entry_decode(self.decoding_table[old_code], string[0])
+                old_code = code
+            else:
+                # The code is not in the table and not one of the special codes
+                string = (
+                    self.decoding_table[old_code] + self.decoding_table[old_code][:1]
+                )
+                output_stream.write(string)
+                self._add_entry_decode(self.decoding_table[old_code], string[0])
+                old_code = code
+
+        output = output_stream.getvalue()
+        return output
+
+    def _add_entry_decode(self, old_string: bytes, new_char: int) -> None:
+        new_string = old_string + bytes([new_char])
+        if self._table_index > self.max_code_value:
+            logger_warning("Ignoring too large LZW table index.", __name__)
+            return
+        self.decoding_table[self._table_index] = new_string
+        self._table_index += 1
+
+        # Update the number of bits to get based on the table index
+        if self._table_index == 511:
+            self._bits_to_get = 10
+        elif self._table_index == 1023:
+            self._bits_to_get = 11
+        elif self._table_index == 2047:
+            self._bits_to_get = 12
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/adobe_glyphs.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/adobe_glyphs.py
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pdfdoc.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pdfdoc.py
@ -0,0 +1,264 @@
+# PDFDocEncoding Character Set: Table D.2 of PDF Reference 1.7
+# C.1 Predefined encodings sorted by character name of another PDF reference
+# Some indices have '\u0000' although they should have something else:
+# 22: should be '\u0017'
+_pdfdoc_encoding = [
+    "\u0000",
+    "\u0001",
+    "\u0002",
+    "\u0003",
+    "\u0004",
+    "\u0005",
+    "\u0006",
+    "\u0007",  # 0 -  7
+    "\u0008",
+    "\u0009",
+    "\u000a",
+    "\u000b",
+    "\u000c",
+    "\u000d",
+    "\u000e",
+    "\u000f",  # 8 - 15
+    "\u0010",
+    "\u0011",
+    "\u0012",
+    "\u0013",
+    "\u0014",
+    "\u0015",
+    "\u0000",
+    "\u0017",  # 16 - 23
+    "\u02d8",
+    "\u02c7",
+    "\u02c6",
+    "\u02d9",
+    "\u02dd",
+    "\u02db",
+    "\u02da",
+    "\u02dc",  # 24 - 31
+    "\u0020",
+    "\u0021",
+    "\u0022",
+    "\u0023",
+    "\u0024",
+    "\u0025",
+    "\u0026",
+    "\u0027",  # 32 - 39
+    "\u0028",
+    "\u0029",
+    "\u002a",
+    "\u002b",
+    "\u002c",
+    "\u002d",
+    "\u002e",
+    "\u002f",  # 40 - 47
+    "\u0030",
+    "\u0031",
+    "\u0032",
+    "\u0033",
+    "\u0034",
+    "\u0035",
+    "\u0036",
+    "\u0037",  # 48 - 55
+    "\u0038",
+    "\u0039",
+    "\u003a",
+    "\u003b",
+    "\u003c",
+    "\u003d",
+    "\u003e",
+    "\u003f",  # 56 - 63
+    "\u0040",
+    "\u0041",
+    "\u0042",
+    "\u0043",
+    "\u0044",
+    "\u0045",
+    "\u0046",
+    "\u0047",  # 64 - 71
+    "\u0048",
+    "\u0049",
+    "\u004a",
+    "\u004b",
+    "\u004c",
+    "\u004d",
+    "\u004e",
+    "\u004f",  # 72 - 79
+    "\u0050",
+    "\u0051",
+    "\u0052",
+    "\u0053",
+    "\u0054",
+    "\u0055",
+    "\u0056",
+    "\u0057",  # 80 - 87
+    "\u0058",
+    "\u0059",
+    "\u005a",
+    "\u005b",
+    "\u005c",
+    "\u005d",
+    "\u005e",
+    "\u005f",  # 88 - 95
+    "\u0060",
+    "\u0061",
+    "\u0062",
+    "\u0063",
+    "\u0064",
+    "\u0065",
+    "\u0066",
+    "\u0067",  # 96 - 103
+    "\u0068",
+    "\u0069",
+    "\u006a",
+    "\u006b",
+    "\u006c",
+    "\u006d",
+    "\u006e",
+    "\u006f",  # 104 - 111
+    "\u0070",
+    "\u0071",
+    "\u0072",
+    "\u0073",
+    "\u0074",
+    "\u0075",
+    "\u0076",
+    "\u0077",  # 112 - 119
+    "\u0078",
+    "\u0079",
+    "\u007a",
+    "\u007b",
+    "\u007c",
+    "\u007d",
+    "\u007e",
+    "\u0000",  # 120 - 127
+    "\u2022",
+    "\u2020",
+    "\u2021",
+    "\u2026",
+    "\u2014",
+    "\u2013",
+    "\u0192",
+    "\u2044",  # 128 - 135
+    "\u2039",
+    "\u203a",
+    "\u2212",
+    "\u2030",
+    "\u201e",
+    "\u201c",
+    "\u201d",
+    "\u2018",  # 136 - 143
+    "\u2019",
+    "\u201a",
+    "\u2122",
+    "\ufb01",
+    "\ufb02",
+    "\u0141",
+    "\u0152",
+    "\u0160",  # 144 - 151
+    "\u0178",
+    "\u017d",
+    "\u0131",
+    "\u0142",
+    "\u0153",
+    "\u0161",
+    "\u017e",
+    "\u0000",  # 152 - 159
+    "\u20ac",
+    "\u00a1",
+    "\u00a2",
+    "\u00a3",
+    "\u00a4",
+    "\u00a5",
+    "\u00a6",
+    "\u00a7",  # 160 - 167
+    "\u00a8",
+    "\u00a9",
+    "\u00aa",
+    "\u00ab",
+    "\u00ac",
+    "\u0000",
+    "\u00ae",
+    "\u00af",  # 168 - 175
+    "\u00b0",
+    "\u00b1",
+    "\u00b2",
+    "\u00b3",
+    "\u00b4",
+    "\u00b5",
+    "\u00b6",
+    "\u00b7",  # 176 - 183
+    "\u00b8",
+    "\u00b9",
+    "\u00ba",
+    "\u00bb",
+    "\u00bc",
+    "\u00bd",
+    "\u00be",
+    "\u00bf",  # 184 - 191
+    "\u00c0",
+    "\u00c1",
+    "\u00c2",
+    "\u00c3",
+    "\u00c4",
+    "\u00c5",
+    "\u00c6",
+    "\u00c7",  # 192 - 199
+    "\u00c8",
+    "\u00c9",
+    "\u00ca",
+    "\u00cb",
+    "\u00cc",
+    "\u00cd",
+    "\u00ce",
+    "\u00cf",  # 200 - 207
+    "\u00d0",
+    "\u00d1",
+    "\u00d2",
+    "\u00d3",
+    "\u00d4",
+    "\u00d5",
+    "\u00d6",
+    "\u00d7",  # 208 - 215
+    "\u00d8",
+    "\u00d9",
+    "\u00da",
+    "\u00db",
+    "\u00dc",
+    "\u00dd",
+    "\u00de",
+    "\u00df",  # 216 - 223
+    "\u00e0",
+    "\u00e1",
+    "\u00e2",
+    "\u00e3",
+    "\u00e4",
+    "\u00e5",
+    "\u00e6",
+    "\u00e7",  # 224 - 231
+    "\u00e8",
+    "\u00e9",
+    "\u00ea",
+    "\u00eb",
+    "\u00ec",
+    "\u00ed",
+    "\u00ee",
+    "\u00ef",  # 232 - 239
+    "\u00f0",
+    "\u00f1",
+    "\u00f2",
+    "\u00f3",
+    "\u00f4",
+    "\u00f5",
+    "\u00f6",
+    "\u00f7",  # 240 - 247
+    "\u00f8",
+    "\u00f9",
+    "\u00fa",
+    "\u00fb",
+    "\u00fc",
+    "\u00fd",
+    "\u00fe",
+    "\u00ff",  # 248 - 255
+]
+
+assert len(_pdfdoc_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/std.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/std.py
@ -0,0 +1,258 @@
+_std_encoding = [
+    "\x00",
+    "\x01",
+    "\x02",
+    "\x03",
+    "\x04",
+    "\x05",
+    "\x06",
+    "\x07",
+    "\x08",
+    "\t",
+    "\n",
+    "\x0b",
+    "\x0c",
+    "\r",
+    "\x0e",
+    "\x0f",
+    "\x10",
+    "\x11",
+    "\x12",
+    "\x13",
+    "\x14",
+    "\x15",
+    "\x16",
+    "\x17",
+    "\x18",
+    "\x19",
+    "\x1a",
+    "\x1b",
+    "\x1c",
+    "\x1d",
+    "\x1e",
+    "\x1f",
+    " ",
+    "!",
+    '"',
+    "#",
+    "$",
+    "%",
+    "&",
+    "’",
+    "(",
+    ")",
+    "*",
+    "+",
+    ",",
+    "-",
+    ".",
+    "/",
+    "0",
+    "1",
+    "2",
+    "3",
+    "4",
+    "5",
+    "6",
+    "7",
+    "8",
+    "9",
+    ":",
+    ";",
+    "<",
+    "=",
+    ">",
+    "?",
+    "@",
+    "A",
+    "B",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "J",
+    "K",
+    "L",
+    "M",
+    "N",
+    "O",
+    "P",
+    "Q",
+    "R",
+    "S",
+    "T",
+    "U",
+    "V",
+    "W",
+    "X",
+    "Y",
+    "Z",
+    "[",
+    "\\",
+    "]",
+    "^",
+    "_",
+    "‘",
+    "a",
+    "b",
+    "c",
+    "d",
+    "e",
+    "f",
+    "g",
+    "h",
+    "i",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "o",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+    "{",
+    "|",
+    "}",
+    "~",
+    "\x7f",
+    "\x80",
+    "\x81",
+    "\x82",
+    "\x83",
+    "\x84",
+    "\x85",
+    "\x86",
+    "\x87",
+    "\x88",
+    "\x89",
+    "\x8a",
+    "\x8b",
+    "\x8c",
+    "\x8d",
+    "\x8e",
+    "\x8f",
+    "\x90",
+    "\x91",
+    "\x92",
+    "\x93",
+    "\x94",
+    "\x95",
+    "\x96",
+    "\x97",
+    "\x98",
+    "\x99",
+    "\x9a",
+    "\x9b",
+    "\x9c",
+    "\x9d",
+    "\x9e",
+    "\x9f",
+    "\xa0",
+    "¡",
+    "¢",
+    "£",
+    "⁄",
+    "¥",
+    "ƒ",
+    "§",
+    "¤",
+    "'",
+    "“",
+    "«",
+    "‹",
+    "›",
+    "ﬁ",
+    "ﬂ",
+    "°",
+    "–",
+    "†",
+    "‡",
+    "·",
+    "µ",
+    "¶",
+    "•",
+    "‚",
+    "„",
+    "”",
+    "»",
+    "…",
+    "‰",
+    "¾",
+    "¿",
+    "À",
+    "`",
+    "´",
+    "ˆ",
+    "˜",
+    "¯",
+    "˘",
+    "˙",
+    "¨",
+    "É",
+    "˚",
+    "¸",
+    "Ì",
+    "˝",
+    "˛",
+    "ˇ",
+    "—",
+    "Ñ",
+    "Ò",
+    "Ó",
+    "Ô",
+    "Õ",
+    "Ö",
+    "×",
+    "Ø",
+    "Ù",
+    "Ú",
+    "Û",
+    "Ü",
+    "Ý",
+    "Þ",
+    "ß",
+    "à",
+    "Æ",
+    "â",
+    "ª",
+    "ä",
+    "å",
+    "æ",
+    "ç",
+    "Ł",
+    "Ø",
+    "Œ",
+    "º",
+    "ì",
+    "í",
+    "î",
+    "ï",
+    "ð",
+    "æ",
+    "ò",
+    "ó",
+    "ô",
+    "ı",
+    "ö",
+    "÷",
+    "ł",
+    "ø",
+    "œ",
+    "ß",
+    "ü",
+    "ý",
+    "þ",
+    "ÿ",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/symbol.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/symbol.py
@ -0,0 +1,260 @@
+# manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
+_symbol_encoding = [
+    "\u0000",
+    "\u0001",
+    "\u0002",
+    "\u0003",
+    "\u0004",
+    "\u0005",
+    "\u0006",
+    "\u0007",
+    "\u0008",
+    "\u0009",
+    "\u000A",
+    "\u000B",
+    "\u000C",
+    "\u000D",
+    "\u000E",
+    "\u000F",
+    "\u0010",
+    "\u0011",
+    "\u0012",
+    "\u0013",
+    "\u0014",
+    "\u0015",
+    "\u0016",
+    "\u0017",
+    "\u0018",
+    "\u0019",
+    "\u001A",
+    "\u001B",
+    "\u001C",
+    "\u001D",
+    "\u001E",
+    "\u001F",
+    "\u0020",
+    "\u0021",
+    "\u2200",
+    "\u0023",
+    "\u2203",
+    "\u0025",
+    "\u0026",
+    "\u220B",
+    "\u0028",
+    "\u0029",
+    "\u2217",
+    "\u002B",
+    "\u002C",
+    "\u2212",
+    "\u002E",
+    "\u002F",
+    "\u0030",
+    "\u0031",
+    "\u0032",
+    "\u0033",
+    "\u0034",
+    "\u0035",
+    "\u0036",
+    "\u0037",
+    "\u0038",
+    "\u0039",
+    "\u003A",
+    "\u003B",
+    "\u003C",
+    "\u003D",
+    "\u003E",
+    "\u003F",
+    "\u2245",
+    "\u0391",
+    "\u0392",
+    "\u03A7",
+    "\u0394",
+    "\u0395",
+    "\u03A6",
+    "\u0393",
+    "\u0397",
+    "\u0399",
+    "\u03D1",
+    "\u039A",
+    "\u039B",
+    "\u039C",
+    "\u039D",
+    "\u039F",
+    "\u03A0",
+    "\u0398",
+    "\u03A1",
+    "\u03A3",
+    "\u03A4",
+    "\u03A5",
+    "\u03C2",
+    "\u03A9",
+    "\u039E",
+    "\u03A8",
+    "\u0396",
+    "\u005B",
+    "\u2234",
+    "\u005D",
+    "\u22A5",
+    "\u005F",
+    "\uF8E5",
+    "\u03B1",
+    "\u03B2",
+    "\u03C7",
+    "\u03B4",
+    "\u03B5",
+    "\u03C6",
+    "\u03B3",
+    "\u03B7",
+    "\u03B9",
+    "\u03D5",
+    "\u03BA",
+    "\u03BB",
+    "\u00B5",
+    "\u03BD",
+    "\u03BF",
+    "\u03C0",
+    "\u03B8",
+    "\u03C1",
+    "\u03C3",
+    "\u03C4",
+    "\u03C5",
+    "\u03D6",
+    "\u03C9",
+    "\u03BE",
+    "\u03C8",
+    "\u03B6",
+    "\u007B",
+    "\u007C",
+    "\u007D",
+    "\u223C",
+    "\u007F",
+    "\u0080",
+    "\u0081",
+    "\u0082",
+    "\u0083",
+    "\u0084",
+    "\u0085",
+    "\u0086",
+    "\u0087",
+    "\u0088",
+    "\u0089",
+    "\u008A",
+    "\u008B",
+    "\u008C",
+    "\u008D",
+    "\u008E",
+    "\u008F",
+    "\u0090",
+    "\u0091",
+    "\u0092",
+    "\u0093",
+    "\u0094",
+    "\u0095",
+    "\u0096",
+    "\u0097",
+    "\u0098",
+    "\u0099",
+    "\u009A",
+    "\u009B",
+    "\u009C",
+    "\u009D",
+    "\u009E",
+    "\u009F",
+    "\u20AC",
+    "\u03D2",
+    "\u2032",
+    "\u2264",
+    "\u2044",
+    "\u221E",
+    "\u0192",
+    "\u2663",
+    "\u2666",
+    "\u2665",
+    "\u2660",
+    "\u2194",
+    "\u2190",
+    "\u2191",
+    "\u2192",
+    "\u2193",
+    "\u00B0",
+    "\u00B1",
+    "\u2033",
+    "\u2265",
+    "\u00D7",
+    "\u221D",
+    "\u2202",
+    "\u2022",
+    "\u00F7",
+    "\u2260",
+    "\u2261",
+    "\u2248",
+    "\u2026",
+    "\uF8E6",
+    "\uF8E7",
+    "\u21B5",
+    "\u2135",
+    "\u2111",
+    "\u211C",
+    "\u2118",
+    "\u2297",
+    "\u2295",
+    "\u2205",
+    "\u2229",
+    "\u222A",
+    "\u2283",
+    "\u2287",
+    "\u2284",
+    "\u2282",
+    "\u2286",
+    "\u2208",
+    "\u2209",
+    "\u2220",
+    "\u2207",
+    "\uF6DA",
+    "\uF6D9",
+    "\uF6DB",
+    "\u220F",
+    "\u221A",
+    "\u22C5",
+    "\u00AC",
+    "\u2227",
+    "\u2228",
+    "\u21D4",
+    "\u21D0",
+    "\u21D1",
+    "\u21D2",
+    "\u21D3",
+    "\u25CA",
+    "\u2329",
+    "\uF8E8",
+    "\uF8E9",
+    "\uF8EA",
+    "\u2211",
+    "\uF8EB",
+    "\uF8EC",
+    "\uF8ED",
+    "\uF8EE",
+    "\uF8EF",
+    "\uF8F0",
+    "\uF8F1",
+    "\uF8F2",
+    "\uF8F3",
+    "\uF8F4",
+    "\u00F0",
+    "\u232A",
+    "\u222B",
+    "\u2320",
+    "\uF8F5",
+    "\u2321",
+    "\uF8F6",
+    "\uF8F7",
+    "\uF8F8",
+    "\uF8F9",
+    "\uF8FA",
+    "\uF8FB",
+    "\uF8FC",
+    "\uF8FD",
+    "\uF8FE",
+    "\u00FF",
+]
+assert len(_symbol_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/zapfding.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/zapfding.py
@ -0,0 +1,261 @@
+#  manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
+
+_zapfding_encoding = [
+    "\u0000",
+    "\u0001",
+    "\u0002",
+    "\u0003",
+    "\u0004",
+    "\u0005",
+    "\u0006",
+    "\u0007",
+    "\u0008",
+    "\u0009",
+    "\u000A",
+    "\u000B",
+    "\u000C",
+    "\u000D",
+    "\u000E",
+    "\u000F",
+    "\u0010",
+    "\u0011",
+    "\u0012",
+    "\u0013",
+    "\u0014",
+    "\u0015",
+    "\u0016",
+    "\u0017",
+    "\u0018",
+    "\u0019",
+    "\u001A",
+    "\u001B",
+    "\u001C",
+    "\u001D",
+    "\u001E",
+    "\u001F",
+    "\u0020",
+    "\u2701",
+    "\u2702",
+    "\u2703",
+    "\u2704",
+    "\u260E",
+    "\u2706",
+    "\u2707",
+    "\u2708",
+    "\u2709",
+    "\u261B",
+    "\u261E",
+    "\u270C",
+    "\u270D",
+    "\u270E",
+    "\u270F",
+    "\u2710",
+    "\u2711",
+    "\u2712",
+    "\u2713",
+    "\u2714",
+    "\u2715",
+    "\u2716",
+    "\u2717",
+    "\u2718",
+    "\u2719",
+    "\u271A",
+    "\u271B",
+    "\u271C",
+    "\u271D",
+    "\u271E",
+    "\u271F",
+    "\u2720",
+    "\u2721",
+    "\u2722",
+    "\u2723",
+    "\u2724",
+    "\u2725",
+    "\u2726",
+    "\u2727",
+    "\u2605",
+    "\u2729",
+    "\u272A",
+    "\u272B",
+    "\u272C",
+    "\u272D",
+    "\u272E",
+    "\u272F",
+    "\u2730",
+    "\u2731",
+    "\u2732",
+    "\u2733",
+    "\u2734",
+    "\u2735",
+    "\u2736",
+    "\u2737",
+    "\u2738",
+    "\u2739",
+    "\u273A",
+    "\u273B",
+    "\u273C",
+    "\u273D",
+    "\u273E",
+    "\u273F",
+    "\u2740",
+    "\u2741",
+    "\u2742",
+    "\u2743",
+    "\u2744",
+    "\u2745",
+    "\u2746",
+    "\u2747",
+    "\u2748",
+    "\u2749",
+    "\u274A",
+    "\u274B",
+    "\u25CF",
+    "\u274D",
+    "\u25A0",
+    "\u274F",
+    "\u2750",
+    "\u2751",
+    "\u2752",
+    "\u25B2",
+    "\u25BC",
+    "\u25C6",
+    "\u2756",
+    "\u25D7",
+    "\u2758",
+    "\u2759",
+    "\u275A",
+    "\u275B",
+    "\u275C",
+    "\u275D",
+    "\u275E",
+    "\u007F",
+    "\uF8D7",
+    "\uF8D8",
+    "\uF8D9",
+    "\uF8DA",
+    "\uF8DB",
+    "\uF8DC",
+    "\uF8DD",
+    "\uF8DE",
+    "\uF8DF",
+    "\uF8E0",
+    "\uF8E1",
+    "\uF8E2",
+    "\uF8E3",
+    "\uF8E4",
+    "\u008E",
+    "\u008F",
+    "\u0090",
+    "\u0091",
+    "\u0092",
+    "\u0093",
+    "\u0094",
+    "\u0095",
+    "\u0096",
+    "\u0097",
+    "\u0098",
+    "\u0099",
+    "\u009A",
+    "\u009B",
+    "\u009C",
+    "\u009D",
+    "\u009E",
+    "\u009F",
+    "\u00A0",
+    "\u2761",
+    "\u2762",
+    "\u2763",
+    "\u2764",
+    "\u2765",
+    "\u2766",
+    "\u2767",
+    "\u2663",
+    "\u2666",
+    "\u2665",
+    "\u2660",
+    "\u2460",
+    "\u2461",
+    "\u2462",
+    "\u2463",
+    "\u2464",
+    "\u2465",
+    "\u2466",
+    "\u2467",
+    "\u2468",
+    "\u2469",
+    "\u2776",
+    "\u2777",
+    "\u2778",
+    "\u2779",
+    "\u277A",
+    "\u277B",
+    "\u277C",
+    "\u277D",
+    "\u277E",
+    "\u277F",
+    "\u2780",
+    "\u2781",
+    "\u2782",
+    "\u2783",
+    "\u2784",
+    "\u2785",
+    "\u2786",
+    "\u2787",
+    "\u2788",
+    "\u2789",
+    "\u278A",
+    "\u278B",
+    "\u278C",
+    "\u278D",
+    "\u278E",
+    "\u278F",
+    "\u2790",
+    "\u2791",
+    "\u2792",
+    "\u2793",
+    "\u2794",
+    "\u2192",
+    "\u2194",
+    "\u2195",
+    "\u2798",
+    "\u2799",
+    "\u279A",
+    "\u279B",
+    "\u279C",
+    "\u279D",
+    "\u279E",
+    "\u279F",
+    "\u27A0",
+    "\u27A1",
+    "\u27A2",
+    "\u27A3",
+    "\u27A4",
+    "\u27A5",
+    "\u27A6",
+    "\u27A7",
+    "\u27A8",
+    "\u27A9",
+    "\u27AA",
+    "\u27AB",
+    "\u27AC",
+    "\u27AD",
+    "\u27AE",
+    "\u27AF",
+    "\u00F0",
+    "\u27B1",
+    "\u27B2",
+    "\u27B3",
+    "\u27B4",
+    "\u27B5",
+    "\u27B6",
+    "\u27B7",
+    "\u27B8",
+    "\u27B9",
+    "\u27BA",
+    "\u27BB",
+    "\u27BC",
+    "\u27BD",
+    "\u27BE",
+    "\u00FF",
+]
+assert len(_zapfding_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/init.py
@ -0,0 +1,86 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from pypdf._crypt_providers._base import CryptBase, CryptIdentity
+
+try:
+    from pypdf._crypt_providers._cryptography import (
+        CryptAES,
+        CryptRC4,
+        aes_cbc_decrypt,
+        aes_cbc_encrypt,
+        aes_ecb_decrypt,
+        aes_ecb_encrypt,
+        crypt_provider,
+        rc4_decrypt,
+        rc4_encrypt,
+    )
+    from pypdf._utils import Version
+
+    if Version(crypt_provider[1]) <= Version("3.0"):
+        # This is due to the backend parameter being required back then:
+        # https://cryptography.io/en/latest/changelog/#v3-1
+        raise ImportError("cryptography<=3.0 is not supported")  # pragma: no cover
+except ImportError:
+    try:
+        from pypdf._crypt_providers._pycryptodome import (  # type: ignore
+            CryptAES,
+            CryptRC4,
+            aes_cbc_decrypt,
+            aes_cbc_encrypt,
+            aes_ecb_decrypt,
+            aes_ecb_encrypt,
+            crypt_provider,
+            rc4_decrypt,
+            rc4_encrypt,
+        )
+    except ImportError:
+        from pypdf._crypt_providers._fallback import (  # type: ignore
+            CryptAES,
+            CryptRC4,
+            aes_cbc_decrypt,
+            aes_cbc_encrypt,
+            aes_ecb_decrypt,
+            aes_ecb_encrypt,
+            crypt_provider,
+            rc4_decrypt,
+            rc4_encrypt,
+        )
+
+__all__ = [
+    "CryptAES",
+    "CryptBase",
+    "CryptIdentity",
+    "CryptRC4",
+    "aes_cbc_decrypt",
+    "aes_cbc_encrypt",
+    "aes_ecb_decrypt",
+    "aes_ecb_encrypt",
+    "crypt_provider",
+    "rc4_decrypt",
+    "rc4_encrypt",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_base.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_base.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_cryptography.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_cryptography.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_fallback.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_fallback.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_pycryptodome.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/pycache/_pycryptodome.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_base.py
@ -0,0 +1,38 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+class CryptBase:
+    def encrypt(self, data: bytes) -> bytes:  # pragma: no cover
+        return data
+
+    def decrypt(self, data: bytes) -> bytes:  # pragma: no cover
+        return data
+
+
+class CryptIdentity(CryptBase):
+    pass
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_cryptography.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_cryptography.py
@ -0,0 +1,118 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import secrets
+
+from cryptography import __version__
+from cryptography.hazmat.primitives import padding
+from cryptography.hazmat.primitives.ciphers.algorithms import AES
+
+try:
+    # 43.0.0 - https://cryptography.io/en/latest/changelog/#v43-0-0
+    from cryptography.hazmat.decrepit.ciphers.algorithms import ARC4
+except ImportError:
+    from cryptography.hazmat.primitives.ciphers.algorithms import ARC4
+from cryptography.hazmat.primitives.ciphers.base import Cipher
+from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB
+
+from pypdf._crypt_providers._base import CryptBase
+
+crypt_provider = ("cryptography", __version__)
+
+
+class CryptRC4(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.cipher = Cipher(ARC4(key), mode=None)
+
+    def encrypt(self, data: bytes) -> bytes:
+        encryptor = self.cipher.encryptor()
+        return encryptor.update(data) + encryptor.finalize()
+
+    def decrypt(self, data: bytes) -> bytes:
+        decryptor = self.cipher.decryptor()
+        return decryptor.update(data) + decryptor.finalize()
+
+
+class CryptAES(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.alg = AES(key)
+
+    def encrypt(self, data: bytes) -> bytes:
+        iv = secrets.token_bytes(16)
+        pad = padding.PKCS7(128).padder()
+        data = pad.update(data) + pad.finalize()
+
+        cipher = Cipher(self.alg, CBC(iv))
+        encryptor = cipher.encryptor()
+        return iv + encryptor.update(data) + encryptor.finalize()
+
+    def decrypt(self, data: bytes) -> bytes:
+        iv = data[:16]
+        data = data[16:]
+        # for empty encrypted data
+        if not data:
+            return data
+
+        # just for robustness, it does not happen under normal circumstances
+        if len(data) % 16 != 0:
+            pad = padding.PKCS7(128).padder()
+            data = pad.update(data) + pad.finalize()
+
+        cipher = Cipher(self.alg, CBC(iv))
+        decryptor = cipher.decryptor()
+        d = decryptor.update(data) + decryptor.finalize()
+        return d[: -d[-1]]
+
+
+def rc4_encrypt(key: bytes, data: bytes) -> bytes:
+    encryptor = Cipher(ARC4(key), mode=None).encryptor()
+    return encryptor.update(data) + encryptor.finalize()
+
+
+def rc4_decrypt(key: bytes, data: bytes) -> bytes:
+    decryptor = Cipher(ARC4(key), mode=None).decryptor()
+    return decryptor.update(data) + decryptor.finalize()
+
+
+def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
+    encryptor = Cipher(AES(key), mode=ECB()).encryptor()
+    return encryptor.update(data) + encryptor.finalize()
+
+
+def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
+    decryptor = Cipher(AES(key), mode=ECB()).decryptor()
+    return decryptor.update(data) + decryptor.finalize()
+
+
+def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    encryptor = Cipher(AES(key), mode=CBC(iv)).encryptor()
+    return encryptor.update(data) + encryptor.finalize()
+
+
+def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    decryptor = Cipher(AES(key), mode=CBC(iv)).decryptor()
+    return decryptor.update(data) + decryptor.finalize()
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_fallback.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_fallback.py
@ -0,0 +1,93 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from pypdf._crypt_providers._base import CryptBase
+from pypdf.errors import DependencyError
+
+_DEPENDENCY_ERROR_STR = "cryptography>=3.1 is required for AES algorithm"
+
+
+crypt_provider = ("local_crypt_fallback", "0.0.0")
+
+
+class CryptRC4(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.s = bytearray(range(256))
+        j = 0
+        for i in range(256):
+            j = (j + self.s[i] + key[i % len(key)]) % 256
+            self.s[i], self.s[j] = self.s[j], self.s[i]
+
+    def encrypt(self, data: bytes) -> bytes:
+        s = bytearray(self.s)
+        out = [0 for _ in range(len(data))]
+        i, j = 0, 0
+        for k in range(len(data)):
+            i = (i + 1) % 256
+            j = (j + s[i]) % 256
+            s[i], s[j] = s[j], s[i]
+            x = s[(s[i] + s[j]) % 256]
+            out[k] = data[k] ^ x
+        return bytes(out)
+
+    def decrypt(self, data: bytes) -> bytes:
+        return self.encrypt(data)
+
+
+class CryptAES(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        pass
+
+    def encrypt(self, data: bytes) -> bytes:
+        raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+    def decrypt(self, data: bytes) -> bytes:
+        raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def rc4_encrypt(key: bytes, data: bytes) -> bytes:
+    return CryptRC4(key).encrypt(data)
+
+
+def rc4_decrypt(key: bytes, data: bytes) -> bytes:
+    return CryptRC4(key).decrypt(data)
+
+
+def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_pycryptodome.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_pycryptodome.py
@ -0,0 +1,97 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import secrets
+
+from Crypto import __version__
+from Crypto.Cipher import AES, ARC4
+from Crypto.Util.Padding import pad
+
+from pypdf._crypt_providers._base import CryptBase
+
+crypt_provider = ("pycryptodome", __version__)
+
+
+class CryptRC4(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.key = key
+
+    def encrypt(self, data: bytes) -> bytes:
+        return ARC4.ARC4Cipher(self.key).encrypt(data)
+
+    def decrypt(self, data: bytes) -> bytes:
+        return ARC4.ARC4Cipher(self.key).decrypt(data)
+
+
+class CryptAES(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.key = key
+
+    def encrypt(self, data: bytes) -> bytes:
+        iv = secrets.token_bytes(16)
+        data = pad(data, 16)
+        aes = AES.new(self.key, AES.MODE_CBC, iv)
+        return iv + aes.encrypt(data)
+
+    def decrypt(self, data: bytes) -> bytes:
+        iv = data[:16]
+        data = data[16:]
+        # for empty encrypted data
+        if not data:
+            return data
+
+        # just for robustness, it does not happen under normal circumstances
+        if len(data) % 16 != 0:
+            data = pad(data, 16)
+
+        aes = AES.new(self.key, AES.MODE_CBC, iv)
+        d = aes.decrypt(data)
+        return d[: -d[-1]]
+
+
+def rc4_encrypt(key: bytes, data: bytes) -> bytes:
+    return ARC4.ARC4Cipher(key).encrypt(data)
+
+
+def rc4_decrypt(key: bytes, data: bytes) -> bytes:
+    return ARC4.ARC4Cipher(key).decrypt(data)
+
+
+def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_ECB).encrypt(data)
+
+
+def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_ECB).decrypt(data)
+
+
+def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_CBC, iv).encrypt(data)
+
+
+def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_CBC, iv).decrypt(data)
--- a/venv/lib/python3.12/site-packages/pypdf/_doc_common.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_doc_common.py
--- a/venv/lib/python3.12/site-packages/pypdf/_encryption.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_encryption.py
--- a/venv/lib/python3.12/site-packages/pypdf/_merger.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_merger.py
@ -0,0 +1,42 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+from ._utils import (
+    deprecation_with_replacement,
+)
+
+
+class PdfMerger:
+    """
+    Use :class:`PdfWriter` instead.
+
+    .. deprecated:: 5.0.0
+    """
+
+    def __init__(self) -> None:
+        deprecation_with_replacement("PdfMerger", "PdfWriter", "5.0.0")
--- a/venv/lib/python3.12/site-packages/pypdf/_page.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_page.py
--- a/venv/lib/python3.12/site-packages/pypdf/_page_labels.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_page_labels.py
@ -0,0 +1,289 @@
+"""
+Page labels are shown by PDF viewers as "the page number".
+
+A page has a numeric index, starting at 0. Additionally, the page
+has a label. In the most simple case:
+
+    label = index + 1
+
+However, the title page and the table of contents might have Roman numerals as
+page labels. This makes things more complicated.
+
+Example 1
+---------
+
+>>> reader.root_object["/PageLabels"]["/Nums"]
+[0, IndirectObject(18, 0, 139929798197504),
+ 8, IndirectObject(19, 0, 139929798197504)]
+>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
+{'/S': '/r'}
+>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
+{'/S': '/D'}
+
+Example 2
+---------
+The following is a document with pages labeled
+i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...
+
+1 0 obj
+    << /Type /Catalog
+       /PageLabels << /Nums [
+                        0 << /S /r >>
+                        4 << /S /D >>
+                        7 << /S /D
+                             /P ( A- )
+                             /St 8
+                        >>
+                        % A number tree containing
+                        % three page label dictionaries
+                        ]
+                   >>
+    ...
+    >>
+endobj
+
+
+§12.4.2 PDF Specification 1.7 and 2.0
+=====================================
+
+Entries in a page label dictionary
+----------------------------------
+The /S key:
+D       Decimal Arabic numerals
+R       Uppercase Roman numerals
+r       Lowercase Roman numerals
+A       Uppercase letters (A to Z for the first 26 pages,
+                           AA to ZZ for the next 26, and so on)
+a       Lowercase letters (a to z for the first 26 pages,
+                           aa to zz for the next 26, and so on)
+"""
+
+from typing import Iterator, List, Optional, Tuple, cast
+
+from ._protocols import PdfCommonDocProtocol
+from ._utils import logger_warning
+from .generic import (
+    ArrayObject,
+    DictionaryObject,
+    NullObject,
+    NumberObject,
+    is_null_or_none,
+)
+
+
+def number2uppercase_roman_numeral(num: int) -> str:
+    roman = [
+        (1000, "M"),
+        (900, "CM"),
+        (500, "D"),
+        (400, "CD"),
+        (100, "C"),
+        (90, "XC"),
+        (50, "L"),
+        (40, "XL"),
+        (10, "X"),
+        (9, "IX"),
+        (5, "V"),
+        (4, "IV"),
+        (1, "I"),
+    ]
+
+    def roman_num(num: int) -> Iterator[str]:
+        for decimal, roman_repr in roman:
+            x, _ = divmod(num, decimal)
+            yield roman_repr * x
+            num -= decimal * x
+            if num <= 0:
+                break
+
+    return "".join(list(roman_num(num)))
+
+
+def number2lowercase_roman_numeral(number: int) -> str:
+    return number2uppercase_roman_numeral(number).lower()
+
+
+def number2uppercase_letter(number: int) -> str:
+    if number <= 0:
+        raise ValueError("Expecting a positive number")
+    alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
+    rep = ""
+    while number > 0:
+        remainder = number % 26
+        if remainder == 0:
+            remainder = 26
+        rep = alphabet[remainder - 1] + rep
+        # update
+        number -= remainder
+        number = number // 26
+    return rep
+
+
+def number2lowercase_letter(number: int) -> str:
+    return number2uppercase_letter(number).lower()
+
+
+def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
+    # [Nums] shall be an array of the form
+    #   [ key_1 value_1 key_2 value_2 ... key_n value_n ]
+    # where each key_i is an integer and the corresponding
+    # value_i shall be the object associated with that key.
+    # The keys shall be sorted in numerical order,
+    # analogously to the arrangement of keys in a name tree
+    # as described in 7.9.6, "Name Trees."
+    nums = cast(ArrayObject, dictionary_object["/Nums"])
+    i = 0
+    value = None
+    start_index = 0
+    while i < len(nums):
+        start_index = nums[i]
+        value = nums[i + 1].get_object()
+        if i + 2 == len(nums):
+            break
+        if nums[i + 2] > index:
+            break
+        i += 2
+    m = {
+        None: lambda n: "",
+        "/D": lambda n: str(n),
+        "/R": number2uppercase_roman_numeral,
+        "/r": number2lowercase_roman_numeral,
+        "/A": number2uppercase_letter,
+        "/a": number2lowercase_letter,
+    }
+    # if /Nums array is not following the specification or if /Nums is empty
+    if not isinstance(value, dict):
+        return str(index + 1)  # Fallback
+    start = value.get("/St", 1)
+    prefix = value.get("/P", "")
+    return prefix + m[value.get("/S")](index - start_index + start)
+
+
+def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
+    """
+    See 7.9.7 "Number Trees".
+
+    Args:
+        reader: The PdfReader
+        index: The index of the page
+
+    Returns:
+        The label of the page, e.g. "iv" or "4".
+
+    """
+    root = cast(DictionaryObject, reader.root_object)
+    if "/PageLabels" not in root:
+        return str(index + 1)  # Fallback
+    number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
+    if "/Nums" in number_tree:
+        return get_label_from_nums(number_tree, index)
+    if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
+        # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
+        # Limit maximum depth.
+        level = 0
+        while level < 100:
+            kids = cast(List[DictionaryObject], number_tree["/Kids"])
+            for kid in kids:
+                # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
+                limits = cast(List[int], kid["/Limits"])
+                if limits[0] <= index <= limits[1]:
+                    if not is_null_or_none(kid.get("/Kids", None)):
+                        # Recursive definition.
+                        level += 1
+                        if level == 100:  # pragma: no cover
+                            raise NotImplementedError(
+                                "Too deep nesting is not supported."
+                            )
+                        number_tree = kid
+                        # Exit the inner `for` loop and continue at the next level with the
+                        # next iteration of the `while` loop.
+                        break
+                    return get_label_from_nums(kid, index)
+            else:
+                # When there are no kids, make sure to exit the `while` loop directly
+                # and continue with the fallback.
+                break
+
+    logger_warning(f"Could not reliably determine page label for {index}.", __name__)
+    return str(index + 1)  # Fallback if neither /Nums nor /Kids is in the number_tree
+
+
+def nums_insert(
+    key: NumberObject,
+    value: DictionaryObject,
+    nums: ArrayObject,
+) -> None:
+    """
+    Insert a key, value pair in a Nums array.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry
+        value: value of the entry
+        nums: Nums array to modify
+
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("A nums like array must have an even number of elements")
+
+    i = len(nums)
+    while i != 0 and key <= nums[i - 2]:
+        i = i - 2
+
+    if i < len(nums) and key == nums[i]:
+        nums[i + 1] = value
+    else:
+        nums.insert(i, key)
+        nums.insert(i + 1, value)
+
+
+def nums_clear_range(
+    key: NumberObject,
+    page_index_to: int,
+    nums: ArrayObject,
+) -> None:
+    """
+    Remove all entries in a number tree in a range after an entry.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry before the range
+        page_index_to: The page index of the upper limit of the range
+        nums: Nums array to modify
+
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("A nums like array must have an even number of elements")
+    if page_index_to < key:
+        raise ValueError("page_index_to must be greater or equal than key")
+
+    i = nums.index(key) + 2
+    while i < len(nums) and nums[i] <= page_index_to:
+        nums.pop(i)
+        nums.pop(i)
+
+
+def nums_next(
+    key: NumberObject,
+    nums: ArrayObject,
+) -> Tuple[Optional[NumberObject], Optional[DictionaryObject]]:
+    """
+    Return the (key, value) pair of the entry after the given one.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry
+        nums: Nums array
+
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("A nums like array must have an even number of elements")
+
+    i = nums.index(key) + 2
+    if i < len(nums):
+        return (nums[i], nums[i + 1])
+    else:
+        return (None, None)
--- a/venv/lib/python3.12/site-packages/pypdf/_protocols.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_protocols.py
@ -0,0 +1,86 @@
+"""Helpers for working with PDF types."""
+
+from abc import abstractmethod
+from pathlib import Path
+from typing import IO, Any, Dict, List, Optional, Protocol, Tuple, Union
+
+from ._utils import StrByteType, StreamType
+
+
+class PdfObjectProtocol(Protocol):
+    indirect_reference: Any
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
+    ) -> Any:
+        ...  # pragma: no cover
+
+    def _reference_clone(self, clone: Any, pdf_dest: Any) -> Any:
+        ...  # pragma: no cover
+
+    def get_object(self) -> Optional["PdfObjectProtocol"]:
+        ...  # pragma: no cover
+
+    def hash_value(self) -> bytes:
+        ...  # pragma: no cover
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        ...  # pragma: no cover
+
+
+class XmpInformationProtocol(PdfObjectProtocol):
+    pass
+
+
+class PdfCommonDocProtocol(Protocol):
+    @property
+    def pdf_header(self) -> str:
+        ...  # pragma: no cover
+
+    @property
+    def pages(self) -> List[Any]:
+        ...  # pragma: no cover
+
+    @property
+    def root_object(self) -> PdfObjectProtocol:
+        ...  # pragma: no cover
+
+    def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
+        ...  # pragma: no cover
+
+    @property
+    def strict(self) -> bool:
+        ...  # pragma: no cover
+
+
+class PdfReaderProtocol(PdfCommonDocProtocol, Protocol):
+    @property
+    @abstractmethod
+    def xref(self) -> Dict[int, Dict[int, Any]]:
+        ...  # pragma: no cover
+
+    @property
+    @abstractmethod
+    def trailer(self) -> Dict[str, Any]:
+        ...  # pragma: no cover
+
+
+class PdfWriterProtocol(PdfCommonDocProtocol, Protocol):
+    _objects: List[Any]
+    _id_translated: Dict[int, Dict[int, int]]
+
+    incremental: bool
+    _reader: Any  # PdfReader
+
+    @abstractmethod
+    def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]:
+        ...  # pragma: no cover
+
+    @abstractmethod
+    def _add_object(self, obj: Any) -> Any:
+        ...  # pragma: no cover
--- a/venv/lib/python3.12/site-packages/pypdf/_reader.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_reader.py
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/init.py
@ -0,0 +1,248 @@
+"""
+Code related to text extraction.
+
+Some parts are still in _page.py. In doubt, they will stay there.
+"""
+
+import math
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+from ..generic import DictionaryObject, TextStringObject, encode_pdfdocencoding
+
+CUSTOM_RTL_MIN: int = -1
+CUSTOM_RTL_MAX: int = -1
+CUSTOM_RTL_SPECIAL_CHARS: List[int] = []
+LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS: int = 5
+
+
+class OrientationNotFoundError(Exception):
+    pass
+
+
+def set_custom_rtl(
+    _min: Union[str, int, None] = None,
+    _max: Union[str, int, None] = None,
+    specials: Union[str, List[int], None] = None,
+) -> Tuple[int, int, List[int]]:
+    """
+    Change the Right-To-Left and special characters custom parameters.
+
+    Args:
+        _min: The new minimum value for the range of custom characters that
+            will be written right to left.
+            If set to ``None``, the value will not be changed.
+            If set to an integer or string, it will be converted to its ASCII code.
+            The default value is -1, which sets no additional range to be converted.
+        _max: The new maximum value for the range of custom characters that will
+            be written right to left.
+            If set to ``None``, the value will not be changed.
+            If set to an integer or string, it will be converted to its ASCII code.
+            The default value is -1, which sets no additional range to be converted.
+        specials: The new list of special characters to be inserted in the
+            current insertion order.
+            If set to ``None``, the current value will not be changed.
+            If set to a string, it will be converted to a list of ASCII codes.
+            The default value is an empty list.
+
+    Returns:
+        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
+        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.
+
+    """
+    global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
+    if isinstance(_min, int):
+        CUSTOM_RTL_MIN = _min
+    elif isinstance(_min, str):
+        CUSTOM_RTL_MIN = ord(_min)
+    if isinstance(_max, int):
+        CUSTOM_RTL_MAX = _max
+    elif isinstance(_max, str):
+        CUSTOM_RTL_MAX = ord(_max)
+    if isinstance(specials, str):
+        CUSTOM_RTL_SPECIAL_CHARS = [ord(x) for x in specials]
+    elif isinstance(specials, list):
+        CUSTOM_RTL_SPECIAL_CHARS = specials
+    return CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
+
+
+def mult(m: List[float], n: List[float]) -> List[float]:
+    return [
+        m[0] * n[0] + m[1] * n[2],
+        m[0] * n[1] + m[1] * n[3],
+        m[2] * n[0] + m[3] * n[2],
+        m[2] * n[1] + m[3] * n[3],
+        m[4] * n[0] + m[5] * n[2] + n[4],
+        m[4] * n[1] + m[5] * n[3] + n[5],
+    ]
+
+
+def orient(m: List[float]) -> int:
+    if m[3] > 1e-6:
+        return 0
+    elif m[3] < -1e-6:
+        return 180
+    elif m[1] > 0:
+        return 90
+    else:
+        return 270
+
+
+def crlf_space_check(
+    text: str,
+    cmtm_prev: Tuple[List[float], List[float]],
+    cmtm_matrix: Tuple[List[float], List[float]],
+    memo_cmtm: Tuple[List[float], List[float]],
+    cmap: Tuple[
+        Union[str, Dict[int, str]], Dict[str, str], str, Optional[DictionaryObject]
+    ],
+    orientations: Tuple[int, ...],
+    output: str,
+    font_size: float,
+    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
+    str_widths: float,
+    spacewidth: float,
+    str_height: float,
+) -> Tuple[str, str, List[float], List[float]]:
+    cm_prev = cmtm_prev[0]
+    tm_prev = cmtm_prev[1]
+    cm_matrix = cmtm_matrix[0]
+    tm_matrix = cmtm_matrix[1]
+    memo_cm = memo_cmtm[0]
+    memo_tm = memo_cmtm[1]
+
+    m_prev = mult(tm_prev, cm_prev)
+    m = mult(tm_matrix, cm_matrix)
+    orientation = orient(m)
+    delta_x = m[4] - m_prev[4]
+    delta_y = m[5] - m_prev[5]
+    # Table 108 of the 1.7 reference ("Text positioning operators")
+    scale_prev_x = math.sqrt(tm_prev[0]**2 + tm_prev[1]**2)
+    scale_prev_y = math.sqrt(tm_prev[2]**2 + tm_prev[3]**2)
+    scale_y = math.sqrt(tm_matrix[2]**2 + tm_matrix[3]**2)
+    cm_prev = m
+
+    if orientation not in orientations:
+        raise OrientationNotFoundError
+    if orientation in (0, 180):
+        moved_height: float = delta_y
+        moved_width: float = delta_x
+    elif orientation in (90, 270):
+        moved_height = delta_x
+        moved_width = delta_y
+    try:
+        if abs(moved_height) > 0.8 * min(str_height * scale_prev_y, font_size * scale_y):
+            if (output + text)[-1] != "\n":
+                output += text + "\n"
+                if visitor_text is not None:
+                    visitor_text(
+                        text + "\n",
+                        memo_cm,
+                        memo_tm,
+                        cmap[3],
+                        font_size,
+                    )
+                text = ""
+        elif (
+            (moved_width >= (spacewidth + str_widths) * scale_prev_x)
+            and (output + text)[-1] != " "
+        ):
+            text += " "
+    except Exception:
+        pass
+    tm_prev = tm_matrix.copy()
+    cm_prev = cm_matrix.copy()
+    return text, output, cm_prev, tm_prev
+
+
+def get_text_operands(
+    operands: List[Union[str, TextStringObject]],
+    cm_matrix: List[float],
+    tm_matrix: List[float],
+    cmap: Tuple[
+        Union[str, Dict[int, str]], Dict[str, str], str, Optional[DictionaryObject]
+    ],
+    orientations: Tuple[int, ...]
+) -> Tuple[str, bool]:
+    t: str = ""
+    is_str_operands = False
+    m = mult(tm_matrix, cm_matrix)
+    orientation = orient(m)
+    if orientation in orientations and len(operands) > 0:
+        if isinstance(operands[0], str):
+            t = operands[0]
+            is_str_operands = True
+        else:
+            t = ""
+            tt: bytes = (
+                encode_pdfdocencoding(operands[0])
+                if isinstance(operands[0], str)
+                else operands[0]
+            )
+            if isinstance(cmap[0], str):
+                try:
+                    t = tt.decode(cmap[0], "surrogatepass")  # apply str encoding
+                except Exception:
+                    # the data does not match the expectation,
+                    # we use the alternative ;
+                    # text extraction may not be good
+                    t = tt.decode(
+                        "utf-16-be" if cmap[0] == "charmap" else "charmap",
+                        "surrogatepass",
+                    )  # apply str encoding
+            else:  # apply dict encoding
+                t = "".join(
+                    [cmap[0][x] if x in cmap[0] else bytes((x,)).decode() for x in tt]
+                )
+    return (t, is_str_operands)
+
+
+def get_display_str(
+    text: str,
+    cm_matrix: List[float],
+    tm_matrix: List[float],
+    cmap: Tuple[
+        Union[str, Dict[int, str]], Dict[str, str], str, Optional[DictionaryObject]
+    ],
+    text_operands: str,
+    font_size: float,
+    rtl_dir: bool,
+    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]]
+) -> Tuple[str, bool]:
+    # "\u0590 - \u08FF \uFB50 - \uFDFF"
+    for x in [cmap[1].get(x, x) for x in text_operands]:
+        # x can be a sequence of bytes ; ex: habibi.pdf
+        if len(x) == 1:
+            xx = ord(x)
+        else:
+            xx = 1
+        # fmt: off
+        if (
+            # cases where the current inserting order is kept
+            (xx <= 0x2F)                        # punctuations but...
+            or 0x3A <= xx <= 0x40               # numbers (x30-39)
+            or 0x2000 <= xx <= 0x206F           # upper punctuations..
+            or 0x20A0 <= xx <= 0x21FF           # but (numbers) indices/exponents
+            or xx in CUSTOM_RTL_SPECIAL_CHARS   # customized....
+        ):
+            text = x + text if rtl_dir else text + x
+        elif (  # right-to-left characters set
+            0x0590 <= xx <= 0x08FF
+            or 0xFB1D <= xx <= 0xFDFF
+            or 0xFE70 <= xx <= 0xFEFF
+            or CUSTOM_RTL_MIN <= xx <= CUSTOM_RTL_MAX
+        ):
+            if not rtl_dir:
+                rtl_dir = True
+                if visitor_text is not None:
+                    visitor_text(text, cm_matrix, tm_matrix, cmap[3], font_size)
+                text = ""
+            text = x + text
+        else:  # left-to-right
+            if rtl_dir:
+                rtl_dir = False
+                if visitor_text is not None:
+                    visitor_text(text, cm_matrix, tm_matrix, cmap[3], font_size)
+                text = ""
+            text = text + x
+        # fmt: on
+    return text, rtl_dir
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/init.py
@ -0,0 +1,16 @@
+"""Layout mode text extraction extension for pypdf"""
+from ._fixed_width_page import (
+    fixed_char_width,
+    fixed_width_page,
+    text_show_operations,
+    y_coordinate_groups,
+)
+from ._font import Font
+
+__all__ = [
+    "Font",
+    "fixed_char_width",
+    "fixed_width_page",
+    "text_show_operations",
+    "y_coordinate_groups",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_fixed_width_page.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_fixed_width_page.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_font.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_font.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_font_widths.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_font_widths.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_text_state_manager.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_text_state_manager.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_text_state_params.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/pycache/_text_state_params.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
@ -0,0 +1,394 @@
+"""Extract PDF text preserving the layout of the source PDF"""
+
+from itertools import groupby
+from math import ceil
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, TypedDict
+
+from ..._utils import logger_warning
+from .. import LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
+from ._font import Font
+from ._text_state_manager import TextStateManager
+from ._text_state_params import TextStateParams
+
+
+class BTGroup(TypedDict):
+    """
+    Dict describing a line of text rendered within a BT/ET operator pair.
+    If multiple text show operations render text on the same line, the text
+    will be combined into a single BTGroup dict.
+
+    Keys:
+        tx: x coordinate of first character in BTGroup
+        ty: y coordinate of first character in BTGroup
+        font_size: nominal font size
+        font_height: effective font height
+        text: rendered text
+        displaced_tx: x coordinate of last character in BTGroup
+        flip_sort: -1 if page is upside down, else 1
+    """
+
+    tx: float
+    ty: float
+    font_size: float
+    font_height: float
+    text: str
+    displaced_tx: float
+    flip_sort: Literal[-1, 1]
+
+
+def bt_group(tj_op: TextStateParams, rendered_text: str, dispaced_tx: float) -> BTGroup:
+    """
+    BTGroup constructed from a TextStateParams instance, rendered text, and
+    displaced tx value.
+
+    Args:
+        tj_op (TextStateParams): TextStateParams instance
+        rendered_text (str): rendered text
+        dispaced_tx (float): x coordinate of last character in BTGroup
+
+    """
+    return BTGroup(
+        tx=tj_op.tx,
+        ty=tj_op.ty,
+        font_size=tj_op.font_size,
+        font_height=tj_op.font_height,
+        text=rendered_text,
+        displaced_tx=dispaced_tx,
+        flip_sort=-1 if tj_op.flip_vertical else 1,
+    )
+
+
+def recurs_to_target_op(
+    ops: Iterator[Tuple[List[Any], bytes]],
+    text_state_mgr: TextStateManager,
+    end_target: Literal[b"Q", b"ET"],
+    fonts: Dict[str, Font],
+    strip_rotated: bool = True,
+) -> Tuple[List[BTGroup], List[TextStateParams]]:
+    """
+    Recurse operators between BT/ET and/or q/Q operators managing the transform
+    stack and capturing text positioning and rendering data.
+
+    Args:
+        ops: iterator of operators in content stream
+        text_state_mgr: a TextStateManager instance
+        end_target: Either b"Q" (ends b"q" op) or b"ET" (ends b"BT" op)
+        fonts: font dictionary as returned by PageObject._layout_mode_fonts()
+
+    Returns:
+        tuple: list of BTGroup dicts + list of TextStateParams dataclass instances.
+
+    """
+    # 1 entry per line of text rendered within each BT/ET operation.
+    bt_groups: List[BTGroup] = []
+
+    # 1 entry per text show operator (Tj/TJ/'/")
+    tj_ops: List[TextStateParams] = []
+
+    if end_target == b"Q":
+        # add new q level. cm's added at this level will be popped at next b'Q'
+        text_state_mgr.add_q()
+
+    while True:
+        try:
+            operands, op = next(ops)
+        except StopIteration:
+            return bt_groups, tj_ops
+        if op == end_target:
+            if op == b"Q":
+                text_state_mgr.remove_q()
+            if op == b"ET":
+                if not tj_ops:
+                    return bt_groups, tj_ops
+                _text = ""
+                bt_idx = 0  # idx of first tj in this bt group
+                last_displaced_tx = tj_ops[bt_idx].displaced_tx
+                last_ty = tj_ops[bt_idx].ty
+                for _idx, _tj in enumerate(
+                    tj_ops
+                ):  # ... build text from new Tj operators
+                    if strip_rotated and _tj.rotated:
+                        continue
+                    if not _tj.font.interpretable:  # generates warning
+                        continue
+                    # if the y position of the text is greater than the font height, assume
+                    # the text is on a new line and start a new group
+                    if abs(_tj.ty - last_ty) > _tj.font_height:
+                        if _text.strip():
+                            bt_groups.append(
+                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
+                            )
+                        bt_idx = _idx
+                        _text = ""
+
+                    # if the x position of the text is less than the last x position by
+                    # more than 5 spaces widths, assume the text order should be flipped
+                    # and start a new group
+                    if (
+                        last_displaced_tx - _tj.tx
+                        > _tj.space_tx * LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
+                    ):
+                        if _text.strip():
+                            bt_groups.append(
+                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
+                            )
+                        bt_idx = _idx
+                        last_displaced_tx = _tj.displaced_tx
+                        _text = ""
+
+                    # calculate excess x translation based on ending tx of previous Tj.
+                    # multiply by bool (_idx != bt_idx) to ensure spaces aren't double
+                    # applied to the first tj of a BTGroup in fixed_width_page().
+                    excess_tx = round(_tj.tx - last_displaced_tx, 3) * (_idx != bt_idx)
+                    # space_tx could be 0 if either Tz or font_size was 0 for this _tj.
+                    spaces = int(excess_tx // _tj.space_tx) if _tj.space_tx else 0
+                    new_text = f'{" " * spaces}{_tj.txt}'
+
+                    last_ty = _tj.ty
+                    _text = f"{_text}{new_text}"
+                    last_displaced_tx = _tj.displaced_tx
+                if _text:
+                    bt_groups.append(bt_group(tj_ops[bt_idx], _text, last_displaced_tx))
+                text_state_mgr.reset_tm()
+            return bt_groups, tj_ops
+        if op == b"q":
+            bts, tjs = recurs_to_target_op(
+                ops, text_state_mgr, b"Q", fonts, strip_rotated
+            )
+            bt_groups.extend(bts)
+            tj_ops.extend(tjs)
+        elif op == b"cm":
+            text_state_mgr.add_cm(*operands)
+        elif op == b"BT":
+            bts, tjs = recurs_to_target_op(
+                ops, text_state_mgr, b"ET", fonts, strip_rotated
+            )
+            bt_groups.extend(bts)
+            tj_ops.extend(tjs)
+        elif op == b"Tj":
+            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
+        elif op == b"TJ":
+            _tj = text_state_mgr.text_state_params()
+            for tj_op in operands[0]:
+                if isinstance(tj_op, bytes):
+                    _tj = text_state_mgr.text_state_params(tj_op)
+                    tj_ops.append(_tj)
+                else:
+                    text_state_mgr.add_trm(_tj.displacement_matrix(TD_offset=tj_op))
+        elif op == b"'":
+            text_state_mgr.reset_trm()
+            text_state_mgr.add_tm([0, -text_state_mgr.TL])
+            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
+        elif op == b'"':
+            text_state_mgr.reset_trm()
+            text_state_mgr.set_state_param(b"Tw", operands[0])
+            text_state_mgr.set_state_param(b"Tc", operands[1])
+            text_state_mgr.add_tm([0, -text_state_mgr.TL])
+            tj_ops.append(text_state_mgr.text_state_params(operands[2]))
+        elif op in (b"Td", b"Tm", b"TD", b"T*"):
+            text_state_mgr.reset_trm()
+            if op == b"Tm":
+                text_state_mgr.reset_tm()
+            elif op == b"TD":
+                text_state_mgr.set_state_param(b"TL", -operands[1])
+            elif op == b"T*":
+                operands = [0, -text_state_mgr.TL]
+            text_state_mgr.add_tm(operands)
+        elif op == b"Tf":
+            text_state_mgr.set_font(fonts[operands[0]], operands[1])
+        else:  # handle Tc, Tw, Tz, TL, and Ts operators
+            text_state_mgr.set_state_param(op, operands)
+
+
+def y_coordinate_groups(
+    bt_groups: List[BTGroup], debug_path: Optional[Path] = None
+) -> Dict[int, List[BTGroup]]:
+    """
+    Group text operations by rendered y coordinate, i.e. the line number.
+
+    Args:
+        bt_groups: list of dicts as returned by text_show_operations()
+        debug_path (Path, optional): Path to a directory for saving debug output.
+
+    Returns:
+        Dict[int, List[BTGroup]]: dict of lists of text rendered by each BT operator
+            keyed by y coordinate
+
+    """
+    ty_groups = {
+        ty: sorted(grp, key=lambda x: x["tx"])
+        for ty, grp in groupby(
+            bt_groups, key=lambda bt_grp: int(bt_grp["ty"] * bt_grp["flip_sort"])
+        )
+    }
+    # combine groups whose y coordinates differ by less than the effective font height
+    # (accounts for mixed fonts and other minor oddities)
+    last_ty = next(iter(ty_groups))
+    last_txs = {int(_t["tx"]) for _t in ty_groups[last_ty] if _t["text"].strip()}
+    for ty in list(ty_groups)[1:]:
+        fsz = min(ty_groups[_y][0]["font_height"] for _y in (ty, last_ty))
+        txs = {int(_t["tx"]) for _t in ty_groups[ty] if _t["text"].strip()}
+        # prevent merge if both groups are rendering in the same x position.
+        no_text_overlap = not (txs & last_txs)
+        offset_less_than_font_height = abs(ty - last_ty) < fsz
+        if no_text_overlap and offset_less_than_font_height:
+            ty_groups[last_ty] = sorted(
+                ty_groups.pop(ty) + ty_groups[last_ty], key=lambda x: x["tx"]
+            )
+            last_txs |= txs
+        else:
+            last_ty = ty
+            last_txs = txs
+    if debug_path:  # pragma: no cover
+        import json
+
+        debug_path.joinpath("bt_groups.json").write_text(
+            json.dumps(ty_groups, indent=2, default=str), "utf-8"
+        )
+    return ty_groups
+
+
+def text_show_operations(
+    ops: Iterator[Tuple[List[Any], bytes]],
+    fonts: Dict[str, Font],
+    strip_rotated: bool = True,
+    debug_path: Optional[Path] = None,
+) -> List[BTGroup]:
+    """
+    Extract text from BT/ET operator pairs.
+
+    Args:
+        ops (Iterator[Tuple[List, bytes]]): iterator of operators in content stream
+        fonts (Dict[str, Font]): font dictionary
+        strip_rotated: Removes text if rotated w.r.t. to the page. Defaults to True.
+        debug_path (Path, optional): Path to a directory for saving debug output.
+
+    Returns:
+        List[BTGroup]: list of dicts of text rendered by each BT operator
+
+    """
+    state_mgr = TextStateManager()  # transformation stack manager
+    debug = bool(debug_path)
+    bt_groups: List[BTGroup] = []  # BT operator dict
+    tj_debug: List[TextStateParams] = []  # Tj/TJ operator data (debug only)
+    try:
+        warned_rotation = False
+        warned_uninterpretable_font = False
+        while True:
+            operands, op = next(ops)
+            if op in (b"BT", b"q"):
+                bts, tjs = recurs_to_target_op(
+                    ops, state_mgr, b"ET" if op == b"BT" else b"Q", fonts, strip_rotated
+                )
+                if not warned_rotation and any(tj.rotated for tj in tjs):
+                    warned_rotation = True
+                    if strip_rotated:
+                        logger_warning(
+                            "Rotated text discovered. Output will be incomplete.",
+                            __name__,
+                        )
+                    else:
+                        logger_warning(
+                            "Rotated text discovered. Layout will be degraded.",
+                            __name__,
+                        )
+                if not warned_uninterpretable_font and any(not tj.font.interpretable for tj in tjs):
+                    warned_uninterpretable_font = True
+                    logger_warning(
+                        "PDF contains an uninterpretable font. Output will be incomplete.",
+                        __name__,
+                    )
+                bt_groups.extend(bts)
+                if debug:  # pragma: no cover
+                    tj_debug.extend(tjs)
+            elif op == b"Tf":
+                state_mgr.set_font(fonts[operands[0]], operands[1])
+            else:  # set Tc, Tw, Tz, TL, and Ts if required. ignores all other ops
+                state_mgr.set_state_param(op, operands)
+    except StopIteration:
+        pass
+
+    # left align the data, i.e. decrement all tx values by min(tx)
+    min_x = min((x["tx"] for x in bt_groups), default=0.0)
+    bt_groups = [
+        dict(ogrp, tx=ogrp["tx"] - min_x, displaced_tx=ogrp["displaced_tx"] - min_x)  # type: ignore[misc]
+        for ogrp in sorted(
+            bt_groups, key=lambda x: (x["ty"] * x["flip_sort"], -x["tx"]), reverse=True
+        )
+    ]
+
+    if debug_path:  # pragma: no cover
+        import json
+
+        debug_path.joinpath("bts.json").write_text(
+            json.dumps(bt_groups, indent=2, default=str), "utf-8"
+        )
+        debug_path.joinpath("tjs.json").write_text(
+            json.dumps(
+                tj_debug, indent=2, default=lambda x: getattr(x, "to_dict", str)(x)
+            ),
+            "utf-8",
+        )
+    return bt_groups
+
+
+def fixed_char_width(bt_groups: List[BTGroup], scale_weight: float = 1.25) -> float:
+    """
+    Calculate average character width weighted by the length of the rendered
+    text in each sample for conversion to fixed-width layout.
+
+    Args:
+        bt_groups (List[BTGroup]): List of dicts of text rendered by each
+            BT operator
+
+    Returns:
+        float: fixed character width
+
+    """
+    char_widths = []
+    for _bt in bt_groups:
+        _len = len(_bt["text"]) * scale_weight
+        char_widths.append(((_bt["displaced_tx"] - _bt["tx"]) / _len, _len))
+    return sum(_w * _l for _w, _l in char_widths) / sum(_l for _, _l in char_widths)
+
+
+def fixed_width_page(
+    ty_groups: Dict[int, List[BTGroup]], char_width: float, space_vertically: bool, font_height_weight: float
+) -> str:
+    """
+    Generate page text from text operations grouped by rendered y coordinate.
+
+    Args:
+        ty_groups: dict of text show ops as returned by y_coordinate_groups()
+        char_width: fixed character width
+        space_vertically: include blank lines inferred from y distance + font height.
+        font_height_weight: multiplier for font height when calculating blank lines.
+
+    Returns:
+        str: page text in a fixed width format that closely adheres to the rendered
+            layout in the source pdf.
+
+    """
+    lines: List[str] = []
+    last_y_coord = 0
+    for y_coord, line_data in ty_groups.items():
+        if space_vertically and lines:
+            fh = line_data[0]["font_height"]
+            blank_lines = 0 if fh == 0 else (
+                int(abs(y_coord - last_y_coord) / (fh * font_height_weight)) - 1
+            )
+            lines.extend([""] * blank_lines)
+        line = ""
+        last_disp = 0.0
+        for bt_op in line_data:
+            offset = int(bt_op["tx"] // char_width)
+            spaces = (offset - len(line)) * (ceil(last_disp) < int(bt_op["tx"]))
+            line = f"{line}{' ' * spaces}{bt_op['text']}"
+            last_disp = bt_op["displaced_tx"]
+        if line.strip() or lines:
+            lines.append(
+                "".join(c if ord(c) < 14 or ord(c) > 31 else " " for c in line)
+            )
+        last_y_coord = y_coord
+    return "\n".join(ln.rstrip() for ln in lines if space_vertically or ln.strip())
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_font.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_font.py
@ -0,0 +1,152 @@
+"""Font constants and classes for "layout" mode text operations"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, Sequence, Union, cast
+
+from ..._codecs import adobe_glyphs
+from ...errors import ParseError
+from ...generic import IndirectObject
+from ._font_widths import STANDARD_WIDTHS
+
+
+@dataclass
+class Font:
+    """
+    A font object formatted for use during "layout" mode text extraction
+
+    Attributes:
+        subtype (str): font subtype
+        space_width (int | float): width of a space character
+        encoding (str | Dict[int, str]): font encoding
+        char_map (dict): character map
+        font_dictionary (dict): font dictionary
+        width_map (Dict[str, int]): mapping of characters to widths
+        interpretable (bool): Default True. If False, the font glyphs cannot
+            be translated to characters, e.g. Type3 fonts that do not define
+            a '/ToUnicode' mapping.
+
+    """
+
+    subtype: str
+    space_width: Union[int, float]
+    encoding: Union[str, Dict[int, str]]
+    char_map: Dict[Any, Any]
+    font_dictionary: Dict[Any, Any]
+    width_map: Dict[str, int] = field(default_factory=dict, init=False)
+    interpretable: bool = True
+
+    def __post_init__(self) -> None:
+        # Type3 fonts that do not specify a "/ToUnicode" mapping cannot be
+        # reliably converted into character codes unless all named chars
+        # in /CharProcs map to a standard adobe glyph. See § 9.10.2 of the
+        # PDF 1.7 standard.
+        if self.subtype == "/Type3" and "/ToUnicode" not in self.font_dictionary:
+            self.interpretable = all(
+                cname in adobe_glyphs
+                for cname in self.font_dictionary.get("/CharProcs") or []
+            )
+
+        if not self.interpretable:  # save some overhead if font is not interpretable
+            return
+
+        # TrueType fonts have a /Widths array mapping character codes to widths
+        if isinstance(self.encoding, dict) and "/Widths" in self.font_dictionary:
+            first_char = self.font_dictionary.get("/FirstChar", 0)
+            self.width_map = {
+                self.encoding.get(idx + first_char, chr(idx + first_char)): width
+                for idx, width in enumerate(self.font_dictionary["/Widths"])
+            }
+
+        # CID fonts have a /W array mapping character codes to widths stashed in /DescendantFonts
+        if "/DescendantFonts" in self.font_dictionary:
+            d_font: Dict[Any, Any]
+            for d_font_idx, d_font in enumerate(
+                self.font_dictionary["/DescendantFonts"]
+            ):
+                while isinstance(d_font, IndirectObject):
+                    d_font = d_font.get_object()
+                self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font
+                ord_map = {
+                    ord(_target): _surrogate
+                    for _target, _surrogate in self.char_map.items()
+                    if isinstance(_target, str)
+                }
+                # /W width definitions have two valid formats which can be mixed and matched:
+                #   (1) A character start index followed by a list of widths, e.g.
+                #       `45 [500 600 700]` applies widths 500, 600, 700 to characters 45-47.
+                #   (2) A character start index, a character stop index, and a width, e.g.
+                #       `45 65 500` applies width 500 to characters 45-65.
+                skip_count = 0
+                _w = d_font.get("/W", [])
+                for idx, w_entry in enumerate(_w):
+                    w_entry = w_entry.get_object()
+                    if skip_count:
+                        skip_count -= 1
+                        continue
+                    if not isinstance(w_entry, (int, float)):  # pragma: no cover
+                        # We should never get here due to skip_count above. Add a
+                        # warning and or use reader's "strict" to force an ex???
+                        continue
+                    # check for format (1): `int [int int int int ...]`
+                    w_next_entry = _w[idx + 1].get_object()
+                    if isinstance(w_next_entry, Sequence):
+                        start_idx, width_list = w_entry, w_next_entry
+                        self.width_map.update(
+                            {
+                                ord_map[_cidx]: _width
+                                for _cidx, _width in zip(
+                                    range(
+                                        cast(int, start_idx),
+                                        cast(int, start_idx) + len(width_list),
+                                        1,
+                                    ),
+                                    width_list,
+                                )
+                                if _cidx in ord_map
+                            }
+                        )
+                        skip_count = 1
+                    # check for format (2): `int int int`
+                    elif isinstance(w_next_entry, (int, float)) and isinstance(
+                        _w[idx + 2].get_object(), (int, float)
+                    ):
+                        start_idx, stop_idx, const_width = (
+                            w_entry,
+                            w_next_entry,
+                            _w[idx + 2].get_object(),
+                        )
+                        self.width_map.update(
+                            {
+                                ord_map[_cidx]: const_width
+                                for _cidx in range(
+                                    cast(int, start_idx), cast(int, stop_idx + 1), 1
+                                )
+                                if _cidx in ord_map
+                            }
+                        )
+                        skip_count = 2
+                    else:
+                        # Note: this doesn't handle the case of out of bounds (reaching the end of the width definitions
+                        # while expecting more elements). This raises an IndexError which is sufficient.
+                        raise ParseError(
+                            f"Invalid font width definition. Next elements: {w_entry}, {w_next_entry}, {_w[idx + 2]}"
+                        )  # pragma: no cover
+
+        if not self.width_map and "/BaseFont" in self.font_dictionary:
+            for key in STANDARD_WIDTHS:
+                if self.font_dictionary["/BaseFont"].startswith(f"/{key}"):
+                    self.width_map = STANDARD_WIDTHS[key]
+                    break
+
+    def word_width(self, word: str) -> float:
+        """Sum of character widths specified in PDF font for the supplied word"""
+        return sum(
+            [self.width_map.get(char, self.space_width * 2) for char in word], 0.0
+        )
+
+    @staticmethod
+    def to_dict(font_instance: "Font") -> Dict[str, Any]:
+        """Dataclass to dict for json.dumps serialization."""
+        return {
+            k: getattr(font_instance, k) for k in font_instance.__dataclass_fields__
+        }
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_font_widths.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_font_widths.py
@ -0,0 +1,208 @@
+# Widths for the standard 14 fonts as described on page 416 of the PDF 1.7 standard
+STANDARD_WIDTHS = {
+    "Helvetica": {  # 4 fonts, includes bold, oblique and boldoblique variants
+        " ": 278,
+        "!": 278,
+        '"': 355,
+        "#": 556,
+        "$": 556,
+        "%": 889,
+        "&": 667,
+        "'": 191,
+        "(": 333,
+        ")": 333,
+        "*": 389,
+        "+": 584,
+        ",": 278,
+        "-": 333,
+        ".": 278,
+        "/": 278,
+        "0": 556,
+        "1": 556,
+        "2": 556,
+        "3": 556,
+        "4": 556,
+        "5": 556,
+        "6": 556,
+        "7": 556,
+        "8": 556,
+        "9": 556,
+        ":": 278,
+        ";": 278,
+        "<": 584,
+        "=": 584,
+        ">": 584,
+        "?": 611,
+        "@": 975,
+        "A": 667,
+        "B": 667,
+        "C": 722,
+        "D": 722,
+        "E": 667,
+        "F": 611,
+        "G": 778,
+        "H": 722,
+        "I": 278,
+        "J": 500,
+        "K": 667,
+        "L": 556,
+        "M": 833,
+        "N": 722,
+        "O": 778,
+        "P": 667,
+        "Q": 944,
+        "R": 667,
+        "S": 667,
+        "T": 611,
+        "U": 278,
+        "V": 278,
+        "W": 584,
+        "X": 556,
+        "Y": 556,
+        "Z": 500,
+        "[": 556,
+        "\\": 556,
+        "]": 556,
+        "^": 278,
+        "_": 278,
+        "`": 278,
+        "a": 278,
+        "b": 278,
+        "c": 333,
+        "d": 556,
+        "e": 556,
+        "f": 556,
+        "g": 556,
+        "h": 556,
+        "i": 556,
+        "j": 556,
+        "k": 556,
+        "l": 556,
+        "m": 556,
+        "n": 278,
+        "o": 278,
+        "p": 556,
+        "q": 556,
+        "r": 500,
+        "s": 556,
+        "t": 556,
+        "u": 278,
+        "v": 500,
+        "w": 500,
+        "x": 222,
+        "y": 222,
+        "z": 556,
+        "{": 222,
+        "|": 833,
+        "}": 556,
+        "~": 556,
+    },
+    "Times": {  # 4 fonts, includes bold, oblique and boldoblique variants
+        " ": 250,
+        "!": 333,
+        '"': 408,
+        "#": 500,
+        "$": 500,
+        "%": 833,
+        "&": 778,
+        "'": 180,
+        "(": 333,
+        ")": 333,
+        "*": 500,
+        "+": 564,
+        ",": 250,
+        "-": 333,
+        ".": 250,
+        "/": 564,
+        "0": 500,
+        "1": 500,
+        "2": 500,
+        "3": 500,
+        "4": 500,
+        "5": 500,
+        "6": 500,
+        "7": 500,
+        "8": 500,
+        "9": 500,
+        ":": 278,
+        ";": 278,
+        "<": 564,
+        "=": 564,
+        ">": 564,
+        "?": 444,
+        "@": 921,
+        "A": 722,
+        "B": 667,
+        "C": 667,
+        "D": 722,
+        "E": 611,
+        "F": 556,
+        "G": 722,
+        "H": 722,
+        "I": 333,
+        "J": 389,
+        "K": 722,
+        "L": 611,
+        "M": 889,
+        "N": 722,
+        "O": 722,
+        "P": 556,
+        "Q": 722,
+        "R": 667,
+        "S": 556,
+        "T": 611,
+        "U": 722,
+        "V": 722,
+        "W": 944,
+        "X": 722,
+        "Y": 722,
+        "Z": 611,
+        "[": 333,
+        "\\": 278,
+        "]": 333,
+        "^": 469,
+        "_": 500,
+        "`": 333,
+        "a": 444,
+        "b": 500,
+        "c": 444,
+        "d": 500,
+        "e": 444,
+        "f": 333,
+        "g": 500,
+        "h": 500,
+        "i": 278,
+        "j": 278,
+        "k": 500,
+        "l": 278,
+        "m": 722,
+        "n": 500,
+        "o": 500,
+        "p": 500,
+        "q": 500,
+        "r": 333,
+        "s": 389,
+        "t": 278,
+        "u": 500,
+        "v": 444,
+        "w": 722,
+        "x": 500,
+        "y": 444,
+        "z": 389,
+        "{": 348,
+        "|": 220,
+        "}": 348,
+        "~": 469,
+    },
+}
+
+# 4 fonts, includes bold, oblique and bold oblique variants
+STANDARD_WIDTHS[
+    "Courier"
+] = dict.fromkeys(STANDARD_WIDTHS["Times"], 600)  # fixed width
+STANDARD_WIDTHS["ZapfDingbats"] = dict.fromkeys(STANDARD_WIDTHS["Times"], 1000)  # 1 font
+STANDARD_WIDTHS["Symbol"] = dict.fromkeys(STANDARD_WIDTHS["Times"], 500)  # 1 font
+# add aliases per table H.3 on page 1110 of the PDF 1.7 standard
+STANDARD_WIDTHS["CourierNew"] = STANDARD_WIDTHS["Courier"]
+STANDARD_WIDTHS["Arial"] = STANDARD_WIDTHS["Helvetica"]
+STANDARD_WIDTHS["TimesNewRoman"] = STANDARD_WIDTHS["Times"]
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_manager.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_manager.py
@ -0,0 +1,217 @@
+"""manage the PDF transform stack during "layout" mode text extraction"""
+
+from collections import ChainMap, Counter
+from typing import Any, Dict, List, MutableMapping, Union
+from typing import ChainMap as ChainMapType
+from typing import Counter as CounterType
+
+from ...errors import PdfReadError
+from .. import mult
+from ._font import Font
+from ._text_state_params import TextStateParams
+
+TextStateManagerChainMapType = ChainMapType[Union[int, str], Union[float, bool]]
+TextStateManagerDictType = MutableMapping[Union[int, str], Union[float, bool]]
+
+
+class TextStateManager:
+    """
+    Tracks the current text state including cm/tm/trm transformation matrices.
+
+    Attributes:
+        transform_stack (ChainMap): ChainMap of cm/tm transformation matrices
+        q_queue (Counter[int]): Counter of q operators
+        q_depth (List[int]): list of q operator nesting levels
+        Tc (float): character spacing
+        Tw (float): word spacing
+        Tz (int): horizontal scaling
+        TL (float): leading
+        Ts (float): text rise
+        font (Font): font object
+        font_size (int | float): font size
+
+    """
+
+    def __init__(self) -> None:
+        self.transform_stack: TextStateManagerChainMapType = ChainMap(
+            self.new_transform()
+        )
+        self.q_queue: CounterType[int] = Counter()
+        self.q_depth = [0]
+        self.Tc: float = 0.0
+        self.Tw: float = 0.0
+        self.Tz: float = 100.0
+        self.TL: float = 0.0
+        self.Ts: float = 0.0
+        self.font: Union[Font, None] = None
+        self.font_size: Union[int, float] = 0
+
+    def set_state_param(self, op: bytes, value: Union[float, List[Any]]) -> None:
+        """
+        Set a text state parameter. Supports Tc, Tz, Tw, TL, and Ts operators.
+
+        Args:
+            op: operator read from PDF stream as bytes. No action is taken
+                for unsupported operators (see supported operators above).
+            value (float | List[Any]): new parameter value. If a list,
+                value[0] is used.
+
+        """
+        if op not in [b"Tc", b"Tz", b"Tw", b"TL", b"Ts"]:
+            return
+        self.__setattr__(op.decode(), value[0] if isinstance(value, list) else value)
+
+    def set_font(self, font: Font, size: float) -> None:
+        """
+        Set the current font and font_size.
+
+        Args:
+            font (Font): a layout mode Font
+            size (float): font size
+
+        """
+        self.font = font
+        self.font_size = size
+
+    def text_state_params(self, value: Union[bytes, str] = "") -> TextStateParams:
+        """
+        Create a TextStateParams instance to display a text string. Type[bytes] values
+        will be decoded implicitly.
+
+        Args:
+            value (str | bytes): text to associate with the captured state.
+
+        Raises:
+            PdfReadError: if font not set (no Tf operator in incoming pdf content stream)
+
+        Returns:
+            TextStateParams: current text state parameters
+
+        """
+        if not isinstance(self.font, Font):
+            raise PdfReadError(
+                "font not set: is PDF missing a Tf operator?"
+            )  # pragma: no cover
+        if isinstance(value, bytes):
+            try:
+                if isinstance(self.font.encoding, str):
+                    txt = value.decode(self.font.encoding, "surrogatepass")
+                else:
+                    txt = "".join(
+                        self.font.encoding[x]
+                        if x in self.font.encoding
+                        else bytes((x,)).decode()
+                        for x in value
+                    )
+            except (UnicodeEncodeError, UnicodeDecodeError):
+                txt = value.decode("utf-8", "replace")
+            txt = "".join(
+                self.font.char_map.get(x, x) for x in txt
+            )
+        else:
+            txt = value
+        return TextStateParams(
+            txt,
+            self.font,
+            self.font_size,
+            self.Tc,
+            self.Tw,
+            self.Tz,
+            self.TL,
+            self.Ts,
+            self.effective_transform,
+        )
+
+    @staticmethod
+    def raw_transform(
+        _a: float = 1.0,
+        _b: float = 0.0,
+        _c: float = 0.0,
+        _d: float = 1.0,
+        _e: float = 0.0,
+        _f: float = 0.0,
+    ) -> Dict[int, float]:
+        """Only a/b/c/d/e/f matrix params"""
+        return dict(zip(range(6), map(float, (_a, _b, _c, _d, _e, _f))))
+
+    @staticmethod
+    def new_transform(
+        _a: float = 1.0,
+        _b: float = 0.0,
+        _c: float = 0.0,
+        _d: float = 1.0,
+        _e: float = 0.0,
+        _f: float = 0.0,
+        is_text: bool = False,
+        is_render: bool = False,
+    ) -> TextStateManagerDictType:
+        """Standard a/b/c/d/e/f matrix params + 'is_text' and 'is_render' keys"""
+        result: Any = TextStateManager.raw_transform(_a, _b, _c, _d, _e, _f)
+        result.update({"is_text": is_text, "is_render": is_render})
+        return result
+
+    def reset_tm(self) -> TextStateManagerChainMapType:
+        """Clear all transforms from chainmap having is_text==True or is_render==True"""
+        while (
+            self.transform_stack.maps[0]["is_text"]
+            or self.transform_stack.maps[0]["is_render"]
+        ):
+            self.transform_stack = self.transform_stack.parents
+        return self.transform_stack
+
+    def reset_trm(self) -> TextStateManagerChainMapType:
+        """Clear all transforms from chainmap having is_render==True"""
+        while self.transform_stack.maps[0]["is_render"]:
+            self.transform_stack = self.transform_stack.parents
+        return self.transform_stack
+
+    def remove_q(self) -> TextStateManagerChainMapType:
+        """Rewind to stack prior state after closing a 'q' with internal 'cm' ops"""
+        self.transform_stack = self.reset_tm()
+        self.transform_stack.maps = self.transform_stack.maps[
+            self.q_queue.pop(self.q_depth.pop(), 0) :
+        ]
+        return self.transform_stack
+
+    def add_q(self) -> None:
+        """Add another level to q_queue"""
+        self.q_depth.append(len(self.q_depth))
+
+    def add_cm(self, *args: Any) -> TextStateManagerChainMapType:
+        """Concatenate an additional transform matrix"""
+        self.transform_stack = self.reset_tm()
+        self.q_queue.update(self.q_depth[-1:])
+        self.transform_stack = self.transform_stack.new_child(self.new_transform(*args))
+        return self.transform_stack
+
+    def _complete_matrix(self, operands: List[float]) -> List[float]:
+        """Adds a, b, c, and d to an "e/f only" operand set (e.g Td)"""
+        if len(operands) == 2:  # this is a Td operator or equivalent
+            operands = [1.0, 0.0, 0.0, 1.0, *operands]
+        return operands
+
+    def add_tm(self, operands: List[float]) -> TextStateManagerChainMapType:
+        """Append a text transform matrix"""
+        self.transform_stack = self.transform_stack.new_child(
+            self.new_transform(  # type: ignore[misc]
+                *self._complete_matrix(operands), is_text=True  # type: ignore[arg-type]
+            )
+        )
+        return self.transform_stack
+
+    def add_trm(self, operands: List[float]) -> TextStateManagerChainMapType:
+        """Append a text rendering transform matrix"""
+        self.transform_stack = self.transform_stack.new_child(
+            self.new_transform(  # type: ignore[misc]
+                *self._complete_matrix(operands), is_text=True, is_render=True  # type: ignore[arg-type]
+            )
+        )
+        return self.transform_stack
+
+    @property
+    def effective_transform(self) -> List[float]:
+        """Current effective transform accounting for cm, tm, and trm transforms"""
+        eff_transform = [*self.transform_stack.maps[0].values()]
+        for transform in self.transform_stack.maps[1:]:
+            eff_transform = mult(eff_transform, transform)  # type: ignore[arg-type]  # dict has int keys 0-5
+        return eff_transform
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_params.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_params.py
@ -0,0 +1,129 @@
+"""A dataclass that captures the CTM and Text State for a tj operation"""
+
+import math
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Union
+
+from .. import mult, orient
+from ._font import Font
+
+
+@dataclass
+class TextStateParams:
+    """
+    Text state parameters and operator values for a single text value in a
+    TJ or Tj PDF operation.
+
+    Attributes:
+        txt (str): the text to be rendered.
+        font (Font): font object
+        font_size (int | float): font size
+        Tc (float): character spacing. Defaults to 0.0.
+        Tw (float): word spacing. Defaults to 0.0.
+        Tz (float): horizontal scaling. Defaults to 100.0.
+        TL (float): leading, vertical displacement between text lines. Defaults to 0.0.
+        Ts (float): text rise. Used for super/subscripts. Defaults to 0.0.
+        transform (List[float]): effective transformation matrix.
+        tx (float): x cood of rendered text, i.e. self.transform[4]
+        ty (float): y cood of rendered text. May differ from self.transform[5] per self.Ts.
+        displaced_tx (float): x coord immediately following rendered text
+        space_tx (float): tx for a space character
+        font_height (float): effective font height accounting for CTM
+        flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.)
+        rotated (bool): True if the text orientation is rotated with respect to the page.
+
+    """
+
+    txt: str
+    font: Font
+    font_size: Union[int, float]
+    Tc: float = 0.0
+    Tw: float = 0.0
+    Tz: float = 100.0
+    TL: float = 0.0
+    Ts: float = 0.0
+    transform: List[float] = field(
+        default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+    )
+    tx: float = field(default=0.0, init=False)
+    ty: float = field(default=0.0, init=False)
+    displaced_tx: float = field(default=0.0, init=False)
+    space_tx: float = field(default=0.0, init=False)
+    font_height: float = field(default=0.0, init=False)
+    flip_vertical: bool = field(default=False, init=False)
+    rotated: bool = field(default=False, init=False)
+
+    def __post_init__(self) -> None:
+        if orient(self.transform) in (90, 270):
+            self.transform = mult(
+                [1.0, -self.transform[1], -self.transform[2], 1.0, 0.0, 0.0],
+                self.transform,
+            )
+            self.rotated = True
+        # self.transform[0] AND self.transform[3] < 0 indicates true rotation.
+        # If only self.transform[3] < 0, the y coords are simply inverted.
+        if orient(self.transform) == 180 and self.transform[0] < -1e-6:
+            self.transform = mult([-1.0, 0.0, 0.0, -1.0, 0.0, 0.0], self.transform)
+            self.rotated = True
+        self.displaced_tx = self.displaced_transform()[4]
+        self.tx = self.transform[4]
+        self.ty = self.render_transform()[5]
+        self.space_tx = round(self.word_tx(" "), 3)
+        if self.space_tx < 1e-6:
+            # if the " " char is assigned 0 width (e.g. for fine tuned spacing
+            # with TJ int operators a la crazyones.pdf), calculate space_tx as
+            # a TD_offset of -2 * font.space_width where font.space_width is
+            # the space_width calculated in _cmap.py.
+            self.space_tx = round(self.word_tx("", self.font.space_width * -2), 3)
+        self.font_height = self.font_size * math.sqrt(
+            self.transform[1] ** 2 + self.transform[3] ** 2
+        )
+        # flip_vertical handles PDFs generated by Microsoft Word's "publish" command.
+        self.flip_vertical = self.transform[3] < -1e-6  # inverts y axis
+
+    def font_size_matrix(self) -> List[float]:
+        """Font size matrix"""
+        return [
+            self.font_size * (self.Tz / 100.0),
+            0.0,
+            0.0,
+            self.font_size,
+            0.0,
+            self.Ts,
+        ]
+
+    def displaced_transform(self) -> List[float]:
+        """Effective transform matrix after text has been rendered."""
+        return mult(self.displacement_matrix(), self.transform)
+
+    def render_transform(self) -> List[float]:
+        """Effective transform matrix accounting for font size, Tz, and Ts."""
+        return mult(self.font_size_matrix(), self.transform)
+
+    def displacement_matrix(
+        self, word: Union[str, None] = None, TD_offset: float = 0.0
+    ) -> List[float]:
+        """
+        Text displacement matrix
+
+        Args:
+            word (str, optional): Defaults to None in which case self.txt displacement is
+                returned.
+            TD_offset (float, optional): translation applied by TD operator. Defaults to 0.0.
+
+        """
+        word = word if word is not None else self.txt
+        return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, TD_offset), 0.0]
+
+    def word_tx(self, word: str, TD_offset: float = 0.0) -> float:
+        """Horizontal text displacement for any word according this text state"""
+        return (
+            (self.font_size * ((self.font.word_width(word) - TD_offset) / 1000.0))
+            + self.Tc
+            + word.count(" ") * self.Tw
+        ) * (self.Tz / 100.0)
+
+    @staticmethod
+    def to_dict(inst: "TextStateParams") -> Dict[str, Any]:
+        """Dataclass to dict for json.dumps serialization"""
+        return {k: getattr(inst, k) for k in inst.__dataclass_fields__ if k != "font"}
--- a/venv/lib/python3.12/site-packages/pypdf/_utils.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_utils.py
@ -0,0 +1,605 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Utility functions for PDF library."""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import functools
+import logging
+import re
+import sys
+import warnings
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from io import DEFAULT_BUFFER_SIZE
+from os import SEEK_CUR
+from typing import (
+    IO,
+    Any,
+    Dict,
+    List,
+    Optional,
+    Pattern,
+    Tuple,
+    Union,
+    overload,
+)
+
+if sys.version_info[:2] >= (3, 10):
+    # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from .errors import (
+    STREAM_TRUNCATED_PREMATURELY,
+    DeprecationError,
+    PdfStreamError,
+)
+
+TransformationMatrixType: TypeAlias = Tuple[
+    Tuple[float, float, float], Tuple[float, float, float], Tuple[float, float, float]
+]
+CompressedTransformationMatrix: TypeAlias = Tuple[
+    float, float, float, float, float, float
+]
+
+StreamType = IO[Any]
+StrByteType = Union[str, StreamType]
+
+
+def parse_iso8824_date(text: Optional[str]) -> Optional[datetime]:
+    orgtext = text
+    if text is None:
+        return None
+    if text[0].isdigit():
+        text = "D:" + text
+    if text.endswith(("Z", "z")):
+        text += "0000"
+    text = text.replace("z", "+").replace("Z", "+").replace("'", "")
+    i = max(text.find("+"), text.find("-"))
+    if i > 0 and i != len(text) - 5:
+        text += "00"
+    for f in (
+        "D:%Y",
+        "D:%Y%m",
+        "D:%Y%m%d",
+        "D:%Y%m%d%H",
+        "D:%Y%m%d%H%M",
+        "D:%Y%m%d%H%M%S",
+        "D:%Y%m%d%H%M%S%z",
+    ):
+        try:
+            d = datetime.strptime(text, f)  # noqa: DTZ007
+        except ValueError:
+            continue
+        else:
+            if text.endswith("+0000"):
+                d = d.replace(tzinfo=timezone.utc)
+            return d
+    raise ValueError(f"Can not convert date: {orgtext}")
+
+
+def _get_max_pdf_version_header(header1: str, header2: str) -> str:
+    versions = (
+        "%PDF-1.3",
+        "%PDF-1.4",
+        "%PDF-1.5",
+        "%PDF-1.6",
+        "%PDF-1.7",
+        "%PDF-2.0",
+    )
+    pdf_header_indices = []
+    if header1 in versions:
+        pdf_header_indices.append(versions.index(header1))
+    if header2 in versions:
+        pdf_header_indices.append(versions.index(header2))
+    if len(pdf_header_indices) == 0:
+        raise ValueError(f"Neither {header1!r} nor {header2!r} are proper headers")
+    return versions[max(pdf_header_indices)]
+
+
+WHITESPACES = (b"\x00", b"\t", b"\n", b"\f", b"\r", b" ")
+WHITESPACES_AS_BYTES = b"".join(WHITESPACES)
+WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]"
+
+
+def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
+    """
+    Read non-whitespace characters and return them.
+
+    Stops upon encountering whitespace or when maxchars is reached.
+
+    Args:
+        stream: The data stream from which was read.
+        maxchars: The maximum number of bytes returned; by default unlimited.
+
+    Returns:
+        The data which was read.
+
+    """
+    txt = b""
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+
+def read_non_whitespace(stream: StreamType) -> bytes:
+    """
+    Find and read the next non-whitespace character (ignores whitespace).
+
+    Args:
+        stream: The data stream from which was read.
+
+    Returns:
+        The data which was read.
+
+    """
+    tok = stream.read(1)
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+    return tok
+
+
+def skip_over_whitespace(stream: StreamType) -> bool:
+    """
+    Similar to read_non_whitespace, but return a boolean if at least one
+    whitespace character was read.
+
+    Args:
+        stream: The data stream from which was read.
+
+    Returns:
+        True if one or more whitespace was skipped, otherwise return False.
+
+    """
+    tok = stream.read(1)
+    cnt = 0
+    while tok in WHITESPACES:
+        cnt += 1
+        tok = stream.read(1)
+    return cnt > 0
+
+
+def check_if_whitespace_only(value: bytes) -> bool:
+    """
+    Check if the given value consists of whitespace characters only.
+
+    Args:
+        value: The bytes to check.
+
+    Returns:
+        True if the value only has whitespace characters, otherwise return False.
+
+    """
+    return all(b in WHITESPACES_AS_BYTES for b in value)
+
+
+def skip_over_comment(stream: StreamType) -> None:
+    tok = stream.read(1)
+    stream.seek(-1, 1)
+    if tok == b"%":
+        while tok not in (b"\n", b"\r"):
+            tok = stream.read(1)
+            if tok == b"":
+                raise PdfStreamError("File ended unexpectedly.")
+
+
+def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
+    """
+    Read until the regular expression pattern matched (ignore the match).
+    Treats EOF on the underlying stream as the end of the token to be matched.
+
+    Args:
+        regex: re.Pattern
+
+    Returns:
+        The read bytes.
+
+    """
+    name = b""
+    while True:
+        tok = stream.read(16)
+        if not tok:
+            return name
+        m = regex.search(name + tok)
+        if m is not None:
+            stream.seek(m.start() - (len(name) + len(tok)), 1)
+            name = (name + tok)[: m.start()]
+            break
+        name += tok
+    return name
+
+
+def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
+    """
+    Given a stream at position X, read a block of size to_read ending at position X.
+
+    This changes the stream's position to the beginning of where the block was
+    read.
+
+    Args:
+        stream:
+        to_read:
+
+    Returns:
+        The data which was read.
+
+    """
+    if stream.tell() < to_read:
+        raise PdfStreamError("Could not read malformed PDF file")
+    # Seek to the start of the block we want to read.
+    stream.seek(-to_read, SEEK_CUR)
+    read = stream.read(to_read)
+    # Seek to the start of the block we read after reading it.
+    stream.seek(-to_read, SEEK_CUR)
+    return read
+
+
+def read_previous_line(stream: StreamType) -> bytes:
+    """
+    Given a byte stream with current position X, return the previous line.
+
+    All characters between the first CR/LF byte found before X
+    (or, the start of the file, if no such byte is found) and position X
+    After this call, the stream will be positioned one byte after the
+    first non-CRLF character found beyond the first CR/LF byte before X,
+    or, if no such byte is found, at the beginning of the stream.
+
+    Args:
+        stream: StreamType:
+
+    Returns:
+        The data which was read.
+
+    """
+    line_content = []
+    found_crlf = False
+    if stream.tell() == 0:
+        raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+    while True:
+        to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
+        if to_read == 0:
+            break
+        # Read the block. After this, our stream will be one
+        # beyond the initial position.
+        block = read_block_backwards(stream, to_read)
+        idx = len(block) - 1
+        if not found_crlf:
+            # We haven't found our first CR/LF yet.
+            # Read off characters until we hit one.
+            while idx >= 0 and block[idx] not in b"\r\n":
+                idx -= 1
+            if idx >= 0:
+                found_crlf = True
+        if found_crlf:
+            # We found our first CR/LF already (on this block or
+            # a previous one).
+            # Our combined line is the remainder of the block
+            # plus any previously read blocks.
+            line_content.append(block[idx + 1 :])
+            # Continue to read off any more CRLF characters.
+            while idx >= 0 and block[idx] in b"\r\n":
+                idx -= 1
+        else:
+            # Didn't find CR/LF yet - add this block to our
+            # previously read blocks and continue.
+            line_content.append(block)
+        if idx >= 0:
+            # We found the next non-CRLF character.
+            # Set the stream position correctly, then break
+            stream.seek(idx + 1, SEEK_CUR)
+            break
+    # Join all the blocks in the line (which are in reverse order)
+    return b"".join(line_content[::-1])
+
+
+def matrix_multiply(
+    a: TransformationMatrixType, b: TransformationMatrixType
+) -> TransformationMatrixType:
+    return tuple(  # type: ignore[return-value]
+        tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
+        for row in a
+    )
+
+
+def mark_location(stream: StreamType) -> None:
+    """Create text file showing current location in context."""
+    # Mainly for debugging
+    radius = 5000
+    stream.seek(-radius, 1)
+    with open("pypdf_pdfLocation.txt", "wb") as output_fh:
+        output_fh.write(stream.read(radius))
+        output_fh.write(b"HERE")
+        output_fh.write(stream.read(radius))
+    stream.seek(-radius, 1)
+
+
+@overload
+def ord_(b: str) -> int:
+    ...
+
+
+@overload
+def ord_(b: bytes) -> bytes:
+    ...
+
+
+@overload
+def ord_(b: int) -> int:
+    ...
+
+
+def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
+    if isinstance(b, str):
+        return ord(b)
+    return b
+
+
+def deprecate(msg: str, stacklevel: int = 3) -> None:
+    warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
+
+
+def deprecation(msg: str) -> None:
+    raise DeprecationError(msg)
+
+
+def deprecate_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
+    """Issue a warning that a feature will be removed, but has a replacement."""
+    deprecate(
+        f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.",
+        4,
+    )
+
+
+def deprecation_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
+    """Raise an exception that a feature was already removed, but has a replacement."""
+    deprecation(
+        f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead."
+    )
+
+
+def deprecate_no_replacement(name: str, removed_in: str) -> None:
+    """Issue a warning that a feature will be removed without replacement."""
+    deprecate(f"{name} is deprecated and will be removed in pypdf {removed_in}.", 4)
+
+
+def deprecation_no_replacement(name: str, removed_in: str) -> None:
+    """Raise an exception that a feature was already removed without replacement."""
+    deprecation(f"{name} is deprecated and was removed in pypdf {removed_in}.")
+
+
+def logger_error(msg: str, src: str) -> None:
+    """
+    Use this instead of logger.error directly.
+
+    That allows people to overwrite it more easily.
+
+    See the docs on when to use which:
+    https://pypdf.readthedocs.io/en/latest/user/suppress-warnings.html
+    """
+    logging.getLogger(src).error(msg)
+
+
+def logger_warning(msg: str, src: str) -> None:
+    """
+    Use this instead of logger.warning directly.
+
+    That allows people to overwrite it more easily.
+
+    ## Exception, warnings.warn, logger_warning
+    - Exceptions should be used if the user should write code that deals with
+      an error case, e.g. the PDF being completely broken.
+    - warnings.warn should be used if the user needs to fix their code, e.g.
+      DeprecationWarnings
+    - logger_warning should be used if the user needs to know that an issue was
+      handled by pypdf, e.g. a non-compliant PDF being read in a way that
+      pypdf could apply a robustness fix to still read it. This applies mainly
+      to strict=False mode.
+    """
+    logging.getLogger(src).warning(msg)
+
+
+def rename_kwargs(
+    func_name: str, kwargs: Dict[str, Any], aliases: Dict[str, str], fail: bool = False
+) -> None:
+    """
+    Helper function to deprecate arguments.
+
+    Args:
+        func_name: Name of the function to be deprecated
+        kwargs:
+        aliases:
+        fail:
+
+    """
+    for old_term, new_term in aliases.items():
+        if old_term in kwargs:
+            if fail:
+                raise DeprecationError(
+                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
+                )
+            if new_term in kwargs:
+                raise TypeError(
+                    f"{func_name} received both {old_term} and {new_term} as "
+                    f"an argument. {old_term} is deprecated. "
+                    f"Use {new_term} instead."
+                )
+            kwargs[new_term] = kwargs.pop(old_term)
+            warnings.warn(
+                message=(
+                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
+                ),
+                category=DeprecationWarning,
+            )
+
+
+def _human_readable_bytes(bytes: int) -> str:
+    if bytes < 10**3:
+        return f"{bytes} Byte"
+    elif bytes < 10**6:
+        return f"{bytes / 10**3:.1f} kB"
+    elif bytes < 10**9:
+        return f"{bytes / 10**6:.1f} MB"
+    else:
+        return f"{bytes / 10**9:.1f} GB"
+
+
+# The following class has been copied from Django:
+# https://github.com/django/django/blob/adae619426b6f50046b3daaa744db52989c9d6db/django/utils/functional.py#L51-L65
+# It received some modifications to comply with our own coding standards.
+#
+# Original license:
+#
+# ---------------------------------------------------------------------------------
+# Copyright (c) Django Software Foundation and individual contributors.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#     1. Redistributions of source code must retain the above copyright notice,
+#        this list of conditions and the following disclaimer.
+#
+#     2. Redistributions in binary form must reproduce the above copyright
+#        notice, this list of conditions and the following disclaimer in the
+#        documentation and/or other materials provided with the distribution.
+#
+#     3. Neither the name of Django nor the names of its contributors may be used
+#        to endorse or promote products derived from this software without
+#        specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---------------------------------------------------------------------------------
+class classproperty:  # noqa: N801
+    """
+    Decorator that converts a method with a single cls argument into a property
+    that can be accessed directly from the class.
+    """
+
+    def __init__(self, method=None) -> None:  # type: ignore  # noqa: ANN001
+        self.fget = method
+
+    def __get__(self, instance, cls=None) -> Any:  # type: ignore  # noqa: ANN001
+        return self.fget(cls)
+
+    def getter(self, method) -> Self:  # type: ignore  # noqa: ANN001
+        self.fget = method
+        return self
+
+
+@dataclass
+class File:
+    from .generic import IndirectObject
+
+    name: str = ""
+    """
+    Filename as identified within the PDF file.
+    """
+    data: bytes = b""
+    """
+    Data as bytes.
+    """
+    indirect_reference: Optional[IndirectObject] = None
+    """
+    Reference to the object storing the stream.
+    """
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}(name={self.name}, data: {_human_readable_bytes(len(self.data))})"
+
+    def __repr__(self) -> str:
+        return self.__str__()[:-1] + f", hash: {hash(self.data)})"
+
+
+@functools.total_ordering
+class Version:
+    COMPONENT_PATTERN = re.compile(r"^(\d+)(.*)$")
+
+    def __init__(self, version_str: str) -> None:
+        self.version_str = version_str
+        self.components = self._parse_version(version_str)
+
+    def _parse_version(self, version_str: str) -> List[Tuple[int, str]]:
+        components = version_str.split(".")
+        parsed_components = []
+        for component in components:
+            match = Version.COMPONENT_PATTERN.match(component)
+            if not match:
+                parsed_components.append((0, component))
+                continue
+            integer_prefix = match.group(1)
+            suffix = match.group(2)
+            if integer_prefix is None:
+                integer_prefix = 0
+            parsed_components.append((int(integer_prefix), suffix))
+        return parsed_components
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Version):
+            return False
+        return self.components == other.components
+
+    def __lt__(self, other: Any) -> bool:
+        if not isinstance(other, Version):
+            raise ValueError(f"Version cannot be compared against {type(other)}")
+
+        for self_component, other_component in zip(self.components, other.components):
+            self_value, self_suffix = self_component
+            other_value, other_suffix = other_component
+
+            if self_value < other_value:
+                return True
+            elif self_value > other_value:
+                return False
+
+            if self_suffix < other_suffix:
+                return True
+            elif self_suffix > other_suffix:
+                return False
+
+        return len(self.components) < len(other.components)
--- a/venv/lib/python3.12/site-packages/pypdf/_version.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_version.py
@ -0,0 +1 @@
+__version__ = "5.4.0"
--- a/venv/lib/python3.12/site-packages/pypdf/_writer.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_writer.py
--- a/venv/lib/python3.12/site-packages/pypdf/_xobj_image_helpers.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_xobj_image_helpers.py
@ -0,0 +1,379 @@
+"""Code in here is only used by pypdf.filters._xobj_to_image"""
+
+import sys
+from io import BytesIO
+from typing import Any, Dict, List, Literal, Tuple, Union, cast
+
+from ._utils import check_if_whitespace_only, logger_warning
+from .constants import ColorSpaces
+from .constants import FilterTypes as FT
+from .constants import ImageAttributes as IA
+from .errors import EmptyImageDataError, PdfReadError
+from .generic import (
+    ArrayObject,
+    DecodedStreamObject,
+    EncodedStreamObject,
+    IndirectObject,
+    NullObject,
+    TextStringObject,
+)
+
+if sys.version_info[:2] >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+
+try:
+    from PIL import Image, UnidentifiedImageError  # noqa: F401
+except ImportError:
+    raise ImportError(
+        "pillow is required to do image extraction. "
+        "It can be installed via 'pip install pypdf[image]'"
+    )
+
+mode_str_type: TypeAlias = Literal[
+    "", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"
+]
+
+MAX_IMAGE_MODE_NESTING_DEPTH: int = 10
+
+
+def _get_imagemode(
+    color_space: Union[str, List[Any], Any],
+    color_components: int,
+    prev_mode: mode_str_type,
+    depth: int = 0,
+) -> Tuple[mode_str_type, bool]:
+    """
+    Returns:
+        Image mode, not taking into account mask (transparency).
+        ColorInversion is required (like for some DeviceCMYK).
+
+    """
+    if depth > MAX_IMAGE_MODE_NESTING_DEPTH:
+        raise PdfReadError(
+            "Color spaces nested too deeply. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH."
+        )
+    if isinstance(color_space, NullObject):
+        return "", False
+    if isinstance(color_space, str):
+        pass
+    elif not isinstance(color_space, list):
+        raise PdfReadError(
+            "Cannot interpret color space", color_space
+        )  # pragma: no cover
+    elif color_space[0].startswith("/Cal"):  # /CalRGB and /CalGray
+        color_space = "/Device" + color_space[0][4:]
+    elif color_space[0] == "/ICCBased":
+        icc_profile = color_space[1].get_object()
+        color_components = cast(int, icc_profile["/N"])
+        color_space = icc_profile.get("/Alternate", "")
+    elif color_space[0] == "/Indexed":
+        color_space = color_space[1].get_object()
+        mode, invert_color = _get_imagemode(
+            color_space, color_components, prev_mode, depth + 1
+        )
+        if mode in ("RGB", "CMYK"):
+            mode = "P"
+        return mode, invert_color
+    elif color_space[0] == "/Separation":
+        color_space = color_space[2]
+        if isinstance(color_space, IndirectObject):
+            color_space = color_space.get_object()
+        mode, invert_color = _get_imagemode(
+            color_space, color_components, prev_mode, depth + 1
+        )
+        return mode, True
+    elif color_space[0] == "/DeviceN":
+        original_color_space = color_space
+        color_components = len(color_space[1])
+        color_space = color_space[2]
+        if isinstance(color_space, IndirectObject):  # pragma: no cover
+            color_space = color_space.get_object()
+        if color_space == "/DeviceCMYK" and color_components == 1:
+            if original_color_space[1][0] != "/Black":
+                logger_warning(
+                    f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",
+                    __name__,
+                )
+            return "L", True
+        mode, invert_color = _get_imagemode(
+            color_space, color_components, prev_mode, depth + 1
+        )
+        return mode, invert_color
+
+    mode_map: Dict[str, mode_str_type] = {
+        "1bit": "1",  # must be zeroth position: color_components may index the values
+        "/DeviceGray": "L",  # must be first position: color_components may index the values
+        "palette": "P",  # must be second position: color_components may index the values
+        "/DeviceRGB": "RGB",  # must be third position: color_components may index the values
+        "/DeviceCMYK": "CMYK",  # must be fourth position: color_components may index the values
+        "2bit": "2bits",
+        "4bit": "4bits",
+    }
+
+    mode = (
+        mode_map.get(color_space)
+        or list(mode_map.values())[color_components]
+        or prev_mode
+    )
+
+    return mode, mode == "CMYK"
+
+
+def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
+    mask = (1 << bits) - 1
+    byte_buffer = bytearray(size[0] * size[1])
+    data_index = 0
+    bit = 8 - bits
+    for y in range(size[1]):
+        if bit != 8 - bits:
+            data_index += 1
+            bit = 8 - bits
+        for x in range(size[0]):
+            byte_buffer[x + y * size[0]] = (data[data_index] >> bit) & mask
+            bit -= bits
+            if bit < 0:
+                data_index += 1
+                bit = 8 - bits
+    return bytes(byte_buffer)
+
+
+def _extended_image_frombytes(
+    mode: str, size: Tuple[int, int], data: bytes
+) -> Image.Image:
+    try:
+        img = Image.frombytes(mode, size, data)
+    except ValueError as exc:
+        nb_pix = size[0] * size[1]
+        data_length = len(data)
+        if data_length == 0:
+            raise EmptyImageDataError(
+                "Data is 0 bytes, cannot process an image from empty data."
+            ) from exc
+        if data_length % nb_pix != 0:
+            raise exc
+        k = nb_pix * len(mode) / data_length
+        data = b"".join(bytes((x,) * int(k)) for x in data)
+        img = Image.frombytes(mode, size, data)
+    return img
+
+
+def _handle_flate(
+    size: Tuple[int, int],
+    data: bytes,
+    mode: mode_str_type,
+    color_space: str,
+    colors: int,
+    obj_as_text: str,
+) -> Tuple[Image.Image, str, str, bool]:
+    """
+    Process image encoded in flateEncode
+    Returns img, image_format, extension, color inversion
+    """
+    extension = ".png"  # mime_type = "image/png"
+    image_format = "PNG"
+    lookup: Any
+    base: Any
+    hival: Any
+    if isinstance(color_space, ArrayObject) and color_space[0] == "/Indexed":
+        color_space, base, hival, lookup = (value.get_object() for value in color_space)
+    if mode == "2bits":
+        mode = "P"
+        data = bits2byte(data, size, 2)
+    elif mode == "4bits":
+        mode = "P"
+        data = bits2byte(data, size, 4)
+    img = _extended_image_frombytes(mode, size, data)
+    if color_space == "/Indexed":
+        if isinstance(lookup, (EncodedStreamObject, DecodedStreamObject)):
+            lookup = lookup.get_data()
+        if isinstance(lookup, TextStringObject):
+            lookup = lookup.original_bytes
+        if isinstance(lookup, str):
+            lookup = lookup.encode()
+        try:
+            nb, conv, mode = {  # type: ignore
+                "1": (0, "", ""),
+                "L": (1, "P", "L"),
+                "P": (0, "", ""),
+                "RGB": (3, "P", "RGB"),
+                "CMYK": (4, "P", "CMYK"),
+            }[_get_imagemode(base, 0, "")[0]]
+        except KeyError:  # pragma: no cover
+            logger_warning(
+                f"Base {base} not coded please share the pdf file with pypdf dev team",
+                __name__,
+            )
+            lookup = None
+        else:
+            if img.mode == "1":
+                # Two values ("high" and "low").
+                expected_count = 2 * nb
+                actual_count = len(lookup)
+                if actual_count != expected_count:
+                    if actual_count < expected_count:
+                        logger_warning(
+                            f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
+                            __name__
+                        )
+                        lookup += bytes([0] * (expected_count - actual_count))
+                    elif not check_if_whitespace_only(lookup[expected_count:]):
+                        logger_warning(
+                            f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
+                            __name__
+                        )
+                    lookup = lookup[:expected_count]
+                colors_arr = [lookup[:nb], lookup[nb:]]
+                arr = b"".join(
+                    b"".join(
+                        colors_arr[1 if img.getpixel((x, y)) > 127 else 0]
+                        for x in range(img.size[0])
+                    )
+                    for y in range(img.size[1])
+                )
+                img = Image.frombytes(mode, img.size, arr)
+            else:
+                img = img.convert(conv)
+                if len(lookup) != (hival + 1) * nb:
+                    logger_warning(f"Invalid Lookup Table in {obj_as_text}", __name__)
+                    lookup = None
+                elif mode == "L":
+                    # gray lookup does not work : it is converted to a similar RGB lookup
+                    lookup = b"".join([bytes([b, b, b]) for b in lookup])
+                    mode = "RGB"
+                # TODO : cf https://github.com/py-pdf/pypdf/pull/2039
+                # this is a work around until PIL is able to process CMYK images
+                elif mode == "CMYK":
+                    _rgb = []
+                    for _c, _m, _y, _k in (
+                        lookup[n : n + 4] for n in range(0, 4 * (len(lookup) // 4), 4)
+                    ):
+                        _r = int(255 * (1 - _c / 255) * (1 - _k / 255))
+                        _g = int(255 * (1 - _m / 255) * (1 - _k / 255))
+                        _b = int(255 * (1 - _y / 255) * (1 - _k / 255))
+                        _rgb.append(bytes((_r, _g, _b)))
+                    lookup = b"".join(_rgb)
+                    mode = "RGB"
+                if lookup is not None:
+                    img.putpalette(lookup, rawmode=mode)
+            img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB")
+    elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
+        # see Table 66 - Additional Entries Specific to an ICC Profile
+        # Stream Dictionary
+        mode2 = _get_imagemode(color_space, colors, mode)[0]
+        if mode != mode2:
+            img = Image.frombytes(mode2, size, data)  # reloaded as mode may have change
+    if mode == "CMYK":
+        extension = ".tif"
+        image_format = "TIFF"
+    return img, image_format, extension, False
+
+
+def _handle_jpx(
+    size: Tuple[int, int],
+    data: bytes,
+    mode: mode_str_type,
+    color_space: str,
+    colors: int,
+) -> Tuple[Image.Image, str, str, bool]:
+    """
+    Process image encoded in flateEncode
+    Returns img, image_format, extension, inversion
+    """
+    extension = ".jp2"  # mime_type = "image/x-jp2"
+    img1 = Image.open(BytesIO(data), formats=("JPEG2000",))
+    mode, invert_color = _get_imagemode(color_space, colors, mode)
+    if mode == "":
+        mode = cast(mode_str_type, img1.mode)
+        invert_color = mode in ("CMYK",)
+    if img1.mode == "RGBA" and mode == "RGB":
+        mode = "RGBA"
+    # we need to convert to the good mode
+    if img1.mode == mode or {img1.mode, mode} == {"L", "P"}:  # compare (unordered) sets
+        # L and P are indexed modes which should not be changed.
+        img = img1
+    elif {img1.mode, mode} == {"RGBA", "CMYK"}:
+        # RGBA / CMYK are 4bytes encoding where
+        # the encoding should be corrected
+        img = Image.frombytes(mode, img1.size, img1.tobytes())
+    else:  # pragma: no cover
+        img = img1.convert(mode)
+    # for CMYK conversion :
+    # https://stcom/questions/38855022/conversion-from-cmyk-to-rgb-with-pillow-is-different-from-that-of-photoshop
+    # not implemented for the moment as I need to get properly the ICC
+    if img.mode == "CMYK":
+        img = img.convert("RGB")
+    image_format = "JPEG2000"
+    return img, image_format, extension, invert_color
+
+
+def _apply_decode(
+    img: Image.Image,
+    x_object_obj: Dict[str, Any],
+    lfilters: FT,
+    color_space: Union[str, List[Any], Any],
+    invert_color: bool,
+) -> Image.Image:
+    # CMYK image and other color spaces without decode
+    # requires reverting scale (cf p243,2§ last sentence)
+    decode = x_object_obj.get(
+        IA.DECODE,
+        ([1.0, 0.0] * len(img.getbands()))
+        if (
+            (img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
+            or (invert_color and img.mode == "L")
+        )
+        else None,
+    )
+    if (
+        isinstance(color_space, ArrayObject)
+        and color_space[0].get_object() == "/Indexed"
+    ):
+        decode = None  # decode is meaningless if Indexed
+    if (
+        isinstance(color_space, ArrayObject)
+        and color_space[0].get_object() == "/Separation"
+    ):
+        decode = [1.0, 0.0] * len(img.getbands())
+    if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):
+        lut: List[int] = []
+        for i in range(0, len(decode), 2):
+            dmin = decode[i]
+            dmax = decode[i + 1]
+            lut.extend(
+                round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)
+            )
+        img = img.point(lut)
+    return img
+
+
+def _get_mode_and_invert_color(
+    x_object_obj: Dict[str, Any], colors: int, color_space: Union[str, List[Any], Any]
+) -> Tuple[mode_str_type, bool]:
+    if (
+        IA.COLOR_SPACE in x_object_obj
+        and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
+    ):
+        # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
+        mode: mode_str_type = "RGB"
+    if x_object_obj.get("/BitsPerComponent", 8) < 8:
+        mode, invert_color = _get_imagemode(
+            f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""
+        )
+    else:
+        mode, invert_color = _get_imagemode(
+            color_space,
+            2
+            if (
+                colors == 1
+                and (
+                    not isinstance(color_space, NullObject)
+                    and "Gray" not in color_space
+                )
+            )
+            else colors,
+            "",
+        )
+    return mode, invert_color
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/init.py
@ -0,0 +1,42 @@
+"""
+PDF specifies several annotation types which pypdf makes available here.
+
+The names of the annotations and their attributes do not reflect the names in
+the specification in all cases. For example, the PDF standard defines a
+'Square' annotation that does not actually need to be square. For this reason,
+pypdf calls it 'Rectangle'.
+
+At their core, all annotation types are DictionaryObjects. That means if pypdf
+does not implement a feature, users can easily extend the given functionality.
+"""
+
+
+from ._base import NO_FLAGS, AnnotationDictionary
+from ._markup_annotations import (
+    Ellipse,
+    FreeText,
+    Highlight,
+    Line,
+    MarkupAnnotation,
+    Polygon,
+    PolyLine,
+    Rectangle,
+    Text,
+)
+from ._non_markup_annotations import Link, Popup
+
+__all__ = [
+    "NO_FLAGS",
+    "AnnotationDictionary",
+    "Ellipse",
+    "FreeText",
+    "Highlight",
+    "Line",
+    "Link",
+    "MarkupAnnotation",
+    "PolyLine",
+    "Polygon",
+    "Popup",
+    "Rectangle",
+    "Text",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/_base.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/_base.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/_markup_annotations.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/_markup_annotations.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/_non_markup_annotations.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/pycache/_non_markup_annotations.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_base.py
@ -0,0 +1,27 @@
+from abc import ABC
+
+from ..constants import AnnotationFlag
+from ..generic import NameObject, NumberObject
+from ..generic._data_structures import DictionaryObject
+
+
+class AnnotationDictionary(DictionaryObject, ABC):
+    def __init__(self) -> None:
+        from ..generic._base import NameObject
+
+        # /Rect should not be added here as Polygon and PolyLine can automatically set it
+        self[NameObject("/Type")] = NameObject("/Annot")
+        # The flags were NOT added to the constructor on purpose:
+        # We expect that most users don't want to change the default.
+        # If they do, they can use the property. The default is 0.
+
+    @property
+    def flags(self) -> AnnotationFlag:
+        return self.get(NameObject("/F"), AnnotationFlag(0))
+
+    @flags.setter
+    def flags(self, value: AnnotationFlag) -> None:
+        self[NameObject("/F")] = NumberObject(value)
+
+
+NO_FLAGS = AnnotationFlag(0)
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_markup_annotations.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_markup_annotations.py
@ -0,0 +1,315 @@
+import sys
+from abc import ABC
+from typing import Any, List, Optional, Tuple, Union
+
+from .._utils import deprecation_with_replacement
+from ..constants import AnnotationFlag
+from ..generic import ArrayObject, DictionaryObject
+from ..generic._base import (
+    BooleanObject,
+    FloatObject,
+    NameObject,
+    NumberObject,
+    TextStringObject,
+)
+from ..generic._rectangle import RectangleObject
+from ..generic._utils import hex_to_rgb
+from ._base import NO_FLAGS, AnnotationDictionary
+
+if sys.version_info[:2] >= (3, 10):
+    from typing import TypeAlias
+else:
+    # PEP 613 introduced typing.TypeAlias with Python 3.10
+    # For older Python versions, the backport typing_extensions is necessary:
+    from typing_extensions import TypeAlias
+
+
+Vertex: TypeAlias = Tuple[float, float]
+
+
+def _get_bounding_rectangle(vertices: List[Vertex]) -> RectangleObject:
+    x_min, y_min = vertices[0][0], vertices[0][1]
+    x_max, y_max = vertices[0][0], vertices[0][1]
+    for x, y in vertices:
+        x_min = min(x_min, x)
+        y_min = min(y_min, y)
+        x_max = max(x_max, x)
+        y_max = max(y_max, y)
+    rect = RectangleObject((x_min, y_min, x_max, y_max))
+    return rect
+
+
+class MarkupAnnotation(AnnotationDictionary, ABC):
+    """
+    Base class for all markup annotations.
+
+    Args:
+        title_bar: Text to be displayed in the title bar of the annotation;
+            by convention this is the name of the author
+
+    """
+
+    def __init__(self, *, title_bar: Optional[str] = None) -> None:
+        if title_bar is not None:
+            self[NameObject("/T")] = TextStringObject(title_bar)
+
+
+class Text(MarkupAnnotation):
+    """
+    A text annotation.
+
+    Args:
+        rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
+        text: The text that is added to the document
+        open:
+        flags:
+
+    """
+
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        text: str,
+        open: bool = False,
+        flags: int = NO_FLAGS,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self[NameObject("/Subtype")] = NameObject("/Text")
+        self[NameObject("/Rect")] = RectangleObject(rect)
+        self[NameObject("/Contents")] = TextStringObject(text)
+        self[NameObject("/Open")] = BooleanObject(open)
+        self[NameObject("/Flags")] = NumberObject(flags)
+
+
+class FreeText(MarkupAnnotation):
+    """A FreeText annotation"""
+
+    def __init__(
+        self,
+        *,
+        text: str,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        font: str = "Helvetica",
+        bold: bool = False,
+        italic: bool = False,
+        font_size: str = "14pt",
+        font_color: str = "000000",
+        border_color: Optional[str] = "000000",
+        background_color: Optional[str] = "ffffff",
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self[NameObject("/Subtype")] = NameObject("/FreeText")
+        self[NameObject("/Rect")] = RectangleObject(rect)
+
+        # Table 225 of the 1.7 reference ("CSS2 style attributes used in rich text strings")
+        font_str = "font: "
+        if italic:
+            font_str = f"{font_str}italic "
+        else:
+            font_str = f"{font_str}normal "
+        if bold:
+            font_str = f"{font_str}bold "
+        else:
+            font_str = f"{font_str}normal "
+        font_str = f"{font_str}{font_size} {font}"
+        font_str = f"{font_str};text-align:left;color:#{font_color}"
+
+        default_appearance_string = ""
+        if border_color:
+            for st in hex_to_rgb(border_color):
+                default_appearance_string = f"{default_appearance_string}{st} "
+            default_appearance_string = f"{default_appearance_string}rg"
+
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/FreeText"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/Contents"): TextStringObject(text),
+                # font size color
+                NameObject("/DS"): TextStringObject(font_str),
+                NameObject("/DA"): TextStringObject(default_appearance_string),
+            }
+        )
+        if border_color is None:
+            # Border Style
+            self[NameObject("/BS")] = DictionaryObject(
+                {
+                    # width of 0 means no border
+                    NameObject("/W"): NumberObject(0)
+                }
+            )
+        if background_color is not None:
+            self[NameObject("/C")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(background_color)]
+            )
+
+
+class Line(MarkupAnnotation):
+    def __init__(
+        self,
+        p1: Vertex,
+        p2: Vertex,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        text: str = "",
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/Line"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/L"): ArrayObject(
+                    [
+                        FloatObject(p1[0]),
+                        FloatObject(p1[1]),
+                        FloatObject(p2[0]),
+                        FloatObject(p2[1]),
+                    ]
+                ),
+                NameObject("/LE"): ArrayObject(
+                    [
+                        NameObject("/None"),
+                        NameObject("/None"),
+                    ]
+                ),
+                NameObject("/IC"): ArrayObject(
+                    [
+                        FloatObject(0.5),
+                        FloatObject(0.5),
+                        FloatObject(0.5),
+                    ]
+                ),
+                NameObject("/Contents"): TextStringObject(text),
+            }
+        )
+
+
+class PolyLine(MarkupAnnotation):
+    def __init__(
+        self,
+        vertices: List[Vertex],
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        if len(vertices) == 0:
+            raise ValueError("A polygon needs at least 1 vertex with two coordinates")
+        coord_list = []
+        for x, y in vertices:
+            coord_list.append(NumberObject(x))
+            coord_list.append(NumberObject(y))
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/PolyLine"),
+                NameObject("/Vertices"): ArrayObject(coord_list),
+                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
+            }
+        )
+
+
+class Rectangle(MarkupAnnotation):
+    def __init__(
+        self,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        *,
+        interior_color: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        if "interiour_color" in kwargs:
+            deprecation_with_replacement("interiour_color", "interior_color", "5.0.0")
+            interior_color = kwargs["interiour_color"]
+            del kwargs["interiour_color"]
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Square"),
+                NameObject("/Rect"): RectangleObject(rect),
+            }
+        )
+
+        if interior_color:
+            self[NameObject("/IC")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(interior_color)]
+            )
+
+
+class Highlight(MarkupAnnotation):
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        quad_points: ArrayObject,
+        highlight_color: str = "ff0000",
+        printing: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/Highlight"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/QuadPoints"): quad_points,
+                NameObject("/C"): ArrayObject(
+                    [FloatObject(n) for n in hex_to_rgb(highlight_color)]
+                ),
+            }
+        )
+        if printing:
+            self.flags = AnnotationFlag.PRINT
+
+
+class Ellipse(MarkupAnnotation):
+    def __init__(
+        self,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        *,
+        interior_color: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        if "interiour_color" in kwargs:
+            deprecation_with_replacement("interiour_color", "interior_color", "5.0.0")
+            interior_color = kwargs["interiour_color"]
+            del kwargs["interiour_color"]
+        super().__init__(**kwargs)
+
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Circle"),
+                NameObject("/Rect"): RectangleObject(rect),
+            }
+        )
+
+        if interior_color:
+            self[NameObject("/IC")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(interior_color)]
+            )
+
+
+class Polygon(MarkupAnnotation):
+    def __init__(
+        self,
+        vertices: List[Tuple[float, float]],
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        if len(vertices) == 0:
+            raise ValueError("A polygon needs at least 1 vertex with two coordinates")
+
+        coord_list = []
+        for x, y in vertices:
+            coord_list.append(NumberObject(x))
+            coord_list.append(NumberObject(y))
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Polygon"),
+                NameObject("/Vertices"): ArrayObject(coord_list),
+                NameObject("/IT"): NameObject("/PolygonCloud"),
+                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
+            }
+        )
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_non_markup_annotations.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_non_markup_annotations.py
@ -0,0 +1,106 @@
+from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
+
+from ..generic._base import (
+    BooleanObject,
+    NameObject,
+    NumberObject,
+    TextStringObject,
+)
+from ..generic._data_structures import ArrayObject, DictionaryObject
+from ..generic._fit import DEFAULT_FIT, Fit
+from ..generic._rectangle import RectangleObject
+from ._base import AnnotationDictionary
+
+
+class Link(AnnotationDictionary):
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        border: Optional[ArrayObject] = None,
+        url: Optional[str] = None,
+        target_page_index: Optional[int] = None,
+        fit: Fit = DEFAULT_FIT,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        if TYPE_CHECKING:
+            from ..types import BorderArrayType
+
+        is_external = url is not None
+        is_internal = target_page_index is not None
+        if not is_external and not is_internal:
+            raise ValueError(
+                "Either 'url' or 'target_page_index' have to be provided. Both were None."
+            )
+        if is_external and is_internal:
+            raise ValueError(
+                "Either 'url' or 'target_page_index' have to be provided. "
+                f"{url=}, {target_page_index=}"
+            )
+
+        border_arr: BorderArrayType
+        if border is not None:
+            border_arr = [NumberObject(n) for n in border[:3]]
+            if len(border) == 4:
+                dash_pattern = ArrayObject([NumberObject(n) for n in border[3]])
+                border_arr.append(dash_pattern)
+        else:
+            border_arr = [NumberObject(0)] * 3
+
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Link"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/Border"): ArrayObject(border_arr),
+            }
+        )
+        if is_external:
+            self[NameObject("/A")] = DictionaryObject(
+                {
+                    NameObject("/S"): NameObject("/URI"),
+                    NameObject("/Type"): NameObject("/Action"),
+                    NameObject("/URI"): TextStringObject(url),
+                }
+            )
+        if is_internal:
+            # This needs to be updated later!
+            dest_deferred = DictionaryObject(
+                {
+                    "target_page_index": NumberObject(target_page_index),
+                    "fit": NameObject(fit.fit_type),
+                    "fit_args": fit.fit_args,
+                }
+            )
+            self[NameObject("/Dest")] = dest_deferred
+
+
+class Popup(AnnotationDictionary):
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        parent: Optional[DictionaryObject] = None,
+        open: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/Popup"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/Open"): BooleanObject(open),
+            }
+        )
+        if parent:
+            # This needs to be an indirect object
+            try:
+                self[NameObject("/Parent")] = parent.indirect_reference
+            except AttributeError:
+                from .._utils import logger_warning
+
+                logger_warning(
+                    "Unregistered Parent object : No Parent field set",
+                    __name__,
+                )
--- a/venv/lib/python3.12/site-packages/pypdf/constants.py
+++ b/venv/lib/python3.12/site-packages/pypdf/constants.py
@ -0,0 +1,722 @@
+"""Various constants, enums, and flags to aid readability."""
+
+from enum import Enum, IntFlag, auto, unique
+from typing import Dict, Tuple
+
+
+class StrEnum(str, Enum):  # Once we are on Python 3.11+: enum.StrEnum
+    def __str__(self) -> str:
+        return str(self.value)
+
+
+class Core:
+    """Keywords that don't quite belong anywhere else."""
+
+    OUTLINES = "/Outlines"
+    THREADS = "/Threads"
+    PAGE = "/Page"
+    PAGES = "/Pages"
+    CATALOG = "/Catalog"
+
+
+class TrailerKeys:
+    ROOT = "/Root"
+    ENCRYPT = "/Encrypt"
+    ID = "/ID"
+    INFO = "/Info"
+    SIZE = "/Size"
+    PREV = "/Prev"
+
+
+class CatalogAttributes:
+    NAMES = "/Names"
+    DESTS = "/Dests"
+
+
+class EncryptionDictAttributes:
+    """
+    Additional encryption dictionary entries for the standard security handler.
+
+    Table 3.19, Page 122.
+    Table 21 of the 2.0 manual.
+    """
+
+    R = "/R"  # number, required; revision of the standard security handler
+    O = "/O"  # 32-byte string, required  # noqa: E741
+    U = "/U"  # 32-byte string, required
+    P = "/P"  # integer flag, required; permitted operations
+    ENCRYPT_METADATA = "/EncryptMetadata"  # boolean flag, optional
+
+
+class UserAccessPermissions(IntFlag):
+    """
+    Table 3.20 User access permissions.
+    Table 22 of the 2.0 manual.
+    """
+
+    R1 = 1
+    R2 = 2
+    PRINT = 4
+    MODIFY = 8
+    EXTRACT = 16
+    ADD_OR_MODIFY = 32
+    R7 = 64
+    R8 = 128
+    FILL_FORM_FIELDS = 256
+    EXTRACT_TEXT_AND_GRAPHICS = 512
+    ASSEMBLE_DOC = 1024
+    PRINT_TO_REPRESENTATION = 2048
+    R13 = 2**12
+    R14 = 2**13
+    R15 = 2**14
+    R16 = 2**15
+    R17 = 2**16
+    R18 = 2**17
+    R19 = 2**18
+    R20 = 2**19
+    R21 = 2**20
+    R22 = 2**21
+    R23 = 2**22
+    R24 = 2**23
+    R25 = 2**24
+    R26 = 2**25
+    R27 = 2**26
+    R28 = 2**27
+    R29 = 2**28
+    R30 = 2**29
+    R31 = 2**30
+    R32 = 2**31
+
+    @classmethod
+    def _is_reserved(cls, name: str) -> bool:
+        """Check if the given name corresponds to a reserved flag entry."""
+        return name.startswith("R") and name[1:].isdigit()
+
+    @classmethod
+    def _is_active(cls, name: str) -> bool:
+        """Check if the given reserved name defaults to 1 = active."""
+        return name not in {"R1", "R2"}
+
+    def to_dict(self) -> Dict[str, bool]:
+        """Convert the given flag value to a corresponding verbose name mapping."""
+        result: Dict[str, bool] = {}
+        for name, flag in UserAccessPermissions.__members__.items():
+            if UserAccessPermissions._is_reserved(name):
+                continue
+            result[name.lower()] = (self & flag) == flag
+        return result
+
+    @classmethod
+    def from_dict(cls, value: Dict[str, bool]) -> "UserAccessPermissions":
+        """Convert the verbose name mapping to the corresponding flag value."""
+        value_copy = value.copy()
+        result = cls(0)
+        for name, flag in cls.__members__.items():
+            if cls._is_reserved(name):
+                # Reserved names have a required value. Use it.
+                if cls._is_active(name):
+                    result |= flag
+                continue
+            is_active = value_copy.pop(name.lower(), False)
+            if is_active:
+                result |= flag
+        if value_copy:
+            raise ValueError(f"Unknown dictionary keys: {value_copy!r}")
+        return result
+
+    @classmethod
+    def all(cls) -> "UserAccessPermissions":
+        return cls((2**32 - 1) - cls.R1 - cls.R2)
+
+
+class Resources:
+    """
+    Table 3.30 Entries in a resource dictionary.
+    Table 34 in the 2.0 reference.
+    """
+
+    EXT_G_STATE = "/ExtGState"  # dictionary, optional
+    COLOR_SPACE = "/ColorSpace"  # dictionary, optional
+    PATTERN = "/Pattern"  # dictionary, optional
+    SHADING = "/Shading"  # dictionary, optional
+    XOBJECT = "/XObject"  # dictionary, optional
+    FONT = "/Font"  # dictionary, optional
+    PROC_SET = "/ProcSet"  # array, optional
+    PROPERTIES = "/Properties"  # dictionary, optional
+
+
+class Ressources:  # deprecated
+    """
+    Use :class: `Resources` instead.
+
+    .. deprecated:: 5.0.0
+    """
+
+
+class PagesAttributes:
+    """§7.7.3.2 of the 1.7 and 2.0 reference."""
+
+    TYPE = "/Type"  # name, required; must be /Pages
+    PARENT = "/Parent"  # dictionary, required; indirect reference to pages object
+    KIDS = "/Kids"  # array, required; List of indirect references
+    COUNT = "/Count"
+    # integer, required; the number of leaf nodes (page objects)
+    # that are descendants of this node within the page tree
+
+
+class PageAttributes:
+    """§7.7.3.3 of the 1.7 and 2.0 reference."""
+
+    TYPE = "/Type"  # name, required; must be /Page
+    PARENT = "/Parent"  # dictionary, required; a pages object
+    LAST_MODIFIED = (
+        "/LastModified"  # date, optional; date and time of last modification
+    )
+    RESOURCES = "/Resources"  # dictionary, required if there are any
+    MEDIABOX = "/MediaBox"  # rectangle, required; rectangle specifying page size
+    CROPBOX = "/CropBox"  # rectangle, optional
+    BLEEDBOX = "/BleedBox"  # rectangle, optional
+    TRIMBOX = "/TrimBox"  # rectangle, optional
+    ARTBOX = "/ArtBox"  # rectangle, optional
+    BOX_COLOR_INFO = "/BoxColorInfo"  # dictionary, optional
+    CONTENTS = "/Contents"  # stream or array, optional
+    ROTATE = "/Rotate"  # integer, optional; page rotation in degrees
+    GROUP = "/Group"  # dictionary, optional; page group
+    THUMB = "/Thumb"  # stream, optional; indirect reference to image of the page
+    B = "/B"  # array, optional
+    DUR = "/Dur"  # number, optional
+    TRANS = "/Trans"  # dictionary, optional
+    ANNOTS = "/Annots"  # array, optional; an array of annotations
+    AA = "/AA"  # dictionary, optional
+    METADATA = "/Metadata"  # stream, optional
+    PIECE_INFO = "/PieceInfo"  # dictionary, optional
+    STRUCT_PARENTS = "/StructParents"  # integer, optional
+    ID = "/ID"  # byte string, optional
+    PZ = "/PZ"  # number, optional
+    SEPARATION_INFO = "/SeparationInfo"  # dictionary, optional
+    TABS = "/Tabs"  # name, optional
+    TEMPLATE_INSTANTIATED = "/TemplateInstantiated"  # name, optional
+    PRES_STEPS = "/PresSteps"  # dictionary, optional
+    USER_UNIT = "/UserUnit"  # number, optional
+    VP = "/VP"  # dictionary, optional
+    AF = "/AF"  # array of dictionaries, optional
+    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
+    D_PART = "/DPart"  # dictionary, required, if this page is within the range of a DPart, not permitted otherwise
+
+
+class FileSpecificationDictionaryEntries:
+    """Table 3.41 Entries in a file specification dictionary."""
+
+    Type = "/Type"
+    FS = "/FS"  # The name of the file system to be used to interpret this file specification
+    F = "/F"  # A file specification string of the form described in §3.10.1
+    UF = "/UF"  # A Unicode string of the file as described in §3.10.1
+    DOS = "/DOS"
+    Mac = "/Mac"
+    Unix = "/Unix"
+    ID = "/ID"
+    V = "/V"
+    EF = "/EF"  # dictionary, containing a subset of the keys F, UF, DOS, Mac, and Unix
+    RF = "/RF"  # dictionary, containing arrays of /EmbeddedFile
+    DESC = "/Desc"  # description of the file
+    Cl = "/Cl"
+
+
+class StreamAttributes:
+    """
+    Table 4.2.
+    Table 5 in the 2.0 reference.
+    """
+
+    LENGTH = "/Length"  # integer, required
+    FILTER = "/Filter"  # name or array of names, optional
+    DECODE_PARMS = "/DecodeParms"  # variable, optional -- 'decodeParams is wrong
+
+
+@unique
+class FilterTypes(StrEnum):
+    """§7.4 of the 1.7 and 2.0 references."""
+
+    ASCII_HEX_DECODE = "/ASCIIHexDecode"  # abbreviation: AHx
+    ASCII_85_DECODE = "/ASCII85Decode"  # abbreviation: A85
+    LZW_DECODE = "/LZWDecode"  # abbreviation: LZW
+    FLATE_DECODE = "/FlateDecode"  # abbreviation: Fl, PDF 1.2
+    RUN_LENGTH_DECODE = "/RunLengthDecode"  # abbreviation: RL
+    CCITT_FAX_DECODE = "/CCITTFaxDecode"  # abbreviation: CCF
+    DCT_DECODE = "/DCTDecode"  # abbreviation: DCT
+    JPX_DECODE = "/JPXDecode"
+
+
+class FilterTypeAbbreviations:
+    """§8.9.7 of the 1.7 and 2.0 references."""
+
+    AHx = "/AHx"
+    A85 = "/A85"
+    LZW = "/LZW"
+    FL = "/Fl"  # FlateDecode
+    RL = "/RL"
+    CCF = "/CCF"
+    DCT = "/DCT"
+
+
+class LzwFilterParameters:
+    """
+    Table 4.4.
+    Table 8 in the 2.0 reference.
+    """
+
+    PREDICTOR = "/Predictor"  # integer
+    COLORS = "/Colors"  # integer
+    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer
+    COLUMNS = "/Columns"  # integer
+    EARLY_CHANGE = "/EarlyChange"  # integer
+
+
+class CcittFaxDecodeParameters:
+    """
+    Table 4.5.
+    Table 11 in the 2.0 reference.
+    """
+
+    K = "/K"  # integer
+    END_OF_LINE = "/EndOfLine"  # boolean
+    ENCODED_BYTE_ALIGN = "/EncodedByteAlign"  # boolean
+    COLUMNS = "/Columns"  # integer
+    ROWS = "/Rows"  # integer
+    END_OF_BLOCK = "/EndOfBlock"  # boolean
+    BLACK_IS_1 = "/BlackIs1"  # boolean
+    DAMAGED_ROWS_BEFORE_ERROR = "/DamagedRowsBeforeError"  # integer
+
+
+class ImageAttributes:
+    """§11.6.5 of the 1.7 and 2.0 references."""
+
+    TYPE = "/Type"  # name, required; must be /XObject
+    SUBTYPE = "/Subtype"  # name, required; must be /Image
+    NAME = "/Name"  # name, required
+    WIDTH = "/Width"  # integer, required
+    HEIGHT = "/Height"  # integer, required
+    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer, required
+    COLOR_SPACE = "/ColorSpace"  # name, required
+    DECODE = "/Decode"  # array, optional
+    INTENT = "/Intent"  # string, optional
+    INTERPOLATE = "/Interpolate"  # boolean, optional
+    IMAGE_MASK = "/ImageMask"  # boolean, optional
+    MASK = "/Mask"  # 1-bit image mask stream
+    S_MASK = "/SMask"  # dictionary or name, optional
+
+
+class ColorSpaces:
+    DEVICE_RGB = "/DeviceRGB"
+    DEVICE_CMYK = "/DeviceCMYK"
+    DEVICE_GRAY = "/DeviceGray"
+
+
+class TypArguments:
+    """Table 8.2 of the PDF 1.7 reference."""
+
+    LEFT = "/Left"
+    RIGHT = "/Right"
+    BOTTOM = "/Bottom"
+    TOP = "/Top"
+
+
+class TypFitArguments:
+    """Table 8.2 of the PDF 1.7 reference."""
+
+    FIT = "/Fit"
+    FIT_V = "/FitV"
+    FIT_BV = "/FitBV"
+    FIT_B = "/FitB"
+    FIT_H = "/FitH"
+    FIT_BH = "/FitBH"
+    FIT_R = "/FitR"
+    XYZ = "/XYZ"
+
+
+class GoToActionArguments:
+    S = "/S"  # name, required: type of action
+    D = "/D"  # name / byte string /array, required: Destination to jump to
+
+
+class AnnotationDictionaryAttributes:
+    """Table 8.15 Entries common to all annotation dictionaries."""
+
+    Type = "/Type"
+    Subtype = "/Subtype"
+    Rect = "/Rect"
+    Contents = "/Contents"
+    P = "/P"
+    NM = "/NM"
+    M = "/M"
+    F = "/F"
+    AP = "/AP"
+    AS = "/AS"
+    DA = "/DA"
+    Border = "/Border"
+    C = "/C"
+    StructParent = "/StructParent"
+    OC = "/OC"
+
+
+class InteractiveFormDictEntries:
+    Fields = "/Fields"
+    NeedAppearances = "/NeedAppearances"
+    SigFlags = "/SigFlags"
+    CO = "/CO"
+    DR = "/DR"
+    DA = "/DA"
+    Q = "/Q"
+    XFA = "/XFA"
+
+
+class FieldDictionaryAttributes:
+    """
+    Entries common to all field dictionaries (Table 8.69 PDF 1.7 reference)
+    (*very partially documented here*).
+
+    FFBits provides the constants used for `/Ff` from Table 8.70/8.75/8.77/8.79
+    """
+
+    FT = "/FT"  # name, required for terminal fields
+    Parent = "/Parent"  # dictionary, required for children
+    Kids = "/Kids"  # array, sometimes required
+    T = "/T"  # text string, optional
+    TU = "/TU"  # text string, optional
+    TM = "/TM"  # text string, optional
+    Ff = "/Ff"  # integer, optional
+    V = "/V"  # text string or array, optional
+    DV = "/DV"  # text string, optional
+    AA = "/AA"  # dictionary, optional
+    Opt = "/Opt"  # array, optional
+
+    class FfBits(IntFlag):
+        """
+        Ease building /Ff flags
+        Some entries may be specific to:
+
+        * Text (Tx) (Table 8.75 PDF 1.7 reference)
+        * Buttons (Btn) (Table 8.77 PDF 1.7 reference)
+        * Choice (Ch) (Table 8.79 PDF 1.7 reference)
+        """
+
+        ReadOnly = 1 << 0
+        """common to Tx/Btn/Ch in Table 8.70"""
+        Required = 1 << 1
+        """common to Tx/Btn/Ch in Table 8.70"""
+        NoExport = 1 << 2
+        """common to Tx/Btn/Ch in Table 8.70"""
+
+        Multiline = 1 << 12
+        """Tx"""
+        Password = 1 << 13
+        """Tx"""
+
+        NoToggleToOff = 1 << 14
+        """Btn"""
+        Radio = 1 << 15
+        """Btn"""
+        Pushbutton = 1 << 16
+        """Btn"""
+
+        Combo = 1 << 17
+        """Ch"""
+        Edit = 1 << 18
+        """Ch"""
+        Sort = 1 << 19
+        """Ch"""
+
+        FileSelect = 1 << 20
+        """Tx"""
+
+        MultiSelect = 1 << 21
+        """Tx"""
+
+        DoNotSpellCheck = 1 << 22
+        """Tx/Ch"""
+        DoNotScroll = 1 << 23
+        """Tx"""
+        Comb = 1 << 24
+        """Tx"""
+
+        RadiosInUnison = 1 << 25
+        """Btn"""
+
+        RichText = 1 << 25
+        """Tx"""
+
+        CommitOnSelChange = 1 << 26
+        """Ch"""
+
+    @classmethod
+    def attributes(cls) -> Tuple[str, ...]:
+        """
+        Get a tuple of all the attributes present in a Field Dictionary.
+
+        This method returns a tuple of all the attribute constants defined in
+        the FieldDictionaryAttributes class. These attributes correspond to the
+        entries that are common to all field dictionaries as specified in the
+        PDF 1.7 reference.
+
+        Returns:
+            A tuple containing all the attribute constants.
+
+        """
+        return (
+            cls.TM,
+            cls.T,
+            cls.FT,
+            cls.Parent,
+            cls.TU,
+            cls.Ff,
+            cls.V,
+            cls.DV,
+            cls.Kids,
+            cls.AA,
+        )
+
+    @classmethod
+    def attributes_dict(cls) -> Dict[str, str]:
+        """
+        Get a dictionary of attribute keys and their human-readable names.
+
+        This method returns a dictionary where the keys are the attribute
+        constants defined in the FieldDictionaryAttributes class and the values
+        are their corresponding human-readable names. These attributes
+        correspond to the entries that are common to all field dictionaries as
+        specified in the PDF 1.7 reference.
+
+        Returns:
+            A dictionary containing attribute keys and their names.
+
+        """
+        return {
+            cls.FT: "Field Type",
+            cls.Parent: "Parent",
+            cls.T: "Field Name",
+            cls.TU: "Alternate Field Name",
+            cls.TM: "Mapping Name",
+            cls.Ff: "Field Flags",
+            cls.V: "Value",
+            cls.DV: "Default Value",
+        }
+
+
+class CheckboxRadioButtonAttributes:
+    """Table 8.76 Field flags common to all field types."""
+
+    Opt = "/Opt"  # Options, Optional
+
+    @classmethod
+    def attributes(cls) -> Tuple[str, ...]:
+        """
+        Get a tuple of all the attributes present in a Field Dictionary.
+
+        This method returns a tuple of all the attribute constants defined in
+        the CheckboxRadioButtonAttributes class. These attributes correspond to
+        the entries that are common to all field dictionaries as specified in
+        the PDF 1.7 reference.
+
+        Returns:
+            A tuple containing all the attribute constants.
+
+        """
+        return (cls.Opt,)
+
+    @classmethod
+    def attributes_dict(cls) -> Dict[str, str]:
+        """
+        Get a dictionary of attribute keys and their human-readable names.
+
+        This method returns a dictionary where the keys are the attribute
+        constants defined in the CheckboxRadioButtonAttributes class and the
+        values are their corresponding human-readable names. These attributes
+        correspond to the entries that are common to all field dictionaries as
+        specified in the PDF 1.7 reference.
+
+        Returns:
+            A dictionary containing attribute keys and their names.
+
+        """
+        return {
+            cls.Opt: "Options",
+        }
+
+
+class FieldFlag(IntFlag):
+    """Table 8.70 Field flags common to all field types."""
+
+    READ_ONLY = 1
+    REQUIRED = 2
+    NO_EXPORT = 4
+
+
+class DocumentInformationAttributes:
+    """Table 10.2 Entries in the document information dictionary."""
+
+    TITLE = "/Title"  # text string, optional
+    AUTHOR = "/Author"  # text string, optional
+    SUBJECT = "/Subject"  # text string, optional
+    KEYWORDS = "/Keywords"  # text string, optional
+    CREATOR = "/Creator"  # text string, optional
+    PRODUCER = "/Producer"  # text string, optional
+    CREATION_DATE = "/CreationDate"  # date, optional
+    MOD_DATE = "/ModDate"  # date, optional
+    TRAPPED = "/Trapped"  # name, optional
+
+
+class PageLayouts:
+    """
+    Page 84, PDF 1.4 reference.
+    Page 115, PDF 2.0 reference.
+    """
+
+    SINGLE_PAGE = "/SinglePage"
+    ONE_COLUMN = "/OneColumn"
+    TWO_COLUMN_LEFT = "/TwoColumnLeft"
+    TWO_COLUMN_RIGHT = "/TwoColumnRight"
+    TWO_PAGE_LEFT = "/TwoPageLeft"  # (PDF 1.5)
+    TWO_PAGE_RIGHT = "/TwoPageRight"  # (PDF 1.5)
+
+
+class GraphicsStateParameters:
+    """Table 58 – Entries in a Graphics State Parameter Dictionary"""
+
+    TYPE = "/Type"  # name, optional
+    LW = "/LW"  # number, optional
+    LC = "/LC"  # integer, optional
+    LJ = "/LJ"  # integer, optional
+    ML = "/ML"  # number, optional
+    D = "/D"  # array, optional
+    RI = "/RI"  # name, optional
+    OP = "/OP"
+    op = "/op"
+    OPM = "/OPM"
+    FONT = "/Font"  # array, optional
+    BG = "/BG"
+    BG2 = "/BG2"
+    UCR = "/UCR"
+    UCR2 = "/UCR2"
+    TR = "/TR"
+    TR2 = "/TR2"
+    HT = "/HT"
+    FL = "/FL"
+    SM = "/SM"
+    SA = "/SA"
+    BM = "/BM"
+    S_MASK = "/SMask"  # dictionary or name, optional
+    CA = "/CA"
+    ca = "/ca"
+    AIS = "/AIS"
+    TK = "/TK"
+
+
+class CatalogDictionary:
+    """§7.7.2 of the 1.7 and 2.0 references."""
+
+    TYPE = "/Type"  # name, required; must be /Catalog
+    VERSION = "/Version"  # name
+    EXTENSIONS = "/Extensions"  # dictionary, optional; ISO 32000-1
+    PAGES = "/Pages"  # dictionary, required
+    PAGE_LABELS = "/PageLabels"  # number tree, optional
+    NAMES = "/Names"  # dictionary, optional
+    DESTS = "/Dests"  # dictionary, optional
+    VIEWER_PREFERENCES = "/ViewerPreferences"  # dictionary, optional
+    PAGE_LAYOUT = "/PageLayout"  # name, optional
+    PAGE_MODE = "/PageMode"  # name, optional
+    OUTLINES = "/Outlines"  # dictionary, optional
+    THREADS = "/Threads"  # array, optional
+    OPEN_ACTION = "/OpenAction"  # array or dictionary or name, optional
+    AA = "/AA"  # dictionary, optional
+    URI = "/URI"  # dictionary, optional
+    ACRO_FORM = "/AcroForm"  # dictionary, optional
+    METADATA = "/Metadata"  # stream, optional
+    STRUCT_TREE_ROOT = "/StructTreeRoot"  # dictionary, optional
+    MARK_INFO = "/MarkInfo"  # dictionary, optional
+    LANG = "/Lang"  # text string, optional
+    SPIDER_INFO = "/SpiderInfo"  # dictionary, optional
+    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
+    PIECE_INFO = "/PieceInfo"  # dictionary, optional
+    OC_PROPERTIES = "/OCProperties"  # dictionary, optional
+    PERMS = "/Perms"  # dictionary, optional
+    LEGAL = "/Legal"  # dictionary, optional
+    REQUIREMENTS = "/Requirements"  # array, optional
+    COLLECTION = "/Collection"  # dictionary, optional
+    NEEDS_RENDERING = "/NeedsRendering"  # boolean, optional
+    DSS = "/DSS"  # dictionary, optional
+    AF = "/AF"  # array of dictionaries, optional
+    D_PART_ROOT = "/DPartRoot"  # dictionary, optional
+
+
+class OutlineFontFlag(IntFlag):
+    """A class used as an enumerable flag for formatting an outline font."""
+
+    italic = 1
+    bold = 2
+
+
+class PageLabelStyle:
+    """
+    Table 8.10 in the 1.7 reference.
+    Table 161 in the 2.0 reference.
+    """
+
+    DECIMAL = "/D"  # Decimal Arabic numerals
+    UPPERCASE_ROMAN = "/R"  # Uppercase Roman numerals
+    LOWERCASE_ROMAN = "/r"  # Lowercase Roman numerals
+    UPPERCASE_LETTER = "/A"  # Uppercase letters
+    LOWERCASE_LETTER = "/a"  # Lowercase letters
+
+
+class AnnotationFlag(IntFlag):
+    """See §12.5.3 "Annotation Flags"."""
+
+    INVISIBLE = 1
+    HIDDEN = 2
+    PRINT = 4
+    NO_ZOOM = 8
+    NO_ROTATE = 16
+    NO_VIEW = 32
+    READ_ONLY = 64
+    LOCKED = 128
+    TOGGLE_NO_VIEW = 256
+    LOCKED_CONTENTS = 512
+
+
+PDF_KEYS = (
+    AnnotationDictionaryAttributes,
+    CatalogAttributes,
+    CatalogDictionary,
+    CcittFaxDecodeParameters,
+    CheckboxRadioButtonAttributes,
+    ColorSpaces,
+    Core,
+    DocumentInformationAttributes,
+    EncryptionDictAttributes,
+    FieldDictionaryAttributes,
+    FileSpecificationDictionaryEntries,
+    FilterTypeAbbreviations,
+    FilterTypes,
+    GoToActionArguments,
+    GraphicsStateParameters,
+    ImageAttributes,
+    InteractiveFormDictEntries,
+    LzwFilterParameters,
+    PageAttributes,
+    PageLayouts,
+    PagesAttributes,
+    Resources,
+    StreamAttributes,
+    TrailerKeys,
+    TypArguments,
+    TypFitArguments,
+)
+
+
+class ImageType(IntFlag):
+    NONE = 0
+    XOBJECT_IMAGES = auto()
+    INLINE_IMAGES = auto()
+    DRAWING_IMAGES = auto()
+    ALL = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES
+    IMAGES = ALL  # for consistency with ObjectDeletionFlag
--- a/venv/lib/python3.12/site-packages/pypdf/errors.py
+++ b/venv/lib/python3.12/site-packages/pypdf/errors.py
@ -0,0 +1,66 @@
+"""
+All errors/exceptions pypdf raises and all of the warnings it uses.
+
+Please note that broken PDF files might cause other Exceptions.
+"""
+
+
+class DeprecationError(Exception):
+    """Raised when a deprecated feature is used."""
+
+
+class DependencyError(Exception):
+    """
+    Raised when a required dependency (a library or module that pypdf depends on)
+    is not available or cannot be imported.
+    """
+
+
+class PyPdfError(Exception):
+    """Base class for all exceptions raised by pypdf."""
+
+
+class PdfReadError(PyPdfError):
+    """Raised when there is an issue reading a PDF file."""
+
+
+class PageSizeNotDefinedError(PyPdfError):
+    """Raised when the page size of a PDF document is not defined."""
+
+
+class PdfReadWarning(UserWarning):
+    """Issued when there is a potential issue reading a PDF file, but it can still be read."""
+
+
+class PdfStreamError(PdfReadError):
+    """Raised when there is an issue reading the stream of data in a PDF file."""
+
+
+class ParseError(PyPdfError):
+    """
+    Raised when there is an issue parsing (analyzing and understanding the
+    structure and meaning of) a PDF file.
+    """
+
+
+class FileNotDecryptedError(PdfReadError):
+    """
+    Raised when a PDF file that has been encrypted
+    (meaning it requires a password to be accessed) has not been successfully
+    decrypted.
+    """
+
+
+class WrongPasswordError(FileNotDecryptedError):
+    """Raised when the wrong password is used to try to decrypt an encrypted PDF file."""
+
+
+class EmptyFileError(PdfReadError):
+    """Raised when a PDF file is empty or has no content."""
+
+
+class EmptyImageDataError(PyPdfError):
+    """Raised when trying to process an image that has no data."""
+
+
+STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
--- a/venv/lib/python3.12/site-packages/pypdf/filters.py
+++ b/venv/lib/python3.12/site-packages/pypdf/filters.py
@ -0,0 +1,836 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of stream filters for PDF.
+
+See TABLE H.1 Abbreviations for standard filter names
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import math
+import struct
+import zlib
+from base64 import a85decode
+from dataclasses import dataclass
+from io import BytesIO
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
+
+from ._codecs._codecs import LzwCodec as _LzwCodec
+from ._utils import (
+    WHITESPACES_AS_BYTES,
+    deprecate,
+    deprecate_with_replacement,
+    deprecation_no_replacement,
+    logger_warning,
+)
+from .constants import CcittFaxDecodeParameters as CCITT
+from .constants import FilterTypeAbbreviations as FTA
+from .constants import FilterTypes as FT
+from .constants import ImageAttributes as IA
+from .constants import LzwFilterParameters as LZW
+from .constants import StreamAttributes as SA
+from .errors import DeprecationError, PdfReadError, PdfStreamError
+from .generic import (
+    ArrayObject,
+    DictionaryObject,
+    IndirectObject,
+    NullObject,
+)
+
+
+def decompress(data: bytes) -> bytes:
+    """
+    Decompress the given data using zlib.
+
+    Attempts to decompress the input data using zlib.
+    If the decompression fails due to a zlib error, it falls back
+    to using a decompression object with a larger window size.
+
+    Args:
+        data: The input data to be decompressed.
+
+    Returns:
+        The decompressed data.
+
+    """
+    try:
+        return zlib.decompress(data)
+    except zlib.error:
+        try:
+            # For larger files, use decompression object to enable buffered reading
+            return zlib.decompressobj().decompress(data)
+        except zlib.error:
+            # If still failing, then try with increased window size
+            d = zlib.decompressobj(zlib.MAX_WBITS | 32)
+            result_str = b""
+            for b in [data[i : i + 1] for i in range(len(data))]:
+                try:
+                    result_str += d.decompress(b)
+                except zlib.error:
+                    pass
+            return result_str
+
+
+class FlateDecode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode data which is flate-encoded.
+
+        Args:
+          data: flate-encoded data.
+          decode_parms: a dictionary of values, understanding the
+            "/Predictor":<int> key only
+
+        Returns:
+          The flate-decoded data.
+
+        Raises:
+          PdfReadError:
+
+        """
+        if isinstance(decode_parms, ArrayObject):
+            raise DeprecationError("decode_parms as ArrayObject is deprecated")
+
+        str_data = decompress(data)
+        predictor = 1
+
+        if decode_parms:
+            try:
+                predictor = decode_parms.get("/Predictor", 1)
+            except (AttributeError, TypeError):  # Type Error is NullObject
+                pass  # Usually an array with a null object was read
+        # predictor 1 == no predictor
+        if predictor != 1:
+            # /Columns, the number of samples in each row, has a default value of 1;
+            # §7.4.4.3, ISO 32000.
+            DEFAULT_BITS_PER_COMPONENT = 8
+            try:
+                columns = cast(int, decode_parms[LZW.COLUMNS].get_object())  # type: ignore
+            except (TypeError, KeyError):
+                columns = 1
+            try:
+                colors = cast(int, decode_parms[LZW.COLORS].get_object())  # type: ignore
+            except (TypeError, KeyError):
+                colors = 1
+            try:
+                bits_per_component = cast(
+                    int,
+                    decode_parms[LZW.BITS_PER_COMPONENT].get_object(),  # type: ignore
+                )
+            except (TypeError, KeyError):
+                bits_per_component = DEFAULT_BITS_PER_COMPONENT
+
+            # PNG predictor can vary by row and so is the lead byte on each row
+            rowlength = (
+                math.ceil(columns * colors * bits_per_component / 8) + 1
+            )  # number of bytes
+
+            # TIFF prediction:
+            if predictor == 2:
+                rowlength -= 1  # remove the predictor byte
+                bpp = rowlength // columns
+                str_data = bytearray(str_data)
+                for i in range(len(str_data)):
+                    if i % rowlength >= bpp:
+                        str_data[i] = (str_data[i] + str_data[i - bpp]) % 256
+                str_data = bytes(str_data)
+            # PNG prediction:
+            elif 10 <= predictor <= 15:
+                str_data = FlateDecode._decode_png_prediction(
+                    str_data, columns, rowlength
+                )
+            else:
+                raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}")
+        return str_data
+
+    @staticmethod
+    def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
+        # PNG prediction can vary from row to row
+        if len(data) % rowlength != 0:
+            raise PdfReadError("Image data is not rectangular")
+        output = []
+        prev_rowdata = (0,) * rowlength
+        bpp = (rowlength - 1) // columns  # recomputed locally to not change params
+        for row in range(0, len(data), rowlength):
+            rowdata: List[int] = list(data[row : row + rowlength])
+            filter_byte = rowdata[0]
+
+            if filter_byte == 0:
+                # PNG None Predictor
+                pass
+            elif filter_byte == 1:
+                # PNG Sub Predictor
+                for i in range(bpp + 1, rowlength):
+                    rowdata[i] = (rowdata[i] + rowdata[i - bpp]) % 256
+            elif filter_byte == 2:
+                # PNG Up Predictor
+                for i in range(1, rowlength):
+                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+            elif filter_byte == 3:
+                # PNG Average Predictor
+                for i in range(1, bpp + 1):
+                    floor = prev_rowdata[i] // 2
+                    rowdata[i] = (rowdata[i] + floor) % 256
+                for i in range(bpp + 1, rowlength):
+                    left = rowdata[i - bpp]
+                    floor = (left + prev_rowdata[i]) // 2
+                    rowdata[i] = (rowdata[i] + floor) % 256
+            elif filter_byte == 4:
+                # PNG Paeth Predictor
+                for i in range(1, bpp + 1):
+                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+                for i in range(bpp + 1, rowlength):
+                    left = rowdata[i - bpp]
+                    up = prev_rowdata[i]
+                    up_left = prev_rowdata[i - bpp]
+
+                    p = left + up - up_left
+                    dist_left = abs(p - left)
+                    dist_up = abs(p - up)
+                    dist_up_left = abs(p - up_left)
+
+                    if dist_left <= dist_up and dist_left <= dist_up_left:
+                        paeth = left
+                    elif dist_up <= dist_up_left:
+                        paeth = up
+                    else:
+                        paeth = up_left
+
+                    rowdata[i] = (rowdata[i] + paeth) % 256
+            else:
+                raise PdfReadError(
+                    f"Unsupported PNG filter {filter_byte!r}"
+                )  # pragma: no cover
+            prev_rowdata = tuple(rowdata)
+            output.extend(rowdata[1:])
+        return bytes(output)
+
+    @staticmethod
+    def encode(data: bytes, level: int = -1) -> bytes:
+        """
+        Compress the input data using zlib.
+
+        Args:
+            data: The data to be compressed.
+            level: See https://docs.python.org/3/library/zlib.html#zlib.compress
+
+        Returns:
+            The compressed data.
+
+        """
+        return zlib.compress(data, level)
+
+
+class ASCIIHexDecode:
+    """
+    The ASCIIHexDecode filter decodes data that has been encoded in ASCII
+    hexadecimal form into a base-7 ASCII format.
+    """
+
+    @staticmethod
+    def decode(
+        data: Union[str, bytes],
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode an ASCII-Hex encoded data stream.
+
+        Args:
+          data: a str sequence of hexadecimal-encoded values to be
+            converted into a base-7 ASCII string
+          decode_parms: a string conversion in base-7 ASCII, where each of its values
+            v is such that 0 <= ord(v) <= 127.
+
+        Returns:
+          A string conversion in base-7 ASCII, where each of its values
+          v is such that 0 <= ord(v) <= 127.
+
+        Raises:
+          PdfStreamError:
+
+        """
+        # decode_parms is unused here
+
+        if isinstance(data, str):
+            data = data.encode()
+        retval = b""
+        hex_pair = b""
+        index = 0
+        while True:
+            if index >= len(data):
+                logger_warning(
+                    "missing EOD in ASCIIHexDecode, check if output is OK", __name__
+                )
+                break  # Reached end of string even if no EOD
+            char = data[index : index + 1]
+            if char == b">":
+                break
+            if char.isspace():
+                index += 1
+                continue
+            hex_pair += char
+            if len(hex_pair) == 2:
+                retval += bytes((int(hex_pair, base=16),))
+                hex_pair = b""
+            index += 1
+        assert hex_pair == b""
+        return retval
+
+
+class RunLengthDecode:
+    """
+    The RunLengthDecode filter decodes data that has been encoded in a
+    simple byte-oriented format based on run length.
+    The encoded data is a sequence of runs, where each run consists of
+    a length byte followed by 1 to 128 bytes of data. If the length byte is
+    in the range 0 to 127,
+    the following length + 1 (1 to 128) bytes are copied literally during
+    decompression.
+    If length is in the range 129 to 255, the following single byte is to be
+    copied 257 − length (2 to 128) times during decompression. A length value
+    of 128 denotes EOD.
+    """
+
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode a run length encoded data stream.
+
+        Args:
+          data: a bytes sequence of length/data
+          decode_parms: ignored.
+
+        Returns:
+          A bytes decompressed sequence.
+
+        Raises:
+          PdfStreamError:
+
+        """
+        # decode_parms is unused here
+
+        lst = []
+        index = 0
+        while True:
+            if index >= len(data):
+                logger_warning(
+                    "missing EOD in RunLengthDecode, check if output is OK", __name__
+                )
+                break  # reach End Of String even if no EOD
+            length = data[index]
+            index += 1
+            if length == 128:
+                if index < len(data):
+                    raise PdfStreamError("Early EOD in RunLengthDecode")
+                else:
+                    break
+            elif length < 128:
+                length += 1
+                lst.append(data[index : (index + length)])
+                index += length
+            else:  # >128
+                length = 257 - length
+                lst.append(bytes((data[index],)) * length)
+                index += 1
+        return b"".join(lst)
+
+
+class LZWDecode:
+    class Decoder:
+        STOP = 257
+        CLEARDICT = 256
+
+        def __init__(self, data: bytes) -> None:
+            self.data = data
+
+        def decode(self) -> bytes:
+            return _LzwCodec().decode(self.data)
+
+    @staticmethod
+    def _decodeb(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode an LZW encoded data stream.
+
+        Args:
+          data: ``bytes`` or ``str`` text to decode.
+          decode_parms: a dictionary of parameter values.
+
+        Returns:
+          decoded data.
+
+        """
+        # decode_parms is unused here
+        return LZWDecode.Decoder(data).decode()
+
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> str:  # deprecated
+        """
+        Decode an LZW encoded data stream.
+
+        Args:
+          data: ``bytes`` or ``str`` text to decode.
+          decode_parms: a dictionary of parameter values.
+
+        Returns:
+          decoded data.
+
+        """
+        # decode_parms is unused here
+        deprecate("LZWDecode.decode will return bytes instead of str in pypdf 6.0.0")
+        return LZWDecode.Decoder(data).decode().decode("latin-1")
+
+
+class ASCII85Decode:
+    """Decodes string ASCII85-encoded data into a byte format."""
+
+    @staticmethod
+    def decode(
+        data: Union[str, bytes],
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode an Ascii85 encoded data stream.
+
+        Args:
+          data: ``bytes`` or ``str`` text to decode.
+          decode_parms: a dictionary of parameter values.
+
+        Returns:
+          decoded data.
+
+        """
+        if isinstance(data, str):
+            data = data.encode()
+        data = data.strip(WHITESPACES_AS_BYTES)
+        try:
+            return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+        except ValueError as error:
+            if error.args[0] == "Ascii85 encoded byte sequences must end with b'~>'":
+                logger_warning("Ignoring missing Ascii85 end marker.", __name__)
+                return a85decode(data, adobe=False, ignorechars=WHITESPACES_AS_BYTES)
+            raise
+
+
+class DCTDecode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        # decode_parms is unused here
+        return data
+
+
+class JPXDecode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        # decode_parms is unused here
+        return data
+
+
+@dataclass
+class CCITTParameters:
+    """§7.4.6, optional parameters for the CCITTFaxDecode filter."""
+
+    K: int = 0
+    columns: int = 0
+    rows: int = 0
+    EndOfBlock: Union[int, None] = None
+    EndOfLine: Union[int, None] = None
+    EncodedByteAlign: Union[int, None] = None
+    DamagedRowsBeforeError: Union[int, None] = None
+
+    @property
+    def group(self) -> int:
+        if self.K < 0:
+            # Pure two-dimensional encoding (Group 4)
+            CCITTgroup = 4
+        else:
+            # K == 0: Pure one-dimensional encoding (Group 3, 1-D)
+            # K > 0: Mixed one- and two-dimensional encoding (Group 3, 2-D)
+            CCITTgroup = 3
+        return CCITTgroup
+
+
+def __create_old_class_instance(
+    K: int = 0,
+    columns: int = 0,
+    rows: int = 0
+) -> CCITTParameters:
+    deprecate_with_replacement("CCITParameters", "CCITTParameters", "6.0.0")
+    return CCITTParameters(K, columns, rows)
+
+
+# Create an alias for the old class name
+CCITParameters = __create_old_class_instance
+
+
+class CCITTFaxDecode:
+    """
+    §7.4.6, CCITTFaxDecode filter (ISO 32000).
+
+    Either Group 3 or Group 4 CCITT facsimile (fax) encoding.
+    CCITT encoding is bit-oriented, not byte-oriented.
+
+    §7.4.6, optional parameters for the CCITTFaxDecode filter.
+    """
+
+    @staticmethod
+    def _get_parameters(
+        parameters: Union[None, ArrayObject, DictionaryObject, IndirectObject],
+        rows: Union[int, IndirectObject],
+    ) -> CCITTParameters:
+        # §7.4.6, optional parameters for the CCITTFaxDecode filter
+        k = 0
+        columns = 1728
+        if parameters:
+            parameters_unwrapped = cast(
+                Union[ArrayObject, DictionaryObject], parameters.get_object()
+            )
+            if isinstance(parameters_unwrapped, ArrayObject):
+                for decode_parm in parameters_unwrapped:
+                    if CCITT.COLUMNS in decode_parm:
+                        columns = decode_parm[CCITT.COLUMNS].get_object()
+                    if CCITT.K in decode_parm:
+                        k = decode_parm[CCITT.K].get_object()
+            else:
+                if CCITT.COLUMNS in parameters_unwrapped:
+                    columns = parameters_unwrapped[CCITT.COLUMNS].get_object()  # type: ignore
+                if CCITT.K in parameters_unwrapped:
+                    k = parameters_unwrapped[CCITT.K].get_object()  # type: ignore
+
+        return CCITTParameters(K=k, columns=columns, rows=int(rows))
+
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        height: int = 0,
+        **kwargs: Any,
+    ) -> bytes:
+        # decode_parms is unused here
+        if isinstance(decode_parms, ArrayObject):  # deprecated
+            deprecation_no_replacement(
+                "decode_parms being an ArrayObject", removed_in="3.15.5"
+            )
+        params = CCITTFaxDecode._get_parameters(decode_parms, height)
+
+        img_size = len(data)
+        tiff_header_struct = "<2shlh" + "hhll" * 8 + "h"
+        tiff_header = struct.pack(
+            tiff_header_struct,
+            b"II",  # Byte order indication: Little endian
+            42,  # Version number (always 42)
+            8,  # Offset to first IFD
+            8,  # Number of tags in IFD
+            256,
+            4,
+            1,
+            params.columns,  # ImageWidth, LONG, 1, width
+            257,
+            4,
+            1,
+            params.rows,  # ImageLength, LONG, 1, length
+            258,
+            3,
+            1,
+            1,  # BitsPerSample, SHORT, 1, 1
+            259,
+            3,
+            1,
+            params.group,  # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding
+            262,
+            3,
+            1,
+            0,  # Thresholding, SHORT, 1, 0 = WhiteIsZero
+            273,
+            4,
+            1,
+            struct.calcsize(
+                tiff_header_struct
+            ),  # StripOffsets, LONG, 1, length of header
+            278,
+            4,
+            1,
+            params.rows,  # RowsPerStrip, LONG, 1, length
+            279,
+            4,
+            1,
+            img_size,  # StripByteCounts, LONG, 1, size of image
+            0,  # last IFD
+        )
+
+        return tiff_header + data
+
+
+def decode_stream_data(stream: Any) -> bytes:
+    """
+    Decode the stream data based on the specified filters.
+
+    This function decodes the stream data using the filters provided in the
+    stream.
+
+    Args:
+        stream: The input stream object containing the data and filters.
+
+    Returns:
+        The decoded stream data.
+
+    Raises:
+        NotImplementedError: If an unsupported filter type is encountered.
+
+    """
+    filters = stream.get(SA.FILTER, ())
+    if isinstance(filters, IndirectObject):
+        filters = cast(ArrayObject, filters.get_object())
+    if not isinstance(filters, ArrayObject):
+        # We have a single filter instance
+        filters = (filters,)
+    decode_parms = stream.get(SA.DECODE_PARMS, ({},) * len(filters))
+    if not isinstance(decode_parms, (list, tuple)):
+        decode_parms = (decode_parms,)
+    data: bytes = stream._data
+    # If there is not data to decode we should not try to decode the data.
+    if not data:
+        return data
+    for filter_name, params in zip(filters, decode_parms):
+        if isinstance(params, NullObject):
+            params = {}
+        if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
+            data = ASCIIHexDecode.decode(data)
+        elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
+            data = ASCII85Decode.decode(data)
+        elif filter_name in (FT.LZW_DECODE, FTA.LZW):
+            data = LZWDecode._decodeb(data, params)
+        elif filter_name in (FT.FLATE_DECODE, FTA.FL):
+            data = FlateDecode.decode(data, params)
+        elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
+            data = RunLengthDecode.decode(data)
+        elif filter_name == FT.CCITT_FAX_DECODE:
+            height = stream.get(IA.HEIGHT, ())
+            data = CCITTFaxDecode.decode(data, params, height)
+        elif filter_name == FT.DCT_DECODE:
+            data = DCTDecode.decode(data)
+        elif filter_name == FT.JPX_DECODE:
+            data = JPXDecode.decode(data)
+        elif filter_name == "/Crypt":
+            if "/Name" in params or "/Type" in params:
+                raise NotImplementedError(
+                    "/Crypt filter with /Name or /Type not supported yet"
+                )
+        else:
+            raise NotImplementedError(f"Unsupported filter {filter_name}")
+    return data
+
+
+def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes, Any]:
+    """
+    Users need to have the pillow package installed.
+
+    It's unclear if pypdf will keep this function here, hence it's private.
+    It might get removed at any point.
+
+    Args:
+      x_object_obj:
+
+    Returns:
+        Tuple[file extension, bytes, PIL.Image.Image]
+
+    """
+    from ._xobj_image_helpers import (
+        Image,
+        UnidentifiedImageError,
+        _apply_decode,
+        _extended_image_frombytes,
+        _get_mode_and_invert_color,
+        _handle_flate,
+        _handle_jpx,
+    )
+
+    def _apply_alpha(
+        img: Image.Image,
+        x_object_obj: Dict[str, Any],
+        obj_as_text: str,
+        image_format: str,
+        extension: str,
+    ) -> Tuple[Image.Image, str, str]:
+        alpha = None
+        if IA.S_MASK in x_object_obj:  # add alpha channel
+            alpha = _xobj_to_image(x_object_obj[IA.S_MASK])[2]
+            if img.size != alpha.size:
+                logger_warning(
+                    f"image and mask size not matching: {obj_as_text}", __name__
+                )
+            else:
+                # TODO : implement mask
+                if alpha.mode != "L":
+                    alpha = alpha.convert("L")
+                if img.mode == "P":
+                    img = img.convert("RGB")
+                elif img.mode == "1":
+                    img = img.convert("L")
+                img.putalpha(alpha)
+            if "JPEG" in image_format:
+                extension = ".jp2"
+                image_format = "JPEG2000"
+            else:
+                extension = ".png"
+                image_format = "PNG"
+        return img, extension, image_format
+
+    # for error reporting
+    obj_as_text = (
+        x_object_obj.indirect_reference.__repr__()
+        if x_object_obj is None  # pragma: no cover
+        else x_object_obj.__repr__()
+    )
+
+    # Get size and data
+    size = (cast(int, x_object_obj[IA.WIDTH]), cast(int, x_object_obj[IA.HEIGHT]))
+    data = x_object_obj.get_data()  # type: ignore
+    if isinstance(data, str):  # pragma: no cover
+        data = data.encode()
+    if len(data) % (size[0] * size[1]) == 1 and data[-1] == 0x0A:  # ie. '\n'
+        data = data[:-1]
+
+    # Get color properties
+    colors = x_object_obj.get("/Colors", 1)
+    color_space: Any = x_object_obj.get("/ColorSpace", NullObject()).get_object()
+    if isinstance(color_space, list) and len(color_space) == 1:
+        color_space = color_space[0].get_object()
+
+    mode, invert_color = _get_mode_and_invert_color(x_object_obj, colors, color_space)
+
+    # Get filters
+    filters = x_object_obj.get(SA.FILTER, NullObject()).get_object()
+    lfilters = filters[-1] if isinstance(filters, list) else filters
+
+    extension = None
+    if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):
+        img, image_format, extension, _ = _handle_flate(
+            size,
+            data,
+            mode,
+            color_space,
+            colors,
+            obj_as_text,
+        )
+    elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE, FT.CCITT_FAX_DECODE):
+        # I'm not sure if the following logic is correct.
+        # There might not be any relationship between the filters and the
+        # extension
+        if lfilters in (FT.LZW_DECODE, FT.CCITT_FAX_DECODE):
+            extension = ".tiff"  # mime_type = "image/tiff"
+            image_format = "TIFF"
+        else:
+            extension = ".png"  # mime_type = "image/png"
+            image_format = "PNG"
+        try:
+            img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
+        except UnidentifiedImageError:
+            img = _extended_image_frombytes(mode, size, data)
+    elif lfilters == FT.DCT_DECODE:
+        img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
+        # invert_color kept unchanged
+    elif lfilters == FT.JPX_DECODE:
+        img, image_format, extension, invert_color = _handle_jpx(
+            size, data, mode, color_space, colors
+        )
+    elif lfilters == FT.CCITT_FAX_DECODE:
+        img, image_format, extension, invert_color = (
+            Image.open(BytesIO(data), formats=("TIFF",)),
+            "TIFF",
+            ".tiff",
+            False,
+        )
+    elif mode == "CMYK":
+        img, image_format, extension, invert_color = (
+            _extended_image_frombytes(mode, size, data),
+            "TIFF",
+            ".tif",
+            False,
+        )
+    elif mode == "":
+        raise PdfReadError(f"ColorSpace field not found in {x_object_obj}")
+    else:
+        img, image_format, extension, invert_color = (
+            _extended_image_frombytes(mode, size, data),
+            "PNG",
+            ".png",
+            False,
+        )
+
+    img = _apply_decode(img, x_object_obj, lfilters, color_space, invert_color)
+    img, extension, image_format = _apply_alpha(
+        img, x_object_obj, obj_as_text, image_format, extension
+    )
+
+    # Save image to bytes
+    img_byte_arr = BytesIO()
+    try:
+        img.save(img_byte_arr, format=image_format)
+    except OSError:  # pragma: no cover  # covered with pillow 10.3
+        # in case of we convert to RGBA and then to PNG
+        img1 = img.convert("RGBA")
+        image_format = "PNG"
+        extension = ".png"
+        img_byte_arr = BytesIO()
+        img1.save(img_byte_arr, format=image_format)
+    data = img_byte_arr.getvalue()
+
+    try:  # temporary try/except until other fixes of images
+        img = Image.open(BytesIO(data))
+    except Exception:
+        img = None  # type: ignore
+    return extension, data, img
--- a/venv/lib/python3.12/site-packages/pypdf/generic/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/init.py
@ -0,0 +1,238 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Implementation of generic PDF objects (dictionary, number, string, ...)."""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+from typing import List, Optional, Tuple, Union
+
+from .._utils import (
+    deprecation_with_replacement,
+)
+from ..constants import OutlineFontFlag
+from ._base import (
+    BooleanObject,
+    ByteStringObject,
+    FloatObject,
+    IndirectObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    PdfObject,
+    TextStringObject,
+    encode_pdfdocencoding,
+    is_null_or_none,
+)
+from ._data_structures import (
+    ArrayObject,
+    ContentStream,
+    DecodedStreamObject,
+    Destination,
+    DictionaryObject,
+    EncodedStreamObject,
+    Field,
+    StreamObject,
+    TreeObject,
+    read_object,
+)
+from ._files import EmbeddedFile
+from ._fit import Fit
+from ._outline import OutlineItem
+from ._rectangle import RectangleObject
+from ._utils import (
+    create_string_object,
+    decode_pdfdocencoding,
+    hex_to_rgb,
+    read_hex_string_from_stream,
+    read_string_from_stream,
+)
+from ._viewerpref import ViewerPreferences
+
+PAGE_FIT = Fit.fit()
+
+
+class AnnotationBuilder:  # deprecated
+    """
+    The AnnotationBuilder is deprecated.
+
+    Instead, use the annotation classes in pypdf.annotations.
+
+    See `adding PDF annotations <../user/adding-pdf-annotations.html>`_ for
+    its usage combined with PdfWriter.
+    """
+
+    from ..generic._rectangle import RectangleObject
+
+    @staticmethod
+    def text(
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        text: str,
+        open: bool = False,
+        flags: int = 0,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.text", "pypdf.annotations.Text", "5.0.0"
+        )
+
+    @staticmethod
+    def free_text(
+        text: str,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        font: str = "Helvetica",
+        bold: bool = False,
+        italic: bool = False,
+        font_size: str = "14pt",
+        font_color: str = "000000",
+        border_color: Optional[str] = "000000",
+        background_color: Optional[str] = "ffffff",
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.free_text", "pypdf.annotations.FreeText", "5.0.0"
+        )
+
+    @staticmethod
+    def popup(
+        *,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        flags: int = 0,
+        parent: Optional[DictionaryObject] = None,
+        open: bool = False,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.popup", "pypdf.annotations.Popup", "5.0.0"
+        )
+
+    @staticmethod
+    def line(
+        p1: Tuple[float, float],
+        p2: Tuple[float, float],
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        text: str = "",
+        title_bar: Optional[str] = None,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.line", "pypdf.annotations.Line", "5.0.0"
+        )
+
+    @staticmethod
+    def polyline(
+        vertices: List[Tuple[float, float]],
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.polyline", "pypdf.annotations.PolyLine", "5.0.0"
+        )
+
+    @staticmethod
+    def rectangle(
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        interiour_color: Optional[str] = None,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.rectangle", "pypdf.annotations.Rectangle", "5.0.0"
+        )
+
+    @staticmethod
+    def highlight(
+        *,
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        quad_points: ArrayObject,
+        highlight_color: str = "ff0000",
+        printing: bool = False,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.highlight", "pypdf.annotations.Highlight", "5.0.0"
+        )
+
+    @staticmethod
+    def ellipse(
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        interiour_color: Optional[str] = None,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.ellipse", "pypdf.annotations.Ellipse", "5.0.0"
+        )
+
+    @staticmethod
+    def polygon(vertices: List[Tuple[float, float]]) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.polygon", "pypdf.annotations.Polygon", "5.0.0"
+        )
+
+    from ._fit import DEFAULT_FIT
+
+    @staticmethod
+    def link(
+        rect: Union[RectangleObject, Tuple[float, float, float, float]],
+        border: Optional[ArrayObject] = None,
+        url: Optional[str] = None,
+        target_page_index: Optional[int] = None,
+        fit: Fit = DEFAULT_FIT,
+    ) -> None:
+        deprecation_with_replacement(
+            "AnnotationBuilder.link", "pypdf.annotations.Link", "5.0.0"
+        )
+
+
+__all__ = [
+    "PAGE_FIT",
+    "AnnotationBuilder",
+    "ArrayObject",
+    "BooleanObject",
+    "ByteStringObject",
+    "ContentStream",
+    "DecodedStreamObject",
+    "Destination",
+    "DictionaryObject",
+    "EmbeddedFile",
+    "EncodedStreamObject",
+    "Field",
+    "Fit",
+    "FloatObject",
+    "IndirectObject",
+    "NameObject",
+    "NullObject",
+    "NumberObject",
+    "OutlineFontFlag",
+    "OutlineItem",
+    "PdfObject",
+    "RectangleObject",
+    "StreamObject",
+    "TextStringObject",
+    "TreeObject",
+    "ViewerPreferences",
+    # Utility functions
+    "create_string_object",
+    "decode_pdfdocencoding",
+    "encode_pdfdocencoding",
+    "hex_to_rgb",
+    "is_null_or_none",
+    "read_hex_string_from_stream",
+    # Data structures core functions
+    "read_object",
+    "read_string_from_stream",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/generic/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_base.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_base.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_data_structures.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_data_structures.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_files.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_files.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_fit.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_fit.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_image_inline.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/pycache/_image_inline.cpython-312.pyc
--- a/Show More
+++ b/Show More