Initial Commit of the PDM project (ready for DWS migration)
This commit is contained in:
828
helpers/batch_copy_tree.py
Normal file
828
helpers/batch_copy_tree.py
Normal file
@@ -0,0 +1,828 @@
|
||||
"""
|
||||
Batch Copy Tree Export for PDM Vault
|
||||
=====================================
|
||||
This module provides a framework for:
|
||||
1. Logging into a PDM vault via API
|
||||
2. Reading part numbers from a CSV file (no extensions)
|
||||
3. Running the PDM Copy Tree function for each part
|
||||
4. Exporting each part's file tree to its own subfolder on a local path
|
||||
|
||||
Usage:
|
||||
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output" --vault "IDSVault"
|
||||
"""
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
import getpass
|
||||
import os
|
||||
import ctypes
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
import win32com.client
|
||||
|
||||
# PDM API Type Library Constants
|
||||
EdmObject_File = 1
|
||||
EdmObject_Folder = 2
|
||||
EdmSearch_FileName = 1 # Search by filename
|
||||
EdmGet_Simple = 1 # Simple get (latest version)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIGURATION - Can be overridden via command line
|
||||
# =============================================================================
|
||||
|
||||
VAULT_NAME = "IDSVault" # Default vault name
|
||||
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LOGGING SETUP
|
||||
# =============================================================================
|
||||
|
||||
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
|
||||
"""Configure logging for the batch process."""
|
||||
if log_file is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
log_file = f"batch_copy_tree_{timestamp}.log"
|
||||
|
||||
logger = logging.getLogger("batch_copy_tree")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# File handler
|
||||
fh = logging.FileHandler(log_file)
|
||||
fh.setLevel(logging.DEBUG)
|
||||
|
||||
# Console handler
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
|
||||
# Formatter
|
||||
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
fh.setFormatter(formatter)
|
||||
ch.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(fh)
|
||||
logger.addHandler(ch)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PDM VAULT CONNECTION
|
||||
# =============================================================================
|
||||
|
||||
class PDMVaultConnection:
|
||||
"""Handles connection and authentication to the PDM vault."""
|
||||
|
||||
def __init__(self, vault_name: str):
|
||||
self.vault_name = vault_name
|
||||
self.vault = None
|
||||
self.is_connected = False
|
||||
self.logger = logging.getLogger("batch_copy_tree")
|
||||
self._username = None
|
||||
self._password = None
|
||||
|
||||
def connect(self, username: str, password: str) -> bool:
|
||||
"""
|
||||
Connect and log into the PDM vault with username/password.
|
||||
|
||||
Args:
|
||||
username: PDM username
|
||||
password: PDM password
|
||||
|
||||
Returns:
|
||||
True if connection successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Create the vault interface
|
||||
self.vault = win32com.client.Dispatch("ConisioLib.EdmVault")
|
||||
|
||||
# Login with credentials
|
||||
self.vault.Login(username, password, self.vault_name)
|
||||
|
||||
self.is_connected = True
|
||||
self._username = username
|
||||
self._password = password
|
||||
self.logger.info(f"Successfully connected to vault: {self.vault_name}")
|
||||
self.logger.info(f"Logged in as: {username}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to connect to vault '{self.vault_name}': {e}")
|
||||
self.is_connected = False
|
||||
return False
|
||||
|
||||
def disconnect(self) -> None:
|
||||
"""Disconnect from the PDM vault."""
|
||||
if self.vault is not None:
|
||||
try:
|
||||
# Clear the vault reference
|
||||
self.vault = None
|
||||
self.is_connected = False
|
||||
self.logger.info("Disconnected from vault")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error during disconnect: {e}")
|
||||
|
||||
def search_file_by_name(self, filename: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for a file in the vault by filename (supports wildcards).
|
||||
|
||||
Args:
|
||||
filename: The filename to search for (e.g., "part001.*" or "part001.sldprt")
|
||||
|
||||
Returns:
|
||||
List of dicts with 'file_obj', 'path', 'folder_path', 'folder_obj' for each match
|
||||
"""
|
||||
if not self.is_connected:
|
||||
self.logger.error("Not connected to vault")
|
||||
return []
|
||||
|
||||
results = []
|
||||
|
||||
try:
|
||||
# Create search object
|
||||
search = self.vault.CreateSearch()
|
||||
|
||||
# Set search filename
|
||||
search.FileName = filename
|
||||
self.logger.debug(f"Search pattern: '{filename}'")
|
||||
|
||||
# Execute search
|
||||
search_result = search.GetFirstResult()
|
||||
|
||||
while search_result is not None:
|
||||
try:
|
||||
file_id = search_result.ID
|
||||
folder_id = search_result.ParentFolderID
|
||||
|
||||
# Get the file and folder objects
|
||||
file_obj = self.vault.GetObject(EdmObject_File, file_id)
|
||||
folder_obj = self.vault.GetObject(EdmObject_Folder, folder_id)
|
||||
|
||||
if file_obj is not None and folder_obj is not None:
|
||||
folder_path = folder_obj.LocalPath
|
||||
# Use the actual filename from the file object
|
||||
actual_name = search_result.Name
|
||||
full_path = str(Path(folder_path) / actual_name)
|
||||
|
||||
results.append({
|
||||
"file_obj": file_obj,
|
||||
"file_id": file_id,
|
||||
"folder_id": folder_id,
|
||||
"path": full_path,
|
||||
"folder_path": folder_path,
|
||||
"folder_obj": folder_obj,
|
||||
"filename": actual_name
|
||||
})
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error processing search result: {e}")
|
||||
|
||||
search_result = search.GetNextResult()
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error searching for '{filename}': {e}")
|
||||
|
||||
self.logger.debug(f"Search for '{filename}' returned {len(results)} result(s)")
|
||||
return results
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry - note: call connect() separately with credentials."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.disconnect()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FILE LIST HANDLING
|
||||
# =============================================================================
|
||||
|
||||
def load_part_numbers_from_csv(csv_path: str) -> List[str]:
|
||||
"""
|
||||
Load list of part numbers from a CSV file (single column, no header).
|
||||
|
||||
Args:
|
||||
csv_path: Path to the CSV file with one part number per line (no extensions)
|
||||
|
||||
Returns:
|
||||
List of part numbers
|
||||
"""
|
||||
logger = logging.getLogger("batch_copy_tree")
|
||||
part_numbers = []
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8-sig') as f:
|
||||
for line_num, line in enumerate(f, start=1):
|
||||
part_number = line.strip()
|
||||
if part_number: # Skip empty lines
|
||||
part_numbers.append(part_number)
|
||||
|
||||
logger.info(f"Loaded {len(part_numbers)} part numbers from {csv_path}")
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error(f"CSV file not found: {csv_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading CSV file: {e}")
|
||||
|
||||
return part_numbers
|
||||
|
||||
|
||||
def search_and_resolve_parts(
|
||||
vault: PDMVaultConnection,
|
||||
part_numbers: List[str],
|
||||
extension: str
|
||||
) -> Dict[str, List]:
|
||||
"""
|
||||
Search for part numbers in the vault and resolve each to a root file.
|
||||
|
||||
Part numbers are searched with the user-specified extension
|
||||
(e.g., "PART001.SLDASM") since the CSV does not include file extensions.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
part_numbers: List of part numbers (no extensions)
|
||||
extension: File extension including the dot (e.g., ".SLDASM")
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- 'valid': list of file info dicts (ready for copy tree)
|
||||
- 'not_found': list of part numbers not found in vault
|
||||
- 'ambiguous': list of dicts with part_number and all found paths
|
||||
"""
|
||||
logger = logging.getLogger("batch_copy_tree")
|
||||
results = {"valid": [], "not_found": [], "ambiguous": []}
|
||||
|
||||
total = len(part_numbers)
|
||||
for i, part_number in enumerate(part_numbers, 1):
|
||||
logger.info(f"[{i}/{total}] Searching for: {part_number}{extension}")
|
||||
|
||||
search_results = vault.search_file_by_name(f"{part_number}{extension}")
|
||||
|
||||
if len(search_results) == 0:
|
||||
results["not_found"].append(part_number)
|
||||
logger.warning(f" NOT FOUND: {part_number}")
|
||||
|
||||
else:
|
||||
match = search_results[0]
|
||||
logger.info(f" FOUND: {match['path']}")
|
||||
|
||||
if len(search_results) > 1:
|
||||
logger.warning(f" Multiple matches found, using first result:")
|
||||
for r in search_results:
|
||||
logger.warning(f" - {r['path']}")
|
||||
|
||||
results["valid"].append({
|
||||
"part_number": part_number,
|
||||
"filename": match["filename"],
|
||||
"path": match["path"],
|
||||
"file_obj": match["file_obj"],
|
||||
"file_id": match["file_id"],
|
||||
"folder_id": match["folder_id"],
|
||||
"folder_obj": match["folder_obj"],
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# COPY TREE OPERATIONS
|
||||
# =============================================================================
|
||||
|
||||
def get_window_handle() -> int:
|
||||
"""Get a window handle for PDM API calls. Returns console handle or 0."""
|
||||
try:
|
||||
hwnd = ctypes.windll.kernel32.GetConsoleWindow()
|
||||
return hwnd if hwnd else 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _collect_references(file_obj, folder_id: int, vault, logger) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Collect all referenced files from an assembly using IEdmReference5.
|
||||
|
||||
Uses IEdmFile5.GetReferenceTree() to get the reference tree root, then
|
||||
recursively traverses all levels of the reference tree using
|
||||
GetFirstChildPosition/GetNextChild on each node.
|
||||
|
||||
Args:
|
||||
file_obj: IEdmFile5 COM object (the root assembly)
|
||||
folder_id: Folder ID of the root file
|
||||
vault: The IEdmVault COM object
|
||||
logger: Logger instance
|
||||
|
||||
Returns:
|
||||
List of dicts with 'file_id', 'folder_id', 'name' for each
|
||||
unique referenced file (including the root file itself)
|
||||
"""
|
||||
collected = {} # keyed by file ID to deduplicate
|
||||
|
||||
# Add the root file itself
|
||||
root_name = file_obj.Name
|
||||
root_id = file_obj.ID
|
||||
collected[root_id] = {
|
||||
"file_id": root_id,
|
||||
"folder_id": folder_id,
|
||||
"name": root_name,
|
||||
}
|
||||
logger.debug(f" Root: {root_name}")
|
||||
|
||||
def _traverse_children(ref_node, depth=0):
|
||||
"""Recursively traverse all children of a reference node."""
|
||||
indent = " " * (depth + 1)
|
||||
try:
|
||||
result = ref_node.GetFirstChildPosition("", True, True, 0)
|
||||
|
||||
if isinstance(result, tuple):
|
||||
child_pos = result[0]
|
||||
else:
|
||||
child_pos = result
|
||||
|
||||
while child_pos is not None:
|
||||
try:
|
||||
if hasattr(child_pos, 'IsNull') and child_pos.IsNull:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
child_ref = ref_node.GetNextChild(child_pos)
|
||||
if child_ref is None:
|
||||
break
|
||||
|
||||
if isinstance(child_ref, tuple):
|
||||
child_ref = child_ref[0]
|
||||
|
||||
child_file_id = child_ref.FileID
|
||||
child_folder_id = child_ref.FolderID
|
||||
child_name = child_ref.Name
|
||||
|
||||
if child_file_id not in collected:
|
||||
collected[child_file_id] = {
|
||||
"file_id": child_file_id,
|
||||
"folder_id": child_folder_id,
|
||||
"name": child_name,
|
||||
}
|
||||
logger.debug(f"{indent}Child: {child_name}")
|
||||
|
||||
# Recurse into this child to get its children
|
||||
_traverse_children(child_ref, depth + 1)
|
||||
|
||||
except StopIteration:
|
||||
break
|
||||
except Exception as child_err:
|
||||
logger.debug(f"{indent}Error reading child reference: {child_err}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"{indent}Error traversing children at depth {depth}: {e}")
|
||||
|
||||
try:
|
||||
# Get the reference tree (IEdmReference5)
|
||||
ref_tree = file_obj.GetReferenceTree(folder_id, 0)
|
||||
|
||||
if ref_tree is None:
|
||||
logger.warning(f" GetReferenceTree returned None")
|
||||
return list(collected.values())
|
||||
|
||||
_traverse_children(ref_tree)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f" Error traversing reference tree: {e}")
|
||||
logger.debug(f" Full error:", exc_info=True)
|
||||
|
||||
return list(collected.values())
|
||||
|
||||
|
||||
def execute_copy_tree(
|
||||
vault: PDMVaultConnection,
|
||||
file_info: Dict[str, Any],
|
||||
output_dir: str,
|
||||
part_name: str,
|
||||
dry_run: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Export an assembly and all its referenced files to a local subfolder.
|
||||
|
||||
Uses IEdmFile5.GetReferenceTree() to traverse references, then
|
||||
IEdmFile5.GetFileCopy() to download each file to the output folder.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_info: Dict with file_obj, file_id, folder_id, path, etc.
|
||||
output_dir: Base output directory
|
||||
part_name: Part number used as subfolder name
|
||||
dry_run: If True, build tree but don't copy files
|
||||
|
||||
Returns:
|
||||
Dict with 'status' ('success'/'failed'), 'file_count', 'dest_path',
|
||||
'source_path', and 'error' (if failed)
|
||||
"""
|
||||
logger = logging.getLogger("batch_copy_tree")
|
||||
dest_path = os.path.join(output_dir, part_name)
|
||||
# GetFileCopy requires destination path to end with backslash
|
||||
dest_path_trailing = dest_path if dest_path.endswith("\\") else dest_path + "\\"
|
||||
result = {
|
||||
"status": "failed",
|
||||
"file_count": 0,
|
||||
"dest_path": dest_path,
|
||||
"source_path": file_info["path"],
|
||||
"error": None
|
||||
}
|
||||
|
||||
try:
|
||||
# Create the output subfolder
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
logger.debug(f"Output folder: {dest_path}")
|
||||
|
||||
hwnd = get_window_handle()
|
||||
file_obj = file_info["file_obj"]
|
||||
folder_id = file_info["folder_id"]
|
||||
|
||||
# Step 1: Traverse the reference tree to find all files
|
||||
logger.info(f" Building reference tree...")
|
||||
ref_files = _collect_references(file_obj, folder_id, vault.vault, logger)
|
||||
file_count = len(ref_files)
|
||||
result["file_count"] = file_count
|
||||
logger.info(f" Reference tree: {file_count} file(s) found")
|
||||
|
||||
if dry_run:
|
||||
for idx, ref in enumerate(ref_files, 1):
|
||||
logger.info(f" [{idx}/{file_count}] {ref['name']}")
|
||||
logger.info(f" DRY RUN: Would copy {file_count} file(s) to {dest_path}")
|
||||
result["status"] = "dry_run"
|
||||
return result
|
||||
|
||||
# Step 2: Copy each file to the output directory using GetFileCopy
|
||||
# Confirmed signature: GetFileCopy(lParentWnd, poVersionNoOrRevisionName, poPathOrFolderID, lEdmGetFlags, bsNewName)
|
||||
# lParentWnd = window handle (0 for headless)
|
||||
# poVersionNoOrRevisionName = version number (0 = latest)
|
||||
# poPathOrFolderID = destination folder path (must end with \)
|
||||
# lEdmGetFlags = EdmGet flags (EdmGet_Simple = 1)
|
||||
# bsNewName = new filename or empty string to keep original name
|
||||
copied = 0
|
||||
for idx, ref in enumerate(ref_files, 1):
|
||||
ref_name = ref["name"]
|
||||
logger.debug(f" [{idx}/{file_count}] Copying {ref_name}...")
|
||||
|
||||
try:
|
||||
# Get the file object from the vault by ID
|
||||
ref_file_obj = vault.vault.GetObject(EdmObject_File, ref["file_id"])
|
||||
if ref_file_obj is None:
|
||||
logger.warning(f" Could not get file object for {ref_name} (ID: {ref['file_id']})")
|
||||
continue
|
||||
|
||||
ref_file_obj.GetFileCopy(hwnd, 0, dest_path_trailing, EdmGet_Simple, "")
|
||||
copied += 1
|
||||
except Exception as copy_err:
|
||||
logger.warning(f" Failed to copy {ref_name}: {copy_err}")
|
||||
|
||||
result["file_count"] = copied
|
||||
result["status"] = "success"
|
||||
logger.info(f" SUCCESS: {copied}/{file_count} file(s) exported to {dest_path}")
|
||||
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
logger.error(f" FAILED: {e}")
|
||||
logger.debug(f" Full error details:", exc_info=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def batch_copy_tree(
|
||||
vault: PDMVaultConnection,
|
||||
file_list: List[Dict[str, Any]],
|
||||
output_dir: str,
|
||||
dry_run: bool = False
|
||||
) -> Dict[str, List]:
|
||||
"""
|
||||
Execute Copy Tree for multiple parts.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_list: List of file info dicts (from search_and_resolve_parts)
|
||||
output_dir: Base output directory
|
||||
dry_run: If True, build trees but don't execute copies
|
||||
|
||||
Returns:
|
||||
Dict with 'success' and 'failed' lists of result dicts
|
||||
"""
|
||||
logger = logging.getLogger("batch_copy_tree")
|
||||
results = {"success": [], "failed": []}
|
||||
|
||||
total = len(file_list)
|
||||
logger.info(f"Starting batch copy tree for {total} parts")
|
||||
logger.info("=" * 60)
|
||||
|
||||
for i, file_info in enumerate(file_list, 1):
|
||||
part_number = file_info["part_number"]
|
||||
logger.info(f"[{i}/{total}] Processing copy tree for: {part_number} ({file_info['filename']})")
|
||||
|
||||
result = execute_copy_tree(vault, file_info, output_dir, part_number, dry_run)
|
||||
|
||||
if result["status"] in ("success", "dry_run"):
|
||||
results["success"].append(result)
|
||||
else:
|
||||
results["failed"].append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def test_copy_tree_api(vault: PDMVaultConnection, file_info: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Test reference tree traversal on a single file without copying.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_info: Dict with file_obj, file_id, folder_id, path
|
||||
|
||||
Returns:
|
||||
True if reference traversal succeeds, False otherwise
|
||||
"""
|
||||
logger = logging.getLogger("batch_copy_tree")
|
||||
logger.info("=" * 60)
|
||||
logger.info("TESTING REFERENCE TREE TRAVERSAL")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Test file: {file_info['path']}")
|
||||
|
||||
try:
|
||||
file_obj = file_info["file_obj"]
|
||||
folder_id = file_info["folder_id"]
|
||||
|
||||
# Step 1: Test GetReferenceTree
|
||||
logger.info("Step 1: Calling GetReferenceTree...")
|
||||
ref_tree = file_obj.GetReferenceTree(folder_id, 0)
|
||||
if ref_tree is None:
|
||||
logger.error(" GetReferenceTree returned None")
|
||||
return False
|
||||
logger.info(f" OK - Got reference tree object: {type(ref_tree)}")
|
||||
|
||||
# Step 2: Introspect reference tree object
|
||||
logger.info("Step 2: Inspecting IEdmReference5 COM object...")
|
||||
try:
|
||||
type_info = ref_tree._oleobj_.GetTypeInfo(0, 0)
|
||||
type_attr = type_info.GetTypeAttr()
|
||||
methods = []
|
||||
for i in range(type_attr.cFuncs):
|
||||
func_desc = type_info.GetFuncDesc(i)
|
||||
names = type_info.GetNames(func_desc.memid)
|
||||
methods.append(f"{names[0]}({', '.join(names[1:])})" if len(names) > 1 else names[0])
|
||||
logger.info(f" IEdmReference methods ({len(methods)}):")
|
||||
for m in methods:
|
||||
logger.info(f" - {m}")
|
||||
except Exception as intro_err:
|
||||
logger.warning(f" Could not introspect: {intro_err}")
|
||||
|
||||
# Step 3: Traverse children
|
||||
logger.info("Step 3: Traversing reference tree children...")
|
||||
ref_files = _collect_references(file_obj, folder_id, vault.vault, logger)
|
||||
logger.info(f" Found {len(ref_files)} file(s) in reference tree:")
|
||||
for idx, ref in enumerate(ref_files, 1):
|
||||
logger.info(f" [{idx}] {ref['name']}")
|
||||
|
||||
# Step 4: Test GetFileCopy on root file (introspect only, don't copy)
|
||||
logger.info("Step 4: Inspecting IEdmFile5 GetFileCopy method...")
|
||||
try:
|
||||
type_info = file_obj._oleobj_.GetTypeInfo(0, 0)
|
||||
type_attr = type_info.GetTypeAttr()
|
||||
for i in range(type_attr.cFuncs):
|
||||
func_desc = type_info.GetFuncDesc(i)
|
||||
names = type_info.GetNames(func_desc.memid)
|
||||
if "copy" in names[0].lower() or "get" in names[0].lower():
|
||||
sig = f"{names[0]}({', '.join(names[1:])})" if len(names) > 1 else names[0]
|
||||
logger.info(f" - {sig}")
|
||||
except Exception as intro_err:
|
||||
logger.warning(f" Could not introspect file object: {intro_err}")
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("API TEST PASSED")
|
||||
logger.info("=" * 60)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API TEST FAILED: {e}")
|
||||
logger.debug("Full error details:", exc_info=True)
|
||||
logger.info("=" * 60)
|
||||
return False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# COMMAND LINE INTERFACE
|
||||
# =============================================================================
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Batch Copy Tree export for SolidWorks PDM Professional",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output"
|
||||
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output" -v "IDSVault" --dry-run
|
||||
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output" --test
|
||||
|
||||
CSV format (one part number per line, no extensions, no header):
|
||||
UDS.00056
|
||||
WIDGET.00123
|
||||
BRACKET.00789
|
||||
|
||||
Each part number's Copy Tree output goes to its own subfolder:
|
||||
C:\\Temp\\Output\\UDS.00056\\{files...}
|
||||
C:\\Temp\\Output\\WIDGET.00123\\{files...}
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--vault",
|
||||
default=VAULT_NAME,
|
||||
help=f"PDM vault name (default: {VAULT_NAME})"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--csv",
|
||||
required=True,
|
||||
help="Path to CSV file containing part numbers (one per line, no extensions)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o", "--output-dir",
|
||||
required=True,
|
||||
help="Base output directory for exported files (e.g., C:\\Temp\\Output)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-u", "--username",
|
||||
help="PDM username (will prompt if not provided)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-file",
|
||||
help="Custom log file path (default: auto-generated with timestamp)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Build copy trees and show what would be copied, but don't execute"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
action="store_true",
|
||||
help="Test Copy Tree API calls on the first part only, then exit"
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MAIN EXECUTION
|
||||
# =============================================================================
|
||||
|
||||
def main():
|
||||
"""Main entry point for batch copy tree processing."""
|
||||
# Parse arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Setup logging
|
||||
logger = setup_logging(args.log_file)
|
||||
logger.info("=" * 60)
|
||||
logger.info("PDM BATCH COPY TREE EXPORT")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Vault: {args.vault}")
|
||||
logger.info(f"CSV File: {args.csv}")
|
||||
logger.info(f"Output Directory: {args.output_dir}")
|
||||
if args.dry_run:
|
||||
logger.info("Mode: DRY RUN (no files will be copied)")
|
||||
if args.test:
|
||||
logger.info("Mode: API TEST (testing on first part only)")
|
||||
|
||||
# Get credentials
|
||||
username = args.username
|
||||
if not username:
|
||||
username = input("PDM Username: ")
|
||||
|
||||
password = getpass.getpass("PDM Password: ")
|
||||
|
||||
# Get file extension from user
|
||||
ext_input = input("Enter the file extension to search for (e.g., SLDASM, SLDDRW, SLDPRT): ").strip()
|
||||
ext_input = ext_input.lstrip(".") # Remove leading dot if user included one
|
||||
if not ext_input:
|
||||
logger.error("No extension provided. Exiting.")
|
||||
return 1
|
||||
extension = f".{ext_input}"
|
||||
logger.info(f"File extension: {extension}")
|
||||
|
||||
# Load part numbers from CSV
|
||||
part_numbers = load_part_numbers_from_csv(args.csv)
|
||||
|
||||
if not part_numbers:
|
||||
logger.error("No part numbers loaded from CSV. Exiting.")
|
||||
return 1
|
||||
|
||||
logger.info(f"Loaded {len(part_numbers)} part numbers from CSV")
|
||||
|
||||
# Connect to vault
|
||||
vault = PDMVaultConnection(args.vault)
|
||||
|
||||
if not vault.connect(username, password):
|
||||
logger.error("Failed to connect to vault. Exiting.")
|
||||
return 1
|
||||
|
||||
try:
|
||||
# Search for parts in vault
|
||||
logger.info("Searching for parts in vault...")
|
||||
logger.info("=" * 60)
|
||||
validation = search_and_resolve_parts(vault, part_numbers, extension)
|
||||
|
||||
valid_count = len(validation["valid"])
|
||||
not_found_count = len(validation["not_found"])
|
||||
|
||||
# Summary
|
||||
logger.info("=" * 60)
|
||||
logger.info("SEARCH RESULTS SUMMARY")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Parts ready to process: {valid_count}")
|
||||
logger.info(f"Parts not found: {not_found_count}")
|
||||
|
||||
# Report not found
|
||||
if not_found_count > 0:
|
||||
logger.warning("\nParts not found in vault:")
|
||||
for pn in validation["not_found"]:
|
||||
logger.warning(f" - {pn}")
|
||||
|
||||
# Save not found list
|
||||
not_found_file = f"not_found_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
||||
with open(not_found_file, 'w') as f:
|
||||
for pn in validation["not_found"]:
|
||||
f.write(pn + "\n")
|
||||
logger.info(f"Not found list saved to: {not_found_file}")
|
||||
|
||||
if valid_count == 0:
|
||||
logger.error("No valid parts to process. Exiting.")
|
||||
return 1
|
||||
|
||||
# API test mode - test on first part then exit
|
||||
if args.test:
|
||||
test_passed = test_copy_tree_api(vault, validation["valid"][0])
|
||||
return 0 if test_passed else 1
|
||||
|
||||
# Verify output directory is accessible
|
||||
try:
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.error(f"Cannot create output directory '{args.output_dir}': {e}")
|
||||
return 1
|
||||
|
||||
# Confirm before proceeding
|
||||
mode_label = "DRY RUN copy tree" if args.dry_run else "copy tree"
|
||||
print(f"\nReady to {mode_label} for {valid_count} parts to '{args.output_dir}'")
|
||||
confirm = input("Proceed? (yes/no): ").strip().lower()
|
||||
|
||||
if confirm != "yes":
|
||||
logger.info("Operation cancelled by user")
|
||||
return 0
|
||||
|
||||
# Execute batch copy tree
|
||||
results = batch_copy_tree(
|
||||
vault,
|
||||
validation["valid"],
|
||||
args.output_dir,
|
||||
dry_run=args.dry_run
|
||||
)
|
||||
|
||||
# Final report
|
||||
logger.info("=" * 60)
|
||||
logger.info("BATCH COPY TREE COMPLETE")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Total parts processed: {valid_count}")
|
||||
logger.info(f"Successful: {len(results['success'])}")
|
||||
logger.info(f"Failed: {len(results['failed'])}")
|
||||
|
||||
if results["success"]:
|
||||
total_files = sum(r["file_count"] for r in results["success"])
|
||||
logger.info(f"Total files exported: {total_files}")
|
||||
|
||||
if results["failed"]:
|
||||
logger.warning("\nFailed parts:")
|
||||
for r in results["failed"]:
|
||||
logger.warning(f" - {r['source_path']}: {r['error']}")
|
||||
|
||||
# Write failed parts to a separate file for retry
|
||||
failed_file = f"failed_copies_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
||||
with open(failed_file, 'w') as f:
|
||||
for r in results["failed"]:
|
||||
f.write(f"{r['source_path']}\t{r['error']}\n")
|
||||
logger.info(f"Failed parts list saved to: {failed_file}")
|
||||
|
||||
return 0 if not results["failed"] else 1
|
||||
|
||||
finally:
|
||||
vault.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
864
helpers/batch_workflows_paths.py
Normal file
864
helpers/batch_workflows_paths.py
Normal file
@@ -0,0 +1,864 @@
|
||||
"""
|
||||
Batch Workflow Processing for PDM Vault (Path-based)
|
||||
=====================================================
|
||||
This module provides a framework for:
|
||||
1. Logging into a PDM vault via API
|
||||
2. Processing a list of files (by full vault path) through a specified workflow transition
|
||||
|
||||
Usage:
|
||||
python batch_workflows_paths.py --vault "MyVault" --csv "files.csv" --transition "citadel_set_production released"
|
||||
"""
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
import getpass
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
import ctypes
|
||||
import pythoncom
|
||||
import win32com.client
|
||||
import comtypes
|
||||
import comtypes.automation
|
||||
from comtypes import COMMETHOD, GUID, HRESULT
|
||||
from comtypes.automation import IDispatch as _CT_IDispatch
|
||||
|
||||
# PDM API Type Library Constants
|
||||
EdmObject_File = 1
|
||||
EdmObject_Folder = 2
|
||||
EdmObject_Workflow = 6
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# comtypes interface for IEdmFile13 — enables vtable call to ChangeState3.
|
||||
#
|
||||
# Confirmed from gen_py stubs (IEdmFile13.py):
|
||||
# IID : {DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}
|
||||
# oVft : 432 → slot 54 (IUnknown[0-2] + IDispatch[3-6] + 47 placeholders[7-53])
|
||||
# ---------------------------------------------------------------------------
|
||||
_IEdmFile13_phs = [COMMETHOD([], HRESULT, f"_ph{i}") for i in range(47)]
|
||||
_VARIANT_p = ctypes.POINTER(comtypes.automation.VARIANT)
|
||||
|
||||
class _IEdmFile13_CT(_CT_IDispatch):
|
||||
_iid_ = GUID("{DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}")
|
||||
_idlflags_ = ["dual", "oleautomation"]
|
||||
_methods_ = _IEdmFile13_phs + [
|
||||
COMMETHOD(
|
||||
[], HRESULT, "ChangeState3",
|
||||
(["in"], _VARIANT_p, "poStateIdOrName"),
|
||||
(["in"], _VARIANT_p, "poTransitionIdOrName"),
|
||||
(["in"], ctypes.c_long, "lFolderID"),
|
||||
(["in"], ctypes.c_wchar_p, "bsComment"),
|
||||
(["in"], ctypes.c_long, "lParentWnd"),
|
||||
(["in"], ctypes.c_long, "lEdmStateFlags"),
|
||||
(["in"], ctypes.c_wchar_p, "bsPasswd"),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _make_i4_variant(val: int) -> comtypes.automation.VARIANT:
|
||||
"""Return a VARIANT with vt=VT_I4 containing the given integer."""
|
||||
v = comtypes.automation.VARIANT()
|
||||
v.vt = 3 # VT_I4
|
||||
ctypes.cast(ctypes.byref(v, 8), ctypes.POINTER(ctypes.c_int))[0] = int(val)
|
||||
return v
|
||||
|
||||
|
||||
class _Phase2AVError(OSError):
|
||||
"""Raised when ChangeState3 crashes with an access violation after all retries.
|
||||
|
||||
Distinct from generic OSError so batch_transition can count consecutive
|
||||
Phase-2 AV failures and trigger a vault reconnect to reset PDM's in-process
|
||||
state after repeated corruption.
|
||||
"""
|
||||
|
||||
|
||||
# Escalating backoff for Phase-2 access violations (seconds between attempts).
|
||||
# PDM's in-process DLL can take progressively longer to clean up after state
|
||||
# corruption accumulates; a longer final sleep gives it a real chance to settle.
|
||||
_PHASE2_BACKOFF = (3, 10, 30)
|
||||
|
||||
|
||||
def _changestate3(vault_obj, file_id: int, to_state_id: int,
|
||||
transition_id: int, folder_id: int, comment: str,
|
||||
password: str) -> None:
|
||||
"""
|
||||
Call IEdmFile13::ChangeState3 via comtypes vtable to transition a file
|
||||
using a *specific* transition ID, bypassing the ambiguous ChangeState.
|
||||
|
||||
Uses the primary win32com vault for GetObject so that the returned COM
|
||||
proxy is in the primary connection's context. Bridging to comtypes is
|
||||
done by reading the IEdmFile13* stored inside the pythoncom PyIBase
|
||||
wrapper at CPython object offset 16, then calling QueryInterface to get
|
||||
an AddRef'd comtypes pointer.
|
||||
|
||||
Reads from _oleobj_ directly (not from a secondary QI(IID_IUnknown) result)
|
||||
because for aggregated COM objects the controlling IUnknown can be at a
|
||||
different address with a shorter lifetime than the IEdmFile13* itself.
|
||||
|
||||
Retries up to 3 additional times on Phase-1 access-violation or
|
||||
misaligned-pointer failures, and on Phase-2 access violations inside
|
||||
ChangeState3 itself (with escalating backoff of 3s, 10s, 30s between
|
||||
attempts to give PDM's in-process DLL time to clean up corrupted state).
|
||||
If all Phase-2 attempts fail, raises _Phase2AVError so batch_transition
|
||||
can trigger a vault reconnect after repeated failures.
|
||||
"""
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
max_attempts = 1 + len(_PHASE2_BACKOFF) # initial + 3 retries
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
if attempt > 0:
|
||||
logger.debug(f" [CS3] Retry {attempt} for file ID {file_id}")
|
||||
|
||||
file_obj = None
|
||||
try:
|
||||
# Fresh COM wrapper each attempt — primary vault, no competing refs.
|
||||
file_obj = win32com.client.CastTo(
|
||||
vault_obj.GetObject(EdmObject_File, file_id), 'IEdmFile13'
|
||||
)
|
||||
# CastTo('IEdmFile13') calls QI(IID_IEdmFile13) so _oleobj_ already
|
||||
# holds the IEdmFile13* directly. Read it at offset 16 in the
|
||||
# CPython object struct (ob_refcnt[8] + ob_type[8] + m_pUnknown[8]).
|
||||
py_disp = file_obj._oleobj_
|
||||
raw_ptr = ctypes.c_uint64.from_address(id(py_disp) + 16).value
|
||||
logger.debug(f" [CS3] raw_ptr={raw_ptr:#018x} (& 7 == {raw_ptr & 7})")
|
||||
if not raw_ptr or (raw_ptr & 0x7) != 0:
|
||||
raise RuntimeError(f"Misaligned IEdmFile13* at offset 16: {raw_ptr:#x}")
|
||||
ct_unk = ctypes.cast(raw_ptr, ctypes.POINTER(comtypes.IUnknown))
|
||||
file13 = ct_unk.QueryInterface(_IEdmFile13_CT) # AddRefs independently
|
||||
logger.debug(f" [CS3] QI OK (attempt {attempt})")
|
||||
except (OSError, RuntimeError) as exc:
|
||||
if file_obj is not None:
|
||||
del file_obj
|
||||
is_retryable = (
|
||||
(isinstance(exc, OSError) and 'access violation' in str(exc).lower())
|
||||
or isinstance(exc, RuntimeError)
|
||||
)
|
||||
if is_retryable and attempt < max_attempts - 1:
|
||||
sleep_s = _PHASE2_BACKOFF[attempt]
|
||||
logger.debug(
|
||||
f" [CS3] Phase-1 failure ({exc}); sleeping {sleep_s}s then retrying"
|
||||
)
|
||||
time.sleep(sleep_s)
|
||||
continue
|
||||
raise
|
||||
|
||||
# Release win32com wrapper — file13 holds its own AddRef'd reference.
|
||||
del file_obj
|
||||
|
||||
# --- Phase 2: call ChangeState3 ---
|
||||
v_state = _make_i4_variant(to_state_id)
|
||||
v_trans = _make_i4_variant(transition_id)
|
||||
try:
|
||||
file13.ChangeState3(
|
||||
ctypes.byref(v_state),
|
||||
ctypes.byref(v_trans),
|
||||
ctypes.c_long(folder_id),
|
||||
comment,
|
||||
ctypes.c_long(0),
|
||||
ctypes.c_long(0),
|
||||
password,
|
||||
)
|
||||
return # success
|
||||
except OSError as exc:
|
||||
# Access violation inside ChangeState3 (PDM in-process DLL crashes while
|
||||
# accessing internal state left over from a recent transition). An
|
||||
# escalating sleep (3s, 10s, 30s) lets PDM's post-transition cleanup
|
||||
# finish, then we retry with a fresh COM wrapper. After all retries
|
||||
# are exhausted, raise _Phase2AVError so batch_transition can count
|
||||
# consecutive failures and reconnect the vault.
|
||||
if 'access violation' in str(exc).lower():
|
||||
if attempt < max_attempts - 1:
|
||||
sleep_s = _PHASE2_BACKOFF[attempt]
|
||||
logger.debug(
|
||||
f" [CS3] Phase-2 access violation ({exc}); "
|
||||
f"sleeping {sleep_s}s for PDM cleanup then retrying"
|
||||
)
|
||||
time.sleep(sleep_s)
|
||||
continue
|
||||
raise _Phase2AVError(
|
||||
f"ChangeState3 access violation after {max_attempts} attempts: {exc}"
|
||||
) from exc
|
||||
raise
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIGURATION - Can be overridden via command line
|
||||
# =============================================================================
|
||||
|
||||
VAULT_NAME = "IDSVault" # Default vault name
|
||||
DEFAULT_TRANSITION = "Citadel_mig_Set Proto Released" # Default transition name
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LOGGING SETUP
|
||||
# =============================================================================
|
||||
|
||||
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
|
||||
"""Configure logging for the batch process."""
|
||||
if log_file is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
log_file = f"batch_workflow_paths_{timestamp}.log"
|
||||
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# File handler
|
||||
fh = logging.FileHandler(log_file)
|
||||
fh.setLevel(logging.DEBUG)
|
||||
|
||||
# Console handler
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
|
||||
# Formatter
|
||||
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
fh.setFormatter(formatter)
|
||||
ch.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(fh)
|
||||
logger.addHandler(ch)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PDM VAULT CONNECTION
|
||||
# =============================================================================
|
||||
|
||||
class PDMVaultConnection:
|
||||
"""Handles connection and authentication to the PDM vault."""
|
||||
|
||||
def __init__(self, vault_name: str):
|
||||
self.vault_name = vault_name
|
||||
self.vault = None
|
||||
self.is_connected = False
|
||||
self.logger = logging.getLogger("batch_workflows_paths")
|
||||
self._username = None
|
||||
self._password = None
|
||||
|
||||
def connect(self, username: str, password: str) -> bool:
|
||||
"""
|
||||
Connect and log into the PDM vault with username/password.
|
||||
|
||||
Args:
|
||||
username: PDM username
|
||||
password: PDM password
|
||||
|
||||
Returns:
|
||||
True if connection successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
self.vault = win32com.client.Dispatch("ConisioLib.EdmVault")
|
||||
|
||||
# Login with credentials
|
||||
self.vault.Login(username, password, self.vault_name)
|
||||
|
||||
self.is_connected = True
|
||||
self._username = username
|
||||
self._password = password
|
||||
self.logger.info(f"Successfully connected to vault: {self.vault_name}")
|
||||
self.logger.info(f"Logged in as: {username}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to connect to vault '{self.vault_name}': {e}")
|
||||
self.is_connected = False
|
||||
return False
|
||||
|
||||
def disconnect(self) -> None:
|
||||
"""Disconnect from the PDM vault."""
|
||||
if self.vault is not None:
|
||||
try:
|
||||
self.vault = None
|
||||
self.is_connected = False
|
||||
self.logger.info("Disconnected from vault")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error during disconnect: {e}")
|
||||
|
||||
def reconnect(self) -> bool:
|
||||
"""Force a full disconnect + re-login using the stored credentials.
|
||||
|
||||
Used to reset PDM's in-process DLL state after repeated Phase-2 access
|
||||
violations indicate the vault connection's internal data structures have
|
||||
been corrupted. Returns True if the re-login succeeded.
|
||||
"""
|
||||
if self._username is None or self._password is None:
|
||||
self.logger.error("Cannot reconnect: no stored credentials")
|
||||
return False
|
||||
username, password = self._username, self._password
|
||||
self.logger.info("Reconnecting vault to reset PDM internal state...")
|
||||
self.disconnect()
|
||||
# Give the in-process DLL a moment to release any lingering state.
|
||||
time.sleep(2)
|
||||
return self.connect(username, password)
|
||||
|
||||
def get_file(self, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a file object from the vault by full path.
|
||||
|
||||
Args:
|
||||
file_path: Full path to the file in the vault
|
||||
|
||||
Returns:
|
||||
Dict with 'file_obj', 'folder_obj', 'path' or None if not found
|
||||
"""
|
||||
if not self.is_connected:
|
||||
self.logger.error("Not connected to vault")
|
||||
return None
|
||||
|
||||
try:
|
||||
folder_path = str(Path(file_path).parent)
|
||||
folder_obj = self.vault.GetFolderFromPath(folder_path)
|
||||
|
||||
if folder_obj is None:
|
||||
self.logger.warning(f"Folder not found: {folder_path}")
|
||||
return None
|
||||
|
||||
# GetFileFromPath returns (file_obj, file_id) tuple
|
||||
result = self.vault.GetFileFromPath(file_path, folder_obj)
|
||||
|
||||
# Handle tuple return value
|
||||
if isinstance(result, tuple):
|
||||
file_obj = result[0]
|
||||
else:
|
||||
file_obj = result
|
||||
|
||||
if file_obj is None:
|
||||
return None
|
||||
|
||||
# Re-fetch via GetObject, then cast to IEdmFile13 so CurrentState and
|
||||
# transition methods are accessible regardless of gen_py stub state.
|
||||
file_obj = self.vault.GetObject(EdmObject_File, file_obj.ID)
|
||||
file_obj = win32com.client.CastTo(file_obj, 'IEdmFile13')
|
||||
|
||||
return {
|
||||
"file_obj": file_obj,
|
||||
"folder_obj": folder_obj,
|
||||
"path": file_path
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting file '{file_path}': {e}")
|
||||
return None
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry."""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit."""
|
||||
self.disconnect()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FILE LIST HANDLING
|
||||
# =============================================================================
|
||||
|
||||
def load_file_list_from_csv(csv_path: str) -> List[str]:
|
||||
"""
|
||||
Load list of file paths from a CSV file (single column, no header).
|
||||
|
||||
Args:
|
||||
csv_path: Path to the CSV file with one file path per line
|
||||
|
||||
Returns:
|
||||
List of file paths
|
||||
"""
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
file_paths = []
|
||||
|
||||
try:
|
||||
with open(csv_path, 'r', encoding='utf-8-sig') as f:
|
||||
for line_num, line in enumerate(f, start=1):
|
||||
file_path = line.strip()
|
||||
if file_path: # Skip empty lines
|
||||
file_paths.append(file_path)
|
||||
|
||||
logger.info(f"Loaded {len(file_paths)} file paths from {csv_path}")
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error(f"CSV file not found: {csv_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading CSV file: {e}")
|
||||
|
||||
return file_paths
|
||||
|
||||
|
||||
def validate_files(
|
||||
vault: PDMVaultConnection,
|
||||
file_paths: List[str]
|
||||
) -> Dict[str, List]:
|
||||
"""
|
||||
Validate that files exist in the vault by their full paths.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_paths: List of full vault paths to validate
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- 'valid': list of file info dicts (ready for transition)
|
||||
- 'not_found': list of paths not found in vault
|
||||
"""
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
results = {"valid": [], "not_found": []}
|
||||
|
||||
total = len(file_paths)
|
||||
for i, file_path in enumerate(file_paths, 1):
|
||||
logger.info(f"[{i}/{total}] Validating: {file_path}")
|
||||
|
||||
file_info = vault.get_file(file_path)
|
||||
|
||||
if file_info is None:
|
||||
results["not_found"].append(file_path)
|
||||
logger.warning(f" NOT FOUND: {file_path}")
|
||||
else:
|
||||
# Read state now, then release the COM wrappers immediately.
|
||||
# Keeping file_obj alive across the full validation pass leaves
|
||||
# stale COM proxies in memory. After any file is transitioned,
|
||||
# PDM's server may invalidate proxies for other files, causing
|
||||
# access violations later. transition_file() fetches its own
|
||||
# fresh wrapper just before it needs it.
|
||||
try:
|
||||
current_state = file_info["file_obj"].CurrentState
|
||||
state_name = current_state.Name if current_state else "Unknown"
|
||||
except Exception as e:
|
||||
logger.warning(f" Could not get state: {e}")
|
||||
state_name = "Unknown"
|
||||
|
||||
results["valid"].append({"path": file_path, "current_state": state_name})
|
||||
logger.info(f" FOUND (State: {state_name})")
|
||||
# file_info (and its file_obj / folder_obj) goes out of scope here
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# WORKFLOW PROCESSING
|
||||
# =============================================================================
|
||||
|
||||
def get_available_transitions(vault: PDMVaultConnection, file_obj) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get list of available transitions for a file in its current state.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_obj: IEdmFile object
|
||||
|
||||
Returns:
|
||||
List of transition info dicts with 'name' and 'id'
|
||||
"""
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
transitions = []
|
||||
|
||||
try:
|
||||
current_state = file_obj.CurrentState
|
||||
if current_state is None:
|
||||
return transitions
|
||||
|
||||
trans_pos = current_state.GetFirstTransitionPosition()
|
||||
while not trans_pos.IsNull:
|
||||
transition = current_state.GetNextTransition(trans_pos)
|
||||
transitions.append({
|
||||
"name": transition.Name,
|
||||
"id": transition.ID,
|
||||
"target_state": transition.ToState.Name if transition.ToState else "Unknown"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting transitions: {e}")
|
||||
|
||||
return transitions
|
||||
|
||||
|
||||
def transition_file(
|
||||
vault: PDMVaultConnection,
|
||||
file_info: Dict[str, Any],
|
||||
transition_name: str,
|
||||
comment: str = ""
|
||||
) -> str:
|
||||
"""
|
||||
Transition a single file using a named transition.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_info: Dict containing 'path' (and optionally 'current_state' for logging)
|
||||
transition_name: Name of the transition to execute
|
||||
comment: Optional transition comment
|
||||
|
||||
Returns:
|
||||
One of "success", "not_available", or "failed".
|
||||
- "success": transition completed and state verified
|
||||
- "not_available": named transition is not valid from the file's current
|
||||
state (typically means the file is already in the target state from a
|
||||
prior run — not a real failure, just a no-op)
|
||||
- "failed": real failure (access violation, missing file, state
|
||||
unchanged after call, etc.) — worth retrying
|
||||
"""
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
file_path = file_info["path"]
|
||||
|
||||
try:
|
||||
# Fetch a fresh COM wrapper right now — not from validation.
|
||||
# By the time this file is processed, earlier transitions may have
|
||||
# caused PDM to invalidate COM proxies obtained during the validation
|
||||
# pass. A fresh GetObject/CastTo gives a clean proxy every time.
|
||||
fresh = vault.get_file(file_path)
|
||||
if fresh is None:
|
||||
logger.error(f"File no longer accessible in vault: {file_path}")
|
||||
return "failed"
|
||||
file_obj = fresh["file_obj"]
|
||||
folder_obj = fresh["folder_obj"]
|
||||
|
||||
# Get current state and find the transition
|
||||
current_state = file_obj.CurrentState
|
||||
if current_state is None:
|
||||
logger.error(f"File has no workflow state: {file_path}")
|
||||
return "failed"
|
||||
|
||||
# Find the transition by name
|
||||
target_transition = None
|
||||
trans_pos = current_state.GetFirstTransitionPosition()
|
||||
|
||||
while not trans_pos.IsNull:
|
||||
transition = current_state.GetNextTransition(trans_pos)
|
||||
if transition.Name.lower() == transition_name.lower():
|
||||
target_transition = transition
|
||||
break
|
||||
|
||||
if target_transition is None:
|
||||
available = get_available_transitions(vault, file_obj)
|
||||
available_names = [t["name"] for t in available]
|
||||
logger.error(
|
||||
f"Transition '{transition_name}' not available for {file_path}. "
|
||||
f"Current state: {current_state.Name}. "
|
||||
f"Available transitions: {available_names}"
|
||||
)
|
||||
return "not_available"
|
||||
|
||||
# Collect everything we need from the COM objects, then release them
|
||||
# before calling _changestate3. _changestate3 fetches its own wrapper
|
||||
# internally, so having the lookup wrapper alive simultaneously would
|
||||
# create competing COM references and corrupt PDM's internal state.
|
||||
old_state_name = current_state.Name
|
||||
expected_state_name = target_transition.ToState.Name
|
||||
to_state_id = target_transition.ToState.ID
|
||||
trans_id = target_transition.ID
|
||||
file_id = file_obj.ID
|
||||
folder_id = folder_obj.ID
|
||||
|
||||
logger.info(
|
||||
f" Transition: '{target_transition.Name}' (ID: {trans_id}) | "
|
||||
f"ToState: '{expected_state_name}' (ID: {to_state_id}) | "
|
||||
f"Folder ID: {folder_id} | File ID: {file_id}"
|
||||
)
|
||||
|
||||
# ↓ Release all COM wrappers from the lookup phase before the vtable call
|
||||
del file_obj, folder_obj, fresh, current_state, target_transition, trans_pos
|
||||
|
||||
try:
|
||||
_changestate3(
|
||||
vault.vault,
|
||||
file_id,
|
||||
to_state_id,
|
||||
trans_id,
|
||||
folder_id,
|
||||
comment,
|
||||
vault._password or "",
|
||||
)
|
||||
except _Phase2AVError as exc:
|
||||
# Persistent access violation — flag for batch_transition to count
|
||||
# against the consecutive-failure threshold for reconnect.
|
||||
vault._last_was_phase2_av = True
|
||||
logger.error(f"Failed to transition {file_path}: {exc}")
|
||||
return "failed"
|
||||
else:
|
||||
vault._last_was_phase2_av = False
|
||||
|
||||
# Verify the state actually changed — re-fetch and cast to IEdmFile13
|
||||
fresh_file = win32com.client.CastTo(
|
||||
vault.vault.GetObject(EdmObject_File, file_id), 'IEdmFile13'
|
||||
)
|
||||
new_state = fresh_file.CurrentState
|
||||
actual_state_name = new_state.Name if new_state else "Unknown"
|
||||
|
||||
if actual_state_name.lower() == expected_state_name.lower():
|
||||
logger.info(
|
||||
f"SUCCESS: {file_path} | "
|
||||
f"{old_state_name} -> {actual_state_name}"
|
||||
)
|
||||
return "success"
|
||||
else:
|
||||
logger.error(
|
||||
f"FAILED (state unchanged): {file_path} | "
|
||||
f"Expected: {expected_state_name}, Actual: {actual_state_name}"
|
||||
)
|
||||
return "failed"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to transition {file_path}: {e}")
|
||||
return "failed"
|
||||
|
||||
|
||||
def batch_transition(
|
||||
vault: PDMVaultConnection,
|
||||
file_list: List[Dict[str, Any]],
|
||||
transition_name: str,
|
||||
comment: str = ""
|
||||
) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Transition multiple files using a named transition.
|
||||
|
||||
Args:
|
||||
vault: Active vault connection
|
||||
file_list: List of file info dicts (from validate_files)
|
||||
transition_name: Name of the transition to execute
|
||||
comment: Optional transition comment
|
||||
|
||||
Returns:
|
||||
Dict with 'success', 'failed', and 'not_available' file lists.
|
||||
- 'success': transition completed
|
||||
- 'failed': real failure worth retrying (access violation, etc.)
|
||||
- 'not_available': transition not valid from current state — typically
|
||||
means the file is already in the target state from a prior run
|
||||
"""
|
||||
logger = logging.getLogger("batch_workflows_paths")
|
||||
results = {"success": [], "failed": [], "not_available": []}
|
||||
|
||||
total = len(file_list)
|
||||
logger.info(f"Starting batch transition: '{transition_name}' for {total} files")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# Consecutive Phase-2 access-violation counter. When it hits the threshold,
|
||||
# the vault connection's in-process state is likely corrupted past the
|
||||
# point where sleeping will help, so force a full disconnect + re-login.
|
||||
consecutive_phase2_av = 0
|
||||
PHASE2_AV_RECONNECT_THRESHOLD = 3
|
||||
|
||||
for i, file_info in enumerate(file_list, 1):
|
||||
file_path = file_info["path"]
|
||||
logger.info(f"[{i}/{total}] Processing: {file_path}")
|
||||
|
||||
vault._last_was_phase2_av = False
|
||||
status = transition_file(vault, file_info, transition_name, comment)
|
||||
|
||||
if status == "success":
|
||||
results["success"].append(file_path)
|
||||
consecutive_phase2_av = 0
|
||||
elif status == "not_available":
|
||||
results["not_available"].append(file_path)
|
||||
consecutive_phase2_av = 0
|
||||
else: # "failed"
|
||||
results["failed"].append(file_path)
|
||||
if getattr(vault, "_last_was_phase2_av", False):
|
||||
consecutive_phase2_av += 1
|
||||
logger.warning(
|
||||
f" Phase-2 AV streak: {consecutive_phase2_av}/"
|
||||
f"{PHASE2_AV_RECONNECT_THRESHOLD}"
|
||||
)
|
||||
if consecutive_phase2_av >= PHASE2_AV_RECONNECT_THRESHOLD:
|
||||
logger.warning(
|
||||
f"{consecutive_phase2_av} consecutive Phase-2 access "
|
||||
"violations — forcing vault reconnect"
|
||||
)
|
||||
if vault.reconnect():
|
||||
logger.info("Vault reconnected successfully")
|
||||
else:
|
||||
logger.error(
|
||||
"Vault reconnect failed — aborting remaining batch"
|
||||
)
|
||||
break
|
||||
consecutive_phase2_av = 0
|
||||
else:
|
||||
consecutive_phase2_av = 0
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# COMMAND LINE INTERFACE
|
||||
# =============================================================================
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Batch workflow transitions for SolidWorks PDM Professional (path-based)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python batch_workflows_paths.py --vault "MyVault" --csv "files.csv" --transition "citadel_set_production released"
|
||||
python batch_workflows_paths.py -v "MyVault" -c "wip_files.csv" -t "citadel_set_wip"
|
||||
|
||||
CSV format (one full vault path per line):
|
||||
C:\\IDSVault\\Parts\\widget.sldprt
|
||||
C:\\IDSVault\\Parts\\bracket.sldprt
|
||||
C:\\IDSVault\\Assemblies\\main_assy.sldasm
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--vault",
|
||||
default=VAULT_NAME,
|
||||
help=f"PDM vault name (default: {VAULT_NAME})"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--csv",
|
||||
required=True,
|
||||
help="Path to CSV file containing full vault paths"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-t", "--transition",
|
||||
default=DEFAULT_TRANSITION,
|
||||
help=f"Workflow transition name to execute (default: {DEFAULT_TRANSITION})"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--comment",
|
||||
default="Batch workflow transition",
|
||||
help="Comment for the workflow transition"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-u", "--username",
|
||||
help="PDM username (will prompt if not provided)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--log-file",
|
||||
help="Custom log file path (default: auto-generated with timestamp)"
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MAIN EXECUTION
|
||||
# =============================================================================
|
||||
|
||||
def main():
|
||||
"""Main entry point for batch workflow processing."""
|
||||
# Parse arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Setup logging
|
||||
logger = setup_logging(args.log_file)
|
||||
logger.info("=" * 60)
|
||||
logger.info("PDM BATCH WORKFLOW PROCESSOR (PATH-BASED)")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Vault: {args.vault}")
|
||||
logger.info(f"CSV File: {args.csv}")
|
||||
logger.info(f"Transition: {args.transition}")
|
||||
logger.info(f"Comment: {args.comment}")
|
||||
|
||||
# Get credentials
|
||||
username = args.username
|
||||
if not username:
|
||||
username = input("PDM Username: ")
|
||||
|
||||
password = getpass.getpass("PDM Password: ")
|
||||
|
||||
# Load files from CSV
|
||||
file_paths = load_file_list_from_csv(args.csv)
|
||||
|
||||
if not file_paths:
|
||||
logger.error("No files loaded from CSV. Exiting.")
|
||||
return 1
|
||||
|
||||
logger.info(f"Loaded {len(file_paths)} file paths from CSV")
|
||||
|
||||
# Connect to vault
|
||||
vault = PDMVaultConnection(args.vault)
|
||||
|
||||
if not vault.connect(username, password):
|
||||
logger.error("Failed to connect to vault. Exiting.")
|
||||
return 1
|
||||
|
||||
try:
|
||||
# Validate files exist in vault
|
||||
logger.info("Validating files in vault...")
|
||||
logger.info("=" * 60)
|
||||
validation = validate_files(vault, file_paths)
|
||||
|
||||
valid_count = len(validation["valid"])
|
||||
not_found_count = len(validation["not_found"])
|
||||
|
||||
# Summary
|
||||
logger.info("=" * 60)
|
||||
logger.info("VALIDATION SUMMARY")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Files ready to process: {valid_count}")
|
||||
logger.info(f"Files not found: {not_found_count}")
|
||||
|
||||
# Report not found
|
||||
if not_found_count > 0:
|
||||
logger.warning("\nFiles not found in vault:")
|
||||
for f in validation["not_found"]:
|
||||
logger.warning(f" - {f}")
|
||||
|
||||
# Save not found list
|
||||
not_found_file = f"not_found_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
||||
with open(not_found_file, 'w') as f:
|
||||
for path in validation["not_found"]:
|
||||
f.write(path + "\n")
|
||||
logger.info(f"Not found list saved to: {not_found_file}")
|
||||
|
||||
if valid_count == 0:
|
||||
logger.error("No valid files to process. Exiting.")
|
||||
return 1
|
||||
|
||||
# Execute batch transition
|
||||
results = batch_transition(
|
||||
vault,
|
||||
validation["valid"],
|
||||
args.transition,
|
||||
comment=args.comment
|
||||
)
|
||||
|
||||
# Final report
|
||||
logger.info("=" * 60)
|
||||
logger.info("BATCH PROCESS COMPLETE")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Total files processed: {valid_count}")
|
||||
logger.info(f"Successful transitions: {len(results['success'])}")
|
||||
logger.info(f"Failed transitions: {len(results['failed'])}")
|
||||
logger.info(
|
||||
f"Transition not available (likely already in target state): "
|
||||
f"{len(results['not_available'])}"
|
||||
)
|
||||
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
if results["failed"]:
|
||||
logger.warning("\nFailed files (real failures — retry these):")
|
||||
for f in results["failed"]:
|
||||
logger.warning(f" - {f}")
|
||||
|
||||
failed_file = f"failed_transitions_{timestamp}.txt"
|
||||
with open(failed_file, 'w') as f:
|
||||
for path in results["failed"]:
|
||||
f.write(path + "\n")
|
||||
logger.info(f"Failed file list saved to: {failed_file}")
|
||||
|
||||
if results["not_available"]:
|
||||
not_avail_file = f"not_available_{timestamp}.txt"
|
||||
with open(not_avail_file, 'w') as f:
|
||||
for path in results["not_available"]:
|
||||
f.write(path + "\n")
|
||||
logger.info(
|
||||
f"Not-available file list saved to: {not_avail_file} "
|
||||
f"(likely already in target state — not retried)"
|
||||
)
|
||||
|
||||
return 0 if not results["failed"] else 1
|
||||
|
||||
finally:
|
||||
vault.disconnect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
788
helpers/db_helper.py
Normal file
788
helpers/db_helper.py
Normal file
@@ -0,0 +1,788 @@
|
||||
"""
|
||||
Database Helper for PDM Migration
|
||||
==================================
|
||||
Interactive tool for running SELECT queries, transforming results, and
|
||||
inserting new rows — with mandatory terminal confirmation before any
|
||||
write operation touches the database.
|
||||
|
||||
Usage:
|
||||
python db_helper.py --db target_db --task copy_with_new_id
|
||||
python db_helper.py --db source_db --query "SELECT TOP 10 * FROM Documents"
|
||||
python db_helper.py --db target_db --task copy_with_new_id --dry-run
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import glob
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any, Callable, Tuple, Set
|
||||
|
||||
# db_utils lives one directory up
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
from db_utils import DatabaseConnection
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
CONFIG_PATH = Path(__file__).resolve().parent.parent / "config.json"
|
||||
QUERIES_DIR = Path(__file__).resolve().parent / "queries"
|
||||
|
||||
|
||||
def load_config() -> dict:
|
||||
"""Load config.json from the project root."""
|
||||
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_query(name: str) -> str:
|
||||
"""
|
||||
Load a SQL query from the queries/ folder by name.
|
||||
|
||||
Args:
|
||||
name: Query name (filename without .sql extension).
|
||||
e.g. "get_var47" loads queries/get_var47.sql
|
||||
|
||||
Returns:
|
||||
The SQL text from the file.
|
||||
"""
|
||||
sql_path = QUERIES_DIR / f"{name}.sql"
|
||||
if not sql_path.exists():
|
||||
available = sorted(p.stem for p in QUERIES_DIR.glob("*.sql"))
|
||||
raise FileNotFoundError(
|
||||
f"Query '{name}' not found at {sql_path}\n"
|
||||
f"Available queries: {available}"
|
||||
)
|
||||
return sql_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
|
||||
def list_queries() -> List[str]:
|
||||
"""Return names of all available .sql files in the queries/ folder."""
|
||||
return sorted(p.stem for p in QUERIES_DIR.glob("*.sql"))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LOGGING
|
||||
# =============================================================================
|
||||
|
||||
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
|
||||
"""Configure logging with file + console handlers."""
|
||||
if log_file is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
log_file = f"db_helper_{timestamp}.log"
|
||||
|
||||
logger = logging.getLogger("db_helper")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# File handler — everything
|
||||
fh = logging.FileHandler(log_file)
|
||||
fh.setLevel(logging.DEBUG)
|
||||
|
||||
# Console handler — INFO and above
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
|
||||
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||
fh.setFormatter(formatter)
|
||||
ch.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(fh)
|
||||
logger.addHandler(ch)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DATABASE CONNECTION
|
||||
# =============================================================================
|
||||
|
||||
def connect_db(config_key: str) -> DatabaseConnection:
|
||||
"""
|
||||
Connect to a database using a named block from config.json.
|
||||
|
||||
Args:
|
||||
config_key: "source_db" or "target_db"
|
||||
|
||||
Returns:
|
||||
Connected DatabaseConnection instance.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
config = load_config()
|
||||
|
||||
if config_key not in config:
|
||||
raise ValueError(
|
||||
f"Config key '{config_key}' not found in {CONFIG_PATH}. "
|
||||
f"Available keys: {[k for k in config if k.endswith('_db')]}"
|
||||
)
|
||||
|
||||
db_config = config[config_key]
|
||||
logger.info(
|
||||
f"Connecting to {db_config['database']} on {db_config['server']} "
|
||||
f"({config_key})"
|
||||
)
|
||||
return DatabaseConnection(db_config)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SELECT
|
||||
# =============================================================================
|
||||
|
||||
def run_select(
|
||||
db: DatabaseConnection,
|
||||
query: str,
|
||||
params: Optional[tuple] = None,
|
||||
preview_rows: int = 10,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Execute a SELECT query, log it, print a preview, and return results.
|
||||
|
||||
Args:
|
||||
db: Active DatabaseConnection
|
||||
query: SQL SELECT statement
|
||||
params: Optional query parameters
|
||||
preview_rows: How many rows to preview on the console (0 = skip)
|
||||
|
||||
Returns:
|
||||
List of row dicts.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
logger.info(f"Running SELECT:\n{query}")
|
||||
if params:
|
||||
logger.debug(f" Params: {params}")
|
||||
|
||||
rows = db.execute_query(query, params)
|
||||
logger.info(f" Returned {len(rows)} row(s)")
|
||||
|
||||
if rows and preview_rows > 0:
|
||||
_print_table(rows[:preview_rows])
|
||||
if len(rows) > preview_rows:
|
||||
print(f" ... and {len(rows) - preview_rows} more rows")
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def _print_table(rows: List[Dict[str, Any]]) -> None:
|
||||
"""Pretty-print a list of row dicts as an aligned console table."""
|
||||
if not rows:
|
||||
return
|
||||
columns = list(rows[0].keys())
|
||||
# Compute column widths (header vs data)
|
||||
widths = {col: len(col) for col in columns}
|
||||
str_rows = []
|
||||
for row in rows:
|
||||
str_row = {col: str(row[col]) for col in columns}
|
||||
for col in columns:
|
||||
widths[col] = max(widths[col], len(str_row[col]))
|
||||
str_rows.append(str_row)
|
||||
|
||||
header = " | ".join(col.ljust(widths[col]) for col in columns)
|
||||
sep = "-+-".join("-" * widths[col] for col in columns)
|
||||
print(f" {header}")
|
||||
print(f" {sep}")
|
||||
for sr in str_rows:
|
||||
line = " | ".join(sr[col].ljust(widths[col]) for col in columns)
|
||||
print(f" {line}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIRMATION GATE
|
||||
# =============================================================================
|
||||
|
||||
def preview_and_confirm(
|
||||
action: str,
|
||||
sql: str,
|
||||
rows: List[Dict[str, Any]],
|
||||
preview_rows: int = 5,
|
||||
dry_run: bool = False,
|
||||
total_row_count: Optional[int] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Show the user what's about to happen and ask for confirmation.
|
||||
|
||||
Args:
|
||||
action: Short description ("INSERT into Documents")
|
||||
sql: The SQL statement that will be executed
|
||||
rows: The data rows that will be written (or a sample of them)
|
||||
preview_rows: How many sample rows to display
|
||||
dry_run: If True, show the preview but return False without prompting
|
||||
total_row_count: If `rows` is only a sample, pass the full count
|
||||
here so the prompt shows the real number of rows
|
||||
that will be written.
|
||||
|
||||
Returns:
|
||||
True if user confirms, False otherwise.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
full_count = total_row_count if total_row_count is not None else len(rows)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f" ACTION: {action}")
|
||||
print(f" ROWS: {full_count}")
|
||||
print(f" SQL: {sql}")
|
||||
print("=" * 60)
|
||||
|
||||
if rows and preview_rows > 0:
|
||||
shown = min(preview_rows, len(rows))
|
||||
print(f"\n Sample data ({shown} of {full_count}):")
|
||||
_print_table(rows[:preview_rows])
|
||||
|
||||
if dry_run:
|
||||
print("\n [DRY RUN] — no changes will be made.")
|
||||
logger.info(f"[DRY RUN] Would {action} ({full_count} rows)")
|
||||
return False
|
||||
|
||||
print()
|
||||
response = input(" Execute this? [y/N]: ").strip().lower()
|
||||
if response in ("y", "yes"):
|
||||
logger.info(f"User confirmed: {action} ({full_count} rows)")
|
||||
return True
|
||||
else:
|
||||
logger.info(f"User declined: {action}")
|
||||
print(" Aborted.")
|
||||
return False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# INSERT
|
||||
# =============================================================================
|
||||
|
||||
def _parse_insert_columns(sql: str) -> Optional[List[str]]:
|
||||
"""
|
||||
Extract the column name list from a standard INSERT statement.
|
||||
|
||||
Matches 'INSERT INTO <table> (col1, col2, ...) VALUES ...'. Returns
|
||||
None if the INSERT has no explicit column list (e.g. 'INSERT INTO t
|
||||
VALUES (...)') so the caller can fall back to positional labels.
|
||||
"""
|
||||
import re
|
||||
# Match the first parenthesised group after INSERT INTO <table>
|
||||
# Table name may be bracketed/dotted: [db].[dbo].[Table]
|
||||
m = re.search(
|
||||
r"INSERT\s+INTO\s+[\[\]\w\.]+\s*\(([^)]+)\)\s*VALUES",
|
||||
sql,
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
if not m:
|
||||
return None
|
||||
cols = [c.strip().strip("[]") for c in m.group(1).split(",")]
|
||||
return [c for c in cols if c]
|
||||
|
||||
|
||||
def _build_insert_preview_rows(
|
||||
rows: List[Dict[str, Any]],
|
||||
params_builder: Callable[[Dict[str, Any]], tuple],
|
||||
column_names: Optional[List[str]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Apply params_builder to each row and return dicts keyed by the INSERT's
|
||||
column names — so the preview shows exactly what will be written.
|
||||
Falls back to positional labels ('col_0', 'col_1', ...) if the column
|
||||
list couldn't be parsed.
|
||||
"""
|
||||
preview = []
|
||||
for row in rows:
|
||||
params = params_builder(row)
|
||||
if column_names and len(column_names) == len(params):
|
||||
preview.append(dict(zip(column_names, params)))
|
||||
else:
|
||||
preview.append({f"col_{i}": v for i, v in enumerate(params)})
|
||||
return preview
|
||||
|
||||
|
||||
def run_insert(
|
||||
db: DatabaseConnection,
|
||||
insert_sql: str,
|
||||
rows: List[Dict[str, Any]],
|
||||
params_builder: Callable[[Dict[str, Any]], tuple],
|
||||
action: str = "INSERT rows",
|
||||
dry_run: bool = False,
|
||||
preview_columns: Optional[List[str]] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Insert rows with confirmation, logging, and transaction safety.
|
||||
|
||||
Args:
|
||||
db: Active DatabaseConnection
|
||||
insert_sql: Parameterised INSERT statement (use ? placeholders)
|
||||
rows: Row dicts (typically from run_select, possibly transformed)
|
||||
params_builder: Callable that converts a row dict into the param
|
||||
tuple matching the INSERT's ? placeholders
|
||||
action: Description shown in the confirmation prompt
|
||||
dry_run: If True, preview only — don't execute
|
||||
preview_columns: Optional list of column names for the preview
|
||||
display. If None, parsed from the INSERT SQL.
|
||||
|
||||
Returns:
|
||||
Dict with counts: inserted, skipped, errors
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
stats = {"inserted": 0, "skipped": 0, "errors": 0}
|
||||
|
||||
if not rows:
|
||||
logger.info("No rows to insert.")
|
||||
return stats
|
||||
|
||||
# Build the preview from the ACTUAL params that will be sent to the DB
|
||||
# (not the raw SELECT rows) so users see what will really be inserted.
|
||||
column_names = preview_columns or _parse_insert_columns(insert_sql)
|
||||
preview_rows = _build_insert_preview_rows(
|
||||
rows[:5], params_builder, column_names
|
||||
)
|
||||
# Attach the full row count so preview_and_confirm can report it
|
||||
# accurately even though we only transformed the sample.
|
||||
if not preview_and_confirm(
|
||||
action, insert_sql, preview_rows,
|
||||
total_row_count=len(rows),
|
||||
dry_run=dry_run,
|
||||
):
|
||||
return stats
|
||||
|
||||
# Execute row-by-row inside a single transaction so we can log per-row
|
||||
# and rollback cleanly on failure.
|
||||
total = len(rows)
|
||||
# Update progress ~50 times across the batch (minimum every row for
|
||||
# tiny batches). Keeps the terminal feeling alive without spamming.
|
||||
progress_step = max(1, total // 50)
|
||||
print() # blank line before the progress indicator
|
||||
|
||||
for i, row in enumerate(rows, 1):
|
||||
params = params_builder(row)
|
||||
try:
|
||||
db.execute_non_query_no_commit(insert_sql, params)
|
||||
stats["inserted"] += 1
|
||||
logger.debug(f" [{i}/{total}] Inserted: {params}")
|
||||
except Exception as exc:
|
||||
err_msg = str(exc)
|
||||
if "duplicate" in err_msg.lower() or "violation of" in err_msg.lower():
|
||||
stats["skipped"] += 1
|
||||
logger.warning(f" [{i}/{total}] Skipped (duplicate): {params}")
|
||||
else:
|
||||
stats["errors"] += 1
|
||||
logger.error(f" [{i}/{total}] Error: {exc} | params={params}")
|
||||
|
||||
# Live progress (overwrites the same line)
|
||||
if i % progress_step == 0 or i == total:
|
||||
pct = (i / total) * 100
|
||||
print(
|
||||
f"\r Progress: {i}/{total} ({pct:5.1f}%) "
|
||||
f"inserted={stats['inserted']} skipped={stats['skipped']} "
|
||||
f"errors={stats['errors']}",
|
||||
end="",
|
||||
flush=True,
|
||||
)
|
||||
print() # end the progress line
|
||||
|
||||
# Commit or rollback
|
||||
if stats["errors"] == 0:
|
||||
db.commit()
|
||||
logger.info(
|
||||
f"Committed. Inserted: {stats['inserted']}, "
|
||||
f"Skipped: {stats['skipped']}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"\n {stats['errors']} error(s) occurred. "
|
||||
f"Commit anyway? [y/N]: ", end=""
|
||||
)
|
||||
resp = input().strip().lower()
|
||||
if resp in ("y", "yes"):
|
||||
db.commit()
|
||||
logger.info(f"Committed with errors. {stats}")
|
||||
else:
|
||||
db.rollback()
|
||||
stats["inserted"] = 0
|
||||
logger.warning(f"Rolled back all inserts. {stats}")
|
||||
print(" Rolled back.")
|
||||
|
||||
# Summary
|
||||
print(f"\n Results: {stats}")
|
||||
return stats
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PREDEFINED TASKS
|
||||
# =============================================================================
|
||||
# Each task is a function that receives (db, args) and orchestrates a
|
||||
# SELECT → transform → INSERT workflow. Register new tasks in TASK_REGISTRY
|
||||
# at the bottom of this section.
|
||||
|
||||
def task_copy_with_new_id(db: DatabaseConnection, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
Example task: query rows, swap the ID, and insert as new rows.
|
||||
|
||||
Customise the SELECT, INSERT, and transform logic below to match your
|
||||
actual table and columns.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
# ----- 1. SELECT the source rows -----
|
||||
select_sql = """
|
||||
SELECT TOP 10
|
||||
ID, Name, Description
|
||||
FROM YourTable
|
||||
WHERE SomeCondition = 1
|
||||
"""
|
||||
rows = run_select(db, select_sql)
|
||||
|
||||
if not rows:
|
||||
logger.info("No source rows found — nothing to do.")
|
||||
return
|
||||
|
||||
# ----- 2. Transform: build new rows with modified values -----
|
||||
# Adjust this logic to match your actual needs (new IDs, tweaked
|
||||
# strings, mapped values, etc.)
|
||||
new_rows = []
|
||||
for row in rows:
|
||||
new_row = dict(row) # shallow copy
|
||||
new_row["ID"] = row["ID"] + 1000 # example: offset the ID
|
||||
# new_row["Name"] = row["Name"] # keep as-is, or modify
|
||||
new_rows.append(new_row)
|
||||
|
||||
# ----- 3. INSERT the transformed rows -----
|
||||
insert_sql = """
|
||||
INSERT INTO YourTable (ID, Name, Description)
|
||||
VALUES (?, ?, ?)
|
||||
"""
|
||||
|
||||
run_insert(
|
||||
db,
|
||||
insert_sql,
|
||||
new_rows,
|
||||
params_builder=lambda r: (r["ID"], r["Name"], r["Description"]),
|
||||
action="INSERT transformed rows into YourTable",
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
|
||||
|
||||
def task_check_vv50(db: DatabaseConnection, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
For every document that has VariableID=57 (in DWS paths), check whether
|
||||
it also has a VariableValue row for VariableID=50.
|
||||
|
||||
Steps:
|
||||
1. Run DWS_GET_VV-57.sql → list of documents
|
||||
2. For each DocumentID, run Get_All_VV_Per_DocID.sql
|
||||
3. Log whether VariableID=50 is present or missing
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
# ----- Step 1: Get all documents with VV-57 -----
|
||||
step1_sql = load_query("DWS_GET_VV-57")
|
||||
docs = run_select(db, step1_sql, preview_rows=5)
|
||||
|
||||
if not docs:
|
||||
logger.info("No documents returned — nothing to check.")
|
||||
return
|
||||
|
||||
# ----- Step 2 & 3: Check each document for VV-50 -----
|
||||
step2_sql = load_query("Get_All_VV_Per_DocID")
|
||||
|
||||
has_vv50 = []
|
||||
missing_vv50 = []
|
||||
|
||||
total = len(docs)
|
||||
for i, doc in enumerate(docs, 1):
|
||||
doc_id = doc["DocumentID"]
|
||||
file_name = doc.get("FileName", "")
|
||||
full_path = doc.get("FullVaultPath", file_name)
|
||||
|
||||
var_rows = db.execute_query(step2_sql, (doc_id,))
|
||||
var_ids = {row["VariableID"] for row in var_rows}
|
||||
|
||||
if 50 in var_ids:
|
||||
has_vv50.append(doc)
|
||||
logger.debug(
|
||||
f" [{i}/{total}] VV-50 EXISTS | DocID={doc_id} | {full_path}"
|
||||
)
|
||||
else:
|
||||
missing_vv50.append(doc)
|
||||
logger.info(
|
||||
f" [{i}/{total}] VV-50 MISSING | DocID={doc_id} | {full_path}"
|
||||
)
|
||||
|
||||
# ----- Summary -----
|
||||
logger.info("=" * 60)
|
||||
logger.info("VV-50 CHECK COMPLETE")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Total documents checked: {total}")
|
||||
logger.info(f" Has VV-50: {len(has_vv50)}")
|
||||
logger.info(f" Missing VV-50: {len(missing_vv50)}")
|
||||
|
||||
if has_vv50:
|
||||
# Write missing list to file for follow-up
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
out_file = f"has_vv50_{timestamp}.txt"
|
||||
with open(out_file, "w", encoding="utf-8") as f:
|
||||
f.write("DocumentID,FileName,FullVaultPath\n")
|
||||
for doc in has_vv50:
|
||||
f.write(
|
||||
f"{doc['DocumentID']},"
|
||||
f"{doc.get('FileName', '')},"
|
||||
f"{doc.get('FullVaultPath', '')}\n"
|
||||
)
|
||||
logger.info(f"Has VV-50 list saved to: {out_file}")
|
||||
|
||||
def copy_57_to_50(db: DatabaseConnection, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
DWS had a variable called Number, but we want that info to show up on the data cards
|
||||
in the field for "Drawing Number"
|
||||
|
||||
That means that anything in the DWS folder that has a VariableID = 57, we are going to take all of that
|
||||
information and insert a new row in the VariableValues table, where everything is the same except the VariableID = 50
|
||||
|
||||
The one caveat is that we don't want to insert a row for VariableID = 50 if one already exists. For that we are going to reference
|
||||
the has_vv50_{date}.txt file and exlude those document ID's
|
||||
|
||||
Steps:
|
||||
1. Run DWS_VV-57_FullList.sql → list of documents
|
||||
2. For each row returned in Step 1. check and see if DocumentID exists in the has_vv50_{date}.txt file
|
||||
3. If it doesnt already exist insert a new row into VariableValue with all of the same info only change the VariableID to 50
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
# ----- Step 1: Fetch all VV-57 rows in DWS paths -----
|
||||
rows_57 = run_select(
|
||||
db, load_query("DWS_VV-57_FullList"), preview_rows=5
|
||||
)
|
||||
if not rows_57:
|
||||
logger.info("No VV-57 rows found — nothing to copy.")
|
||||
return
|
||||
|
||||
# ----- Step 2: Load DocumentIDs that already have VV-50 -----
|
||||
exclude_file = args.exclude_file or _find_latest_has_vv50_file()
|
||||
excluded_doc_ids = _load_excluded_doc_ids(exclude_file)
|
||||
|
||||
# ----- Step 3: Filter out rows whose DocumentID already has VV-50 -----
|
||||
rows_to_insert = [
|
||||
r for r in rows_57 if r["DocumentID"] not in excluded_doc_ids
|
||||
]
|
||||
skipped = len(rows_57) - len(rows_to_insert)
|
||||
logger.info(
|
||||
f"After filter: {len(rows_to_insert)} rows to insert, "
|
||||
f"{skipped} skipped (DocumentID already has VV-50)"
|
||||
)
|
||||
|
||||
if not rows_to_insert:
|
||||
logger.info("Nothing to insert after filtering.")
|
||||
return
|
||||
|
||||
# ----- Step 4: Insert (with preview + confirmation) -----
|
||||
def build_params(row: Dict[str, Any]) -> tuple:
|
||||
# Parameter order MUST match INSERT_VV50_Copy.sql:
|
||||
# VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
|
||||
# ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText
|
||||
return (
|
||||
50, # override VariableID
|
||||
row["DocumentID"],
|
||||
row["ProjectID"],
|
||||
row["RevisionNo"],
|
||||
row["ConfigurationID"],
|
||||
row["ValueText"],
|
||||
row["ValueInt"],
|
||||
row["ValueFloat"],
|
||||
row["ValueDate"],
|
||||
row["ValueCache"],
|
||||
row["IsLongText"],
|
||||
)
|
||||
|
||||
run_insert(
|
||||
db,
|
||||
load_query("INSERT_VV50_Copy"),
|
||||
rows_to_insert,
|
||||
params_builder=build_params,
|
||||
action="INSERT VariableID=50 copies of DWS VV-57 rows",
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
|
||||
|
||||
def _find_latest_has_vv50_file() -> Optional[str]:
|
||||
"""Find the most recent has_vv50_*.txt file in the current directory."""
|
||||
logger = logging.getLogger("db_helper")
|
||||
matches = sorted(glob.glob("has_vv50_*.txt"))
|
||||
if not matches:
|
||||
return None
|
||||
latest = matches[-1]
|
||||
logger.info(f"Auto-detected exclusion file: {latest}")
|
||||
return latest
|
||||
|
||||
|
||||
def _load_excluded_doc_ids(path: Optional[str]) -> Set[int]:
|
||||
"""
|
||||
Load DocumentIDs from a has_vv50_*.txt file (CSV format with header).
|
||||
|
||||
Returns an empty set if no file is provided and prompts the user to
|
||||
confirm they want to proceed without any exclusions.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
if not path:
|
||||
logger.warning(
|
||||
"No exclusion file found — ALL VV-57 DocumentIDs will get a "
|
||||
"VV-50 copy, including ones that may already have VV-50."
|
||||
)
|
||||
resp = input(
|
||||
" Proceed without an exclusion list? [y/N]: "
|
||||
).strip().lower()
|
||||
if resp not in ("y", "yes"):
|
||||
logger.info("User aborted — no exclusion file.")
|
||||
raise SystemExit(1)
|
||||
return set()
|
||||
|
||||
excluded: Set[int] = set()
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
header = f.readline() # discard "DocumentID,FileName,FullVaultPath"
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
first = line.split(",", 1)[0].strip()
|
||||
if first.isdigit():
|
||||
excluded.add(int(first))
|
||||
logger.info(f"Loaded {len(excluded)} DocumentIDs to exclude from {path}")
|
||||
return excluded
|
||||
|
||||
|
||||
# Register tasks here — maps --task name to function
|
||||
TASK_REGISTRY: Dict[str, Callable] = {
|
||||
"copy_with_new_id": task_copy_with_new_id,
|
||||
"check_vv50": task_check_vv50,
|
||||
"copy_57_to_50": copy_57_to_50
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI
|
||||
# =============================================================================
|
||||
|
||||
def parse_arguments() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Database helper for PDM migration — interactive SQL tasks",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python db_helper.py --db target_db --task copy_with_new_id
|
||||
python db_helper.py --db target_db --task copy_with_new_id --dry-run
|
||||
python db_helper.py --db source_db --query get_var47
|
||||
python db_helper.py --db source_db --query "SELECT TOP 10 * FROM Documents"
|
||||
python db_helper.py --list-queries
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
help='Config key for the database: "source_db" or "target_db"',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task",
|
||||
choices=list(TASK_REGISTRY.keys()),
|
||||
help="Name of a predefined task to run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--query",
|
||||
help=(
|
||||
"Run a SELECT query. Pass a query name to load from "
|
||||
"helpers/queries/<name>.sql, or pass raw SQL in quotes."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Preview what would happen without executing writes",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list-queries",
|
||||
action="store_true",
|
||||
help="List all available saved queries and exit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exclude-file",
|
||||
help=(
|
||||
"Path to a has_vv50_*.txt file whose DocumentIDs should be "
|
||||
"excluded from copy_57_to_50. If omitted, the most recent "
|
||||
"has_vv50_*.txt in the current directory is used."
|
||||
),
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def _resolve_query(query_arg: str) -> str:
|
||||
"""
|
||||
Resolve a --query argument to SQL text.
|
||||
|
||||
If it looks like a SQL statement (contains a space), use it as-is.
|
||||
Otherwise treat it as a saved query name and load from queries/<name>.sql.
|
||||
"""
|
||||
if " " in query_arg:
|
||||
return query_arg
|
||||
return load_query(query_arg)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_arguments()
|
||||
|
||||
# --list-queries doesn't need a DB connection or logging
|
||||
if args.list_queries:
|
||||
queries = list_queries()
|
||||
if queries:
|
||||
print(f"Available queries in {QUERIES_DIR}:")
|
||||
for name in queries:
|
||||
# Show the first line of each .sql as a description
|
||||
sql_path = QUERIES_DIR / f"{name}.sql"
|
||||
first_line = sql_path.read_text(encoding="utf-8").split("\n")[0]
|
||||
print(f" {name:30s} {first_line}")
|
||||
else:
|
||||
print(f"No .sql files found in {QUERIES_DIR}")
|
||||
return 0
|
||||
|
||||
if not args.db:
|
||||
print("Error: --db is required (unless using --list-queries)")
|
||||
return 1
|
||||
|
||||
logger = setup_logging()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("DB HELPER")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Database: {args.db}")
|
||||
logger.info(f"Task: {args.task or '(ad-hoc query)'}")
|
||||
logger.info(f"Dry run: {args.dry_run}")
|
||||
|
||||
db = connect_db(args.db)
|
||||
|
||||
try:
|
||||
if args.query:
|
||||
sql = _resolve_query(args.query)
|
||||
logger.info(f"Resolved query:\n{sql}")
|
||||
run_select(db, sql)
|
||||
|
||||
elif args.task:
|
||||
task_fn = TASK_REGISTRY[args.task]
|
||||
task_fn(db, args)
|
||||
|
||||
else:
|
||||
logger.error("Provide either --task, --query, or --list-queries")
|
||||
return 1
|
||||
|
||||
except FileNotFoundError as exc:
|
||||
logger.error(str(exc))
|
||||
return 1
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("Interrupted by user")
|
||||
db.rollback()
|
||||
return 130
|
||||
except Exception:
|
||||
logger.exception("Unhandled exception")
|
||||
db.rollback()
|
||||
return 1
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
14
helpers/queries/DWS_GET_VV-57.sql
Normal file
14
helpers/queries/DWS_GET_VV-57.sql
Normal file
@@ -0,0 +1,14 @@
|
||||
SELECT DISTINCT
|
||||
d.DocumentID,
|
||||
d.Filename AS [FileName],
|
||||
--p.Path AS [FolderPath],
|
||||
p.Path + d.Filename AS [FullVaultPath]
|
||||
--v.ValueText,
|
||||
--d.DocumentID
|
||||
--d.CurrentVersion
|
||||
FROM [Drilling_Test].[dbo].[Documents] d
|
||||
INNER JOIN [Drilling_Test].[dbo].[DocumentsInProjects] dp on d.[DocumentID] = dp.DocumentID
|
||||
INNER JOIN [Drilling_Test].[dbo].[Projects] p on dp.ProjectID = p.ProjectID
|
||||
INNER JOIN [Drilling_Test].[dbo].[VariableValue] v on d.DocumentID = v.DocumentID
|
||||
WHERE v.VariableID = 57 and p.Path like '%DWS%'
|
||||
ORDER BY [FullVaultPath] asc;
|
||||
24
helpers/queries/DWS_VV-57_FullList.sql
Normal file
24
helpers/queries/DWS_VV-57_FullList.sql
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
Here we are basically just doing SELECT * FROM VariableValue WHERE VariableID = 57
|
||||
The reason we are doing all the join statements is we are trying to target only the files in the
|
||||
DWS folder, so we need the other tables in order to construct a path
|
||||
*/
|
||||
|
||||
SELECT
|
||||
v.VariableID,
|
||||
v.DocumentID,
|
||||
v.ProjectID,
|
||||
v.RevisionNo,
|
||||
v.ConfigurationID,
|
||||
v.ValueText,
|
||||
v.ValueInt,
|
||||
v.ValueFloat,
|
||||
v.ValueDate,
|
||||
v.ValueCache,
|
||||
v.IsLongText
|
||||
FROM [Drilling_Test].[dbo].[Documents] d
|
||||
INNER JOIN [Drilling_Test].[dbo].[DocumentsInProjects] dp on d.[DocumentID] = dp.DocumentID
|
||||
INNER JOIN [Drilling_Test].[dbo].[Projects] p on dp.ProjectID = p.ProjectID
|
||||
INNER JOIN [Drilling_Test].[dbo].[VariableValue] v on d.DocumentID = v.DocumentID
|
||||
WHERE v.VariableID = 57 and p.Path like '%DWS%'
|
||||
ORDER BY v.DocumentID asc;
|
||||
14
helpers/queries/Find_Duplicate_Rows.sql
Normal file
14
helpers/queries/Find_Duplicate_Rows.sql
Normal file
@@ -0,0 +1,14 @@
|
||||
SELECT TOP (1000) [ConfigurationID]
|
||||
,[ConfigurationName]
|
||||
FROM [_Citadel_CS].[dbo].[DocumentConfiguration]
|
||||
|
||||
SELECT *
|
||||
FROM [_Citadel_CS].[dbo].[DocumentConfiguration]
|
||||
WHERE ConfigurationName IN (
|
||||
SELECT ConfigurationName
|
||||
FROM [_Citadel_CS].[dbo].[DocumentConfiguration]
|
||||
GROUP BY ConfigurationName
|
||||
HAVING COUNT(*) > 1
|
||||
);
|
||||
|
||||
|
||||
6
helpers/queries/Get_All_VV_Per_DocID.sql
Normal file
6
helpers/queries/Get_All_VV_Per_DocID.sql
Normal file
@@ -0,0 +1,6 @@
|
||||
-- Get all distinct VariableIDs for a given DocumentID
|
||||
-- Pass DocumentID as parameter (?)
|
||||
SELECT DISTINCT VariableID
|
||||
FROM [Drilling_Test].[dbo].[VariableValue]
|
||||
WHERE DocumentID = ?
|
||||
|
||||
9
helpers/queries/INSERT_VV50_Copy.sql
Normal file
9
helpers/queries/INSERT_VV50_Copy.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
-- Insert a VariableValue row that mirrors an existing VV-57 row but with
|
||||
-- VariableID = 50 (used by copy_57_to_50 task).
|
||||
-- Parameter order MUST match build_params() in copy_57_to_50:
|
||||
-- VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
|
||||
-- ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText
|
||||
INSERT INTO [Drilling_Test].[dbo].[VariableValue]
|
||||
(VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
|
||||
ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
|
||||
42
helpers/samples/README.md
Normal file
42
helpers/samples/README.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Input File Format Reference
|
||||
|
||||
The actual input CSVs under `helpers/` are gitignored (they are environment-specific
|
||||
and often large). These samples document the expected format for each script so you
|
||||
don't have to guess next time.
|
||||
|
||||
All sample files use a minimal set of example rows. **No headers**, **UTF-8** (with or
|
||||
without BOM). Empty lines are skipped.
|
||||
|
||||
## Full-path format
|
||||
|
||||
Used by `batch_workflows_paths.py` and any script whose CLI takes `--csv` / `-c` and
|
||||
operates on files already inside the vault.
|
||||
|
||||
- One full Windows vault path per line.
|
||||
- Path must match what `IEdmVault5.GetFileFromPath` expects — i.e. the real location
|
||||
inside the vault's local view (e.g. `C:\PDM\<VaultName>\...`).
|
||||
- Extension-sensitive: `.SLDPRT`, `.SLDDRW`, `.SLDASM`, `.pdf`, etc. all count as
|
||||
distinct files.
|
||||
|
||||
See `sample_full_paths.csv`.
|
||||
|
||||
## Part-number format
|
||||
|
||||
Used by `batch_copy_tree.py` (and other scripts that resolve files by part/document
|
||||
number rather than path).
|
||||
|
||||
- One part number per line.
|
||||
- No extension, no path.
|
||||
- Case and dashes/underscores should match the vault's stored value.
|
||||
|
||||
See `sample_part_numbers.csv`.
|
||||
|
||||
## Filename-only format
|
||||
|
||||
Used by older helpers (e.g. the `Code15*.csv` family) that match by filename across
|
||||
the vault rather than by full path.
|
||||
|
||||
- One filename per line, with extension.
|
||||
- No directory component.
|
||||
|
||||
See `sample_filenames.csv`.
|
||||
4
helpers/samples/sample_filenames.csv
Normal file
4
helpers/samples/sample_filenames.csv
Normal file
@@ -0,0 +1,4 @@
|
||||
EXAMPLE-PART.SLDPRT
|
||||
EXAMPLE-DRAWING.SLDDRW
|
||||
EXAMPLE-LIBRARY-FEATURE.SLDLFP
|
||||
EXAMPLE-DOCUMENT.PDF
|
||||
|
5
helpers/samples/sample_full_paths.csv
Normal file
5
helpers/samples/sample_full_paths.csv
Normal file
@@ -0,0 +1,5 @@
|
||||
C:\PDM\Drilling_Test\DWS\Data Subs\EXAMPLE-PART.SLDPRT
|
||||
C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDPRT
|
||||
C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDDRW
|
||||
C:\PDM\Drilling_Test\DWS\Assemblies\EXAMPLE-ASSY.SLDASM
|
||||
C:\PDM\Drilling_Test\DWS\Drawings\EXAMPLE-DRAWING.pdf
|
||||
|
6
helpers/samples/sample_part_numbers.csv
Normal file
6
helpers/samples/sample_part_numbers.csv
Normal file
@@ -0,0 +1,6 @@
|
||||
2-80001010
|
||||
2-80003568
|
||||
2-80003962
|
||||
2-TF-80009889
|
||||
4-804687-02
|
||||
6-60181-02
|
||||
|
184
helpers/test_batch_api.py
Normal file
184
helpers/test_batch_api.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
IEdmFile13::ChangeState3 via comtypes vtable direct call.
|
||||
|
||||
Confirmed from gen_py IEdmFile13_vtables_:
|
||||
- IEdmFile13 IID : {DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}
|
||||
- ChangeState3 oVft = 432 → vtable slot 54
|
||||
- Slot layout:
|
||||
0-2 IUnknown (handled by comtypes base)
|
||||
3-6 IDispatch (handled by comtypes base)
|
||||
7-53 47 methods from IEdmObject5 … IEdmFile12 (placeholders)
|
||||
54 ChangeState3
|
||||
55 GetThumbnail
|
||||
"""
|
||||
import ctypes
|
||||
import getpass
|
||||
import pythoncom
|
||||
import win32com.client
|
||||
import win32com.client.gencache as gencache
|
||||
import comtypes
|
||||
import comtypes.automation
|
||||
from comtypes import COMMETHOD, GUID, HRESULT
|
||||
from comtypes.automation import IDispatch as CT_IDispatch
|
||||
|
||||
VAULT_NAME = "Drilling_Test"
|
||||
TEST_PATH = r"C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDPRT"
|
||||
FOLDER_PATH = r"C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG"
|
||||
TRANSITION_ID = 268
|
||||
TO_STATE_ID = 9
|
||||
EdmObject_File = 1
|
||||
IID_IEdmFile13 = "{DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# comtypes interface — 47 placeholders put ChangeState3 at slot 54 (offset 432)
|
||||
# ---------------------------------------------------------------------------
|
||||
_phs = [COMMETHOD([], HRESULT, f"_ph{i}") for i in range(47)]
|
||||
|
||||
VARIANT_p = ctypes.POINTER(comtypes.automation.VARIANT)
|
||||
|
||||
class IEdmFile13_CT(CT_IDispatch):
|
||||
_iid_ = GUID(IID_IEdmFile13)
|
||||
_idlflags_ = ["dual", "oleautomation"]
|
||||
_methods_ = _phs + [
|
||||
COMMETHOD(
|
||||
[], HRESULT, "ChangeState3",
|
||||
(["in"], VARIANT_p, "poStateIdOrName"),
|
||||
(["in"], VARIANT_p, "poTransitionIdOrName"),
|
||||
(["in"], ctypes.c_long, "lFolderID"),
|
||||
(["in"], ctypes.c_wchar_p, "bsComment"),
|
||||
(["in"], ctypes.c_long, "lParentWnd"),
|
||||
(["in"], ctypes.c_long, "lEdmStateFlags"),
|
||||
(["in"], ctypes.c_wchar_p, "bsPasswd"),
|
||||
),
|
||||
COMMETHOD(
|
||||
[], HRESULT, "GetThumbnail",
|
||||
(["out", "retval"], ctypes.POINTER(ctypes.c_void_p), "pBitmap"),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_i4_variant(val: int) -> comtypes.automation.VARIANT:
|
||||
"""Return a 16-byte VARIANT with vt=VT_I4 and the given integer value."""
|
||||
v = comtypes.automation.VARIANT()
|
||||
v.vt = 3 # VT_I4
|
||||
# The union value starts at byte offset 8 inside VARIANT
|
||||
ctypes.cast(ctypes.byref(v, 8), ctypes.POINTER(ctypes.c_int))[0] = val
|
||||
return v
|
||||
|
||||
|
||||
def raw_ptr_from_pycom(py_com_obj) -> int:
|
||||
"""
|
||||
Read the IUnknown*/IDispatch* stored inside a pythoncom COM wrapper.
|
||||
CPython 64-bit layout: ob_refcnt(8) | ob_type(8) | punk(8) → offset 16.
|
||||
"""
|
||||
return ctypes.c_uint64.from_address(id(py_com_obj) + 16).value
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
username = input("PDM Username: ")
|
||||
password = getpass.getpass("PDM Password: ") # reused for bsPasswd below
|
||||
|
||||
print("\n=== Connect ===")
|
||||
vault = gencache.EnsureDispatch("ConisioLib.EdmVault")
|
||||
vault.Login(username, password, VAULT_NAME)
|
||||
print(f"Logged in to {VAULT_NAME}")
|
||||
|
||||
folder_obj = vault.GetFolderFromPath(FOLDER_PATH)
|
||||
result = vault.GetFileFromPath(TEST_PATH, folder_obj)
|
||||
raw = result[0] if isinstance(result, tuple) else result
|
||||
file_id = raw.ID
|
||||
fold_id = folder_obj.ID
|
||||
print(f"File ID: {file_id} Folder ID: {fold_id}")
|
||||
|
||||
file_obj = vault.GetObject(EdmObject_File, file_id)
|
||||
file13_w32 = win32com.client.CastTo(file_obj, "IEdmFile13")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
print("\n=== Extract raw IEdmFile13* from pythoncom ===")
|
||||
# -----------------------------------------------------------------------
|
||||
py_disp = file13_w32._oleobj_ # PyIDispatch wrapping IEdmFile13
|
||||
raw_ptr = raw_ptr_from_pycom(py_disp)
|
||||
print(f"IEdmFile13* = {raw_ptr:#x}")
|
||||
|
||||
# Sanity: read the vtable pointer (first 8 bytes of the COM object)
|
||||
vtbl_ptr = ctypes.c_uint64.from_address(raw_ptr).value
|
||||
print(f"Vtable ptr = {vtbl_ptr:#x}")
|
||||
|
||||
if not raw_ptr or not vtbl_ptr:
|
||||
print("ERROR: Could not read a valid COM pointer — aborting.")
|
||||
return
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
print("\n=== QI to IEdmFile13_CT via comtypes ===")
|
||||
# -----------------------------------------------------------------------
|
||||
# Cast to IUnknown so comtypes can call QueryInterface properly
|
||||
ct_unk = ctypes.cast(raw_ptr, ctypes.POINTER(comtypes.IUnknown))
|
||||
# py_disp must stay alive while ct_unk is in use (ct_unk is a borrowed ref)
|
||||
try:
|
||||
file13_ct = ct_unk.QueryInterface(IEdmFile13_CT)
|
||||
print(f"QI succeeded: {file13_ct}")
|
||||
except Exception as e:
|
||||
print(f"QI failed: {e}")
|
||||
import traceback; traceback.print_exc()
|
||||
return
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
print("\n=== Build VARIANTs ===")
|
||||
# -----------------------------------------------------------------------
|
||||
v_state = make_i4_variant(TO_STATE_ID)
|
||||
v_trans = make_i4_variant(TRANSITION_ID)
|
||||
print(f"v_state vt={v_state.vt} val={TO_STATE_ID}")
|
||||
print(f"v_trans vt={v_trans.vt} val={TRANSITION_ID}")
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
print(f"\n=== ChangeState3(state={TO_STATE_ID}, trans={TRANSITION_ID}, folder={fold_id}) ===")
|
||||
# -----------------------------------------------------------------------
|
||||
try:
|
||||
hr = file13_ct.ChangeState3(
|
||||
ctypes.byref(v_state),
|
||||
ctypes.byref(v_trans),
|
||||
ctypes.c_long(fold_id),
|
||||
"Batch transition test",
|
||||
ctypes.c_long(0),
|
||||
ctypes.c_long(0),
|
||||
password, # PDM password (required by this transition)
|
||||
)
|
||||
print(f"ChangeState3 returned HRESULT {hr:#010x}")
|
||||
except Exception as e:
|
||||
print(f"ChangeState3 raised: {e}")
|
||||
import traceback; traceback.print_exc()
|
||||
# Keep py_disp alive
|
||||
del py_disp
|
||||
return
|
||||
|
||||
# keep py_disp alive until after the call
|
||||
del py_disp
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
print("\n=== Verify state ===")
|
||||
# -----------------------------------------------------------------------
|
||||
fresh = vault.GetObject(EdmObject_File, file_id)
|
||||
try:
|
||||
state = fresh.CurrentState
|
||||
if callable(state):
|
||||
state = state()
|
||||
name = state.Name if hasattr(state, "Name") else str(state)
|
||||
print(f"New state: {name}")
|
||||
if name in ("Approved", "AA"):
|
||||
print("\n*** SUCCESS! ***")
|
||||
else:
|
||||
print("State did not reach Approved.")
|
||||
except Exception as e:
|
||||
print(f"Could not read new state: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user