Initial Commit of the PDM project (ready for DWS migration)

This commit is contained in:
will
2026-04-20 08:42:38 -05:00
commit dda7b664e7
2721 changed files with 442772 additions and 0 deletions

828
helpers/batch_copy_tree.py Normal file
View File

@@ -0,0 +1,828 @@
"""
Batch Copy Tree Export for PDM Vault
=====================================
This module provides a framework for:
1. Logging into a PDM vault via API
2. Reading part numbers from a CSV file (no extensions)
3. Running the PDM Copy Tree function for each part
4. Exporting each part's file tree to its own subfolder on a local path
Usage:
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output" --vault "IDSVault"
"""
import logging
import argparse
import getpass
import os
import ctypes
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any
import win32com.client
# PDM API Type Library Constants
EdmObject_File = 1
EdmObject_Folder = 2
EdmSearch_FileName = 1 # Search by filename
EdmGet_Simple = 1 # Simple get (latest version)
# =============================================================================
# CONFIGURATION - Can be overridden via command line
# =============================================================================
VAULT_NAME = "IDSVault" # Default vault name
# =============================================================================
# LOGGING SETUP
# =============================================================================
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
"""Configure logging for the batch process."""
if log_file is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f"batch_copy_tree_{timestamp}.log"
logger = logging.getLogger("batch_copy_tree")
logger.setLevel(logging.DEBUG)
# File handler
fh = logging.FileHandler(log_file)
fh.setLevel(logging.DEBUG)
# Console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# Formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
return logger
# =============================================================================
# PDM VAULT CONNECTION
# =============================================================================
class PDMVaultConnection:
"""Handles connection and authentication to the PDM vault."""
def __init__(self, vault_name: str):
self.vault_name = vault_name
self.vault = None
self.is_connected = False
self.logger = logging.getLogger("batch_copy_tree")
self._username = None
self._password = None
def connect(self, username: str, password: str) -> bool:
"""
Connect and log into the PDM vault with username/password.
Args:
username: PDM username
password: PDM password
Returns:
True if connection successful, False otherwise
"""
try:
# Create the vault interface
self.vault = win32com.client.Dispatch("ConisioLib.EdmVault")
# Login with credentials
self.vault.Login(username, password, self.vault_name)
self.is_connected = True
self._username = username
self._password = password
self.logger.info(f"Successfully connected to vault: {self.vault_name}")
self.logger.info(f"Logged in as: {username}")
return True
except Exception as e:
self.logger.error(f"Failed to connect to vault '{self.vault_name}': {e}")
self.is_connected = False
return False
def disconnect(self) -> None:
"""Disconnect from the PDM vault."""
if self.vault is not None:
try:
# Clear the vault reference
self.vault = None
self.is_connected = False
self.logger.info("Disconnected from vault")
except Exception as e:
self.logger.warning(f"Error during disconnect: {e}")
def search_file_by_name(self, filename: str) -> List[Dict[str, Any]]:
"""
Search for a file in the vault by filename (supports wildcards).
Args:
filename: The filename to search for (e.g., "part001.*" or "part001.sldprt")
Returns:
List of dicts with 'file_obj', 'path', 'folder_path', 'folder_obj' for each match
"""
if not self.is_connected:
self.logger.error("Not connected to vault")
return []
results = []
try:
# Create search object
search = self.vault.CreateSearch()
# Set search filename
search.FileName = filename
self.logger.debug(f"Search pattern: '{filename}'")
# Execute search
search_result = search.GetFirstResult()
while search_result is not None:
try:
file_id = search_result.ID
folder_id = search_result.ParentFolderID
# Get the file and folder objects
file_obj = self.vault.GetObject(EdmObject_File, file_id)
folder_obj = self.vault.GetObject(EdmObject_Folder, folder_id)
if file_obj is not None and folder_obj is not None:
folder_path = folder_obj.LocalPath
# Use the actual filename from the file object
actual_name = search_result.Name
full_path = str(Path(folder_path) / actual_name)
results.append({
"file_obj": file_obj,
"file_id": file_id,
"folder_id": folder_id,
"path": full_path,
"folder_path": folder_path,
"folder_obj": folder_obj,
"filename": actual_name
})
except Exception as e:
self.logger.warning(f"Error processing search result: {e}")
search_result = search.GetNextResult()
except Exception as e:
self.logger.error(f"Error searching for '{filename}': {e}")
self.logger.debug(f"Search for '{filename}' returned {len(results)} result(s)")
return results
def __enter__(self):
"""Context manager entry - note: call connect() separately with credentials."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.disconnect()
# =============================================================================
# FILE LIST HANDLING
# =============================================================================
def load_part_numbers_from_csv(csv_path: str) -> List[str]:
"""
Load list of part numbers from a CSV file (single column, no header).
Args:
csv_path: Path to the CSV file with one part number per line (no extensions)
Returns:
List of part numbers
"""
logger = logging.getLogger("batch_copy_tree")
part_numbers = []
try:
with open(csv_path, 'r', encoding='utf-8-sig') as f:
for line_num, line in enumerate(f, start=1):
part_number = line.strip()
if part_number: # Skip empty lines
part_numbers.append(part_number)
logger.info(f"Loaded {len(part_numbers)} part numbers from {csv_path}")
except FileNotFoundError:
logger.error(f"CSV file not found: {csv_path}")
except Exception as e:
logger.error(f"Error reading CSV file: {e}")
return part_numbers
def search_and_resolve_parts(
vault: PDMVaultConnection,
part_numbers: List[str],
extension: str
) -> Dict[str, List]:
"""
Search for part numbers in the vault and resolve each to a root file.
Part numbers are searched with the user-specified extension
(e.g., "PART001.SLDASM") since the CSV does not include file extensions.
Args:
vault: Active vault connection
part_numbers: List of part numbers (no extensions)
extension: File extension including the dot (e.g., ".SLDASM")
Returns:
Dict with:
- 'valid': list of file info dicts (ready for copy tree)
- 'not_found': list of part numbers not found in vault
- 'ambiguous': list of dicts with part_number and all found paths
"""
logger = logging.getLogger("batch_copy_tree")
results = {"valid": [], "not_found": [], "ambiguous": []}
total = len(part_numbers)
for i, part_number in enumerate(part_numbers, 1):
logger.info(f"[{i}/{total}] Searching for: {part_number}{extension}")
search_results = vault.search_file_by_name(f"{part_number}{extension}")
if len(search_results) == 0:
results["not_found"].append(part_number)
logger.warning(f" NOT FOUND: {part_number}")
else:
match = search_results[0]
logger.info(f" FOUND: {match['path']}")
if len(search_results) > 1:
logger.warning(f" Multiple matches found, using first result:")
for r in search_results:
logger.warning(f" - {r['path']}")
results["valid"].append({
"part_number": part_number,
"filename": match["filename"],
"path": match["path"],
"file_obj": match["file_obj"],
"file_id": match["file_id"],
"folder_id": match["folder_id"],
"folder_obj": match["folder_obj"],
})
return results
# =============================================================================
# COPY TREE OPERATIONS
# =============================================================================
def get_window_handle() -> int:
"""Get a window handle for PDM API calls. Returns console handle or 0."""
try:
hwnd = ctypes.windll.kernel32.GetConsoleWindow()
return hwnd if hwnd else 0
except Exception:
return 0
def _collect_references(file_obj, folder_id: int, vault, logger) -> List[Dict[str, Any]]:
"""
Collect all referenced files from an assembly using IEdmReference5.
Uses IEdmFile5.GetReferenceTree() to get the reference tree root, then
recursively traverses all levels of the reference tree using
GetFirstChildPosition/GetNextChild on each node.
Args:
file_obj: IEdmFile5 COM object (the root assembly)
folder_id: Folder ID of the root file
vault: The IEdmVault COM object
logger: Logger instance
Returns:
List of dicts with 'file_id', 'folder_id', 'name' for each
unique referenced file (including the root file itself)
"""
collected = {} # keyed by file ID to deduplicate
# Add the root file itself
root_name = file_obj.Name
root_id = file_obj.ID
collected[root_id] = {
"file_id": root_id,
"folder_id": folder_id,
"name": root_name,
}
logger.debug(f" Root: {root_name}")
def _traverse_children(ref_node, depth=0):
"""Recursively traverse all children of a reference node."""
indent = " " * (depth + 1)
try:
result = ref_node.GetFirstChildPosition("", True, True, 0)
if isinstance(result, tuple):
child_pos = result[0]
else:
child_pos = result
while child_pos is not None:
try:
if hasattr(child_pos, 'IsNull') and child_pos.IsNull:
break
except Exception:
pass
try:
child_ref = ref_node.GetNextChild(child_pos)
if child_ref is None:
break
if isinstance(child_ref, tuple):
child_ref = child_ref[0]
child_file_id = child_ref.FileID
child_folder_id = child_ref.FolderID
child_name = child_ref.Name
if child_file_id not in collected:
collected[child_file_id] = {
"file_id": child_file_id,
"folder_id": child_folder_id,
"name": child_name,
}
logger.debug(f"{indent}Child: {child_name}")
# Recurse into this child to get its children
_traverse_children(child_ref, depth + 1)
except StopIteration:
break
except Exception as child_err:
logger.debug(f"{indent}Error reading child reference: {child_err}")
break
except Exception as e:
logger.debug(f"{indent}Error traversing children at depth {depth}: {e}")
try:
# Get the reference tree (IEdmReference5)
ref_tree = file_obj.GetReferenceTree(folder_id, 0)
if ref_tree is None:
logger.warning(f" GetReferenceTree returned None")
return list(collected.values())
_traverse_children(ref_tree)
except Exception as e:
logger.warning(f" Error traversing reference tree: {e}")
logger.debug(f" Full error:", exc_info=True)
return list(collected.values())
def execute_copy_tree(
vault: PDMVaultConnection,
file_info: Dict[str, Any],
output_dir: str,
part_name: str,
dry_run: bool = False
) -> Dict[str, Any]:
"""
Export an assembly and all its referenced files to a local subfolder.
Uses IEdmFile5.GetReferenceTree() to traverse references, then
IEdmFile5.GetFileCopy() to download each file to the output folder.
Args:
vault: Active vault connection
file_info: Dict with file_obj, file_id, folder_id, path, etc.
output_dir: Base output directory
part_name: Part number used as subfolder name
dry_run: If True, build tree but don't copy files
Returns:
Dict with 'status' ('success'/'failed'), 'file_count', 'dest_path',
'source_path', and 'error' (if failed)
"""
logger = logging.getLogger("batch_copy_tree")
dest_path = os.path.join(output_dir, part_name)
# GetFileCopy requires destination path to end with backslash
dest_path_trailing = dest_path if dest_path.endswith("\\") else dest_path + "\\"
result = {
"status": "failed",
"file_count": 0,
"dest_path": dest_path,
"source_path": file_info["path"],
"error": None
}
try:
# Create the output subfolder
os.makedirs(dest_path, exist_ok=True)
logger.debug(f"Output folder: {dest_path}")
hwnd = get_window_handle()
file_obj = file_info["file_obj"]
folder_id = file_info["folder_id"]
# Step 1: Traverse the reference tree to find all files
logger.info(f" Building reference tree...")
ref_files = _collect_references(file_obj, folder_id, vault.vault, logger)
file_count = len(ref_files)
result["file_count"] = file_count
logger.info(f" Reference tree: {file_count} file(s) found")
if dry_run:
for idx, ref in enumerate(ref_files, 1):
logger.info(f" [{idx}/{file_count}] {ref['name']}")
logger.info(f" DRY RUN: Would copy {file_count} file(s) to {dest_path}")
result["status"] = "dry_run"
return result
# Step 2: Copy each file to the output directory using GetFileCopy
# Confirmed signature: GetFileCopy(lParentWnd, poVersionNoOrRevisionName, poPathOrFolderID, lEdmGetFlags, bsNewName)
# lParentWnd = window handle (0 for headless)
# poVersionNoOrRevisionName = version number (0 = latest)
# poPathOrFolderID = destination folder path (must end with \)
# lEdmGetFlags = EdmGet flags (EdmGet_Simple = 1)
# bsNewName = new filename or empty string to keep original name
copied = 0
for idx, ref in enumerate(ref_files, 1):
ref_name = ref["name"]
logger.debug(f" [{idx}/{file_count}] Copying {ref_name}...")
try:
# Get the file object from the vault by ID
ref_file_obj = vault.vault.GetObject(EdmObject_File, ref["file_id"])
if ref_file_obj is None:
logger.warning(f" Could not get file object for {ref_name} (ID: {ref['file_id']})")
continue
ref_file_obj.GetFileCopy(hwnd, 0, dest_path_trailing, EdmGet_Simple, "")
copied += 1
except Exception as copy_err:
logger.warning(f" Failed to copy {ref_name}: {copy_err}")
result["file_count"] = copied
result["status"] = "success"
logger.info(f" SUCCESS: {copied}/{file_count} file(s) exported to {dest_path}")
except Exception as e:
result["error"] = str(e)
logger.error(f" FAILED: {e}")
logger.debug(f" Full error details:", exc_info=True)
return result
def batch_copy_tree(
vault: PDMVaultConnection,
file_list: List[Dict[str, Any]],
output_dir: str,
dry_run: bool = False
) -> Dict[str, List]:
"""
Execute Copy Tree for multiple parts.
Args:
vault: Active vault connection
file_list: List of file info dicts (from search_and_resolve_parts)
output_dir: Base output directory
dry_run: If True, build trees but don't execute copies
Returns:
Dict with 'success' and 'failed' lists of result dicts
"""
logger = logging.getLogger("batch_copy_tree")
results = {"success": [], "failed": []}
total = len(file_list)
logger.info(f"Starting batch copy tree for {total} parts")
logger.info("=" * 60)
for i, file_info in enumerate(file_list, 1):
part_number = file_info["part_number"]
logger.info(f"[{i}/{total}] Processing copy tree for: {part_number} ({file_info['filename']})")
result = execute_copy_tree(vault, file_info, output_dir, part_number, dry_run)
if result["status"] in ("success", "dry_run"):
results["success"].append(result)
else:
results["failed"].append(result)
return results
def test_copy_tree_api(vault: PDMVaultConnection, file_info: Dict[str, Any]) -> bool:
"""
Test reference tree traversal on a single file without copying.
Args:
vault: Active vault connection
file_info: Dict with file_obj, file_id, folder_id, path
Returns:
True if reference traversal succeeds, False otherwise
"""
logger = logging.getLogger("batch_copy_tree")
logger.info("=" * 60)
logger.info("TESTING REFERENCE TREE TRAVERSAL")
logger.info("=" * 60)
logger.info(f"Test file: {file_info['path']}")
try:
file_obj = file_info["file_obj"]
folder_id = file_info["folder_id"]
# Step 1: Test GetReferenceTree
logger.info("Step 1: Calling GetReferenceTree...")
ref_tree = file_obj.GetReferenceTree(folder_id, 0)
if ref_tree is None:
logger.error(" GetReferenceTree returned None")
return False
logger.info(f" OK - Got reference tree object: {type(ref_tree)}")
# Step 2: Introspect reference tree object
logger.info("Step 2: Inspecting IEdmReference5 COM object...")
try:
type_info = ref_tree._oleobj_.GetTypeInfo(0, 0)
type_attr = type_info.GetTypeAttr()
methods = []
for i in range(type_attr.cFuncs):
func_desc = type_info.GetFuncDesc(i)
names = type_info.GetNames(func_desc.memid)
methods.append(f"{names[0]}({', '.join(names[1:])})" if len(names) > 1 else names[0])
logger.info(f" IEdmReference methods ({len(methods)}):")
for m in methods:
logger.info(f" - {m}")
except Exception as intro_err:
logger.warning(f" Could not introspect: {intro_err}")
# Step 3: Traverse children
logger.info("Step 3: Traversing reference tree children...")
ref_files = _collect_references(file_obj, folder_id, vault.vault, logger)
logger.info(f" Found {len(ref_files)} file(s) in reference tree:")
for idx, ref in enumerate(ref_files, 1):
logger.info(f" [{idx}] {ref['name']}")
# Step 4: Test GetFileCopy on root file (introspect only, don't copy)
logger.info("Step 4: Inspecting IEdmFile5 GetFileCopy method...")
try:
type_info = file_obj._oleobj_.GetTypeInfo(0, 0)
type_attr = type_info.GetTypeAttr()
for i in range(type_attr.cFuncs):
func_desc = type_info.GetFuncDesc(i)
names = type_info.GetNames(func_desc.memid)
if "copy" in names[0].lower() or "get" in names[0].lower():
sig = f"{names[0]}({', '.join(names[1:])})" if len(names) > 1 else names[0]
logger.info(f" - {sig}")
except Exception as intro_err:
logger.warning(f" Could not introspect file object: {intro_err}")
logger.info("=" * 60)
logger.info("API TEST PASSED")
logger.info("=" * 60)
return True
except Exception as e:
logger.error(f"API TEST FAILED: {e}")
logger.debug("Full error details:", exc_info=True)
logger.info("=" * 60)
return False
# =============================================================================
# COMMAND LINE INTERFACE
# =============================================================================
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Batch Copy Tree export for SolidWorks PDM Professional",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output"
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output" -v "IDSVault" --dry-run
python batch_copy_tree.py -c parts.csv -o "C:\\Temp\\Output" --test
CSV format (one part number per line, no extensions, no header):
UDS.00056
WIDGET.00123
BRACKET.00789
Each part number's Copy Tree output goes to its own subfolder:
C:\\Temp\\Output\\UDS.00056\\{files...}
C:\\Temp\\Output\\WIDGET.00123\\{files...}
"""
)
parser.add_argument(
"-v", "--vault",
default=VAULT_NAME,
help=f"PDM vault name (default: {VAULT_NAME})"
)
parser.add_argument(
"-c", "--csv",
required=True,
help="Path to CSV file containing part numbers (one per line, no extensions)"
)
parser.add_argument(
"-o", "--output-dir",
required=True,
help="Base output directory for exported files (e.g., C:\\Temp\\Output)"
)
parser.add_argument(
"-u", "--username",
help="PDM username (will prompt if not provided)"
)
parser.add_argument(
"--log-file",
help="Custom log file path (default: auto-generated with timestamp)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Build copy trees and show what would be copied, but don't execute"
)
parser.add_argument(
"--test",
action="store_true",
help="Test Copy Tree API calls on the first part only, then exit"
)
return parser.parse_args()
# =============================================================================
# MAIN EXECUTION
# =============================================================================
def main():
"""Main entry point for batch copy tree processing."""
# Parse arguments
args = parse_arguments()
# Setup logging
logger = setup_logging(args.log_file)
logger.info("=" * 60)
logger.info("PDM BATCH COPY TREE EXPORT")
logger.info("=" * 60)
logger.info(f"Vault: {args.vault}")
logger.info(f"CSV File: {args.csv}")
logger.info(f"Output Directory: {args.output_dir}")
if args.dry_run:
logger.info("Mode: DRY RUN (no files will be copied)")
if args.test:
logger.info("Mode: API TEST (testing on first part only)")
# Get credentials
username = args.username
if not username:
username = input("PDM Username: ")
password = getpass.getpass("PDM Password: ")
# Get file extension from user
ext_input = input("Enter the file extension to search for (e.g., SLDASM, SLDDRW, SLDPRT): ").strip()
ext_input = ext_input.lstrip(".") # Remove leading dot if user included one
if not ext_input:
logger.error("No extension provided. Exiting.")
return 1
extension = f".{ext_input}"
logger.info(f"File extension: {extension}")
# Load part numbers from CSV
part_numbers = load_part_numbers_from_csv(args.csv)
if not part_numbers:
logger.error("No part numbers loaded from CSV. Exiting.")
return 1
logger.info(f"Loaded {len(part_numbers)} part numbers from CSV")
# Connect to vault
vault = PDMVaultConnection(args.vault)
if not vault.connect(username, password):
logger.error("Failed to connect to vault. Exiting.")
return 1
try:
# Search for parts in vault
logger.info("Searching for parts in vault...")
logger.info("=" * 60)
validation = search_and_resolve_parts(vault, part_numbers, extension)
valid_count = len(validation["valid"])
not_found_count = len(validation["not_found"])
# Summary
logger.info("=" * 60)
logger.info("SEARCH RESULTS SUMMARY")
logger.info("=" * 60)
logger.info(f"Parts ready to process: {valid_count}")
logger.info(f"Parts not found: {not_found_count}")
# Report not found
if not_found_count > 0:
logger.warning("\nParts not found in vault:")
for pn in validation["not_found"]:
logger.warning(f" - {pn}")
# Save not found list
not_found_file = f"not_found_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
with open(not_found_file, 'w') as f:
for pn in validation["not_found"]:
f.write(pn + "\n")
logger.info(f"Not found list saved to: {not_found_file}")
if valid_count == 0:
logger.error("No valid parts to process. Exiting.")
return 1
# API test mode - test on first part then exit
if args.test:
test_passed = test_copy_tree_api(vault, validation["valid"][0])
return 0 if test_passed else 1
# Verify output directory is accessible
try:
os.makedirs(args.output_dir, exist_ok=True)
except OSError as e:
logger.error(f"Cannot create output directory '{args.output_dir}': {e}")
return 1
# Confirm before proceeding
mode_label = "DRY RUN copy tree" if args.dry_run else "copy tree"
print(f"\nReady to {mode_label} for {valid_count} parts to '{args.output_dir}'")
confirm = input("Proceed? (yes/no): ").strip().lower()
if confirm != "yes":
logger.info("Operation cancelled by user")
return 0
# Execute batch copy tree
results = batch_copy_tree(
vault,
validation["valid"],
args.output_dir,
dry_run=args.dry_run
)
# Final report
logger.info("=" * 60)
logger.info("BATCH COPY TREE COMPLETE")
logger.info("=" * 60)
logger.info(f"Total parts processed: {valid_count}")
logger.info(f"Successful: {len(results['success'])}")
logger.info(f"Failed: {len(results['failed'])}")
if results["success"]:
total_files = sum(r["file_count"] for r in results["success"])
logger.info(f"Total files exported: {total_files}")
if results["failed"]:
logger.warning("\nFailed parts:")
for r in results["failed"]:
logger.warning(f" - {r['source_path']}: {r['error']}")
# Write failed parts to a separate file for retry
failed_file = f"failed_copies_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
with open(failed_file, 'w') as f:
for r in results["failed"]:
f.write(f"{r['source_path']}\t{r['error']}\n")
logger.info(f"Failed parts list saved to: {failed_file}")
return 0 if not results["failed"] else 1
finally:
vault.disconnect()
if __name__ == "__main__":
exit(main())

View File

@@ -0,0 +1,864 @@
"""
Batch Workflow Processing for PDM Vault (Path-based)
=====================================================
This module provides a framework for:
1. Logging into a PDM vault via API
2. Processing a list of files (by full vault path) through a specified workflow transition
Usage:
python batch_workflows_paths.py --vault "MyVault" --csv "files.csv" --transition "citadel_set_production released"
"""
import logging
import argparse
import getpass
import time
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any
import ctypes
import pythoncom
import win32com.client
import comtypes
import comtypes.automation
from comtypes import COMMETHOD, GUID, HRESULT
from comtypes.automation import IDispatch as _CT_IDispatch
# PDM API Type Library Constants
EdmObject_File = 1
EdmObject_Folder = 2
EdmObject_Workflow = 6
# ---------------------------------------------------------------------------
# comtypes interface for IEdmFile13 — enables vtable call to ChangeState3.
#
# Confirmed from gen_py stubs (IEdmFile13.py):
# IID : {DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}
# oVft : 432 → slot 54 (IUnknown[0-2] + IDispatch[3-6] + 47 placeholders[7-53])
# ---------------------------------------------------------------------------
_IEdmFile13_phs = [COMMETHOD([], HRESULT, f"_ph{i}") for i in range(47)]
_VARIANT_p = ctypes.POINTER(comtypes.automation.VARIANT)
class _IEdmFile13_CT(_CT_IDispatch):
_iid_ = GUID("{DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}")
_idlflags_ = ["dual", "oleautomation"]
_methods_ = _IEdmFile13_phs + [
COMMETHOD(
[], HRESULT, "ChangeState3",
(["in"], _VARIANT_p, "poStateIdOrName"),
(["in"], _VARIANT_p, "poTransitionIdOrName"),
(["in"], ctypes.c_long, "lFolderID"),
(["in"], ctypes.c_wchar_p, "bsComment"),
(["in"], ctypes.c_long, "lParentWnd"),
(["in"], ctypes.c_long, "lEdmStateFlags"),
(["in"], ctypes.c_wchar_p, "bsPasswd"),
),
]
def _make_i4_variant(val: int) -> comtypes.automation.VARIANT:
"""Return a VARIANT with vt=VT_I4 containing the given integer."""
v = comtypes.automation.VARIANT()
v.vt = 3 # VT_I4
ctypes.cast(ctypes.byref(v, 8), ctypes.POINTER(ctypes.c_int))[0] = int(val)
return v
class _Phase2AVError(OSError):
"""Raised when ChangeState3 crashes with an access violation after all retries.
Distinct from generic OSError so batch_transition can count consecutive
Phase-2 AV failures and trigger a vault reconnect to reset PDM's in-process
state after repeated corruption.
"""
# Escalating backoff for Phase-2 access violations (seconds between attempts).
# PDM's in-process DLL can take progressively longer to clean up after state
# corruption accumulates; a longer final sleep gives it a real chance to settle.
_PHASE2_BACKOFF = (3, 10, 30)
def _changestate3(vault_obj, file_id: int, to_state_id: int,
transition_id: int, folder_id: int, comment: str,
password: str) -> None:
"""
Call IEdmFile13::ChangeState3 via comtypes vtable to transition a file
using a *specific* transition ID, bypassing the ambiguous ChangeState.
Uses the primary win32com vault for GetObject so that the returned COM
proxy is in the primary connection's context. Bridging to comtypes is
done by reading the IEdmFile13* stored inside the pythoncom PyIBase
wrapper at CPython object offset 16, then calling QueryInterface to get
an AddRef'd comtypes pointer.
Reads from _oleobj_ directly (not from a secondary QI(IID_IUnknown) result)
because for aggregated COM objects the controlling IUnknown can be at a
different address with a shorter lifetime than the IEdmFile13* itself.
Retries up to 3 additional times on Phase-1 access-violation or
misaligned-pointer failures, and on Phase-2 access violations inside
ChangeState3 itself (with escalating backoff of 3s, 10s, 30s between
attempts to give PDM's in-process DLL time to clean up corrupted state).
If all Phase-2 attempts fail, raises _Phase2AVError so batch_transition
can trigger a vault reconnect after repeated failures.
"""
logger = logging.getLogger("batch_workflows_paths")
max_attempts = 1 + len(_PHASE2_BACKOFF) # initial + 3 retries
for attempt in range(max_attempts):
if attempt > 0:
logger.debug(f" [CS3] Retry {attempt} for file ID {file_id}")
file_obj = None
try:
# Fresh COM wrapper each attempt — primary vault, no competing refs.
file_obj = win32com.client.CastTo(
vault_obj.GetObject(EdmObject_File, file_id), 'IEdmFile13'
)
# CastTo('IEdmFile13') calls QI(IID_IEdmFile13) so _oleobj_ already
# holds the IEdmFile13* directly. Read it at offset 16 in the
# CPython object struct (ob_refcnt[8] + ob_type[8] + m_pUnknown[8]).
py_disp = file_obj._oleobj_
raw_ptr = ctypes.c_uint64.from_address(id(py_disp) + 16).value
logger.debug(f" [CS3] raw_ptr={raw_ptr:#018x} (& 7 == {raw_ptr & 7})")
if not raw_ptr or (raw_ptr & 0x7) != 0:
raise RuntimeError(f"Misaligned IEdmFile13* at offset 16: {raw_ptr:#x}")
ct_unk = ctypes.cast(raw_ptr, ctypes.POINTER(comtypes.IUnknown))
file13 = ct_unk.QueryInterface(_IEdmFile13_CT) # AddRefs independently
logger.debug(f" [CS3] QI OK (attempt {attempt})")
except (OSError, RuntimeError) as exc:
if file_obj is not None:
del file_obj
is_retryable = (
(isinstance(exc, OSError) and 'access violation' in str(exc).lower())
or isinstance(exc, RuntimeError)
)
if is_retryable and attempt < max_attempts - 1:
sleep_s = _PHASE2_BACKOFF[attempt]
logger.debug(
f" [CS3] Phase-1 failure ({exc}); sleeping {sleep_s}s then retrying"
)
time.sleep(sleep_s)
continue
raise
# Release win32com wrapper — file13 holds its own AddRef'd reference.
del file_obj
# --- Phase 2: call ChangeState3 ---
v_state = _make_i4_variant(to_state_id)
v_trans = _make_i4_variant(transition_id)
try:
file13.ChangeState3(
ctypes.byref(v_state),
ctypes.byref(v_trans),
ctypes.c_long(folder_id),
comment,
ctypes.c_long(0),
ctypes.c_long(0),
password,
)
return # success
except OSError as exc:
# Access violation inside ChangeState3 (PDM in-process DLL crashes while
# accessing internal state left over from a recent transition). An
# escalating sleep (3s, 10s, 30s) lets PDM's post-transition cleanup
# finish, then we retry with a fresh COM wrapper. After all retries
# are exhausted, raise _Phase2AVError so batch_transition can count
# consecutive failures and reconnect the vault.
if 'access violation' in str(exc).lower():
if attempt < max_attempts - 1:
sleep_s = _PHASE2_BACKOFF[attempt]
logger.debug(
f" [CS3] Phase-2 access violation ({exc}); "
f"sleeping {sleep_s}s for PDM cleanup then retrying"
)
time.sleep(sleep_s)
continue
raise _Phase2AVError(
f"ChangeState3 access violation after {max_attempts} attempts: {exc}"
) from exc
raise
# =============================================================================
# CONFIGURATION - Can be overridden via command line
# =============================================================================
VAULT_NAME = "IDSVault" # Default vault name
DEFAULT_TRANSITION = "Citadel_mig_Set Proto Released" # Default transition name
# =============================================================================
# LOGGING SETUP
# =============================================================================
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
"""Configure logging for the batch process."""
if log_file is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f"batch_workflow_paths_{timestamp}.log"
logger = logging.getLogger("batch_workflows_paths")
logger.setLevel(logging.DEBUG)
# File handler
fh = logging.FileHandler(log_file)
fh.setLevel(logging.DEBUG)
# Console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# Formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
return logger
# =============================================================================
# PDM VAULT CONNECTION
# =============================================================================
class PDMVaultConnection:
"""Handles connection and authentication to the PDM vault."""
def __init__(self, vault_name: str):
self.vault_name = vault_name
self.vault = None
self.is_connected = False
self.logger = logging.getLogger("batch_workflows_paths")
self._username = None
self._password = None
def connect(self, username: str, password: str) -> bool:
"""
Connect and log into the PDM vault with username/password.
Args:
username: PDM username
password: PDM password
Returns:
True if connection successful, False otherwise
"""
try:
self.vault = win32com.client.Dispatch("ConisioLib.EdmVault")
# Login with credentials
self.vault.Login(username, password, self.vault_name)
self.is_connected = True
self._username = username
self._password = password
self.logger.info(f"Successfully connected to vault: {self.vault_name}")
self.logger.info(f"Logged in as: {username}")
return True
except Exception as e:
self.logger.error(f"Failed to connect to vault '{self.vault_name}': {e}")
self.is_connected = False
return False
def disconnect(self) -> None:
"""Disconnect from the PDM vault."""
if self.vault is not None:
try:
self.vault = None
self.is_connected = False
self.logger.info("Disconnected from vault")
except Exception as e:
self.logger.warning(f"Error during disconnect: {e}")
def reconnect(self) -> bool:
"""Force a full disconnect + re-login using the stored credentials.
Used to reset PDM's in-process DLL state after repeated Phase-2 access
violations indicate the vault connection's internal data structures have
been corrupted. Returns True if the re-login succeeded.
"""
if self._username is None or self._password is None:
self.logger.error("Cannot reconnect: no stored credentials")
return False
username, password = self._username, self._password
self.logger.info("Reconnecting vault to reset PDM internal state...")
self.disconnect()
# Give the in-process DLL a moment to release any lingering state.
time.sleep(2)
return self.connect(username, password)
def get_file(self, file_path: str) -> Dict[str, Any]:
"""
Get a file object from the vault by full path.
Args:
file_path: Full path to the file in the vault
Returns:
Dict with 'file_obj', 'folder_obj', 'path' or None if not found
"""
if not self.is_connected:
self.logger.error("Not connected to vault")
return None
try:
folder_path = str(Path(file_path).parent)
folder_obj = self.vault.GetFolderFromPath(folder_path)
if folder_obj is None:
self.logger.warning(f"Folder not found: {folder_path}")
return None
# GetFileFromPath returns (file_obj, file_id) tuple
result = self.vault.GetFileFromPath(file_path, folder_obj)
# Handle tuple return value
if isinstance(result, tuple):
file_obj = result[0]
else:
file_obj = result
if file_obj is None:
return None
# Re-fetch via GetObject, then cast to IEdmFile13 so CurrentState and
# transition methods are accessible regardless of gen_py stub state.
file_obj = self.vault.GetObject(EdmObject_File, file_obj.ID)
file_obj = win32com.client.CastTo(file_obj, 'IEdmFile13')
return {
"file_obj": file_obj,
"folder_obj": folder_obj,
"path": file_path
}
except Exception as e:
self.logger.error(f"Error getting file '{file_path}': {e}")
return None
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.disconnect()
# =============================================================================
# FILE LIST HANDLING
# =============================================================================
def load_file_list_from_csv(csv_path: str) -> List[str]:
"""
Load list of file paths from a CSV file (single column, no header).
Args:
csv_path: Path to the CSV file with one file path per line
Returns:
List of file paths
"""
logger = logging.getLogger("batch_workflows_paths")
file_paths = []
try:
with open(csv_path, 'r', encoding='utf-8-sig') as f:
for line_num, line in enumerate(f, start=1):
file_path = line.strip()
if file_path: # Skip empty lines
file_paths.append(file_path)
logger.info(f"Loaded {len(file_paths)} file paths from {csv_path}")
except FileNotFoundError:
logger.error(f"CSV file not found: {csv_path}")
except Exception as e:
logger.error(f"Error reading CSV file: {e}")
return file_paths
def validate_files(
vault: PDMVaultConnection,
file_paths: List[str]
) -> Dict[str, List]:
"""
Validate that files exist in the vault by their full paths.
Args:
vault: Active vault connection
file_paths: List of full vault paths to validate
Returns:
Dict with:
- 'valid': list of file info dicts (ready for transition)
- 'not_found': list of paths not found in vault
"""
logger = logging.getLogger("batch_workflows_paths")
results = {"valid": [], "not_found": []}
total = len(file_paths)
for i, file_path in enumerate(file_paths, 1):
logger.info(f"[{i}/{total}] Validating: {file_path}")
file_info = vault.get_file(file_path)
if file_info is None:
results["not_found"].append(file_path)
logger.warning(f" NOT FOUND: {file_path}")
else:
# Read state now, then release the COM wrappers immediately.
# Keeping file_obj alive across the full validation pass leaves
# stale COM proxies in memory. After any file is transitioned,
# PDM's server may invalidate proxies for other files, causing
# access violations later. transition_file() fetches its own
# fresh wrapper just before it needs it.
try:
current_state = file_info["file_obj"].CurrentState
state_name = current_state.Name if current_state else "Unknown"
except Exception as e:
logger.warning(f" Could not get state: {e}")
state_name = "Unknown"
results["valid"].append({"path": file_path, "current_state": state_name})
logger.info(f" FOUND (State: {state_name})")
# file_info (and its file_obj / folder_obj) goes out of scope here
return results
# =============================================================================
# WORKFLOW PROCESSING
# =============================================================================
def get_available_transitions(vault: PDMVaultConnection, file_obj) -> List[Dict[str, Any]]:
"""
Get list of available transitions for a file in its current state.
Args:
vault: Active vault connection
file_obj: IEdmFile object
Returns:
List of transition info dicts with 'name' and 'id'
"""
logger = logging.getLogger("batch_workflows_paths")
transitions = []
try:
current_state = file_obj.CurrentState
if current_state is None:
return transitions
trans_pos = current_state.GetFirstTransitionPosition()
while not trans_pos.IsNull:
transition = current_state.GetNextTransition(trans_pos)
transitions.append({
"name": transition.Name,
"id": transition.ID,
"target_state": transition.ToState.Name if transition.ToState else "Unknown"
})
except Exception as e:
logger.error(f"Error getting transitions: {e}")
return transitions
def transition_file(
vault: PDMVaultConnection,
file_info: Dict[str, Any],
transition_name: str,
comment: str = ""
) -> str:
"""
Transition a single file using a named transition.
Args:
vault: Active vault connection
file_info: Dict containing 'path' (and optionally 'current_state' for logging)
transition_name: Name of the transition to execute
comment: Optional transition comment
Returns:
One of "success", "not_available", or "failed".
- "success": transition completed and state verified
- "not_available": named transition is not valid from the file's current
state (typically means the file is already in the target state from a
prior run — not a real failure, just a no-op)
- "failed": real failure (access violation, missing file, state
unchanged after call, etc.) — worth retrying
"""
logger = logging.getLogger("batch_workflows_paths")
file_path = file_info["path"]
try:
# Fetch a fresh COM wrapper right now — not from validation.
# By the time this file is processed, earlier transitions may have
# caused PDM to invalidate COM proxies obtained during the validation
# pass. A fresh GetObject/CastTo gives a clean proxy every time.
fresh = vault.get_file(file_path)
if fresh is None:
logger.error(f"File no longer accessible in vault: {file_path}")
return "failed"
file_obj = fresh["file_obj"]
folder_obj = fresh["folder_obj"]
# Get current state and find the transition
current_state = file_obj.CurrentState
if current_state is None:
logger.error(f"File has no workflow state: {file_path}")
return "failed"
# Find the transition by name
target_transition = None
trans_pos = current_state.GetFirstTransitionPosition()
while not trans_pos.IsNull:
transition = current_state.GetNextTransition(trans_pos)
if transition.Name.lower() == transition_name.lower():
target_transition = transition
break
if target_transition is None:
available = get_available_transitions(vault, file_obj)
available_names = [t["name"] for t in available]
logger.error(
f"Transition '{transition_name}' not available for {file_path}. "
f"Current state: {current_state.Name}. "
f"Available transitions: {available_names}"
)
return "not_available"
# Collect everything we need from the COM objects, then release them
# before calling _changestate3. _changestate3 fetches its own wrapper
# internally, so having the lookup wrapper alive simultaneously would
# create competing COM references and corrupt PDM's internal state.
old_state_name = current_state.Name
expected_state_name = target_transition.ToState.Name
to_state_id = target_transition.ToState.ID
trans_id = target_transition.ID
file_id = file_obj.ID
folder_id = folder_obj.ID
logger.info(
f" Transition: '{target_transition.Name}' (ID: {trans_id}) | "
f"ToState: '{expected_state_name}' (ID: {to_state_id}) | "
f"Folder ID: {folder_id} | File ID: {file_id}"
)
# ↓ Release all COM wrappers from the lookup phase before the vtable call
del file_obj, folder_obj, fresh, current_state, target_transition, trans_pos
try:
_changestate3(
vault.vault,
file_id,
to_state_id,
trans_id,
folder_id,
comment,
vault._password or "",
)
except _Phase2AVError as exc:
# Persistent access violation — flag for batch_transition to count
# against the consecutive-failure threshold for reconnect.
vault._last_was_phase2_av = True
logger.error(f"Failed to transition {file_path}: {exc}")
return "failed"
else:
vault._last_was_phase2_av = False
# Verify the state actually changed — re-fetch and cast to IEdmFile13
fresh_file = win32com.client.CastTo(
vault.vault.GetObject(EdmObject_File, file_id), 'IEdmFile13'
)
new_state = fresh_file.CurrentState
actual_state_name = new_state.Name if new_state else "Unknown"
if actual_state_name.lower() == expected_state_name.lower():
logger.info(
f"SUCCESS: {file_path} | "
f"{old_state_name} -> {actual_state_name}"
)
return "success"
else:
logger.error(
f"FAILED (state unchanged): {file_path} | "
f"Expected: {expected_state_name}, Actual: {actual_state_name}"
)
return "failed"
except Exception as e:
logger.error(f"Failed to transition {file_path}: {e}")
return "failed"
def batch_transition(
vault: PDMVaultConnection,
file_list: List[Dict[str, Any]],
transition_name: str,
comment: str = ""
) -> Dict[str, List[str]]:
"""
Transition multiple files using a named transition.
Args:
vault: Active vault connection
file_list: List of file info dicts (from validate_files)
transition_name: Name of the transition to execute
comment: Optional transition comment
Returns:
Dict with 'success', 'failed', and 'not_available' file lists.
- 'success': transition completed
- 'failed': real failure worth retrying (access violation, etc.)
- 'not_available': transition not valid from current state — typically
means the file is already in the target state from a prior run
"""
logger = logging.getLogger("batch_workflows_paths")
results = {"success": [], "failed": [], "not_available": []}
total = len(file_list)
logger.info(f"Starting batch transition: '{transition_name}' for {total} files")
logger.info("=" * 60)
# Consecutive Phase-2 access-violation counter. When it hits the threshold,
# the vault connection's in-process state is likely corrupted past the
# point where sleeping will help, so force a full disconnect + re-login.
consecutive_phase2_av = 0
PHASE2_AV_RECONNECT_THRESHOLD = 3
for i, file_info in enumerate(file_list, 1):
file_path = file_info["path"]
logger.info(f"[{i}/{total}] Processing: {file_path}")
vault._last_was_phase2_av = False
status = transition_file(vault, file_info, transition_name, comment)
if status == "success":
results["success"].append(file_path)
consecutive_phase2_av = 0
elif status == "not_available":
results["not_available"].append(file_path)
consecutive_phase2_av = 0
else: # "failed"
results["failed"].append(file_path)
if getattr(vault, "_last_was_phase2_av", False):
consecutive_phase2_av += 1
logger.warning(
f" Phase-2 AV streak: {consecutive_phase2_av}/"
f"{PHASE2_AV_RECONNECT_THRESHOLD}"
)
if consecutive_phase2_av >= PHASE2_AV_RECONNECT_THRESHOLD:
logger.warning(
f"{consecutive_phase2_av} consecutive Phase-2 access "
"violations — forcing vault reconnect"
)
if vault.reconnect():
logger.info("Vault reconnected successfully")
else:
logger.error(
"Vault reconnect failed — aborting remaining batch"
)
break
consecutive_phase2_av = 0
else:
consecutive_phase2_av = 0
return results
# =============================================================================
# COMMAND LINE INTERFACE
# =============================================================================
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Batch workflow transitions for SolidWorks PDM Professional (path-based)",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python batch_workflows_paths.py --vault "MyVault" --csv "files.csv" --transition "citadel_set_production released"
python batch_workflows_paths.py -v "MyVault" -c "wip_files.csv" -t "citadel_set_wip"
CSV format (one full vault path per line):
C:\\IDSVault\\Parts\\widget.sldprt
C:\\IDSVault\\Parts\\bracket.sldprt
C:\\IDSVault\\Assemblies\\main_assy.sldasm
"""
)
parser.add_argument(
"-v", "--vault",
default=VAULT_NAME,
help=f"PDM vault name (default: {VAULT_NAME})"
)
parser.add_argument(
"-c", "--csv",
required=True,
help="Path to CSV file containing full vault paths"
)
parser.add_argument(
"-t", "--transition",
default=DEFAULT_TRANSITION,
help=f"Workflow transition name to execute (default: {DEFAULT_TRANSITION})"
)
parser.add_argument(
"--comment",
default="Batch workflow transition",
help="Comment for the workflow transition"
)
parser.add_argument(
"-u", "--username",
help="PDM username (will prompt if not provided)"
)
parser.add_argument(
"--log-file",
help="Custom log file path (default: auto-generated with timestamp)"
)
return parser.parse_args()
# =============================================================================
# MAIN EXECUTION
# =============================================================================
def main():
"""Main entry point for batch workflow processing."""
# Parse arguments
args = parse_arguments()
# Setup logging
logger = setup_logging(args.log_file)
logger.info("=" * 60)
logger.info("PDM BATCH WORKFLOW PROCESSOR (PATH-BASED)")
logger.info("=" * 60)
logger.info(f"Vault: {args.vault}")
logger.info(f"CSV File: {args.csv}")
logger.info(f"Transition: {args.transition}")
logger.info(f"Comment: {args.comment}")
# Get credentials
username = args.username
if not username:
username = input("PDM Username: ")
password = getpass.getpass("PDM Password: ")
# Load files from CSV
file_paths = load_file_list_from_csv(args.csv)
if not file_paths:
logger.error("No files loaded from CSV. Exiting.")
return 1
logger.info(f"Loaded {len(file_paths)} file paths from CSV")
# Connect to vault
vault = PDMVaultConnection(args.vault)
if not vault.connect(username, password):
logger.error("Failed to connect to vault. Exiting.")
return 1
try:
# Validate files exist in vault
logger.info("Validating files in vault...")
logger.info("=" * 60)
validation = validate_files(vault, file_paths)
valid_count = len(validation["valid"])
not_found_count = len(validation["not_found"])
# Summary
logger.info("=" * 60)
logger.info("VALIDATION SUMMARY")
logger.info("=" * 60)
logger.info(f"Files ready to process: {valid_count}")
logger.info(f"Files not found: {not_found_count}")
# Report not found
if not_found_count > 0:
logger.warning("\nFiles not found in vault:")
for f in validation["not_found"]:
logger.warning(f" - {f}")
# Save not found list
not_found_file = f"not_found_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
with open(not_found_file, 'w') as f:
for path in validation["not_found"]:
f.write(path + "\n")
logger.info(f"Not found list saved to: {not_found_file}")
if valid_count == 0:
logger.error("No valid files to process. Exiting.")
return 1
# Execute batch transition
results = batch_transition(
vault,
validation["valid"],
args.transition,
comment=args.comment
)
# Final report
logger.info("=" * 60)
logger.info("BATCH PROCESS COMPLETE")
logger.info("=" * 60)
logger.info(f"Total files processed: {valid_count}")
logger.info(f"Successful transitions: {len(results['success'])}")
logger.info(f"Failed transitions: {len(results['failed'])}")
logger.info(
f"Transition not available (likely already in target state): "
f"{len(results['not_available'])}"
)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
if results["failed"]:
logger.warning("\nFailed files (real failures — retry these):")
for f in results["failed"]:
logger.warning(f" - {f}")
failed_file = f"failed_transitions_{timestamp}.txt"
with open(failed_file, 'w') as f:
for path in results["failed"]:
f.write(path + "\n")
logger.info(f"Failed file list saved to: {failed_file}")
if results["not_available"]:
not_avail_file = f"not_available_{timestamp}.txt"
with open(not_avail_file, 'w') as f:
for path in results["not_available"]:
f.write(path + "\n")
logger.info(
f"Not-available file list saved to: {not_avail_file} "
f"(likely already in target state — not retried)"
)
return 0 if not results["failed"] else 1
finally:
vault.disconnect()
if __name__ == "__main__":
exit(main())

788
helpers/db_helper.py Normal file
View File

@@ -0,0 +1,788 @@
"""
Database Helper for PDM Migration
==================================
Interactive tool for running SELECT queries, transforming results, and
inserting new rows — with mandatory terminal confirmation before any
write operation touches the database.
Usage:
python db_helper.py --db target_db --task copy_with_new_id
python db_helper.py --db source_db --query "SELECT TOP 10 * FROM Documents"
python db_helper.py --db target_db --task copy_with_new_id --dry-run
"""
import json
import logging
import argparse
import sys
import os
import glob
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any, Callable, Tuple, Set
# db_utils lives one directory up
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from db_utils import DatabaseConnection
# =============================================================================
# CONFIGURATION
# =============================================================================
CONFIG_PATH = Path(__file__).resolve().parent.parent / "config.json"
QUERIES_DIR = Path(__file__).resolve().parent / "queries"
def load_config() -> dict:
"""Load config.json from the project root."""
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
return json.load(f)
def load_query(name: str) -> str:
"""
Load a SQL query from the queries/ folder by name.
Args:
name: Query name (filename without .sql extension).
e.g. "get_var47" loads queries/get_var47.sql
Returns:
The SQL text from the file.
"""
sql_path = QUERIES_DIR / f"{name}.sql"
if not sql_path.exists():
available = sorted(p.stem for p in QUERIES_DIR.glob("*.sql"))
raise FileNotFoundError(
f"Query '{name}' not found at {sql_path}\n"
f"Available queries: {available}"
)
return sql_path.read_text(encoding="utf-8").strip()
def list_queries() -> List[str]:
"""Return names of all available .sql files in the queries/ folder."""
return sorted(p.stem for p in QUERIES_DIR.glob("*.sql"))
# =============================================================================
# LOGGING
# =============================================================================
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
"""Configure logging with file + console handlers."""
if log_file is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f"db_helper_{timestamp}.log"
logger = logging.getLogger("db_helper")
logger.setLevel(logging.DEBUG)
# File handler — everything
fh = logging.FileHandler(log_file)
fh.setLevel(logging.DEBUG)
# Console handler — INFO and above
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
return logger
# =============================================================================
# DATABASE CONNECTION
# =============================================================================
def connect_db(config_key: str) -> DatabaseConnection:
"""
Connect to a database using a named block from config.json.
Args:
config_key: "source_db" or "target_db"
Returns:
Connected DatabaseConnection instance.
"""
logger = logging.getLogger("db_helper")
config = load_config()
if config_key not in config:
raise ValueError(
f"Config key '{config_key}' not found in {CONFIG_PATH}. "
f"Available keys: {[k for k in config if k.endswith('_db')]}"
)
db_config = config[config_key]
logger.info(
f"Connecting to {db_config['database']} on {db_config['server']} "
f"({config_key})"
)
return DatabaseConnection(db_config)
# =============================================================================
# SELECT
# =============================================================================
def run_select(
db: DatabaseConnection,
query: str,
params: Optional[tuple] = None,
preview_rows: int = 10,
) -> List[Dict[str, Any]]:
"""
Execute a SELECT query, log it, print a preview, and return results.
Args:
db: Active DatabaseConnection
query: SQL SELECT statement
params: Optional query parameters
preview_rows: How many rows to preview on the console (0 = skip)
Returns:
List of row dicts.
"""
logger = logging.getLogger("db_helper")
logger.info(f"Running SELECT:\n{query}")
if params:
logger.debug(f" Params: {params}")
rows = db.execute_query(query, params)
logger.info(f" Returned {len(rows)} row(s)")
if rows and preview_rows > 0:
_print_table(rows[:preview_rows])
if len(rows) > preview_rows:
print(f" ... and {len(rows) - preview_rows} more rows")
return rows
def _print_table(rows: List[Dict[str, Any]]) -> None:
"""Pretty-print a list of row dicts as an aligned console table."""
if not rows:
return
columns = list(rows[0].keys())
# Compute column widths (header vs data)
widths = {col: len(col) for col in columns}
str_rows = []
for row in rows:
str_row = {col: str(row[col]) for col in columns}
for col in columns:
widths[col] = max(widths[col], len(str_row[col]))
str_rows.append(str_row)
header = " | ".join(col.ljust(widths[col]) for col in columns)
sep = "-+-".join("-" * widths[col] for col in columns)
print(f" {header}")
print(f" {sep}")
for sr in str_rows:
line = " | ".join(sr[col].ljust(widths[col]) for col in columns)
print(f" {line}")
# =============================================================================
# CONFIRMATION GATE
# =============================================================================
def preview_and_confirm(
action: str,
sql: str,
rows: List[Dict[str, Any]],
preview_rows: int = 5,
dry_run: bool = False,
total_row_count: Optional[int] = None,
) -> bool:
"""
Show the user what's about to happen and ask for confirmation.
Args:
action: Short description ("INSERT into Documents")
sql: The SQL statement that will be executed
rows: The data rows that will be written (or a sample of them)
preview_rows: How many sample rows to display
dry_run: If True, show the preview but return False without prompting
total_row_count: If `rows` is only a sample, pass the full count
here so the prompt shows the real number of rows
that will be written.
Returns:
True if user confirms, False otherwise.
"""
logger = logging.getLogger("db_helper")
full_count = total_row_count if total_row_count is not None else len(rows)
print("\n" + "=" * 60)
print(f" ACTION: {action}")
print(f" ROWS: {full_count}")
print(f" SQL: {sql}")
print("=" * 60)
if rows and preview_rows > 0:
shown = min(preview_rows, len(rows))
print(f"\n Sample data ({shown} of {full_count}):")
_print_table(rows[:preview_rows])
if dry_run:
print("\n [DRY RUN] — no changes will be made.")
logger.info(f"[DRY RUN] Would {action} ({full_count} rows)")
return False
print()
response = input(" Execute this? [y/N]: ").strip().lower()
if response in ("y", "yes"):
logger.info(f"User confirmed: {action} ({full_count} rows)")
return True
else:
logger.info(f"User declined: {action}")
print(" Aborted.")
return False
# =============================================================================
# INSERT
# =============================================================================
def _parse_insert_columns(sql: str) -> Optional[List[str]]:
"""
Extract the column name list from a standard INSERT statement.
Matches 'INSERT INTO <table> (col1, col2, ...) VALUES ...'. Returns
None if the INSERT has no explicit column list (e.g. 'INSERT INTO t
VALUES (...)') so the caller can fall back to positional labels.
"""
import re
# Match the first parenthesised group after INSERT INTO <table>
# Table name may be bracketed/dotted: [db].[dbo].[Table]
m = re.search(
r"INSERT\s+INTO\s+[\[\]\w\.]+\s*\(([^)]+)\)\s*VALUES",
sql,
re.IGNORECASE | re.DOTALL,
)
if not m:
return None
cols = [c.strip().strip("[]") for c in m.group(1).split(",")]
return [c for c in cols if c]
def _build_insert_preview_rows(
rows: List[Dict[str, Any]],
params_builder: Callable[[Dict[str, Any]], tuple],
column_names: Optional[List[str]],
) -> List[Dict[str, Any]]:
"""
Apply params_builder to each row and return dicts keyed by the INSERT's
column names — so the preview shows exactly what will be written.
Falls back to positional labels ('col_0', 'col_1', ...) if the column
list couldn't be parsed.
"""
preview = []
for row in rows:
params = params_builder(row)
if column_names and len(column_names) == len(params):
preview.append(dict(zip(column_names, params)))
else:
preview.append({f"col_{i}": v for i, v in enumerate(params)})
return preview
def run_insert(
db: DatabaseConnection,
insert_sql: str,
rows: List[Dict[str, Any]],
params_builder: Callable[[Dict[str, Any]], tuple],
action: str = "INSERT rows",
dry_run: bool = False,
preview_columns: Optional[List[str]] = None,
) -> Dict[str, int]:
"""
Insert rows with confirmation, logging, and transaction safety.
Args:
db: Active DatabaseConnection
insert_sql: Parameterised INSERT statement (use ? placeholders)
rows: Row dicts (typically from run_select, possibly transformed)
params_builder: Callable that converts a row dict into the param
tuple matching the INSERT's ? placeholders
action: Description shown in the confirmation prompt
dry_run: If True, preview only — don't execute
preview_columns: Optional list of column names for the preview
display. If None, parsed from the INSERT SQL.
Returns:
Dict with counts: inserted, skipped, errors
"""
logger = logging.getLogger("db_helper")
stats = {"inserted": 0, "skipped": 0, "errors": 0}
if not rows:
logger.info("No rows to insert.")
return stats
# Build the preview from the ACTUAL params that will be sent to the DB
# (not the raw SELECT rows) so users see what will really be inserted.
column_names = preview_columns or _parse_insert_columns(insert_sql)
preview_rows = _build_insert_preview_rows(
rows[:5], params_builder, column_names
)
# Attach the full row count so preview_and_confirm can report it
# accurately even though we only transformed the sample.
if not preview_and_confirm(
action, insert_sql, preview_rows,
total_row_count=len(rows),
dry_run=dry_run,
):
return stats
# Execute row-by-row inside a single transaction so we can log per-row
# and rollback cleanly on failure.
total = len(rows)
# Update progress ~50 times across the batch (minimum every row for
# tiny batches). Keeps the terminal feeling alive without spamming.
progress_step = max(1, total // 50)
print() # blank line before the progress indicator
for i, row in enumerate(rows, 1):
params = params_builder(row)
try:
db.execute_non_query_no_commit(insert_sql, params)
stats["inserted"] += 1
logger.debug(f" [{i}/{total}] Inserted: {params}")
except Exception as exc:
err_msg = str(exc)
if "duplicate" in err_msg.lower() or "violation of" in err_msg.lower():
stats["skipped"] += 1
logger.warning(f" [{i}/{total}] Skipped (duplicate): {params}")
else:
stats["errors"] += 1
logger.error(f" [{i}/{total}] Error: {exc} | params={params}")
# Live progress (overwrites the same line)
if i % progress_step == 0 or i == total:
pct = (i / total) * 100
print(
f"\r Progress: {i}/{total} ({pct:5.1f}%) "
f"inserted={stats['inserted']} skipped={stats['skipped']} "
f"errors={stats['errors']}",
end="",
flush=True,
)
print() # end the progress line
# Commit or rollback
if stats["errors"] == 0:
db.commit()
logger.info(
f"Committed. Inserted: {stats['inserted']}, "
f"Skipped: {stats['skipped']}"
)
else:
print(
f"\n {stats['errors']} error(s) occurred. "
f"Commit anyway? [y/N]: ", end=""
)
resp = input().strip().lower()
if resp in ("y", "yes"):
db.commit()
logger.info(f"Committed with errors. {stats}")
else:
db.rollback()
stats["inserted"] = 0
logger.warning(f"Rolled back all inserts. {stats}")
print(" Rolled back.")
# Summary
print(f"\n Results: {stats}")
return stats
# =============================================================================
# PREDEFINED TASKS
# =============================================================================
# Each task is a function that receives (db, args) and orchestrates a
# SELECT → transform → INSERT workflow. Register new tasks in TASK_REGISTRY
# at the bottom of this section.
def task_copy_with_new_id(db: DatabaseConnection, args: argparse.Namespace) -> None:
"""
Example task: query rows, swap the ID, and insert as new rows.
Customise the SELECT, INSERT, and transform logic below to match your
actual table and columns.
"""
logger = logging.getLogger("db_helper")
# ----- 1. SELECT the source rows -----
select_sql = """
SELECT TOP 10
ID, Name, Description
FROM YourTable
WHERE SomeCondition = 1
"""
rows = run_select(db, select_sql)
if not rows:
logger.info("No source rows found — nothing to do.")
return
# ----- 2. Transform: build new rows with modified values -----
# Adjust this logic to match your actual needs (new IDs, tweaked
# strings, mapped values, etc.)
new_rows = []
for row in rows:
new_row = dict(row) # shallow copy
new_row["ID"] = row["ID"] + 1000 # example: offset the ID
# new_row["Name"] = row["Name"] # keep as-is, or modify
new_rows.append(new_row)
# ----- 3. INSERT the transformed rows -----
insert_sql = """
INSERT INTO YourTable (ID, Name, Description)
VALUES (?, ?, ?)
"""
run_insert(
db,
insert_sql,
new_rows,
params_builder=lambda r: (r["ID"], r["Name"], r["Description"]),
action="INSERT transformed rows into YourTable",
dry_run=args.dry_run,
)
def task_check_vv50(db: DatabaseConnection, args: argparse.Namespace) -> None:
"""
For every document that has VariableID=57 (in DWS paths), check whether
it also has a VariableValue row for VariableID=50.
Steps:
1. Run DWS_GET_VV-57.sql → list of documents
2. For each DocumentID, run Get_All_VV_Per_DocID.sql
3. Log whether VariableID=50 is present or missing
"""
logger = logging.getLogger("db_helper")
# ----- Step 1: Get all documents with VV-57 -----
step1_sql = load_query("DWS_GET_VV-57")
docs = run_select(db, step1_sql, preview_rows=5)
if not docs:
logger.info("No documents returned — nothing to check.")
return
# ----- Step 2 & 3: Check each document for VV-50 -----
step2_sql = load_query("Get_All_VV_Per_DocID")
has_vv50 = []
missing_vv50 = []
total = len(docs)
for i, doc in enumerate(docs, 1):
doc_id = doc["DocumentID"]
file_name = doc.get("FileName", "")
full_path = doc.get("FullVaultPath", file_name)
var_rows = db.execute_query(step2_sql, (doc_id,))
var_ids = {row["VariableID"] for row in var_rows}
if 50 in var_ids:
has_vv50.append(doc)
logger.debug(
f" [{i}/{total}] VV-50 EXISTS | DocID={doc_id} | {full_path}"
)
else:
missing_vv50.append(doc)
logger.info(
f" [{i}/{total}] VV-50 MISSING | DocID={doc_id} | {full_path}"
)
# ----- Summary -----
logger.info("=" * 60)
logger.info("VV-50 CHECK COMPLETE")
logger.info("=" * 60)
logger.info(f"Total documents checked: {total}")
logger.info(f" Has VV-50: {len(has_vv50)}")
logger.info(f" Missing VV-50: {len(missing_vv50)}")
if has_vv50:
# Write missing list to file for follow-up
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_file = f"has_vv50_{timestamp}.txt"
with open(out_file, "w", encoding="utf-8") as f:
f.write("DocumentID,FileName,FullVaultPath\n")
for doc in has_vv50:
f.write(
f"{doc['DocumentID']},"
f"{doc.get('FileName', '')},"
f"{doc.get('FullVaultPath', '')}\n"
)
logger.info(f"Has VV-50 list saved to: {out_file}")
def copy_57_to_50(db: DatabaseConnection, args: argparse.Namespace) -> None:
"""
DWS had a variable called Number, but we want that info to show up on the data cards
in the field for "Drawing Number"
That means that anything in the DWS folder that has a VariableID = 57, we are going to take all of that
information and insert a new row in the VariableValues table, where everything is the same except the VariableID = 50
The one caveat is that we don't want to insert a row for VariableID = 50 if one already exists. For that we are going to reference
the has_vv50_{date}.txt file and exlude those document ID's
Steps:
1. Run DWS_VV-57_FullList.sql → list of documents
2. For each row returned in Step 1. check and see if DocumentID exists in the has_vv50_{date}.txt file
3. If it doesnt already exist insert a new row into VariableValue with all of the same info only change the VariableID to 50
"""
logger = logging.getLogger("db_helper")
# ----- Step 1: Fetch all VV-57 rows in DWS paths -----
rows_57 = run_select(
db, load_query("DWS_VV-57_FullList"), preview_rows=5
)
if not rows_57:
logger.info("No VV-57 rows found — nothing to copy.")
return
# ----- Step 2: Load DocumentIDs that already have VV-50 -----
exclude_file = args.exclude_file or _find_latest_has_vv50_file()
excluded_doc_ids = _load_excluded_doc_ids(exclude_file)
# ----- Step 3: Filter out rows whose DocumentID already has VV-50 -----
rows_to_insert = [
r for r in rows_57 if r["DocumentID"] not in excluded_doc_ids
]
skipped = len(rows_57) - len(rows_to_insert)
logger.info(
f"After filter: {len(rows_to_insert)} rows to insert, "
f"{skipped} skipped (DocumentID already has VV-50)"
)
if not rows_to_insert:
logger.info("Nothing to insert after filtering.")
return
# ----- Step 4: Insert (with preview + confirmation) -----
def build_params(row: Dict[str, Any]) -> tuple:
# Parameter order MUST match INSERT_VV50_Copy.sql:
# VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
# ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText
return (
50, # override VariableID
row["DocumentID"],
row["ProjectID"],
row["RevisionNo"],
row["ConfigurationID"],
row["ValueText"],
row["ValueInt"],
row["ValueFloat"],
row["ValueDate"],
row["ValueCache"],
row["IsLongText"],
)
run_insert(
db,
load_query("INSERT_VV50_Copy"),
rows_to_insert,
params_builder=build_params,
action="INSERT VariableID=50 copies of DWS VV-57 rows",
dry_run=args.dry_run,
)
def _find_latest_has_vv50_file() -> Optional[str]:
"""Find the most recent has_vv50_*.txt file in the current directory."""
logger = logging.getLogger("db_helper")
matches = sorted(glob.glob("has_vv50_*.txt"))
if not matches:
return None
latest = matches[-1]
logger.info(f"Auto-detected exclusion file: {latest}")
return latest
def _load_excluded_doc_ids(path: Optional[str]) -> Set[int]:
"""
Load DocumentIDs from a has_vv50_*.txt file (CSV format with header).
Returns an empty set if no file is provided and prompts the user to
confirm they want to proceed without any exclusions.
"""
logger = logging.getLogger("db_helper")
if not path:
logger.warning(
"No exclusion file found — ALL VV-57 DocumentIDs will get a "
"VV-50 copy, including ones that may already have VV-50."
)
resp = input(
" Proceed without an exclusion list? [y/N]: "
).strip().lower()
if resp not in ("y", "yes"):
logger.info("User aborted — no exclusion file.")
raise SystemExit(1)
return set()
excluded: Set[int] = set()
with open(path, "r", encoding="utf-8") as f:
header = f.readline() # discard "DocumentID,FileName,FullVaultPath"
for line in f:
line = line.strip()
if not line:
continue
first = line.split(",", 1)[0].strip()
if first.isdigit():
excluded.add(int(first))
logger.info(f"Loaded {len(excluded)} DocumentIDs to exclude from {path}")
return excluded
# Register tasks here — maps --task name to function
TASK_REGISTRY: Dict[str, Callable] = {
"copy_with_new_id": task_copy_with_new_id,
"check_vv50": task_check_vv50,
"copy_57_to_50": copy_57_to_50
}
# =============================================================================
# CLI
# =============================================================================
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Database helper for PDM migration — interactive SQL tasks",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python db_helper.py --db target_db --task copy_with_new_id
python db_helper.py --db target_db --task copy_with_new_id --dry-run
python db_helper.py --db source_db --query get_var47
python db_helper.py --db source_db --query "SELECT TOP 10 * FROM Documents"
python db_helper.py --list-queries
""",
)
parser.add_argument(
"--db",
help='Config key for the database: "source_db" or "target_db"',
)
parser.add_argument(
"--task",
choices=list(TASK_REGISTRY.keys()),
help="Name of a predefined task to run",
)
parser.add_argument(
"--query",
help=(
"Run a SELECT query. Pass a query name to load from "
"helpers/queries/<name>.sql, or pass raw SQL in quotes."
),
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Preview what would happen without executing writes",
)
parser.add_argument(
"--list-queries",
action="store_true",
help="List all available saved queries and exit",
)
parser.add_argument(
"--exclude-file",
help=(
"Path to a has_vv50_*.txt file whose DocumentIDs should be "
"excluded from copy_57_to_50. If omitted, the most recent "
"has_vv50_*.txt in the current directory is used."
),
)
return parser.parse_args()
def _resolve_query(query_arg: str) -> str:
"""
Resolve a --query argument to SQL text.
If it looks like a SQL statement (contains a space), use it as-is.
Otherwise treat it as a saved query name and load from queries/<name>.sql.
"""
if " " in query_arg:
return query_arg
return load_query(query_arg)
def main() -> int:
args = parse_arguments()
# --list-queries doesn't need a DB connection or logging
if args.list_queries:
queries = list_queries()
if queries:
print(f"Available queries in {QUERIES_DIR}:")
for name in queries:
# Show the first line of each .sql as a description
sql_path = QUERIES_DIR / f"{name}.sql"
first_line = sql_path.read_text(encoding="utf-8").split("\n")[0]
print(f" {name:30s} {first_line}")
else:
print(f"No .sql files found in {QUERIES_DIR}")
return 0
if not args.db:
print("Error: --db is required (unless using --list-queries)")
return 1
logger = setup_logging()
logger.info("=" * 60)
logger.info("DB HELPER")
logger.info("=" * 60)
logger.info(f"Database: {args.db}")
logger.info(f"Task: {args.task or '(ad-hoc query)'}")
logger.info(f"Dry run: {args.dry_run}")
db = connect_db(args.db)
try:
if args.query:
sql = _resolve_query(args.query)
logger.info(f"Resolved query:\n{sql}")
run_select(db, sql)
elif args.task:
task_fn = TASK_REGISTRY[args.task]
task_fn(db, args)
else:
logger.error("Provide either --task, --query, or --list-queries")
return 1
except FileNotFoundError as exc:
logger.error(str(exc))
return 1
except KeyboardInterrupt:
logger.warning("Interrupted by user")
db.rollback()
return 130
except Exception:
logger.exception("Unhandled exception")
db.rollback()
return 1
finally:
db.close()
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,14 @@
SELECT DISTINCT
d.DocumentID,
d.Filename AS [FileName],
--p.Path AS [FolderPath],
p.Path + d.Filename AS [FullVaultPath]
--v.ValueText,
--d.DocumentID
--d.CurrentVersion
FROM [Drilling_Test].[dbo].[Documents] d
INNER JOIN [Drilling_Test].[dbo].[DocumentsInProjects] dp on d.[DocumentID] = dp.DocumentID
INNER JOIN [Drilling_Test].[dbo].[Projects] p on dp.ProjectID = p.ProjectID
INNER JOIN [Drilling_Test].[dbo].[VariableValue] v on d.DocumentID = v.DocumentID
WHERE v.VariableID = 57 and p.Path like '%DWS%'
ORDER BY [FullVaultPath] asc;

View File

@@ -0,0 +1,24 @@
/*
Here we are basically just doing SELECT * FROM VariableValue WHERE VariableID = 57
The reason we are doing all the join statements is we are trying to target only the files in the
DWS folder, so we need the other tables in order to construct a path
*/
SELECT
v.VariableID,
v.DocumentID,
v.ProjectID,
v.RevisionNo,
v.ConfigurationID,
v.ValueText,
v.ValueInt,
v.ValueFloat,
v.ValueDate,
v.ValueCache,
v.IsLongText
FROM [Drilling_Test].[dbo].[Documents] d
INNER JOIN [Drilling_Test].[dbo].[DocumentsInProjects] dp on d.[DocumentID] = dp.DocumentID
INNER JOIN [Drilling_Test].[dbo].[Projects] p on dp.ProjectID = p.ProjectID
INNER JOIN [Drilling_Test].[dbo].[VariableValue] v on d.DocumentID = v.DocumentID
WHERE v.VariableID = 57 and p.Path like '%DWS%'
ORDER BY v.DocumentID asc;

View File

@@ -0,0 +1,14 @@
SELECT TOP (1000) [ConfigurationID]
,[ConfigurationName]
FROM [_Citadel_CS].[dbo].[DocumentConfiguration]
SELECT *
FROM [_Citadel_CS].[dbo].[DocumentConfiguration]
WHERE ConfigurationName IN (
SELECT ConfigurationName
FROM [_Citadel_CS].[dbo].[DocumentConfiguration]
GROUP BY ConfigurationName
HAVING COUNT(*) > 1
);

View File

@@ -0,0 +1,6 @@
-- Get all distinct VariableIDs for a given DocumentID
-- Pass DocumentID as parameter (?)
SELECT DISTINCT VariableID
FROM [Drilling_Test].[dbo].[VariableValue]
WHERE DocumentID = ?

View File

@@ -0,0 +1,9 @@
-- Insert a VariableValue row that mirrors an existing VV-57 row but with
-- VariableID = 50 (used by copy_57_to_50 task).
-- Parameter order MUST match build_params() in copy_57_to_50:
-- VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
-- ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText
INSERT INTO [Drilling_Test].[dbo].[VariableValue]
(VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);

42
helpers/samples/README.md Normal file
View File

@@ -0,0 +1,42 @@
# Input File Format Reference
The actual input CSVs under `helpers/` are gitignored (they are environment-specific
and often large). These samples document the expected format for each script so you
don't have to guess next time.
All sample files use a minimal set of example rows. **No headers**, **UTF-8** (with or
without BOM). Empty lines are skipped.
## Full-path format
Used by `batch_workflows_paths.py` and any script whose CLI takes `--csv` / `-c` and
operates on files already inside the vault.
- One full Windows vault path per line.
- Path must match what `IEdmVault5.GetFileFromPath` expects — i.e. the real location
inside the vault's local view (e.g. `C:\PDM\<VaultName>\...`).
- Extension-sensitive: `.SLDPRT`, `.SLDDRW`, `.SLDASM`, `.pdf`, etc. all count as
distinct files.
See `sample_full_paths.csv`.
## Part-number format
Used by `batch_copy_tree.py` (and other scripts that resolve files by part/document
number rather than path).
- One part number per line.
- No extension, no path.
- Case and dashes/underscores should match the vault's stored value.
See `sample_part_numbers.csv`.
## Filename-only format
Used by older helpers (e.g. the `Code15*.csv` family) that match by filename across
the vault rather than by full path.
- One filename per line, with extension.
- No directory component.
See `sample_filenames.csv`.

View File

@@ -0,0 +1,4 @@
EXAMPLE-PART.SLDPRT
EXAMPLE-DRAWING.SLDDRW
EXAMPLE-LIBRARY-FEATURE.SLDLFP
EXAMPLE-DOCUMENT.PDF
1 EXAMPLE-PART.SLDPRT
2 EXAMPLE-DRAWING.SLDDRW
3 EXAMPLE-LIBRARY-FEATURE.SLDLFP
4 EXAMPLE-DOCUMENT.PDF

View File

@@ -0,0 +1,5 @@
C:\PDM\Drilling_Test\DWS\Data Subs\EXAMPLE-PART.SLDPRT
C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDPRT
C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDDRW
C:\PDM\Drilling_Test\DWS\Assemblies\EXAMPLE-ASSY.SLDASM
C:\PDM\Drilling_Test\DWS\Drawings\EXAMPLE-DRAWING.pdf
1 C:\PDM\Drilling_Test\DWS\Data Subs\EXAMPLE-PART.SLDPRT
2 C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDPRT
3 C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDDRW
4 C:\PDM\Drilling_Test\DWS\Assemblies\EXAMPLE-ASSY.SLDASM
5 C:\PDM\Drilling_Test\DWS\Drawings\EXAMPLE-DRAWING.pdf

View File

@@ -0,0 +1,6 @@
2-80001010
2-80003568
2-80003962
2-TF-80009889
4-804687-02
6-60181-02
1 2-80001010
2 2-80003568
3 2-80003962
4 2-TF-80009889
5 4-804687-02
6 6-60181-02

184
helpers/test_batch_api.py Normal file
View File

@@ -0,0 +1,184 @@
"""
IEdmFile13::ChangeState3 via comtypes vtable direct call.
Confirmed from gen_py IEdmFile13_vtables_:
- IEdmFile13 IID : {DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}
- ChangeState3 oVft = 432 → vtable slot 54
- Slot layout:
0-2 IUnknown (handled by comtypes base)
3-6 IDispatch (handled by comtypes base)
7-53 47 methods from IEdmObject5 … IEdmFile12 (placeholders)
54 ChangeState3
55 GetThumbnail
"""
import ctypes
import getpass
import pythoncom
import win32com.client
import win32com.client.gencache as gencache
import comtypes
import comtypes.automation
from comtypes import COMMETHOD, GUID, HRESULT
from comtypes.automation import IDispatch as CT_IDispatch
VAULT_NAME = "Drilling_Test"
TEST_PATH = r"C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG\800-TT-001.SLDPRT"
FOLDER_PATH = r"C:\PDM\Drilling_Test\DWS\PileDRIVER\825 PileDRIVER\MFG"
TRANSITION_ID = 268
TO_STATE_ID = 9
EdmObject_File = 1
IID_IEdmFile13 = "{DB0646C9-9E3F-4EA2-93AA-EB6584D268E2}"
# ---------------------------------------------------------------------------
# comtypes interface — 47 placeholders put ChangeState3 at slot 54 (offset 432)
# ---------------------------------------------------------------------------
_phs = [COMMETHOD([], HRESULT, f"_ph{i}") for i in range(47)]
VARIANT_p = ctypes.POINTER(comtypes.automation.VARIANT)
class IEdmFile13_CT(CT_IDispatch):
_iid_ = GUID(IID_IEdmFile13)
_idlflags_ = ["dual", "oleautomation"]
_methods_ = _phs + [
COMMETHOD(
[], HRESULT, "ChangeState3",
(["in"], VARIANT_p, "poStateIdOrName"),
(["in"], VARIANT_p, "poTransitionIdOrName"),
(["in"], ctypes.c_long, "lFolderID"),
(["in"], ctypes.c_wchar_p, "bsComment"),
(["in"], ctypes.c_long, "lParentWnd"),
(["in"], ctypes.c_long, "lEdmStateFlags"),
(["in"], ctypes.c_wchar_p, "bsPasswd"),
),
COMMETHOD(
[], HRESULT, "GetThumbnail",
(["out", "retval"], ctypes.POINTER(ctypes.c_void_p), "pBitmap"),
),
]
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def make_i4_variant(val: int) -> comtypes.automation.VARIANT:
"""Return a 16-byte VARIANT with vt=VT_I4 and the given integer value."""
v = comtypes.automation.VARIANT()
v.vt = 3 # VT_I4
# The union value starts at byte offset 8 inside VARIANT
ctypes.cast(ctypes.byref(v, 8), ctypes.POINTER(ctypes.c_int))[0] = val
return v
def raw_ptr_from_pycom(py_com_obj) -> int:
"""
Read the IUnknown*/IDispatch* stored inside a pythoncom COM wrapper.
CPython 64-bit layout: ob_refcnt(8) | ob_type(8) | punk(8) → offset 16.
"""
return ctypes.c_uint64.from_address(id(py_com_obj) + 16).value
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
username = input("PDM Username: ")
password = getpass.getpass("PDM Password: ") # reused for bsPasswd below
print("\n=== Connect ===")
vault = gencache.EnsureDispatch("ConisioLib.EdmVault")
vault.Login(username, password, VAULT_NAME)
print(f"Logged in to {VAULT_NAME}")
folder_obj = vault.GetFolderFromPath(FOLDER_PATH)
result = vault.GetFileFromPath(TEST_PATH, folder_obj)
raw = result[0] if isinstance(result, tuple) else result
file_id = raw.ID
fold_id = folder_obj.ID
print(f"File ID: {file_id} Folder ID: {fold_id}")
file_obj = vault.GetObject(EdmObject_File, file_id)
file13_w32 = win32com.client.CastTo(file_obj, "IEdmFile13")
# -----------------------------------------------------------------------
print("\n=== Extract raw IEdmFile13* from pythoncom ===")
# -----------------------------------------------------------------------
py_disp = file13_w32._oleobj_ # PyIDispatch wrapping IEdmFile13
raw_ptr = raw_ptr_from_pycom(py_disp)
print(f"IEdmFile13* = {raw_ptr:#x}")
# Sanity: read the vtable pointer (first 8 bytes of the COM object)
vtbl_ptr = ctypes.c_uint64.from_address(raw_ptr).value
print(f"Vtable ptr = {vtbl_ptr:#x}")
if not raw_ptr or not vtbl_ptr:
print("ERROR: Could not read a valid COM pointer — aborting.")
return
# -----------------------------------------------------------------------
print("\n=== QI to IEdmFile13_CT via comtypes ===")
# -----------------------------------------------------------------------
# Cast to IUnknown so comtypes can call QueryInterface properly
ct_unk = ctypes.cast(raw_ptr, ctypes.POINTER(comtypes.IUnknown))
# py_disp must stay alive while ct_unk is in use (ct_unk is a borrowed ref)
try:
file13_ct = ct_unk.QueryInterface(IEdmFile13_CT)
print(f"QI succeeded: {file13_ct}")
except Exception as e:
print(f"QI failed: {e}")
import traceback; traceback.print_exc()
return
# -----------------------------------------------------------------------
print("\n=== Build VARIANTs ===")
# -----------------------------------------------------------------------
v_state = make_i4_variant(TO_STATE_ID)
v_trans = make_i4_variant(TRANSITION_ID)
print(f"v_state vt={v_state.vt} val={TO_STATE_ID}")
print(f"v_trans vt={v_trans.vt} val={TRANSITION_ID}")
# -----------------------------------------------------------------------
print(f"\n=== ChangeState3(state={TO_STATE_ID}, trans={TRANSITION_ID}, folder={fold_id}) ===")
# -----------------------------------------------------------------------
try:
hr = file13_ct.ChangeState3(
ctypes.byref(v_state),
ctypes.byref(v_trans),
ctypes.c_long(fold_id),
"Batch transition test",
ctypes.c_long(0),
ctypes.c_long(0),
password, # PDM password (required by this transition)
)
print(f"ChangeState3 returned HRESULT {hr:#010x}")
except Exception as e:
print(f"ChangeState3 raised: {e}")
import traceback; traceback.print_exc()
# Keep py_disp alive
del py_disp
return
# keep py_disp alive until after the call
del py_disp
# -----------------------------------------------------------------------
print("\n=== Verify state ===")
# -----------------------------------------------------------------------
fresh = vault.GetObject(EdmObject_File, file_id)
try:
state = fresh.CurrentState
if callable(state):
state = state()
name = state.Name if hasattr(state, "Name") else str(state)
print(f"New state: {name}")
if name in ("Approved", "AA"):
print("\n*** SUCCESS! ***")
else:
print("State did not reach Approved.")
except Exception as e:
print(f"Could not read new state: {e}")
if __name__ == "__main__":
main()