Initial Commit of the PDM project (ready for DWS migration)
This commit is contained in:
788
helpers/db_helper.py
Normal file
788
helpers/db_helper.py
Normal file
@@ -0,0 +1,788 @@
|
||||
"""
|
||||
Database Helper for PDM Migration
|
||||
==================================
|
||||
Interactive tool for running SELECT queries, transforming results, and
|
||||
inserting new rows — with mandatory terminal confirmation before any
|
||||
write operation touches the database.
|
||||
|
||||
Usage:
|
||||
python db_helper.py --db target_db --task copy_with_new_id
|
||||
python db_helper.py --db source_db --query "SELECT TOP 10 * FROM Documents"
|
||||
python db_helper.py --db target_db --task copy_with_new_id --dry-run
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import glob
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any, Callable, Tuple, Set
|
||||
|
||||
# db_utils lives one directory up
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
from db_utils import DatabaseConnection
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIGURATION
|
||||
# =============================================================================
|
||||
|
||||
CONFIG_PATH = Path(__file__).resolve().parent.parent / "config.json"
|
||||
QUERIES_DIR = Path(__file__).resolve().parent / "queries"
|
||||
|
||||
|
||||
def load_config() -> dict:
|
||||
"""Load config.json from the project root."""
|
||||
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def load_query(name: str) -> str:
|
||||
"""
|
||||
Load a SQL query from the queries/ folder by name.
|
||||
|
||||
Args:
|
||||
name: Query name (filename without .sql extension).
|
||||
e.g. "get_var47" loads queries/get_var47.sql
|
||||
|
||||
Returns:
|
||||
The SQL text from the file.
|
||||
"""
|
||||
sql_path = QUERIES_DIR / f"{name}.sql"
|
||||
if not sql_path.exists():
|
||||
available = sorted(p.stem for p in QUERIES_DIR.glob("*.sql"))
|
||||
raise FileNotFoundError(
|
||||
f"Query '{name}' not found at {sql_path}\n"
|
||||
f"Available queries: {available}"
|
||||
)
|
||||
return sql_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
|
||||
def list_queries() -> List[str]:
|
||||
"""Return names of all available .sql files in the queries/ folder."""
|
||||
return sorted(p.stem for p in QUERIES_DIR.glob("*.sql"))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LOGGING
|
||||
# =============================================================================
|
||||
|
||||
def setup_logging(log_file: Optional[str] = None) -> logging.Logger:
|
||||
"""Configure logging with file + console handlers."""
|
||||
if log_file is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
log_file = f"db_helper_{timestamp}.log"
|
||||
|
||||
logger = logging.getLogger("db_helper")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# File handler — everything
|
||||
fh = logging.FileHandler(log_file)
|
||||
fh.setLevel(logging.DEBUG)
|
||||
|
||||
# Console handler — INFO and above
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
|
||||
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||
fh.setFormatter(formatter)
|
||||
ch.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(fh)
|
||||
logger.addHandler(ch)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DATABASE CONNECTION
|
||||
# =============================================================================
|
||||
|
||||
def connect_db(config_key: str) -> DatabaseConnection:
|
||||
"""
|
||||
Connect to a database using a named block from config.json.
|
||||
|
||||
Args:
|
||||
config_key: "source_db" or "target_db"
|
||||
|
||||
Returns:
|
||||
Connected DatabaseConnection instance.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
config = load_config()
|
||||
|
||||
if config_key not in config:
|
||||
raise ValueError(
|
||||
f"Config key '{config_key}' not found in {CONFIG_PATH}. "
|
||||
f"Available keys: {[k for k in config if k.endswith('_db')]}"
|
||||
)
|
||||
|
||||
db_config = config[config_key]
|
||||
logger.info(
|
||||
f"Connecting to {db_config['database']} on {db_config['server']} "
|
||||
f"({config_key})"
|
||||
)
|
||||
return DatabaseConnection(db_config)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SELECT
|
||||
# =============================================================================
|
||||
|
||||
def run_select(
|
||||
db: DatabaseConnection,
|
||||
query: str,
|
||||
params: Optional[tuple] = None,
|
||||
preview_rows: int = 10,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Execute a SELECT query, log it, print a preview, and return results.
|
||||
|
||||
Args:
|
||||
db: Active DatabaseConnection
|
||||
query: SQL SELECT statement
|
||||
params: Optional query parameters
|
||||
preview_rows: How many rows to preview on the console (0 = skip)
|
||||
|
||||
Returns:
|
||||
List of row dicts.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
logger.info(f"Running SELECT:\n{query}")
|
||||
if params:
|
||||
logger.debug(f" Params: {params}")
|
||||
|
||||
rows = db.execute_query(query, params)
|
||||
logger.info(f" Returned {len(rows)} row(s)")
|
||||
|
||||
if rows and preview_rows > 0:
|
||||
_print_table(rows[:preview_rows])
|
||||
if len(rows) > preview_rows:
|
||||
print(f" ... and {len(rows) - preview_rows} more rows")
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def _print_table(rows: List[Dict[str, Any]]) -> None:
|
||||
"""Pretty-print a list of row dicts as an aligned console table."""
|
||||
if not rows:
|
||||
return
|
||||
columns = list(rows[0].keys())
|
||||
# Compute column widths (header vs data)
|
||||
widths = {col: len(col) for col in columns}
|
||||
str_rows = []
|
||||
for row in rows:
|
||||
str_row = {col: str(row[col]) for col in columns}
|
||||
for col in columns:
|
||||
widths[col] = max(widths[col], len(str_row[col]))
|
||||
str_rows.append(str_row)
|
||||
|
||||
header = " | ".join(col.ljust(widths[col]) for col in columns)
|
||||
sep = "-+-".join("-" * widths[col] for col in columns)
|
||||
print(f" {header}")
|
||||
print(f" {sep}")
|
||||
for sr in str_rows:
|
||||
line = " | ".join(sr[col].ljust(widths[col]) for col in columns)
|
||||
print(f" {line}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CONFIRMATION GATE
|
||||
# =============================================================================
|
||||
|
||||
def preview_and_confirm(
|
||||
action: str,
|
||||
sql: str,
|
||||
rows: List[Dict[str, Any]],
|
||||
preview_rows: int = 5,
|
||||
dry_run: bool = False,
|
||||
total_row_count: Optional[int] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Show the user what's about to happen and ask for confirmation.
|
||||
|
||||
Args:
|
||||
action: Short description ("INSERT into Documents")
|
||||
sql: The SQL statement that will be executed
|
||||
rows: The data rows that will be written (or a sample of them)
|
||||
preview_rows: How many sample rows to display
|
||||
dry_run: If True, show the preview but return False without prompting
|
||||
total_row_count: If `rows` is only a sample, pass the full count
|
||||
here so the prompt shows the real number of rows
|
||||
that will be written.
|
||||
|
||||
Returns:
|
||||
True if user confirms, False otherwise.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
full_count = total_row_count if total_row_count is not None else len(rows)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(f" ACTION: {action}")
|
||||
print(f" ROWS: {full_count}")
|
||||
print(f" SQL: {sql}")
|
||||
print("=" * 60)
|
||||
|
||||
if rows and preview_rows > 0:
|
||||
shown = min(preview_rows, len(rows))
|
||||
print(f"\n Sample data ({shown} of {full_count}):")
|
||||
_print_table(rows[:preview_rows])
|
||||
|
||||
if dry_run:
|
||||
print("\n [DRY RUN] — no changes will be made.")
|
||||
logger.info(f"[DRY RUN] Would {action} ({full_count} rows)")
|
||||
return False
|
||||
|
||||
print()
|
||||
response = input(" Execute this? [y/N]: ").strip().lower()
|
||||
if response in ("y", "yes"):
|
||||
logger.info(f"User confirmed: {action} ({full_count} rows)")
|
||||
return True
|
||||
else:
|
||||
logger.info(f"User declined: {action}")
|
||||
print(" Aborted.")
|
||||
return False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# INSERT
|
||||
# =============================================================================
|
||||
|
||||
def _parse_insert_columns(sql: str) -> Optional[List[str]]:
|
||||
"""
|
||||
Extract the column name list from a standard INSERT statement.
|
||||
|
||||
Matches 'INSERT INTO <table> (col1, col2, ...) VALUES ...'. Returns
|
||||
None if the INSERT has no explicit column list (e.g. 'INSERT INTO t
|
||||
VALUES (...)') so the caller can fall back to positional labels.
|
||||
"""
|
||||
import re
|
||||
# Match the first parenthesised group after INSERT INTO <table>
|
||||
# Table name may be bracketed/dotted: [db].[dbo].[Table]
|
||||
m = re.search(
|
||||
r"INSERT\s+INTO\s+[\[\]\w\.]+\s*\(([^)]+)\)\s*VALUES",
|
||||
sql,
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
if not m:
|
||||
return None
|
||||
cols = [c.strip().strip("[]") for c in m.group(1).split(",")]
|
||||
return [c for c in cols if c]
|
||||
|
||||
|
||||
def _build_insert_preview_rows(
|
||||
rows: List[Dict[str, Any]],
|
||||
params_builder: Callable[[Dict[str, Any]], tuple],
|
||||
column_names: Optional[List[str]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Apply params_builder to each row and return dicts keyed by the INSERT's
|
||||
column names — so the preview shows exactly what will be written.
|
||||
Falls back to positional labels ('col_0', 'col_1', ...) if the column
|
||||
list couldn't be parsed.
|
||||
"""
|
||||
preview = []
|
||||
for row in rows:
|
||||
params = params_builder(row)
|
||||
if column_names and len(column_names) == len(params):
|
||||
preview.append(dict(zip(column_names, params)))
|
||||
else:
|
||||
preview.append({f"col_{i}": v for i, v in enumerate(params)})
|
||||
return preview
|
||||
|
||||
|
||||
def run_insert(
|
||||
db: DatabaseConnection,
|
||||
insert_sql: str,
|
||||
rows: List[Dict[str, Any]],
|
||||
params_builder: Callable[[Dict[str, Any]], tuple],
|
||||
action: str = "INSERT rows",
|
||||
dry_run: bool = False,
|
||||
preview_columns: Optional[List[str]] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Insert rows with confirmation, logging, and transaction safety.
|
||||
|
||||
Args:
|
||||
db: Active DatabaseConnection
|
||||
insert_sql: Parameterised INSERT statement (use ? placeholders)
|
||||
rows: Row dicts (typically from run_select, possibly transformed)
|
||||
params_builder: Callable that converts a row dict into the param
|
||||
tuple matching the INSERT's ? placeholders
|
||||
action: Description shown in the confirmation prompt
|
||||
dry_run: If True, preview only — don't execute
|
||||
preview_columns: Optional list of column names for the preview
|
||||
display. If None, parsed from the INSERT SQL.
|
||||
|
||||
Returns:
|
||||
Dict with counts: inserted, skipped, errors
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
stats = {"inserted": 0, "skipped": 0, "errors": 0}
|
||||
|
||||
if not rows:
|
||||
logger.info("No rows to insert.")
|
||||
return stats
|
||||
|
||||
# Build the preview from the ACTUAL params that will be sent to the DB
|
||||
# (not the raw SELECT rows) so users see what will really be inserted.
|
||||
column_names = preview_columns or _parse_insert_columns(insert_sql)
|
||||
preview_rows = _build_insert_preview_rows(
|
||||
rows[:5], params_builder, column_names
|
||||
)
|
||||
# Attach the full row count so preview_and_confirm can report it
|
||||
# accurately even though we only transformed the sample.
|
||||
if not preview_and_confirm(
|
||||
action, insert_sql, preview_rows,
|
||||
total_row_count=len(rows),
|
||||
dry_run=dry_run,
|
||||
):
|
||||
return stats
|
||||
|
||||
# Execute row-by-row inside a single transaction so we can log per-row
|
||||
# and rollback cleanly on failure.
|
||||
total = len(rows)
|
||||
# Update progress ~50 times across the batch (minimum every row for
|
||||
# tiny batches). Keeps the terminal feeling alive without spamming.
|
||||
progress_step = max(1, total // 50)
|
||||
print() # blank line before the progress indicator
|
||||
|
||||
for i, row in enumerate(rows, 1):
|
||||
params = params_builder(row)
|
||||
try:
|
||||
db.execute_non_query_no_commit(insert_sql, params)
|
||||
stats["inserted"] += 1
|
||||
logger.debug(f" [{i}/{total}] Inserted: {params}")
|
||||
except Exception as exc:
|
||||
err_msg = str(exc)
|
||||
if "duplicate" in err_msg.lower() or "violation of" in err_msg.lower():
|
||||
stats["skipped"] += 1
|
||||
logger.warning(f" [{i}/{total}] Skipped (duplicate): {params}")
|
||||
else:
|
||||
stats["errors"] += 1
|
||||
logger.error(f" [{i}/{total}] Error: {exc} | params={params}")
|
||||
|
||||
# Live progress (overwrites the same line)
|
||||
if i % progress_step == 0 or i == total:
|
||||
pct = (i / total) * 100
|
||||
print(
|
||||
f"\r Progress: {i}/{total} ({pct:5.1f}%) "
|
||||
f"inserted={stats['inserted']} skipped={stats['skipped']} "
|
||||
f"errors={stats['errors']}",
|
||||
end="",
|
||||
flush=True,
|
||||
)
|
||||
print() # end the progress line
|
||||
|
||||
# Commit or rollback
|
||||
if stats["errors"] == 0:
|
||||
db.commit()
|
||||
logger.info(
|
||||
f"Committed. Inserted: {stats['inserted']}, "
|
||||
f"Skipped: {stats['skipped']}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"\n {stats['errors']} error(s) occurred. "
|
||||
f"Commit anyway? [y/N]: ", end=""
|
||||
)
|
||||
resp = input().strip().lower()
|
||||
if resp in ("y", "yes"):
|
||||
db.commit()
|
||||
logger.info(f"Committed with errors. {stats}")
|
||||
else:
|
||||
db.rollback()
|
||||
stats["inserted"] = 0
|
||||
logger.warning(f"Rolled back all inserts. {stats}")
|
||||
print(" Rolled back.")
|
||||
|
||||
# Summary
|
||||
print(f"\n Results: {stats}")
|
||||
return stats
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PREDEFINED TASKS
|
||||
# =============================================================================
|
||||
# Each task is a function that receives (db, args) and orchestrates a
|
||||
# SELECT → transform → INSERT workflow. Register new tasks in TASK_REGISTRY
|
||||
# at the bottom of this section.
|
||||
|
||||
def task_copy_with_new_id(db: DatabaseConnection, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
Example task: query rows, swap the ID, and insert as new rows.
|
||||
|
||||
Customise the SELECT, INSERT, and transform logic below to match your
|
||||
actual table and columns.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
# ----- 1. SELECT the source rows -----
|
||||
select_sql = """
|
||||
SELECT TOP 10
|
||||
ID, Name, Description
|
||||
FROM YourTable
|
||||
WHERE SomeCondition = 1
|
||||
"""
|
||||
rows = run_select(db, select_sql)
|
||||
|
||||
if not rows:
|
||||
logger.info("No source rows found — nothing to do.")
|
||||
return
|
||||
|
||||
# ----- 2. Transform: build new rows with modified values -----
|
||||
# Adjust this logic to match your actual needs (new IDs, tweaked
|
||||
# strings, mapped values, etc.)
|
||||
new_rows = []
|
||||
for row in rows:
|
||||
new_row = dict(row) # shallow copy
|
||||
new_row["ID"] = row["ID"] + 1000 # example: offset the ID
|
||||
# new_row["Name"] = row["Name"] # keep as-is, or modify
|
||||
new_rows.append(new_row)
|
||||
|
||||
# ----- 3. INSERT the transformed rows -----
|
||||
insert_sql = """
|
||||
INSERT INTO YourTable (ID, Name, Description)
|
||||
VALUES (?, ?, ?)
|
||||
"""
|
||||
|
||||
run_insert(
|
||||
db,
|
||||
insert_sql,
|
||||
new_rows,
|
||||
params_builder=lambda r: (r["ID"], r["Name"], r["Description"]),
|
||||
action="INSERT transformed rows into YourTable",
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
|
||||
|
||||
def task_check_vv50(db: DatabaseConnection, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
For every document that has VariableID=57 (in DWS paths), check whether
|
||||
it also has a VariableValue row for VariableID=50.
|
||||
|
||||
Steps:
|
||||
1. Run DWS_GET_VV-57.sql → list of documents
|
||||
2. For each DocumentID, run Get_All_VV_Per_DocID.sql
|
||||
3. Log whether VariableID=50 is present or missing
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
# ----- Step 1: Get all documents with VV-57 -----
|
||||
step1_sql = load_query("DWS_GET_VV-57")
|
||||
docs = run_select(db, step1_sql, preview_rows=5)
|
||||
|
||||
if not docs:
|
||||
logger.info("No documents returned — nothing to check.")
|
||||
return
|
||||
|
||||
# ----- Step 2 & 3: Check each document for VV-50 -----
|
||||
step2_sql = load_query("Get_All_VV_Per_DocID")
|
||||
|
||||
has_vv50 = []
|
||||
missing_vv50 = []
|
||||
|
||||
total = len(docs)
|
||||
for i, doc in enumerate(docs, 1):
|
||||
doc_id = doc["DocumentID"]
|
||||
file_name = doc.get("FileName", "")
|
||||
full_path = doc.get("FullVaultPath", file_name)
|
||||
|
||||
var_rows = db.execute_query(step2_sql, (doc_id,))
|
||||
var_ids = {row["VariableID"] for row in var_rows}
|
||||
|
||||
if 50 in var_ids:
|
||||
has_vv50.append(doc)
|
||||
logger.debug(
|
||||
f" [{i}/{total}] VV-50 EXISTS | DocID={doc_id} | {full_path}"
|
||||
)
|
||||
else:
|
||||
missing_vv50.append(doc)
|
||||
logger.info(
|
||||
f" [{i}/{total}] VV-50 MISSING | DocID={doc_id} | {full_path}"
|
||||
)
|
||||
|
||||
# ----- Summary -----
|
||||
logger.info("=" * 60)
|
||||
logger.info("VV-50 CHECK COMPLETE")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Total documents checked: {total}")
|
||||
logger.info(f" Has VV-50: {len(has_vv50)}")
|
||||
logger.info(f" Missing VV-50: {len(missing_vv50)}")
|
||||
|
||||
if has_vv50:
|
||||
# Write missing list to file for follow-up
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
out_file = f"has_vv50_{timestamp}.txt"
|
||||
with open(out_file, "w", encoding="utf-8") as f:
|
||||
f.write("DocumentID,FileName,FullVaultPath\n")
|
||||
for doc in has_vv50:
|
||||
f.write(
|
||||
f"{doc['DocumentID']},"
|
||||
f"{doc.get('FileName', '')},"
|
||||
f"{doc.get('FullVaultPath', '')}\n"
|
||||
)
|
||||
logger.info(f"Has VV-50 list saved to: {out_file}")
|
||||
|
||||
def copy_57_to_50(db: DatabaseConnection, args: argparse.Namespace) -> None:
|
||||
"""
|
||||
DWS had a variable called Number, but we want that info to show up on the data cards
|
||||
in the field for "Drawing Number"
|
||||
|
||||
That means that anything in the DWS folder that has a VariableID = 57, we are going to take all of that
|
||||
information and insert a new row in the VariableValues table, where everything is the same except the VariableID = 50
|
||||
|
||||
The one caveat is that we don't want to insert a row for VariableID = 50 if one already exists. For that we are going to reference
|
||||
the has_vv50_{date}.txt file and exlude those document ID's
|
||||
|
||||
Steps:
|
||||
1. Run DWS_VV-57_FullList.sql → list of documents
|
||||
2. For each row returned in Step 1. check and see if DocumentID exists in the has_vv50_{date}.txt file
|
||||
3. If it doesnt already exist insert a new row into VariableValue with all of the same info only change the VariableID to 50
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
# ----- Step 1: Fetch all VV-57 rows in DWS paths -----
|
||||
rows_57 = run_select(
|
||||
db, load_query("DWS_VV-57_FullList"), preview_rows=5
|
||||
)
|
||||
if not rows_57:
|
||||
logger.info("No VV-57 rows found — nothing to copy.")
|
||||
return
|
||||
|
||||
# ----- Step 2: Load DocumentIDs that already have VV-50 -----
|
||||
exclude_file = args.exclude_file or _find_latest_has_vv50_file()
|
||||
excluded_doc_ids = _load_excluded_doc_ids(exclude_file)
|
||||
|
||||
# ----- Step 3: Filter out rows whose DocumentID already has VV-50 -----
|
||||
rows_to_insert = [
|
||||
r for r in rows_57 if r["DocumentID"] not in excluded_doc_ids
|
||||
]
|
||||
skipped = len(rows_57) - len(rows_to_insert)
|
||||
logger.info(
|
||||
f"After filter: {len(rows_to_insert)} rows to insert, "
|
||||
f"{skipped} skipped (DocumentID already has VV-50)"
|
||||
)
|
||||
|
||||
if not rows_to_insert:
|
||||
logger.info("Nothing to insert after filtering.")
|
||||
return
|
||||
|
||||
# ----- Step 4: Insert (with preview + confirmation) -----
|
||||
def build_params(row: Dict[str, Any]) -> tuple:
|
||||
# Parameter order MUST match INSERT_VV50_Copy.sql:
|
||||
# VariableID, DocumentID, ProjectID, RevisionNo, ConfigurationID,
|
||||
# ValueText, ValueInt, ValueFloat, ValueDate, ValueCache, IsLongText
|
||||
return (
|
||||
50, # override VariableID
|
||||
row["DocumentID"],
|
||||
row["ProjectID"],
|
||||
row["RevisionNo"],
|
||||
row["ConfigurationID"],
|
||||
row["ValueText"],
|
||||
row["ValueInt"],
|
||||
row["ValueFloat"],
|
||||
row["ValueDate"],
|
||||
row["ValueCache"],
|
||||
row["IsLongText"],
|
||||
)
|
||||
|
||||
run_insert(
|
||||
db,
|
||||
load_query("INSERT_VV50_Copy"),
|
||||
rows_to_insert,
|
||||
params_builder=build_params,
|
||||
action="INSERT VariableID=50 copies of DWS VV-57 rows",
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
|
||||
|
||||
def _find_latest_has_vv50_file() -> Optional[str]:
|
||||
"""Find the most recent has_vv50_*.txt file in the current directory."""
|
||||
logger = logging.getLogger("db_helper")
|
||||
matches = sorted(glob.glob("has_vv50_*.txt"))
|
||||
if not matches:
|
||||
return None
|
||||
latest = matches[-1]
|
||||
logger.info(f"Auto-detected exclusion file: {latest}")
|
||||
return latest
|
||||
|
||||
|
||||
def _load_excluded_doc_ids(path: Optional[str]) -> Set[int]:
|
||||
"""
|
||||
Load DocumentIDs from a has_vv50_*.txt file (CSV format with header).
|
||||
|
||||
Returns an empty set if no file is provided and prompts the user to
|
||||
confirm they want to proceed without any exclusions.
|
||||
"""
|
||||
logger = logging.getLogger("db_helper")
|
||||
|
||||
if not path:
|
||||
logger.warning(
|
||||
"No exclusion file found — ALL VV-57 DocumentIDs will get a "
|
||||
"VV-50 copy, including ones that may already have VV-50."
|
||||
)
|
||||
resp = input(
|
||||
" Proceed without an exclusion list? [y/N]: "
|
||||
).strip().lower()
|
||||
if resp not in ("y", "yes"):
|
||||
logger.info("User aborted — no exclusion file.")
|
||||
raise SystemExit(1)
|
||||
return set()
|
||||
|
||||
excluded: Set[int] = set()
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
header = f.readline() # discard "DocumentID,FileName,FullVaultPath"
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
first = line.split(",", 1)[0].strip()
|
||||
if first.isdigit():
|
||||
excluded.add(int(first))
|
||||
logger.info(f"Loaded {len(excluded)} DocumentIDs to exclude from {path}")
|
||||
return excluded
|
||||
|
||||
|
||||
# Register tasks here — maps --task name to function
|
||||
TASK_REGISTRY: Dict[str, Callable] = {
|
||||
"copy_with_new_id": task_copy_with_new_id,
|
||||
"check_vv50": task_check_vv50,
|
||||
"copy_57_to_50": copy_57_to_50
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI
|
||||
# =============================================================================
|
||||
|
||||
def parse_arguments() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Database helper for PDM migration — interactive SQL tasks",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python db_helper.py --db target_db --task copy_with_new_id
|
||||
python db_helper.py --db target_db --task copy_with_new_id --dry-run
|
||||
python db_helper.py --db source_db --query get_var47
|
||||
python db_helper.py --db source_db --query "SELECT TOP 10 * FROM Documents"
|
||||
python db_helper.py --list-queries
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
help='Config key for the database: "source_db" or "target_db"',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--task",
|
||||
choices=list(TASK_REGISTRY.keys()),
|
||||
help="Name of a predefined task to run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--query",
|
||||
help=(
|
||||
"Run a SELECT query. Pass a query name to load from "
|
||||
"helpers/queries/<name>.sql, or pass raw SQL in quotes."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Preview what would happen without executing writes",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list-queries",
|
||||
action="store_true",
|
||||
help="List all available saved queries and exit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exclude-file",
|
||||
help=(
|
||||
"Path to a has_vv50_*.txt file whose DocumentIDs should be "
|
||||
"excluded from copy_57_to_50. If omitted, the most recent "
|
||||
"has_vv50_*.txt in the current directory is used."
|
||||
),
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def _resolve_query(query_arg: str) -> str:
|
||||
"""
|
||||
Resolve a --query argument to SQL text.
|
||||
|
||||
If it looks like a SQL statement (contains a space), use it as-is.
|
||||
Otherwise treat it as a saved query name and load from queries/<name>.sql.
|
||||
"""
|
||||
if " " in query_arg:
|
||||
return query_arg
|
||||
return load_query(query_arg)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_arguments()
|
||||
|
||||
# --list-queries doesn't need a DB connection or logging
|
||||
if args.list_queries:
|
||||
queries = list_queries()
|
||||
if queries:
|
||||
print(f"Available queries in {QUERIES_DIR}:")
|
||||
for name in queries:
|
||||
# Show the first line of each .sql as a description
|
||||
sql_path = QUERIES_DIR / f"{name}.sql"
|
||||
first_line = sql_path.read_text(encoding="utf-8").split("\n")[0]
|
||||
print(f" {name:30s} {first_line}")
|
||||
else:
|
||||
print(f"No .sql files found in {QUERIES_DIR}")
|
||||
return 0
|
||||
|
||||
if not args.db:
|
||||
print("Error: --db is required (unless using --list-queries)")
|
||||
return 1
|
||||
|
||||
logger = setup_logging()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("DB HELPER")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Database: {args.db}")
|
||||
logger.info(f"Task: {args.task or '(ad-hoc query)'}")
|
||||
logger.info(f"Dry run: {args.dry_run}")
|
||||
|
||||
db = connect_db(args.db)
|
||||
|
||||
try:
|
||||
if args.query:
|
||||
sql = _resolve_query(args.query)
|
||||
logger.info(f"Resolved query:\n{sql}")
|
||||
run_select(db, sql)
|
||||
|
||||
elif args.task:
|
||||
task_fn = TASK_REGISTRY[args.task]
|
||||
task_fn(db, args)
|
||||
|
||||
else:
|
||||
logger.error("Provide either --task, --query, or --list-queries")
|
||||
return 1
|
||||
|
||||
except FileNotFoundError as exc:
|
||||
logger.error(str(exc))
|
||||
return 1
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("Interrupted by user")
|
||||
db.rollback()
|
||||
return 130
|
||||
except Exception:
|
||||
logger.exception("Unhandled exception")
|
||||
db.rollback()
|
||||
return 1
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user