Source code for graphnet.data.sqlite.sqlite_utilities

"""SQLite-specific utility functions for use in `graphnet.data`."""

import os.path
from typing import List

import pandas as pd
import sqlalchemy
import sqlite3


[docs] def database_exists(database_path: str) -> bool: """Check whether database exists at `database_path`.""" assert database_path.endswith( ".db" ), "Provided database path does not end in `.db`." return os.path.exists(database_path)
[docs] def database_table_exists(database_path: str, table_name: str) -> bool: """Check whether `table_name` exists in database at `database_path`.""" if not database_exists(database_path): return False query = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';" with sqlite3.connect(database_path) as conn: result = pd.read_sql(query, conn) return len(result) == 1
[docs] def run_sql_code(database_path: str, code: str) -> None: """Execute SQLite code. Args: database_path: Path to databases code: SQLite code """ conn = sqlite3.connect(database_path) c = conn.cursor() c.executescript(code) c.close()
[docs] def save_to_sql(df: pd.DataFrame, table_name: str, database_path: str) -> None: """Save a dataframe `df` to a table `table_name` in SQLite `database`. Table must exist already. Args: df: Dataframe with data to be stored in sqlite table table_name: Name of table. Must exist already database_path: Path to SQLite database """ engine = sqlalchemy.create_engine("sqlite:///" + database_path) df.to_sql(table_name, con=engine, index=False, if_exists="append") engine.dispose()
[docs] def attach_index( database_path: str, table_name: str, index_column: str = "event_no" ) -> None: """Attach the table (i.e., event) index. Important for query times! """ code = ( "PRAGMA foreign_keys=off;\n" "BEGIN TRANSACTION;\n" f"CREATE INDEX {index_column}_{table_name} " f"ON {table_name} ({index_column});\n" "COMMIT TRANSACTION;\n" "PRAGMA foreign_keys=on;" ) run_sql_code(database_path, code)
[docs] def create_table( columns: List[str], table_name: str, database_path: str, *, index_column: str = "event_no", default_type: str = "NOT NULL", integer_primary_key: bool = True, ) -> None: """Create a table. Args: columns: Column names to be created in table. table_name: Name of the table. database_path: Path to the database. index_column: Name of the index column. default_type: The type used for all non-index columns. integer_primary_key: Whether or not to create the `index_column` with the `INTEGER PRIMARY KEY` type. Such a column is required to have unique, integer values for each row. This is appropriate when the table has one row per event, e.g., event-level MC truth. It is not appropriate for pulse map series, particle-level MC truth, and other such data that is expected to have more that one row per event (i.e., with the same index). """ # Prepare column names and types query_columns = [] for column in columns: type_ = default_type if column == index_column: if integer_primary_key: type_ = "INTEGER PRIMARY KEY NOT NULL" else: type_ = "NOT NULL" query_columns.append(f"{column} {type_}") query_columns_string = ", ".join(query_columns) # Run SQL code code = ( "PRAGMA foreign_keys=off;\n" f"CREATE TABLE {table_name} ({query_columns_string});\n" "PRAGMA foreign_keys=on;" ) run_sql_code( database_path, code, ) # Attaching index to all non-truth-like tables (e.g., pulse maps). if not integer_primary_key: attach_index(database_path, table_name, index_column=index_column)
[docs] def create_table_and_save_to_sql( df: pd.DataFrame, table_name: str, database_path: str, *, index_column: str = "event_no", default_type: str = "NOT NULL", integer_primary_key: bool = True, ) -> None: """Create table if it doesn't exist and save dataframe to it.""" if not database_table_exists(database_path, table_name): create_table( df.columns, table_name, database_path, index_column=index_column, default_type=default_type, integer_primary_key=integer_primary_key, ) save_to_sql(df, table_name=table_name, database_path=database_path)