From 3dd6883fd7d909eef82645e77592b90e0d61e9b4 Mon Sep 17 00:00:00 2001 From: Stephen Shao Date: Thu, 11 Sep 2025 22:53:40 -0400 Subject: [PATCH 1/3] Fixed import issue in db --- pyproject.toml | 3 +- src/madengine/db/database.py | 92 +++++++++++++++++--------- src/madengine/db/database_functions.py | 15 +++-- src/madengine/db/relative_perf.py | 33 ++++++--- src/madengine/db/upload_csv_to_db.py | 23 +++++-- src/madengine/db/utils.py | 10 ++- 6 files changed, 121 insertions(+), 55 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 00e9011d..9e3f2d79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,10 +21,11 @@ dependencies = [ "mysql-connector-python", "pymysql", "tqdm", - "pytest", "typing-extensions", "pymongo", "toml", + "numpy", + "pynvml", ] classifiers = [ "Programming Language :: Python :: 3", diff --git a/src/madengine/db/database.py b/src/madengine/db/database.py index 1e384854..3454f932 100644 --- a/src/madengine/db/database.py +++ b/src/madengine/db/database.py @@ -14,10 +14,15 @@ from sqlalchemy import create_engine from sqlalchemy.orm import mapper, clear_mappers -# MAD Engine modules -from logger import setup_logger -from base_class import BASE, BaseMixin -from utils import get_env_vars +# MAD Engine modules (dual import: package first, then standalone fallback for scp use) +try: # Package import context + from madengine.db.logger import setup_logger # type: ignore + from madengine.db.base_class import BASE, BaseMixin # type: ignore + from madengine.db.utils import get_env_vars # type: ignore +except ImportError: # Standalone (scp) execution context + from logger import setup_logger # type: ignore + from base_class import BASE, BaseMixin # type: ignore + from utils import get_env_vars # type: ignore # Create the logger @@ -25,26 +30,51 @@ # Get the environment variables ENV_VARS = get_env_vars() -# Check if the environment variables are set -if ENV_VARS["user_name"] is None or ENV_VARS["user_password"] is None: - raise ValueError("User name or password not set") +# Global engine variable - will be lazily initialized +ENGINE = None -if ENV_VARS["db_hostname"] is None or ENV_VARS["db_port"] is None: - raise ValueError("DB hostname or port not set") - -if ENV_VARS["db_name"] is None: - raise ValueError("DB name not set") +def get_engine(): + """Get database engine, creating it lazily when first needed. + + Returns: + sqlalchemy.engine.Engine: Database engine + + Raises: + ValueError: If required environment variables are not set + """ + global ENGINE + + if ENGINE is None: + # Check if the environment variables are set + if not ENV_VARS["user_name"] or not ENV_VARS["user_password"]: + raise ValueError("User name or password not set") + + if not ENV_VARS["db_hostname"] or not ENV_VARS["db_port"]: + raise ValueError("DB hostname or port not set") + + if not ENV_VARS["db_name"]: + raise ValueError("DB name not set") + + # Create the engine + ENGINE = create_engine( + "mysql+pymysql://{user_name}:{user_password}@{hostname}:{port}/{db_name}".format( + user_name=ENV_VARS["user_name"], + user_password=ENV_VARS["user_password"], + hostname=ENV_VARS["db_hostname"], + port=ENV_VARS["db_port"], + db_name=ENV_VARS["db_name"], + ) + ) + LOGGER.info("Database engine created for %s@%s:%s/%s", + ENV_VARS["user_name"], ENV_VARS["db_hostname"], + ENV_VARS["db_port"], ENV_VARS["db_name"]) + + return ENGINE -# Create the engine -ENGINE = create_engine( - "mysql+pymysql://{user_name}:{user_password}@{hostname}:{port}/{db_name}".format( - user_name=ENV_VARS["user_name"], - user_password=ENV_VARS["user_password"], - hostname=ENV_VARS["db_hostname"], - port=ENV_VARS["db_port"], - db_name=ENV_VARS["db_name"], - ) -) +# Check for eager initialization +if os.getenv("MADENGINE_DB_EAGER") == "1": + LOGGER.info("MADENGINE_DB_EAGER=1 detected, creating engine immediately") + ENGINE = get_engine() # Define the path to the SQL file SQL_FILE_PATH = os.path.join(os.path.dirname(__file__), 'db_table_def.sql') @@ -99,7 +129,7 @@ def connect_db() -> None: user_name = ENV_VARS["user_name"] try: - ENGINE.execute("Use {}".format(db_name)) + get_engine().execute("Use {}".format(db_name)) return except OperationalError: # as err: LOGGER.warning( @@ -107,12 +137,12 @@ def connect_db() -> None: ) try: - ENGINE.execute("Create database if not exists {}".format(db_name)) + get_engine().execute("Create database if not exists {}".format(db_name)) except OperationalError as err: LOGGER.error("Database creation failed %s for username: %s", err, user_name) - ENGINE.execute("Use {}".format(db_name)) - ENGINE.execute("SET GLOBAL max_allowed_packet=4294967296") + get_engine().execute("Use {}".format(db_name)) + get_engine().execute("SET GLOBAL max_allowed_packet=4294967296") def clear_db() -> None: @@ -126,7 +156,7 @@ def clear_db() -> None: db_name = ENV_VARS["db_name"] try: - ENGINE.execute("DROP DATABASE IF EXISTS {}".format(db_name)) + get_engine().execute("DROP DATABASE IF EXISTS {}".format(db_name)) return except OperationalError: # as err: LOGGER.warning("Database %s could not be dropped", db_name) @@ -143,13 +173,13 @@ def show_db() -> None: db_name = ENV_VARS["db_name"] try: - result = ENGINE.execute( + result = get_engine().execute( "SELECT * FROM {} \ WHERE {}.created_date= \ (SELECT MAX(created_date) FROM {}) ;".format(DB_TABLE.__tablename__) ) for row in result: - print(row) + LOGGER.info("Latest entry: %s", row) return except OperationalError: # as err: LOGGER.warning("Database %s could not be shown", db_name) @@ -195,7 +225,7 @@ def trim_column(col_name: str) -> None: Raises: OperationalError: An error occurred while trimming the column. """ - ENGINE.execute( + get_engine().execute( "UPDATE {} \ SET \ {} = TRIM({});".format( @@ -218,7 +248,7 @@ def get_column_names() -> list: """ db_name = ENV_VARS["db_name"] - result = ENGINE.execute( + result = get_engine().execute( "SELECT `COLUMN_NAME` \ FROM `INFORMATION_SCHEMA`.`COLUMNS` \ WHERE `TABLE_SCHEMA`='{}' \ diff --git a/src/madengine/db/database_functions.py b/src/madengine/db/database_functions.py index 97561fc1..87261d70 100644 --- a/src/madengine/db/database_functions.py +++ b/src/madengine/db/database_functions.py @@ -8,8 +8,11 @@ # built-in modules import typing -# MAD Engine modules -from database import ENGINE +# MAD Engine modules (dual import) +try: + from madengine.db.database import get_engine, LOGGER # type: ignore +except ImportError: + from database import get_engine, LOGGER # type: ignore def get_all_gpu_archs() -> typing.List[str]: @@ -18,7 +21,7 @@ def get_all_gpu_archs() -> typing.List[str]: Returns: typing.List[str]: A list of all GPU architectures in the database. """ - matching_entries = ENGINE.execute( + matching_entries = get_engine().execute( "SELECT DISTINCT(gpu_architecture) FROM dlm_table" ) @@ -43,7 +46,7 @@ def get_matching_db_entries( Returns: typing.List[typing.Dict[str, typing.Any]]: The matching entries. """ - print( + LOGGER.info( "Looking for entries with {}, {} and {}".format( recent_entry["model"], recent_entry["gpu_architecture"], @@ -52,7 +55,7 @@ def get_matching_db_entries( ) # find matching entries to current entry - matching_entries = ENGINE.execute( + matching_entries = get_engine().execute( "SELECT * FROM dlm_table \ WHERE model='{}' \ AND gpu_architecture='{}' \ @@ -74,7 +77,7 @@ def get_matching_db_entries( if should_add: filtered_matching_entries.append(m) - print( + LOGGER.info( "Found {} similar entries in database filtered down to {} entries".format( len(matching_entries), len(filtered_matching_entries) diff --git a/src/madengine/db/relative_perf.py b/src/madengine/db/relative_perf.py index 93d2569f..0f930543 100644 --- a/src/madengine/db/relative_perf.py +++ b/src/madengine/db/relative_perf.py @@ -13,10 +13,21 @@ # third-party modules import pandas as pd -# MAD Engine modules -from database import ENGINE, create_tables, LOGGER -from utils import get_avg_perf, load_perf_csv, dataFrame_to_list -from database_functions import get_all_gpu_archs, get_matching_db_entries +# MAD Engine modules (dual import: prefer package, fallback to local) +try: + from madengine.db.database import get_engine, create_tables, LOGGER # type: ignore + from madengine.db.utils import get_avg_perf, load_perf_csv, dataFrame_to_list # type: ignore + from madengine.db.database_functions import ( # type: ignore + get_all_gpu_archs, + get_matching_db_entries, + ) +except ImportError: + from database import get_engine, create_tables, LOGGER # type: ignore + from utils import get_avg_perf, load_perf_csv, dataFrame_to_list # type: ignore + from database_functions import ( # type: ignore + get_all_gpu_archs, + get_matching_db_entries, + ) def get_baseline_configs( @@ -63,7 +74,7 @@ def relative_perf( pd.DataFrame: The data. """ LOGGER.info("Checking relative performance against {}".format(base_line_params)) - print(data) + LOGGER.debug("Data: %s", data) # get the most recent entries most_recent_entries = dataFrame_to_list(data) @@ -74,15 +85,15 @@ def relative_perf( baseline_configs = get_baseline_configs(recent_entry, base_line_params) baseline_avg, baseline_perfs = get_avg_perf(baseline_configs, 5) if recent_entry["performance"] and baseline_avg: - print( + LOGGER.info( "Current Performance is {} {}".format( recent_entry["performance"], recent_entry["metric"] ) ) relative_perf = (float(recent_entry["performance"]) / baseline_avg) * 100 - print( - "Relative perf {:.2f}% against {}".format( - relative_perf, base_line_params + LOGGER.info( + "Baseline performance {} for entry {}. Relative performance: {:.2f}%".format( + baseline_avg, recent_entry, relative_perf ) ) else: @@ -106,7 +117,7 @@ def relative_perf( } data.loc[i, "relative_change"] = str(relative_change) - print(data) + LOGGER.debug("Data after relative performance calculation: %s", data) return data @@ -122,7 +133,7 @@ def relative_perf_all_configs(data: pd.DataFrame) -> pd.DataFrame: pd.DataFrame: The data. """ archs = get_all_gpu_archs() - print(archs) + LOGGER.info("Processing relative performance for GPU architectures: %s", archs) for a in archs: data = relative_perf(data, {"gpu_architecture": a}) return data diff --git a/src/madengine/db/upload_csv_to_db.py b/src/madengine/db/upload_csv_to_db.py index d70d15b5..d839506d 100644 --- a/src/madengine/db/upload_csv_to_db.py +++ b/src/madengine/db/upload_csv_to_db.py @@ -15,10 +15,23 @@ # third-party modules from tqdm import tqdm from sqlalchemy.orm import sessionmaker -# MAD Engine modules -from database import ENGINE, create_tables, DB_TABLE, LOGGER -from utils import dataFrame_to_list, load_perf_csv, replace_nans_with_None -from relative_perf import relative_perf_all_configs +# MAD Engine modules (dual import: prefer package, fallback to local) +try: + from madengine.db.database import get_engine, create_tables, DB_TABLE, LOGGER # type: ignore + from madengine.db.utils import ( + dataFrame_to_list, + load_perf_csv, + replace_nans_with_None, + ) # type: ignore + from madengine.db.relative_perf import relative_perf_all_configs # type: ignore +except ImportError: + from database import get_engine, create_tables, DB_TABLE, LOGGER # type: ignore + from utils import ( + dataFrame_to_list, + load_perf_csv, + replace_nans_with_None, + ) # type: ignore + from relative_perf import relative_perf_all_configs # type: ignore def add_csv_to_db(data: pd.DataFrame) -> bool: @@ -35,7 +48,7 @@ def add_csv_to_db(data: pd.DataFrame) -> bool: LOGGER.info("adding csv to Database") # Create the session session = sessionmaker() - session.configure(bind=ENGINE) + session.configure(bind=get_engine()) s = session() # change nans to None to upload to database diff --git a/src/madengine/db/utils.py b/src/madengine/db/utils.py index 13c6e879..138b408d 100644 --- a/src/madengine/db/utils.py +++ b/src/madengine/db/utils.py @@ -12,6 +12,14 @@ import pandas as pd import numpy as np +# MAD Engine modules (dual import for LOGGER) +try: + from madengine.db.logger import setup_logger # type: ignore + LOGGER = setup_logger() +except ImportError: + from logger import setup_logger # type: ignore + LOGGER = setup_logger() + def get_env_vars() -> dict: """Utility function to get MAD/DLM specific env_vars @@ -101,7 +109,7 @@ def get_avg_perf( if perfs: avg = mean(perfs) - print("{} avg from the last {} entries".format(avg, len(perfs))) + LOGGER.debug("{} avg from the last {} entries".format(avg, len(perfs))) return avg, perfs else: return None, None From b7c9860728f3d0ff070c83e002e7c3b530b854f6 Mon Sep 17 00:00:00 2001 From: Stephen Shao Date: Fri, 12 Sep 2025 15:37:20 -0400 Subject: [PATCH 2/3] Updated pyproject.toml and created MANIFEST.ini for building and publishing to pypi --- MANIFEST.in | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 34 ++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..48eb1ee4 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,63 @@ +# Include essential project files +include README.md +include LICENSE +include pyproject.toml + +# Include all Python source files +recursive-include src/madengine *.py + +# Include all script files +recursive-include src/madengine/scripts * + +# Include database schema files +include src/madengine/db/*.sql + +# Include documentation files +recursive-include src/madengine *.md + +# Include any configuration or data files +recursive-include src/madengine *.yml +recursive-include src/madengine *.yaml +recursive-include src/madengine *.json +recursive-include src/madengine *.toml +recursive-include src/madengine *.cfg +recursive-include src/madengine *.ini + +# Include shell scripts and executables +recursive-include src/madengine *.sh +recursive-include src/madengine *.bash + +# Include any template or configuration files +recursive-include src/madengine *.template +recursive-include src/madengine *.conf + +# Exclude compiled Python files +global-exclude *.pyc +global-exclude *.pyo +global-exclude __pycache__ + +# Exclude version control +global-exclude .git* +global-exclude .svn* + +# Exclude IDE and editor files +global-exclude .vscode* +global-exclude .idea* +global-exclude *.swp +global-exclude *.swo +global-exclude *~ + +# Exclude build and distribution artifacts +global-exclude build +global-exclude dist +global-exclude *.egg-info + +# Exclude test artifacts +global-exclude .pytest_cache +global-exclude .coverage +global-exclude htmlcov + +# Exclude temporary and log files +global-exclude *.log +global-exclude *.tmp +global-exclude temp* diff --git a/pyproject.toml b/pyproject.toml index 9e3f2d79..68813e42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,9 +8,14 @@ dynamic = ["version"] authors = [ { name="Advanced Micro Devices", email="mad.support@amd.com" }, ] -description = "MAD Engine is a set of interfaces to run various AI models from public MAD." +maintainers = [ + { name="Advanced Micro Devices", email="mad.support@amd.com" }, +] +description = "AI Models automation and dashboarding CLI tool for running LLMs and Deep Learning models" readme = "README.md" +license = {text = "MIT"} requires-python = ">=3.8" +keywords = ["AI", "machine-learning", "deep-learning", "LLM", "automation", "AMD", "ROCm", "GPU", "performance", "benchmarking"] dependencies = [ "pandas", "GitPython", @@ -28,9 +33,21 @@ dependencies = [ "pynvml", ] classifiers = [ - "Programming Language :: Python :: 3", + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Testing", + "Topic :: System :: Benchmark", + "Topic :: System :: Hardware", ] [project.scripts] @@ -50,11 +67,22 @@ dev = [ "pytest-asyncio", ] +[tool.hatch.build.targets.sdist] +include = [ + "/src", + "/README.md", + "/LICENSE", + "/pyproject.toml", +] + [tool.hatch.build.targets.wheel] [tool.hatch.build.targets.wheel.force-include] "src/madengine/scripts" = "madengine/scripts" +[tool.hatch.build.targets.sdist.force-include] +"src/madengine/scripts" = "src/madengine/scripts" + [tool.hatch.version] source = "versioningit" From 5c50d065640ffb9c2aa7f352552968ac423984ca Mon Sep 17 00:00:00 2001 From: Satya Nikhil Date: Fri, 3 Oct 2025 15:14:50 +0000 Subject: [PATCH 3/3] added selftest to check if the .whl is being built and installed properly --- pytest.ini | 3 +- tests/test_pip_packaging.py | 77 +++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 tests/test_pip_packaging.py diff --git a/pytest.ini b/pytest.ini index 3a5aa078..1849450b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,4 @@ [pytest] testpaths = tests -pythonpath = src \ No newline at end of file +pythonpath = src +markers = packaging: packaging-related tests exercising the wheel build/install flow \ No newline at end of file diff --git a/tests/test_pip_packaging.py b/tests/test_pip_packaging.py new file mode 100644 index 00000000..1b94843e --- /dev/null +++ b/tests/test_pip_packaging.py @@ -0,0 +1,77 @@ +import subprocess +import sys +from pathlib import Path +import pytest + + +def _run(command, cwd=None): + result = subprocess.run( + command, + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + assert result.returncode == 0, "Command failed (exit code {}):\n{}\nSTDOUT:\n{}\nSTDERR:\n{}".format( + result.returncode, + " ".join(command), + result.stdout, + result.stderr, + ) + + +class TestPackaging: + + @pytest.mark.packaging + def test_build_install_and_import(self, tmp_path): + """Build a wheel, install it in isolation, then import madengine.""" + + project_root = Path(__file__).resolve().parents[1] + dist_dir = tmp_path / "wheel" + site_dir = tmp_path / "site-packages" + dist_dir.mkdir() + site_dir.mkdir() + + # build a wheel into the temporary dist folder. + _run( + [ + sys.executable, + "-m", + "pip", + "wheel", + "--no-deps", + "--no-cache-dir", + "-w", + str(dist_dir), + str(project_root), + ], + cwd=project_root, + ) + + wheels = sorted(dist_dir.glob("madengine-*.whl")) + assert wheels, "Expected pip wheel to create a madengine wheel" + wheel_path = wheels[0] + + # install that wheel into an isolated folder. + _run( + [ + sys.executable, + "-m", + "pip", + "install", + "--no-deps", + "--no-cache-dir", + "--target", + str(site_dir), + str(wheel_path), + ] + ) + + # import madengine from the isolated folder. + _run( + [ + sys.executable, + "-c", + "import sys; " f"sys.path.insert(0, {repr(str(site_dir))}); " "import madengine; print(madengine.__version__)", + ] + )