diff --git a/lib/python/docs/notebooks/access_requests_demo.ipynb b/lib/python/docs/notebooks/access_requests_demo.ipynb index 334f257fc..8ba5b485f 100644 --- a/lib/python/docs/notebooks/access_requests_demo.ipynb +++ b/lib/python/docs/notebooks/access_requests_demo.ipynb @@ -190,9 +190,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv", "language": "python", - "name": "python" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/lib/python/docs/notebooks/datacards_demo.ipynb b/lib/python/docs/notebooks/datacards_demo.ipynb index 7fa8bf248..d84816d45 100644 --- a/lib/python/docs/notebooks/datacards_demo.ipynb +++ b/lib/python/docs/notebooks/datacards_demo.ipynb @@ -45,15 +45,6 @@ "In order to create helper classes, you will first need to instantiate a `Client()` object from the core. By default, this object will not support any authentication. However, Bailo also supports PKI authentication, which you can use from Python by passing a `PkiAgent()` object into the `Client()` object when you instantiate it." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! pip install bailo -e ../.." - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/lib/python/src/bailo/__init__.py b/lib/python/src/bailo/__init__.py index 39aa641ba..5420aa409 100644 --- a/lib/python/src/bailo/__init__.py +++ b/lib/python/src/bailo/__init__.py @@ -5,10 +5,10 @@ Bailo is a ecosystem for managing the lifecycle of managing machine learning models. This package provides support for interacting with models within Bailo. """ from __future__ import annotations +import logging - -# Package Version 2.3.1 -__version__ = "2.3.1" +# Package Version 2.3.2 +__version__ = "2.3.2" from bailo.core.agent import Agent, PkiAgent, TokenAgent @@ -19,3 +19,6 @@ from bailo.helper.model import Experiment, Model from bailo.helper.release import Release from bailo.helper.schema import Schema + + +logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/lib/python/src/bailo/core/agent.py b/lib/python/src/bailo/core/agent.py index f6e1cccfe..78a397f06 100644 --- a/lib/python/src/bailo/core/agent.py +++ b/lib/python/src/bailo/core/agent.py @@ -5,9 +5,12 @@ import requests import os import getpass +import logging from requests.auth import HTTPBasicAuth from bailo.core.exceptions import BailoException, ResponseException +logger = logging.getLogger(__name__) + class Agent: """Base API Agent for talking with Bailo. @@ -108,16 +111,24 @@ def __init__( super().__init__() if access_key is None: + logger.info("Access key not provided. Trying other sources...") try: access_key = os.environ["BAILO_ACCESS_KEY"] + logger.info("Access key acquired from BAILO_ACCESS_KEY environment variable.") except KeyError: + logger.info("Access key not found in BAILO_ACCESS_KEY environment variable. Requires user input.") access_key = getpass.getpass("BAILO ACCESS KEY:") + logger.info("Access key acquired from user input.") if secret_key is None: + logger.info("Secret key not provided. Trying other sources...") try: secret_key = os.environ["BAILO_SECRET_KEY"] + logger.info("Secret key acquired from BAILO_SECRET_KEY environment variable.") except KeyError: + logger.info("Secret key not found in BAILO_SECRET_KEY environment variable. Requires user input.") secret_key = getpass.getpass("BAILO SECRET KEY:") + logger.info("Secret key acquired from user input.") self.access_key = access_key self.secret_key = secret_key diff --git a/lib/python/src/bailo/helper/access_request.py b/lib/python/src/bailo/helper/access_request.py index e2b8133a8..fa1edbfde 100644 --- a/lib/python/src/bailo/helper/access_request.py +++ b/lib/python/src/bailo/helper/access_request.py @@ -1,9 +1,12 @@ from __future__ import annotations from typing import Any +import logging from bailo.core.client import Client +logger = logging.getLogger(__name__) + class AccessRequest: """Represent a review within Bailo. @@ -53,6 +56,8 @@ def from_id(cls, client: Client, model_id: str, access_request_id: str) -> Acces schema_id = json_access_request["schemaId"] + logger.info(f"Access request %s for model %s successfully retrieved from server.", access_request_id, model_id) + return cls( client, model_id, @@ -82,6 +87,10 @@ def create(cls, client: Client, model_id: str, schema_id: str, metadata: Any) -> metadata = access_request_json["metadata"] created_by = access_request_json["createdBy"] + logger.info( + f"Access request successfully created on server with ID %s for model %s.", access_request_id, model_id + ) + return cls( client, model_id, @@ -98,12 +107,17 @@ def delete(self) -> bool: :return: A message confirming the removal of the access request. """ self.client.delete_access_request(self.model_id, self.access_request_id) + + logger.info(f"Access request %s successfully deleted on server.", self.access_request_id) + return True def update(self): """Update the current state of the access request to Bailo.""" self.client.patch_access_request(self.model_id, self.access_request_id, metadata=self.metadata) + logger.info(f"Access request %s successfully updated on server.", self.access_request_id) + def __str__(self) -> str: return f"Access Request: {self.metadata['overview']['name']} - {self.model_id}" diff --git a/lib/python/src/bailo/helper/datacard.py b/lib/python/src/bailo/helper/datacard.py index ee8a16e37..0914df4b2 100644 --- a/lib/python/src/bailo/helper/datacard.py +++ b/lib/python/src/bailo/helper/datacard.py @@ -1,12 +1,15 @@ from __future__ import annotations from typing import Any +import logging from bailo.core.client import Client from bailo.core.enums import EntryKind, ModelVisibility from bailo.core.exceptions import BailoException from bailo.helper.entry import Entry +logger = logging.getLogger(__name__) + class Datacard(Entry): """Represent a datacard within Bailo. @@ -58,9 +61,12 @@ def create( res = client.post_model( name=name, kind=EntryKind.DATACARD, description=description, team_id=team_id, visibility=visibility ) + datacard_id = res["model"]["id"] + logger.info(f"Datacard successfully created on server with ID %s.", datacard_id) + datacard = cls( client=client, - datacard_id=res["model"]["id"], + datacard_id=datacard_id, name=name, description=description, visibility=visibility, @@ -84,6 +90,8 @@ def from_id(cls, client: Client, datacard_id: str) -> Datacard: f"ID {datacard_id} does not belong to a datacard. Did you mean to use Model.from_id()?" ) + logger.info(f"Datacard %s successfully retrieved from server.", datacard_id) + datacard = cls( client=client, datacard_id=datacard_id, diff --git a/lib/python/src/bailo/helper/entry.py b/lib/python/src/bailo/helper/entry.py index 3f5c4aeb0..6796ba98e 100644 --- a/lib/python/src/bailo/helper/entry.py +++ b/lib/python/src/bailo/helper/entry.py @@ -1,11 +1,14 @@ from __future__ import annotations from typing import Any +import logging from bailo.core.client import Client from bailo.core.enums import EntryKind, ModelVisibility from bailo.core.exceptions import BailoException +logger = logging.getLogger(__name__) + class Entry: def __init__( @@ -40,6 +43,8 @@ def update(self) -> None: ) self._unpack(res["model"]) + logger.info(f"ID %s updated locally and on server.", self.id) + def card_from_schema(self, schema_id: str) -> None: """Create a card using a schema on Bailo. @@ -48,6 +53,8 @@ def card_from_schema(self, schema_id: str) -> None: res = self.client.model_card_from_schema(model_id=self.id, schema_id=schema_id) self.__unpack_card(res["card"]) + logger.info(f"Card for ID %s successfully created using schema ID %s.", self.id, schema_id) + def card_from_template(self): """Create a card using a template (not yet implemented). @@ -60,6 +67,7 @@ def get_card_latest(self) -> None: res = self.client.get_model(model_id=self.id) if "card" in res["model"]: self.__unpack_card(res["model"]["card"]) + logger.info(f"Latest card for ID %s successfully retrieved.", self.id) else: raise BailoException(f"A model card doesn't exist for model {self.id}") @@ -71,6 +79,8 @@ def get_card_revision(self, version: str) -> None: res = self.client.get_model_card(model_id=self.id, version=version) self.__unpack_card(res["modelCard"]) + logger.info(f"Card version %s for ID %s successfully retrieved.", version, self.id) + def get_roles(self): """Get all roles for the entry. @@ -96,6 +106,8 @@ def _update_card(self, card: dict[str, Any] | None = None) -> None: res = self.client.put_model_card(model_id=self.id, metadata=card) self.__unpack_card(res["card"]) + logger.info(f"Card for %s successfully updated on server.", self.id) + def _unpack(self, res): self.id = res["id"] self.name = res["name"] @@ -106,6 +118,8 @@ def _unpack(self, res): else: self.visibility = ModelVisibility.PUBLIC + logger.info(f"Attributes for ID %s successfully unpacked.", self.id) + def __unpack_card(self, res): self._card_version = res["version"] self._card_schema = res["schemaId"] @@ -114,3 +128,5 @@ def __unpack_card(self, res): self._card = res["metadata"] except KeyError: self._card = None + + logger.info(f"Card attributes for ID %s successfully unpacked.", self.id) diff --git a/lib/python/src/bailo/helper/model.py b/lib/python/src/bailo/helper/model.py index f6b4afe17..57636e211 100644 --- a/lib/python/src/bailo/helper/model.py +++ b/lib/python/src/bailo/helper/model.py @@ -4,6 +4,8 @@ import shutil import tempfile from typing import Any +import logging +import warnings from bailo.core.client import Client from bailo.core.enums import EntryKind, ModelVisibility @@ -20,6 +22,8 @@ except ImportError: ml_flow = False +logger = logging.getLogger(__name__) + class Model(Entry): """Represent a model within Bailo. @@ -66,6 +70,9 @@ def create( res = client.post_model( name=name, kind=EntryKind.MODEL, description=description, team_id=team_id, visibility=visibility ) + model_id = res["model"]["id"] + logger.info(f"Model successfully created on server with ID %s.", model_id) + model = cls( client=client, model_id=res["model"]["id"], @@ -90,6 +97,8 @@ def from_id(cls, client: Client, model_id: str) -> Model: if res["kind"] != "model": raise BailoException(f"ID {model_id} does not belong to a model. Did you mean to use Datacard.from_id()?") + logger.info(f"Model %s successfully retrieved from server.", model_id) + model = cls( client=client, model_id=model_id, @@ -164,6 +173,8 @@ def get_releases(self) -> list[Release]: for release in res["releases"]: releases.append(self.get_release(version=release["semver"])) + logger.info(f"Successfully retrieved all releases for model %s.", self.model_id) + return releases def get_release(self, version: Version | str) -> Release: @@ -182,6 +193,10 @@ def get_latest_release(self): releases = self.get_releases() if releases == []: raise BailoException("This model has no releases.") + + latest_release = max(releases) + logger.info(f"latest_release (%s) for %s retrieved successfully.", str(latest_release.version), self.model_id) + return max(releases) def get_images(self): @@ -191,6 +206,8 @@ def get_images(self): """ res = self.client.get_all_images(model_id=self.model_id) + logger.info(f"Images for %s retreived successfully.", self.model_id) + return res["images"] def get_image(self): @@ -279,7 +296,7 @@ def start_run(self, is_mlflow: bool = False): self.raw.append(self.run_data) if not is_mlflow: - print(f"Bailo tracking run {self.run}.") + logger.info(f"Bailo tracking run %d.", self.run) def log_params(self, params: dict[str, Any]): """Logs parameters to the current run. @@ -319,6 +336,16 @@ def from_mlflow(self, tracking_uri: str, experiment_id: str): if ml_flow: client = mlflow.tracking.MlflowClient(tracking_uri=tracking_uri) runs = client.search_runs(experiment_id) + if len(runs): + logger.info( + f"Successfully retrieved MLFlow experiment %s from tracking server. %d were found.", + experiment_id, + len(runs), + ) + else: + warnings.warn( + f"MLFlow experiment {experiment_id} does not have any runs and publishing requires at least one valid run. Are you sure the ID is correct?" + ) for run in runs: data = run.data @@ -337,10 +364,13 @@ def from_mlflow(self, tracking_uri: str, experiment_id: str): if status != "FINISHED": continue - if len(mlflow.artifacts.list_artifacts(artifact_uri=artifact_uri)) > 0: + if len(mlflow.artifacts.list_artifacts(artifact_uri=artifact_uri)): mlflow_dir = os.path.join(self.temp_dir, f"mlflow_{run_id}") mlflow.artifacts.download_artifacts(artifact_uri=artifact_uri, dst_path=mlflow_dir) artifacts.append(mlflow_dir) + logger.info( + f"Successfully downloaded artifacts for MLFlow experiment %s to %s.", experiment_id, mlflow_dir + ) self.start_run(is_mlflow=True) self.log_params(data.params) @@ -348,6 +378,8 @@ def from_mlflow(self, tracking_uri: str, experiment_id: str): self.log_artifacts(artifacts) self.log_dataset("".join(datasets_str)) self.run_data["run"] = info.run_id + + logger.info(f"Successfully imported MLFlow experiment %s.", experiment_id) else: raise ImportError("Optional MLFlow dependencies (needed for this method) are not installed.") @@ -367,7 +399,7 @@ def publish(self, mc_loc: str, run_id: str, semver: str = "0.1.0", notes: str = mc = NestedDict(mc) - if len(self.raw) > 0: + if len(self.raw): for run in self.raw: if run["run"] == run_id: sel_run = run @@ -390,7 +422,7 @@ def publish(self, mc_loc: str, run_id: str, semver: str = "0.1.0", notes: str = # Creating a release and uploading artifacts (if artifacts present) artifacts = sel_run["artifacts"] - if len(artifacts) > 0: + if len(artifacts): # Create new release try: release_latest_version = self.model.get_latest_release().version @@ -402,8 +434,17 @@ def publish(self, mc_loc: str, run_id: str, semver: str = "0.1.0", notes: str = notes = f"{notes} (Run ID: {run_id})" release_new = self.model.create_release(version=release_new_version, minor=True, notes=notes) + logger.info( + f"Uploading %d artifacts to version %s of model %s.", + len(artifacts), + str(release_new_version), + self.model.model_id, + ) + for artifact in artifacts: release_new.upload(path=artifact) if os.path.exists(self.temp_dir) and os.path.isdir(self.temp_dir): shutil.rmtree(self.temp_dir) + + logger.info(f"Successfully published experiment run %s to model %s.", str(run_id), self.model.model_id) diff --git a/lib/python/src/bailo/helper/release.py b/lib/python/src/bailo/helper/release.py index 7eaf89c21..443656083 100644 --- a/lib/python/src/bailo/helper/release.py +++ b/lib/python/src/bailo/helper/release.py @@ -4,7 +4,9 @@ import fnmatch import shutil from io import BytesIO -from typing import Any, Union +from typing import Any +import logging +import warnings from tqdm import tqdm from tqdm.utils import CallbackIOWrapper @@ -14,6 +16,7 @@ from semantic_version import Version BLOCK_SIZE = 1024 +logger = logging.getLogger(__name__) class Release: @@ -99,6 +102,7 @@ def create( minor, draft, ) + logger.info(f"Release %s successfully created on server for model with ID %s.", str(version), model_id) return cls( client, @@ -129,6 +133,8 @@ def from_version(cls, client: Client, model_id: str, version: Version | str) -> minor = res["minor"] draft = res["draft"] + logger.info(f"Release %s of model ID %s successfully retrieved from server.", str(version), model_id) + return cls( client, model_id, @@ -151,6 +157,7 @@ def download(self, filename: str, write: bool = True, path: str | None = None) - :return: A JSON response object """ res = self.client.get_download_by_filename(self.model_id, str(self.version), filename) + logger.info(f"Downloading file %s from version %s of %s...", filename, str(self.version), self.model_id) if write: if path is None: @@ -175,6 +182,12 @@ def download(self, filename: str, write: bool = True, path: str | None = None) - t.update(len(data)) f.write(data) + logger.info(f"File written to %s", path) + + logger.info( + f"Downloading of file %s from version %s of %s completed.", filename, str(self.version), self.model_id + ) + return res def download_all(self, path: str = os.getcwd(), include: list | str = None, exclude: list | str = None): @@ -190,6 +203,7 @@ def download_all(self, path: str = os.getcwd(), include: list | str = None, excl if files_metadata == []: raise BailoException("Release has no associated files.") file_names = [file_metadata["name"] for file_metadata in files_metadata] + orig_file_names = file_names if isinstance(include, str): include = [include] @@ -204,6 +218,13 @@ def download_all(self, path: str = os.getcwd(), include: list | str = None, excl file for file in file_names if not any([fnmatch.fnmatch(file, pattern) for pattern in exclude]) ] + logger.info( + f"Downloading %d of %%d files for version %s of %s...", + len(file_names), + len(orig_file_names), + str(self.version), + {self.model_id}, + ) os.makedirs(path, exist_ok=True) for file in file_names: file_path = os.path.join(path, file) @@ -218,10 +239,12 @@ def upload(self, path: str, data: BytesIO | None = None) -> str: :return: The unique file ID of the file uploaded ..note:: If path provided is a directory, it will be uploaded as a zip """ + logger.info(f"Uploading file(s) to version %s of %s...", str(self.version), self.model_id) name = os.path.split(path)[-1] if data is None: if is_zip := os.path.isdir(path): + logger.info(f"Given path (%s) is a directory. This will be converted to a zip file for upload.", path) shutil.make_archive(name, "zip", path) path = f"{name}.zip" name = path @@ -251,6 +274,8 @@ def upload(self, path: str, data: BytesIO | None = None) -> str: self.update() if not isinstance(data, BytesIO): data.close() + logger.info(f"Upload of file %s to version %s of %s complete.", name, str(self.version), self.model_id) + return res["file"]["id"] def update(self) -> Any: @@ -273,6 +298,8 @@ def delete(self) -> Any: :return: JSON Response object """ self.client.delete_release(self.model_id, str(self.version)) + logger.info(f"Release %s of %s successfully deleted.", str(self.version), self.model_id) + return True def __repr__(self) -> str: diff --git a/lib/python/src/bailo/helper/schema.py b/lib/python/src/bailo/helper/schema.py index de18d9f3e..ab050cc2f 100644 --- a/lib/python/src/bailo/helper/schema.py +++ b/lib/python/src/bailo/helper/schema.py @@ -1,10 +1,13 @@ from __future__ import annotations from typing import Any +import logging from bailo.core.client import Client from bailo.core.enums import SchemaKind +logger = logging.getLogger(__name__) + class Schema: """Represent a schema within Bailo. @@ -64,6 +67,7 @@ def create( res = client.post_schema( schema_id=schema_id, name=name, description=description, kind=kind, json_schema=json_schema ) + logger.info(f"Schema successfully created on server with ID %s.", schema_id) schema.__unpack(res["schema"]) return schema @@ -85,6 +89,7 @@ def from_id(cls, client: Client, schema_id: str) -> Schema: json_schema={"temp": "temp"}, ) res = client.get_schema(schema_id=schema_id) + logger.info(f"Schema %s successfully retrieved from server.", schema_id) schema.__unpack(res["schema"]) return schema @@ -100,3 +105,5 @@ def __unpack(self, res) -> None: self.kind = SchemaKind.MODEL if kind == "accessRequest": self.kind = SchemaKind.ACCESS_REQUEST + + logger.info(f"Attributes for Schema ID %s successfully unpacked.", self.schema_id)