from typing import Union
[docs]def log_code_files(filepaths: list):
"""
Upload the code of files. Need not call this for IPython.
IPython code is automatically captured when .fit is called on the model object.
Code capture for vanilla python interpreter is currently not supported.
:param filepaths: List of paths of the files that needs to be uploaded.
Files with extension .ipynb notebook will be ignored
:return: None
>>> scrybe.log_code_files(filepaths=["models/model_architecture.py", "metrics/metric_funcs.py"])
"""
pass
[docs]def log_custom_hyperparameter(model, param_name: str, param_value):
"""
Log a hyperparameter of a model. CAUTION: This must be called BEFORE model.fit
:param model: The estimator object on which fit can be called.
:param param_name: Name of the custom hyperparameter. Must be a string
:param param_value: Value of the custom hyperparameter. Must be a JSON Serializable object
:return: None
>>> knn = KNeighborsClassifier()
>>> scrybe.log_custom_hyperparameter(knn, param_name='custom_param', param_value=float('nan'))
>>> grid_search = GridSearchCV(knn, param_grid=knn_params, cv=2, scoring='accuracy', n_jobs=1)
>>> grid_search.fit(x_train, y_train)
"""
pass
[docs]def log_features(model, feature_names: list):
"""
Log feature names of a model. Feature names should be ordered in the same way in which they were fed to the model.
CAUTION: This must be called AFTER model.fit
:param model: The trained model object which can be used to make predictions
:param feature_names: A list of string containing all the feature names
:return: None
>>> rf = RandomForestRegressor(featuresCol="features", labelCol="item_cnt_month")
>>> rf_model = rf.fit(transformed)
>>> scrybe.log_features(model=rf_model, feature_names=['shop_id', 'item_id', 'item_cnt', 'transactions',
... 'year', 'item_cnt_mean', 'item_cnt_std'])
"""
pass
[docs]def log_feature_importances(model, feature_importances: dict):
"""
Log variable importance of a model. CAUTION: This must be called AFTER model.fit
:param model: The trained model object which can be used to make predictions
:param feature_importances: A dictionary of features as keys and the corresponding importance as value
:return: None
>>> knn = KNeighborsClassifier()
>>> knn.fit(x_train, y_train)
>>> scrybe.log_feature_importances(model=knn, feature_importances={'sepal-length': 0.9, 'sepal-width': 0.6,
... 'petal-length': 0.4, 'petal-width': 0.1})
"""
pass
[docs]def log_custom_model_evaluation_metric(model, x_test, y_test, param_name: str, param_value: Union[int, float, str]):
"""
Log custom metric of a model. CAUTION: This must be called AFTER model.fit
:param model: The trained model object which can be used to make predictions
or the model id displayed on scrybe dashboard
:param x_test: The test samples dataset. Must be one of Pandas Dataframe/Series, Numpy array, Spark dataframe/rdd
:param y_test: The test labels dataset. Must be one of Pandas Dataframe/Series, Numpy array, Spark dataframe/rdd
:param param_name: Name of the custom metric. Must be a string
:param param_value: Value of the custom metric. Must str, int or float
:return: None
>>> model = keras.model.load_model("convnet_mnist.h5")
>>> probabilities = model.predict(x_test)
>>> probabilities[probabilities > 0.3] = 2]
>>> probabilities[probabilities < 0.7] = 2]
>>> probabilities[probabilities < 0.3] = 0]
>>> probabilities[probabilities > 0.7] = 1]
>>> cannot_classify = get_unclassified_images(probabilities, y_test)
>>> scrybe.log_custom_model_evaluation_metric(model=model, x_test=x_test, y_test=y_test,
... param_name="unclassified_images", param_value=cannot_classify)
"""
pass
[docs]def log_custom_data_statistic(data, stats_name: str, stats_value: Union[int, float, str]):
"""
Log custom dataset statistics on a dataset. CAUTION: This must be called AFTER model.fit
:param data: Must be one of Pandas Dataframe/Series, Numpy array, Spark dataframe/rdd
:param stats_name: Name of the custom stats. Must be a string
:param stats_value: Value of the custom stats. Must str, int or float
:return: None
>>> dataset = pandas.read_csv(url, names=names)
>>> num_frauds = get_fraud_count(dataset)
>>> scrybe.log_custom_data_statistic(data=dataset, stats_name="num_fraud", stats_value=num_frauds)
"""
pass
[docs]def set_label(label: Union[str, list, None]):
"""
Set labels/tags on all the models, datasets and plots which helps you in filtering data in the dashboard
:param label: Label can be string or a list of string with which you can filter in the dashboard
:return: None
>>> dataset = spark.read.csv("s3a://test-datasets/")
>>> android_dataset = dataset.filter('os == "android"')
>>> scrybe.set_label(label="android")
>>> rf = RandomForestRegressor(featuresCol="features", labelCol="ltv")
>>> rf_model = rf.fit(android_dataset) ## The model gets tagged as "android"
>>> ios_dataset = dataset.filter('os == "ios" and region == "US"')
>>> scrybe.set_label(label=["ios", "US"])
>>> rf = RandomForestRegressor(featuresCol="features", labelCol="ltv")
>>> rf_model = rf.fit(ios_dataset) ## The model gets tagged as "ios" and "US"
"""
pass
[docs]def bookmark(obj, obj_name: str, msg: str):
"""
Bookmark an object using this API and then filter in the dashboard by clicking the "Bookmarked" button
:param obj: The object to be bookmarked. This can be a Scrybe tracked model,
numpy.ndarray or pandas dataframe or series, or a matplotlib figure object
:param obj_name: The name of the object which will be displayed in the Scrybe dashboard
:param msg: The description of why the bookmarked object is important
:return: None
>>> for model_pipe in models_pipe:
>>> model_pipe.fit(train, y)
>>> preds = model_pipe.predict(test)
>>> if round(np.sqrt(mean_squared_error(y_test, preds)), 2) <= 0.2:
>>> scrybe.bookmark(obj=model_pipe, obj_name="pipeline_model", msg="Shortlisted models with RMSE <= 0.2")
"""
pass
[docs]def peek(obj):
"""
Use this API to directly visit the web page with details of a given object in the dashboard
:param obj: Can be a model or a dataset or figure object
:return: Prints URL to stdout. If executed from notebook then loads the URL
>>> import pandas
>>> import matplotlib.pyplot as plt
>>> import seaborn as sns
>>> url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv"
>>> names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
>>> dataset = pandas.read_csv(url, names=names)
>>> sns.boxplot('sepal-length', 'sepal-width', data=dataset)
>>> scrybe.peek(plt.gcf())
"""
pass