Source code for bluemist.main

import logging
import os
import sys
from logging import config

import sklearn
import cpuinfo
import sklearnex
#import cuml
from sklearnex import patch_sklearn, sklearn_is_patched

#patch_sklearn()

from sklearn import datasets

from bluemist.environment import initialize, check_gpu_brand
# from bluemist.datasource.aws import get_data_from_s3
from bluemist.datasource import get_data_from_filesystem
# from bluemist.pipeline import get_model_pipeline
from bluemist.preprocessing import preprocess_data
from bluemist.regression import train_test_evaluate, deploy_model, get_estimators

# from bluemist.eda import perform_eda
# from bluemist.datasource.database import get_data_from_database

HOME_PATH = os.environ["BLUEMIST_PATH"]
config.fileConfig(HOME_PATH + '/' + 'logging.config')
logger = logging.getLogger("root")

[docs]def main(): # query = 'SELECT * FROM public.auto_mpg' # data = get_data_from_database(db_type='postgres', host='postgres.cxh6nuaszc34.us-east-1.rds.amazonaws.com:5432', # username='postgres', password='adminadmin', database='postgres', query=query, chunk_size=100) # # query = 'SELECT * FROM public.auto_mpg' # data = get_data_from_database(db_type='aurora-postgres', host='aurora-postgres-instance-1.cxh6nuaszc34.us-east-1.rds.amazonaws.com:5432', # username='postgres', password='adminadmin', database='postgres', query=query, chunk_size=100) # # query = 'SELECT * FROM auto_mpg' # data = get_data_from_database(db_type='mysql', host='mysql.cxh6nuaszc34.us-east-1.rds.amazonaws.com:3306', # username='admin', password='adminadmin', database='sys', query=query, chunk_size=100) # # query = 'SELECT * FROM AUTO_MPG' # data = get_data_from_database(db_type='aurora-mysql', host='aurora-mysql-instance-1.cxh6nuaszc34.us-east-1.rds.amazonaws.com:3306', # username='admin', password='adminadmin', database='sys', query=query, chunk_size=100) # # query = 'SELECT * FROM dbo.auto_mpg' # data = get_data_from_database(db_type='mssql', host='mssql.cxh6nuaszc34.us-east-1.rds.amazonaws.com:1433', # username='admin', password='adminadmin', database='bluemist', query=query, chunk_size=100) # # query = 'SELECT * FROM auto_mpg' # data = get_data_from_database(db_type='mariadb', host='mariadb.cxh6nuaszc34.us-east-1.rds.amazonaws.com:3306', # username='admin', password='adminadmin', database='innodb', query=query, chunk_size=100) # # query = 'SELECT * FROM AUTO_MPG' # data = get_data_from_database(db_type='oracle', host='oracle.cxh6nuaszc34.us-east-1.rds.amazonaws.com', # username='admin', password='adminadmin', service='DATABASE', # oracle_instant_client_path='/home/shashank-agrawal/Desktop/instantclient_21_6', # query=query, chunk_size=100) initialize(enable_acceleration_extensions=True) # print('get_estimators', get_estimators()) info = cpuinfo.get_cpu_info() print(info) print(sklearn_is_patched()) # data = datasets.load_diabetes(as_frame=True) # data = get_data_from_filesystem('datasets/auto-mpg/auto-mpg.csv') data = get_data_from_filesystem( 'https://raw.githubusercontent.com/plotly/datasets/3aa08e58607d1f36159efc4cca9d0d073bbf57bb/auto-mpg.csv') # perform_eda(data.frame, target_variable='target', provider='autoviz') X_train, X_test, y_train, y_test = preprocess_data(data, target_variable='mpg', test_size=0.25, data_scaling_strategy='StandardScaler', categorical_features=['model_year'], categorical_encoder='OneHotEncoder', drop_categories_one_hot_encoder='first') train_test_evaluate(X_train, X_test, y_train, y_test) deploy_model(estimator_name='LinearSVR')
# # # pipeline = get_model_pipeline('LarsCV') # # print(pipeline.get_params) # deploy_model(estimator_name='LarsCV', host='localhost', port=8000) # Press the green button in the gutter to run the script. if __name__ == '__main__': main()