# Author: Shashank Agrawal
# License: MIT
# Version: 0.1.3
# Email: dew@bluemist-ai.one
# Created: Jul 17, 2023
# Last modified: Oct 25, 2023
import logging
import os
import pandas as pd
from logging import config
from transformers import pipeline
from bluemist.llm.task_models import TaskModels
BLUEMIST_PATH = os.environ["BLUEMIST_PATH"]
config.fileConfig(BLUEMIST_PATH + '/' + 'logging.config')
logger = logging.getLogger("bluemist")
# Instantiate the TaskModels class
task_models = TaskModels()
def process_models(task_name, models, results_df, input_data, question=None, min_length=30, max_length=130,
do_sample=False):
"""
Process multiple models with given inputs and consolidate results.
Args:
task_name : str, default=None
Supported tasks can be retrieved from the TaskModels class using the get_all_tasks method.
models : list
A list of model names to be processed.
results_df : pd.DataFrame
The initial results DataFrame.
input_data : str, default=None
Text or information used by the model to perform specific NLP tasks.
question : str, default=None
Specific query or question provided as input to the model for question-answering tasks. The model uses this question to find the relevant answer within the provided context.
min_length: number, default=30
The minimum length of the generated summary. Defaults to 30. The summarization model ensures that the summary is at least this length.
max_length : number, default=130
The maximum length of the generated summary. Defaults to 130. The summarization model limits the summary to a maximum of this length.
do_sample : boolean, default=False
Whether to use sampling during summary generation. Defaults to False. When True, the model uses a sampling technique for token selection.
Returns:
pd.DataFrame
The dataFrame containing consolidated results from all models.
"""
for model in models:
print('Model :: {}'.format(model))
logger.info('Model :: {}'.format(model))
try:
nlp = pipeline(task=task_name, model=model)
input_args = {}
if task_models.is_question_supported(task_name):
input_args["question"] = question
if task_name == "question-answering":
input_args["context"] = input_data
result = nlp(input_args)
elif task_name == "document-question-answering":
input_args["image"] = input_data
result = nlp(input_args)
elif task_name == "summarization":
input_args["min_length"] = min_length
input_args["max_length"] = max_length
input_args["do_sample"] = do_sample
result = nlp(input_data, **input_args)
elif task_name == "sentiment-analysis":
result = nlp(input_data)
results_df = consolidate_results(result, results_df, model)
except ValueError as e:
print('Skipping model due to the error.')
logger.error('An error occurred: %s', e)
return results_df
def consolidate_results(result, results_df, model):
"""
Consolidates the given result into the results DataFrame.
This function takes a result, which can be a dictionary or a list of dictionaries,
and appends it to the provided results DataFrame. The 'model' argument is used to
associate the result with a specific model.
Args:
result : (dict or list):
The result to be consolidated into the DataFrame.
results_df : (pd.DataFrame)
The DataFrame to which the result will be added.
model : str
The model name associated with the result.
Returns:
results_df : pd.DataFrame
Results DataFrame with the consolidated result.
"""
# Initialize an empty DataFrame
new_rows_df = pd.DataFrame()
# Some models return the result as dict while other may return list.
if isinstance(result, dict):
new_rows_df = pd.DataFrame([result])
elif isinstance(result, list):
new_rows_df = pd.DataFrame(result)
new_rows_df.insert(0, 'model', model)
# Store the results in the DataFrame
results_df = pd.concat([results_df, new_rows_df], ignore_index=True)
return results_df