Module llms_wrapper.llms_wrapper_test

Module for the llms_wrapper_test command to perform a simple test to check if one or more LLMs are working.

Functions

def equal_response(response, answer)
Expand source code
def equal_response(response, answer):
    """
    Check if the response is equal to the answer, disregarding any newlines and multiple spaces and any
    leading or trailing spaces.
    """
    return response.replace("\n", " ").replace("  ", " ").strip() == answer

Check if the response is equal to the answer, disregarding any newlines and multiple spaces and any leading or trailing spaces.

def get_args() ‑> dict
Expand source code
def get_args() -> dict:
    """
    Get the command line arguments
    """
    parser = argparse.ArgumentParser(description='Test llms')
    parser.add_argument('--llms', '-l', nargs="*", type=str, default=[], help='LLMs to use for the queries (or use config)', required=False)
    parser.add_argument('--use', '-u', nargs="*", type=str, default=[], help='Subset of LLMs to use (all)', required=False)
    parser.add_argument("--prompt", "-p", type=str, help="Prompt text to use (or use default prompt)", required=False)
    parser.add_argument("--answer", "-a", type=str, help="Expected answer (or use default answer)", required=False)
    parser.add_argument("--config", "-c", type=str, help="Config file with the LLM and other info for an experiment, json, jsonl, yaml", required=False)
    parser.add_argument('--role', '-r', choices=["user", "system", "assistant"], default="user", help='Role to use for the prompt', required=False)
    parser.add_argument("--dry-run", "-n", action="store_true", help="Dry run, do not actually run the queries", required=False)
    parser.add_argument("--debug", action="store_true", help="Debug mode", required=False)
    parser.add_argument("--show_response", action="store_true", help="Show thefull  response from the LLM", required=False)
    parser.add_argument("--show_cost", action="store_true", help="Show token counts and cost", required=False)
    parser.add_argument("--logfile", "-f", type=str, help="Log file", required=False)
    parser.add_argument("--version", action="store_true", help="Show version and exit")
    args = parser.parse_args()
    if args.version:
        print("llms_wrapper version:", __version__)
        sys.exit()
    loglevel1 = "INFO"
    if args.debug:
        loglevel1 = "DEBUG"
    configure_logging(level=loglevel1, logfile=args.logfile)
    # logger.enable("llms_wrapper")
    # TODO: for testing, remove once logging works properly
    logger.info("Logging configured")
    for llm in args.llms:
        if not re.match(r"^[a-zA-Z0-9_\-./]+/.+$", llm):
            raise Exception(f"Error: 'llms' field must be in the format 'provider/model' in: {llm}")
    # convert the argparse object to a dictionary
    argsconfig = {}
    argsconfig.update(vars(args))
    # if a config file is specified, read the config file using our config reading function and update the arguments.
    # The config data may contain:
    # - input: used only if not specified in the command line arguments
    # - output: used only if not specified in the command line arguments
    # - llm: added to the ones specified in the command line arguments
    # - prompt: used to add config info to the llms specified in the command line arguments
    if args.config:
        config = read_config_file(args.config, update=False)
        # merge the args into the config, giving precedence to the args, except for the LLM list, which is merged
        # by adding the args to the config
        oldllm = config.get("llms", [])
        config.update(dict_except(argsconfig, ["llms"]))
        # add the llm from the args to the llm from the config, but only if the llm is not already in the config
        mentionedllm = [llm if isinstance(llm, str) else llm["llm"] for llm in config["llms"]]
        for llm in args.llms:
            if llm not in mentionedllm:
                logger.debug(f"Adding LLM {llm} to config")
                oldllm.append(llm)
        config["llms"] = oldllm
    else:
        config = argsconfig
    update_llm_config(config)

    config["answer"] = args.answer
    if len(args.use) > 0:
        # check that the llms specified are actually to be found in the config
        aliases = [llm["alias"] for llm in config["llms"]]
        for llm in args.use:
            if llm not in aliases:
                raise Exception(f"Error: LLM {llm} not found in config")
        config["llms_to_use"] = args.use
    else:
        config["llms_to_use"] = None  # use whatever is configured in the config
    # make sure we got at least one llm
    if not config["llms"]:
        raise Exception("Error: No LLMs specified")
    logger.debug(f"Effective config: {pp_config(config)}")
    ppargs = pp_config(config)
    logger.debug(f"Effective arguments: {ppargs}")
    return config

Get the command line arguments

def main()
Expand source code
def main():
    args = get_args()
    run(args)
def run(config: dict)
Expand source code
def run(config: dict):
    logger.info("Running LLM test")
    prompt = {}
    prompt[config['role']] = config['prompt'] if config['prompt'] else DEFAULT_PROMPT
    answer = config['answer'] if config['answer'] else DEFAULT_ANSWER
    n = 0
    n_ok = 0
    n_nok = 0
    log = []
    llms = LLMS(config)
    messages = llms.make_messages(prompt=prompt)
    if config["llms_to_use"] is None:
        llms_to_use = llms.list_aliases()
    else:
        llms_to_use = config["llms_to_use"]
    if len(llms_to_use) == 0:
        logger.warning("No LLMs to use")
        return
    for alias in llms_to_use:
        llmname = alias
        llm = llms.get(alias)
        n += 1
        if config['dry_run']:
            logger.info(f"Would query LLM {llmname} with prompt {prompt}")
            n_ok += 1
        else:
            if config['debug']:
                apikey = llm.get('api_key') if isinstance(llm, dict) else "NONE"
                logger.debug(f"Querying LLM {llmname} apikey {apikey}  with prompt {prompt}")
            ret = llms.query(
                llmname,
                messages=messages,
                debug=config['debug'],
                return_response=config["show_response"],
                return_cost=config["show_cost"])
            response = ret.get("answer", "")
            error = ret.get("error", "")
            ret_response = ret.get("response", "")
            ret_cost = ret.get("cost", "")
            ret_completion_tokens = ret.get("n_completion_tokens", 0)
            ret_prompt_tokens = ret.get("n_prompt_tokens", 0)
            ret_total_tokens = ret.get("n_total_tokens", 0)
            if config["show_response"]:
                logger.info(f"Response/error from {llmname}: {response}/{error}")
                logger.info(f"Detailed response: {ret_response}")
            if config["show_cost"]:
                logger.info(f"Cost for {llmname}: {ret_cost}")
                logger.info(f"Completion tokens: {ret_completion_tokens}, Prompt tokens: {ret_prompt_tokens}, Total tokens: {ret_total_tokens}")
            if error:
                n_nok += 1
                logger.error(f"Error from {llmname}: {error}")
                log.append(f"{llmname} Error: {error}")
            elif not equal_response(response, answer):
                logger.error(f"Error: Unexpected response from {llmname}: {response}, expected: {answer}")
                log.append(f"{llmname} Unexpected response:  {response}, expected: {answer}")
            else:
                n_ok += 1
                if config['debug']:
                    logger.info(f"OK Response from {llmname}: {response}")
                log.append(f"{llmname} OK")

    logger.info("Summary:")
    for l in log:
        logger.info(l)
    logger.info(f"OK: {n_ok}, NOK: {n_nok}")