Module ragability.ragability_hjson_info

Module for the CLI to concatenate several json or hjson files into one.

Functions

def get_args()
Expand source code
def get_args():
    """
    Get the command line arguments
    """
    parser = argparse.ArgumentParser(description='Show information about the contents of a hjsonm json or jsonl file')
    parser.add_argument('--input', '-i', type=str, help='One or more json, hjson, jsonl files', required=True)
    parser.add_argument('--debug', '-d', action='store_true', help='Debug mode')
    args_tmp = parser.parse_args()
    args = {}
    args.update(vars(args_tmp))
    return args

Get the command line arguments

def main()
Expand source code
def main():
    args = get_args()
    if args["debug"]:
        set_logging_level(DEBUG)
        ppargs = pp_config(args)
        logger.debug(f"Effective arguments: {ppargs}")
    run(args)
def run(config: dict)
Expand source code
def run(config: dict):
    # read each of the input files in turn and write all the entries of each file to the output file
    data = read_input_file(config["input"])
    # show the following information: number of entries, and all the keys that are present in the entries
    # and how many times each key is present. Also if there are nested keys, e.g. "a.b.c.d" show these as well.
    # Dictionaries can be nested arbitrarily.
    # If there are nested keys within lists, show as a.b[].c where a.b is a list of dicts.
    n_total = 0
    keys = Counter()

    def count_keys(entry, prefix=""):
        for k, v in entry.items():
            keys[prefix + k] += 1
            if isinstance(v, dict):
                count_keys(v, prefix + k + ".")
            elif isinstance(v, list):
                for idx, item in enumerate(v):
                    if isinstance(item, dict):
                        count_keys(item, prefix + k + f"[{idx}].")

    for entry in data:
        count_keys(entry)
        n_total += 1

    logger.info(f"Read {n_total} entries from {config['input']}")
    logger.info(f"Keys found in the entries:")
    for k, v in keys.items():
        logger.info(f"{k}: {v}")