Skip to content

robustness

get_overlap_analysis(technology, kind, credentials, summary=False, destination=None, robustness_dataset='robustness')

Return the overlap analysis of technology

Parameters:

Name Type Description Default
technology str

name of the technology (as

required
kind OverlapAnalysisKind

kind of overlap analysis

required
credentials Path

BQ credentials file path

required
destination Path

results destination file path (if None, stdout)

None
summary bool

whether the full analysis or its summary should be saved

False
robustness_dataset str

name of the BQ 'robustness' dataset

'robustness'

Usage:

techlandscape robustness get-overlap-analysis <technology> <your-credentials> --destination <overlap_analysis.csv>

Source code in techlandscape/robustness.py
@app.command()
def get_overlap_analysis(
    technology: str,
    kind: OverlapAnalysisKind,
    credentials: Path,
    summary: bool = False,
    destination: Path = None,
    robustness_dataset: str = "robustness",
):
    """
    Return the overlap analysis of `technology`

    Arguments:
        technology: name of the technology (as
        kind: kind of overlap analysis
        credentials: BQ credentials file path
        destination: results destination file path (if None, stdout)
        summary: whether the full analysis or its summary should be saved
        robustness_dataset: name of the BQ 'robustness' dataset

    **Usage:**
        ```shell
        techlandscape robustness get-overlap-analysis <technology> <your-credentials> --destination <overlap_analysis.csv>
        ```
    """

    overlap_analysis = OverlapAnalysis(technology, credentials, robustness_dataset)
    if kind == OverlapAnalysisKind.pairwise:
        if summary:
            overlap_analysis.get_pairwise_overlap_ratios()
            res, index, header = (
                overlap_analysis.pairwise_overlap_ratios.describe(),
                True,
                False,
            )
        else:
            overlap_analysis.get_pairwise_overlap_analysis()
            res, index, header = overlap_analysis.pairwise_overlap_analysis, False, True
    else:
        if summary:
            overlap_analysis.get_batch_overlap_analysis()
            res, index, header = (
                overlap_analysis.batch_overlap_ratios.describe(),
                True,
                False,
            )
        else:
            overlap_analysis.get_batch_overlap_analysis()
            res, index, header = overlap_analysis.batch_overlap_ratios, False, True
    destination = destination if destination else sys.stdout
    res.to_csv(destination, index=index, header=header)

get_prediction_analysis(models, data, destination=None)

Return a csv file with predicted scores on data for all models matching the models pattern.

Parameters:

Name Type Description Default
models str

model folder path (wildcard enabled)

required
data str

data file path

required
destination Path

destination file path

None

Usage:

techlandscape robustness get-prediction-analysis "models/additivemanufacturing_*_cnn/model-best" data/expansion_additivemanufacturing_sample.jsonl --destination outs/
# will be saved as classification_additivemanufacturing_robustness_cnn.csv

Source code in techlandscape/robustness.py
@app.command()
def get_prediction_analysis(models: str, data: str, destination: Path = None):
    """
    Return a csv file with predicted scores on `data` for all models matching the `models` pattern.

    Arguments:
        models: model folder path (wildcard enabled)
        data: data file path
        destination: destination file path

    **Usage:**
        ```shell
        techlandscape robustness get-prediction-analysis "models/additivemanufacturing_*_cnn/model-best" data/expansion_additivemanufacturing_sample.jsonl --destination outs/
        # will be saved as classification_additivemanufacturing_robustness_cnn.csv
        ```
    """
    get_technology = lambda x: x.split("/")[-2].split("_")[0]
    get_architecture = lambda x: x.split("/")[-2].split("_")[-1]
    models = glob(models)
    for i, model_ in enumerate(models):
        technology = get_technology(model_)
        architecture = get_architecture(model_)
        model = tf.keras.models.load_model(model_)
        cfg = get_config(Path(model_) / Path("config.yaml"))

        cfg["data"]["test"] = data

        text_vectorizer = TextVectorizer(cfg)
        text_vectorizer.vectorize()

        pred = model.predict(text_vectorizer.x_test)
        if i == 0:
            out = pd.DataFrame(pred, columns=[model_])
        else:
            out = out.merge(
                pd.DataFrame(pred, columns=[model_]), left_index=True, right_index=True
            )
        filename = f"classification_{technology}_robustness_{architecture}.csv"
        out.to_csv(Path(destination) / Path(filename))
        typer.secho(f"{ok}{Path(destination) / Path(filename)} saved")

models_performance(path, markdown=True, destination=None, title=None)

Summarize models performance and save to csv/ print to stdout

Parameters:

Name Type Description Default
path str

path of the meta.json (wildcard enabled)

required
markdown bool

whether the output should be printed to stdout as md or saved to destination

True
destination str

destination file path (used if --no-markdown)

None
title str

title of the table (used if --markdown)

None

Usage:

techlandscape robustness models-performance "models/additivemanufacturing_*_cnn/model-best/meta.json" --markdown --title "additivemanufacturing - cnn"

Source code in techlandscape/robustness.py
@app.command()
def models_performance(
    path: str, markdown: bool = True, destination: str = None, title: str = None
):
    """
    Summarize models performance and save to csv/ print to stdout

    Arguments:
        path: path of the meta.json (wildcard enabled)
        markdown: whether the output should be printed to stdout as md or saved to `destination`
        destination: destination file path (used if `--no-markdown`)
        title: title of the table (used if `--markdown`)

    **Usage:**
        ```shell
        techlandscape robustness models-performance "models/additivemanufacturing_*_cnn/model-best/meta.json" --markdown --title "additivemanufacturing - cnn"
        ```
    """
    files = glob(path)
    get_name = lambda x: x.split("/")[1]

    for i, file in enumerate(files):

        tmp = pd.DataFrame.from_dict(json.loads(Path(file).open("r").read())).rename(
            columns={"performance": get_name(file)}
        )
        if i == 0:
            out = tmp.copy()
        else:
            out = out.merge(tmp, left_index=True, right_index=True)
    out = out.T
    out = out[sorted(out.columns)]
    if len(files) > 1:
        out = out.describe()

    if markdown:
        typer.echo(f"\n### {title}\n")
        typer.echo(f"{out.round(2).to_markdown()}")
    else:
        out.to_csv(destination)

wrap_overlap_analysis(path, axis, destination=None, markdown=False)

Wrap overlap analysis based on csv output of get_overlap_analysis

Parameters:

Name Type Description Default
path str

path of the files with results to be wrapped (wildcard enablec)

required
axis OverlapAnalysisAxis

axis of the main analysis

required
destination str

saving file path (print to stdout in None)

None
markdown bool

whether to return as md or csv table

False

Usage:

techlandscape robustness wrap-overlap-analysis "outs/expansion_*robustness*.csv" --markdown

Source code in techlandscape/robustness.py
@app.command()
def wrap_overlap_analysis(
    path: str,
    axis: OverlapAnalysisAxis,
    destination: str = None,
    markdown: bool = False,
):
    """
    Wrap overlap analysis based on csv output of  `get_overlap_analysis`

    Arguments:
        path: path of the files with results to be wrapped (wildcard enablec)
        axis: axis of the main analysis
        destination: saving file path (print to stdout in None)
        markdown: whether to return as md or csv table

    **Usage:**
        ```shell
        techlandscape robustness wrap-overlap-analysis "outs/expansion_*robustness*.csv" --markdown
        ```
    """
    files = glob(path)

    get_technology = lambda f: f.split("_")[1]
    get_config = lambda f: f.split("_")[2].replace(".csv", "")

    technologies = sorted(set([get_technology(f) for f in files]))
    configs = sorted(set([get_config(f) for f in files]))

    for e in eval(axis.value):
        files_ = [f for f in files if e in f]
        tmp = pd.DataFrame()
        for file in files_:
            name = (
                get_config(file)
                if axis == OverlapAnalysisAxis.technologies
                else get_technology(file)
            )
            tmp = tmp.append(pd.read_csv(file, names=["var", name]).set_index("var").T)
        tmp.index.name = (
            "technologies" if axis == OverlapAnalysisAxis.configs else "configs"
        )
        tmp = tmp.sort_index().round(2)
        out = destination if destination else sys.stdout
        if markdown:
            typer.echo(f"\n\n## {e}\n")
            tmp.to_markdown(out)
        else:
            tmp.to_csv(out)

wrap_prediction_analysis(path, markdown=True)

Wrap prediction analysis

Parameters:

Name Type Description Default
path str

prediction analysis file path (wildcard enabled)

required
markdown bool

whether to output wrapped analysis as markdown or csv

True

Attention

csv not supported yet

Usage:

techlandscape robustness wrap-prediction-analysis outs/classification_additivemanufacturing_robustness_cnn.csv

Source code in techlandscape/robustness.py
@app.command()
def wrap_prediction_analysis(path: str, markdown: bool = True):
    """
    Wrap prediction analysis

    Arguments:
        path: prediction analysis file path (wildcard enabled)
        markdown: whether to output wrapped analysis as markdown or csv

    !!! attention
        csv not supported yet

    **Usage:**
        ```shell
        techlandscape robustness wrap-prediction-analysis outs/classification_additivemanufacturing_robustness_cnn.csv
        ```
    """
    get_technology = lambda x: x.split("/")[1].split("_")[1]
    get_architecture = lambda x: x.split("/")[1].split("_")[-1].split(".")[0]
    files = glob(path)
    files = sorted(files)
    for file in files:
        technology = get_technology(file)
        architecture = get_architecture(file)

        tmp = pd.read_csv(file, index_col=0)
        dispersion = tmp.std(axis=1).describe().rename("std_score").copy()

        for col in tmp.columns:
            tmp[col] = tmp[col].apply(lambda x: 1 if x > 0.5 else 0)
        tmp["vote"] = tmp.sum(1)
        tmp = (
            tmp.groupby("vote")
            .count()
            .max(1)
            .to_frame()
            .reset_index()
            .prod(1)
            .rename("nb_positives")
            .to_frame()
        )
        tmp["share_positives"] = tmp["nb_positives"] / tmp["nb_positives"].sum()
        tmp = tmp[::-1]
        tmp["cumshare_positives"] = tmp["share_positives"].cumsum()
        tmp.index.name = "nb_models"
        consensus = tmp.copy()

        if markdown:
            typer.echo(f"\n## {technology} - {architecture}\n")
            typer.echo("### Score dispersion\n")
            typer.echo(dispersion.round(3).to_markdown() + "\n")
            typer.echo("### Models consensus\n")
            typer.echo(consensus.round(3).to_markdown())
        else:
            typer.secho(f"{not_ok}csv not supported yet", err=True)