robustness

`get_overlap_analysis(technology, kind, credentials, summary=False, destination=None, robustness_dataset='robustness')` ¶

Return the overlap analysis of technology

Parameters:

Name	Type	Description	Default
`technology`	`str`	name of the technology (as	required
`kind`	`OverlapAnalysisKind`	kind of overlap analysis	required
`credentials`	`Path`	BQ credentials file path	required
`destination`	`Path`	results destination file path (if None, stdout)	`None`
`summary`	`bool`	whether the full analysis or its summary should be saved	`False`
`robustness_dataset`	`str`	name of the BQ 'robustness' dataset	`'robustness'`

Usage:

techlandscape robustness get-overlap-analysis <technology> <your-credentials> --destination <overlap_analysis.csv>

Source code in techlandscape/robustness.py

@app.command()
def get_overlap_analysis(
    technology: str,
    kind: OverlapAnalysisKind,
    credentials: Path,
    summary: bool = False,
    destination: Path = None,
    robustness_dataset: str = "robustness",
):
    """
    Return the overlap analysis of `technology`

    Arguments:
        technology: name of the technology (as
        kind: kind of overlap analysis
        credentials: BQ credentials file path
        destination: results destination file path (if None, stdout)
        summary: whether the full analysis or its summary should be saved
        robustness_dataset: name of the BQ 'robustness' dataset

    **Usage:**
        ```shell
        techlandscape robustness get-overlap-analysis <technology> <your-credentials> --destination <overlap_analysis.csv>
        ```
    """

    overlap_analysis = OverlapAnalysis(technology, credentials, robustness_dataset)
    if kind == OverlapAnalysisKind.pairwise:
        if summary:
            overlap_analysis.get_pairwise_overlap_ratios()
            res, index, header = (
                overlap_analysis.pairwise_overlap_ratios.describe(),
                True,
                False,
            )
        else:
            overlap_analysis.get_pairwise_overlap_analysis()
            res, index, header = overlap_analysis.pairwise_overlap_analysis, False, True
    else:
        if summary:
            overlap_analysis.get_batch_overlap_analysis()
            res, index, header = (
                overlap_analysis.batch_overlap_ratios.describe(),
                True,
                False,
            )
        else:
            overlap_analysis.get_batch_overlap_analysis()
            res, index, header = overlap_analysis.batch_overlap_ratios, False, True
    destination = destination if destination else sys.stdout
    res.to_csv(destination, index=index, header=header)

`get_prediction_analysis(models, data, destination=None)` ¶

Return a csv file with predicted scores on data for all models matching the models pattern.

Parameters:

Name	Type	Description	Default
`models`	`str`	model folder path (wildcard enabled)	required
`data`	`str`	data file path	required
`destination`	`Path`	destination file path	`None`

Usage:

techlandscape robustness get-prediction-analysis "models/additivemanufacturing_*_cnn/model-best" data/expansion_additivemanufacturing_sample.jsonl --destination outs/
# will be saved as classification_additivemanufacturing_robustness_cnn.csv

Source code in techlandscape/robustness.py

@app.command()
def get_prediction_analysis(models: str, data: str, destination: Path = None):
    """
    Return a csv file with predicted scores on `data` for all models matching the `models` pattern.

    Arguments:
        models: model folder path (wildcard enabled)
        data: data file path
        destination: destination file path

    **Usage:**
        ```shell
        techlandscape robustness get-prediction-analysis "models/additivemanufacturing_*_cnn/model-best" data/expansion_additivemanufacturing_sample.jsonl --destination outs/
        # will be saved as classification_additivemanufacturing_robustness_cnn.csv
        ```
    """
    get_technology = lambda x: x.split("/")[-2].split("_")[0]
    get_architecture = lambda x: x.split("/")[-2].split("_")[-1]
    models = glob(models)
    for i, model_ in enumerate(models):
        technology = get_technology(model_)
        architecture = get_architecture(model_)
        model = tf.keras.models.load_model(model_)
        cfg = get_config(Path(model_) / Path("config.yaml"))

        cfg["data"]["test"] = data

        text_vectorizer = TextVectorizer(cfg)
        text_vectorizer.vectorize()

        pred = model.predict(text_vectorizer.x_test)
        if i == 0:
            out = pd.DataFrame(pred, columns=[model_])
        else:
            out = out.merge(
                pd.DataFrame(pred, columns=[model_]), left_index=True, right_index=True
            )
        filename = f"classification_{technology}_robustness_{architecture}.csv"
        out.to_csv(Path(destination) / Path(filename))
        typer.secho(f"{ok}{Path(destination) / Path(filename)} saved")

`models_performance(path, markdown=True, destination=None, title=None)` ¶

Summarize models performance and save to csv/ print to stdout

Parameters:

Name	Type	Description	Default
`path`	`str`	path of the meta.json (wildcard enabled)	required
`markdown`	`bool`	whether the output should be printed to stdout as md or saved to `destination`	`True`
`destination`	`str`	destination file path (used if `--no-markdown`)	`None`
`title`	`str`	title of the table (used if `--markdown`)	`None`

Usage:

techlandscape robustness models-performance "models/additivemanufacturing_*_cnn/model-best/meta.json" --markdown --title "additivemanufacturing - cnn"

Source code in techlandscape/robustness.py

@app.command()
def models_performance(
    path: str, markdown: bool = True, destination: str = None, title: str = None
):
    """
    Summarize models performance and save to csv/ print to stdout

    Arguments:
        path: path of the meta.json (wildcard enabled)
        markdown: whether the output should be printed to stdout as md or saved to `destination`
        destination: destination file path (used if `--no-markdown`)
        title: title of the table (used if `--markdown`)

    **Usage:**
        ```shell
        techlandscape robustness models-performance "models/additivemanufacturing_*_cnn/model-best/meta.json" --markdown --title "additivemanufacturing - cnn"
        ```
    """
    files = glob(path)
    get_name = lambda x: x.split("/")[1]

    for i, file in enumerate(files):

        tmp = pd.DataFrame.from_dict(json.loads(Path(file).open("r").read())).rename(
            columns={"performance": get_name(file)}
        )
        if i == 0:
            out = tmp.copy()
        else:
            out = out.merge(tmp, left_index=True, right_index=True)
    out = out.T
    out = out[sorted(out.columns)]
    if len(files) > 1:
        out = out.describe()

    if markdown:
        typer.echo(f"\n### {title}\n")
        typer.echo(f"{out.round(2).to_markdown()}")
    else:
        out.to_csv(destination)

`wrap_overlap_analysis(path, axis, destination=None, markdown=False)` ¶

Wrap overlap analysis based on csv output of get_overlap_analysis

Parameters:

Name	Type	Description	Default
`path`	`str`	path of the files with results to be wrapped (wildcard enablec)	required
`axis`	`OverlapAnalysisAxis`	axis of the main analysis	required
`destination`	`str`	saving file path (print to stdout in None)	`None`
`markdown`	`bool`	whether to return as md or csv table	`False`

Usage:

techlandscape robustness wrap-overlap-analysis "outs/expansion_*robustness*.csv" --markdown

Source code in techlandscape/robustness.py

@app.command()
def wrap_overlap_analysis(
    path: str,
    axis: OverlapAnalysisAxis,
    destination: str = None,
    markdown: bool = False,
):
    """
    Wrap overlap analysis based on csv output of  `get_overlap_analysis`

    Arguments:
        path: path of the files with results to be wrapped (wildcard enablec)
        axis: axis of the main analysis
        destination: saving file path (print to stdout in None)
        markdown: whether to return as md or csv table

    **Usage:**
        ```shell
        techlandscape robustness wrap-overlap-analysis "outs/expansion_*robustness*.csv" --markdown
        ```
    """
    files = glob(path)

    get_technology = lambda f: f.split("_")[1]
    get_config = lambda f: f.split("_")[2].replace(".csv", "")

    technologies = sorted(set([get_technology(f) for f in files]))
    configs = sorted(set([get_config(f) for f in files]))

    for e in eval(axis.value):
        files_ = [f for f in files if e in f]
        tmp = pd.DataFrame()
        for file in files_:
            name = (
                get_config(file)
                if axis == OverlapAnalysisAxis.technologies
                else get_technology(file)
            )
            tmp = tmp.append(pd.read_csv(file, names=["var", name]).set_index("var").T)
        tmp.index.name = (
            "technologies" if axis == OverlapAnalysisAxis.configs else "configs"
        )
        tmp = tmp.sort_index().round(2)
        out = destination if destination else sys.stdout
        if markdown:
            typer.echo(f"\n\n## {e}\n")
            tmp.to_markdown(out)
        else:
            tmp.to_csv(out)

`wrap_prediction_analysis(path, markdown=True)` ¶

Wrap prediction analysis

Parameters:

Name	Type	Description	Default
`path`	`str`	prediction analysis file path (wildcard enabled)	required
`markdown`	`bool`	whether to output wrapped analysis as markdown or csv	`True`

Attention

csv not supported yet

Usage:

techlandscape robustness wrap-prediction-analysis outs/classification_additivemanufacturing_robustness_cnn.csv

Source code in techlandscape/robustness.py

@app.command()
def wrap_prediction_analysis(path: str, markdown: bool = True):
    """
    Wrap prediction analysis

    Arguments:
        path: prediction analysis file path (wildcard enabled)
        markdown: whether to output wrapped analysis as markdown or csv

    !!! attention
        csv not supported yet

    **Usage:**
        ```shell
        techlandscape robustness wrap-prediction-analysis outs/classification_additivemanufacturing_robustness_cnn.csv
        ```
    """
    get_technology = lambda x: x.split("/")[1].split("_")[1]
    get_architecture = lambda x: x.split("/")[1].split("_")[-1].split(".")[0]
    files = glob(path)
    files = sorted(files)
    for file in files:
        technology = get_technology(file)
        architecture = get_architecture(file)

        tmp = pd.read_csv(file, index_col=0)
        dispersion = tmp.std(axis=1).describe().rename("std_score").copy()

        for col in tmp.columns:
            tmp[col] = tmp[col].apply(lambda x: 1 if x > 0.5 else 0)
        tmp["vote"] = tmp.sum(1)
        tmp = (
            tmp.groupby("vote")
            .count()
            .max(1)
            .to_frame()
            .reset_index()
            .prod(1)
            .rename("nb_positives")
            .to_frame()
        )
        tmp["share_positives"] = tmp["nb_positives"] / tmp["nb_positives"].sum()
        tmp = tmp[::-1]
        tmp["cumshare_positives"] = tmp["share_positives"].cumsum()
        tmp.index.name = "nb_models"
        consensus = tmp.copy()

        if markdown:
            typer.echo(f"\n## {technology} - {architecture}\n")
            typer.echo("### Score dispersion\n")
            typer.echo(dispersion.round(3).to_markdown() + "\n")
            typer.echo("### Models consensus\n")
            typer.echo(consensus.round(3).to_markdown())
        else:
            typer.secho(f"{not_ok}csv not supported yet", err=True)

robustness

get_overlap_analysis(technology, kind, credentials, summary=False, destination=None, robustness_dataset='robustness') ¶

get_prediction_analysis(models, data, destination=None) ¶

models_performance(path, markdown=True, destination=None, title=None) ¶

wrap_overlap_analysis(path, axis, destination=None, markdown=False) ¶

wrap_prediction_analysis(path, markdown=True) ¶

`get_overlap_analysis(technology, kind, credentials, summary=False, destination=None, robustness_dataset='robustness')` ¶

`get_prediction_analysis(models, data, destination=None)` ¶

`models_performance(path, markdown=True, destination=None, title=None)` ¶

`wrap_overlap_analysis(path, axis, destination=None, markdown=False)` ¶

`wrap_prediction_analysis(path, markdown=True)` ¶