robustness
get_overlap_analysis(technology, kind, credentials, summary=False, destination=None, robustness_dataset='robustness')
¶
Return the overlap analysis of technology
Parameters:
Name | Type | Description | Default |
---|---|---|---|
technology |
str |
name of the technology (as |
required |
kind |
OverlapAnalysisKind |
kind of overlap analysis |
required |
credentials |
Path |
BQ credentials file path |
required |
destination |
Path |
results destination file path (if None, stdout) |
None |
summary |
bool |
whether the full analysis or its summary should be saved |
False |
robustness_dataset |
str |
name of the BQ 'robustness' dataset |
'robustness' |
Usage:
techlandscape robustness get-overlap-analysis <technology> <your-credentials> --destination <overlap_analysis.csv>
Source code in techlandscape/robustness.py
@app.command()
def get_overlap_analysis(
technology: str,
kind: OverlapAnalysisKind,
credentials: Path,
summary: bool = False,
destination: Path = None,
robustness_dataset: str = "robustness",
):
"""
Return the overlap analysis of `technology`
Arguments:
technology: name of the technology (as
kind: kind of overlap analysis
credentials: BQ credentials file path
destination: results destination file path (if None, stdout)
summary: whether the full analysis or its summary should be saved
robustness_dataset: name of the BQ 'robustness' dataset
**Usage:**
```shell
techlandscape robustness get-overlap-analysis <technology> <your-credentials> --destination <overlap_analysis.csv>
```
"""
overlap_analysis = OverlapAnalysis(technology, credentials, robustness_dataset)
if kind == OverlapAnalysisKind.pairwise:
if summary:
overlap_analysis.get_pairwise_overlap_ratios()
res, index, header = (
overlap_analysis.pairwise_overlap_ratios.describe(),
True,
False,
)
else:
overlap_analysis.get_pairwise_overlap_analysis()
res, index, header = overlap_analysis.pairwise_overlap_analysis, False, True
else:
if summary:
overlap_analysis.get_batch_overlap_analysis()
res, index, header = (
overlap_analysis.batch_overlap_ratios.describe(),
True,
False,
)
else:
overlap_analysis.get_batch_overlap_analysis()
res, index, header = overlap_analysis.batch_overlap_ratios, False, True
destination = destination if destination else sys.stdout
res.to_csv(destination, index=index, header=header)
get_prediction_analysis(models, data, destination=None)
¶
Return a csv file with predicted scores on data
for all models matching the models
pattern.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
models |
str |
model folder path (wildcard enabled) |
required |
data |
str |
data file path |
required |
destination |
Path |
destination file path |
None |
Usage:
techlandscape robustness get-prediction-analysis "models/additivemanufacturing_*_cnn/model-best" data/expansion_additivemanufacturing_sample.jsonl --destination outs/
# will be saved as classification_additivemanufacturing_robustness_cnn.csv
Source code in techlandscape/robustness.py
@app.command()
def get_prediction_analysis(models: str, data: str, destination: Path = None):
"""
Return a csv file with predicted scores on `data` for all models matching the `models` pattern.
Arguments:
models: model folder path (wildcard enabled)
data: data file path
destination: destination file path
**Usage:**
```shell
techlandscape robustness get-prediction-analysis "models/additivemanufacturing_*_cnn/model-best" data/expansion_additivemanufacturing_sample.jsonl --destination outs/
# will be saved as classification_additivemanufacturing_robustness_cnn.csv
```
"""
get_technology = lambda x: x.split("/")[-2].split("_")[0]
get_architecture = lambda x: x.split("/")[-2].split("_")[-1]
models = glob(models)
for i, model_ in enumerate(models):
technology = get_technology(model_)
architecture = get_architecture(model_)
model = tf.keras.models.load_model(model_)
cfg = get_config(Path(model_) / Path("config.yaml"))
cfg["data"]["test"] = data
text_vectorizer = TextVectorizer(cfg)
text_vectorizer.vectorize()
pred = model.predict(text_vectorizer.x_test)
if i == 0:
out = pd.DataFrame(pred, columns=[model_])
else:
out = out.merge(
pd.DataFrame(pred, columns=[model_]), left_index=True, right_index=True
)
filename = f"classification_{technology}_robustness_{architecture}.csv"
out.to_csv(Path(destination) / Path(filename))
typer.secho(f"{ok}{Path(destination) / Path(filename)} saved")
models_performance(path, markdown=True, destination=None, title=None)
¶
Summarize models performance and save to csv/ print to stdout
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
path of the meta.json (wildcard enabled) |
required |
markdown |
bool |
whether the output should be printed to stdout as md or saved to |
True |
destination |
str |
destination file path (used if |
None |
title |
str |
title of the table (used if |
None |
Usage:
techlandscape robustness models-performance "models/additivemanufacturing_*_cnn/model-best/meta.json" --markdown --title "additivemanufacturing - cnn"
Source code in techlandscape/robustness.py
@app.command()
def models_performance(
path: str, markdown: bool = True, destination: str = None, title: str = None
):
"""
Summarize models performance and save to csv/ print to stdout
Arguments:
path: path of the meta.json (wildcard enabled)
markdown: whether the output should be printed to stdout as md or saved to `destination`
destination: destination file path (used if `--no-markdown`)
title: title of the table (used if `--markdown`)
**Usage:**
```shell
techlandscape robustness models-performance "models/additivemanufacturing_*_cnn/model-best/meta.json" --markdown --title "additivemanufacturing - cnn"
```
"""
files = glob(path)
get_name = lambda x: x.split("/")[1]
for i, file in enumerate(files):
tmp = pd.DataFrame.from_dict(json.loads(Path(file).open("r").read())).rename(
columns={"performance": get_name(file)}
)
if i == 0:
out = tmp.copy()
else:
out = out.merge(tmp, left_index=True, right_index=True)
out = out.T
out = out[sorted(out.columns)]
if len(files) > 1:
out = out.describe()
if markdown:
typer.echo(f"\n### {title}\n")
typer.echo(f"{out.round(2).to_markdown()}")
else:
out.to_csv(destination)
wrap_overlap_analysis(path, axis, destination=None, markdown=False)
¶
Wrap overlap analysis based on csv output of get_overlap_analysis
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
path of the files with results to be wrapped (wildcard enablec) |
required |
axis |
OverlapAnalysisAxis |
axis of the main analysis |
required |
destination |
str |
saving file path (print to stdout in None) |
None |
markdown |
bool |
whether to return as md or csv table |
False |
Usage:
techlandscape robustness wrap-overlap-analysis "outs/expansion_*robustness*.csv" --markdown
Source code in techlandscape/robustness.py
@app.command()
def wrap_overlap_analysis(
path: str,
axis: OverlapAnalysisAxis,
destination: str = None,
markdown: bool = False,
):
"""
Wrap overlap analysis based on csv output of `get_overlap_analysis`
Arguments:
path: path of the files with results to be wrapped (wildcard enablec)
axis: axis of the main analysis
destination: saving file path (print to stdout in None)
markdown: whether to return as md or csv table
**Usage:**
```shell
techlandscape robustness wrap-overlap-analysis "outs/expansion_*robustness*.csv" --markdown
```
"""
files = glob(path)
get_technology = lambda f: f.split("_")[1]
get_config = lambda f: f.split("_")[2].replace(".csv", "")
technologies = sorted(set([get_technology(f) for f in files]))
configs = sorted(set([get_config(f) for f in files]))
for e in eval(axis.value):
files_ = [f for f in files if e in f]
tmp = pd.DataFrame()
for file in files_:
name = (
get_config(file)
if axis == OverlapAnalysisAxis.technologies
else get_technology(file)
)
tmp = tmp.append(pd.read_csv(file, names=["var", name]).set_index("var").T)
tmp.index.name = (
"technologies" if axis == OverlapAnalysisAxis.configs else "configs"
)
tmp = tmp.sort_index().round(2)
out = destination if destination else sys.stdout
if markdown:
typer.echo(f"\n\n## {e}\n")
tmp.to_markdown(out)
else:
tmp.to_csv(out)
wrap_prediction_analysis(path, markdown=True)
¶
Wrap prediction analysis
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
prediction analysis file path (wildcard enabled) |
required |
markdown |
bool |
whether to output wrapped analysis as markdown or csv |
True |
Attention
csv not supported yet
Usage:
techlandscape robustness wrap-prediction-analysis outs/classification_additivemanufacturing_robustness_cnn.csv
Source code in techlandscape/robustness.py
@app.command()
def wrap_prediction_analysis(path: str, markdown: bool = True):
"""
Wrap prediction analysis
Arguments:
path: prediction analysis file path (wildcard enabled)
markdown: whether to output wrapped analysis as markdown or csv
!!! attention
csv not supported yet
**Usage:**
```shell
techlandscape robustness wrap-prediction-analysis outs/classification_additivemanufacturing_robustness_cnn.csv
```
"""
get_technology = lambda x: x.split("/")[1].split("_")[1]
get_architecture = lambda x: x.split("/")[1].split("_")[-1].split(".")[0]
files = glob(path)
files = sorted(files)
for file in files:
technology = get_technology(file)
architecture = get_architecture(file)
tmp = pd.read_csv(file, index_col=0)
dispersion = tmp.std(axis=1).describe().rename("std_score").copy()
for col in tmp.columns:
tmp[col] = tmp[col].apply(lambda x: 1 if x > 0.5 else 0)
tmp["vote"] = tmp.sum(1)
tmp = (
tmp.groupby("vote")
.count()
.max(1)
.to_frame()
.reset_index()
.prod(1)
.rename("nb_positives")
.to_frame()
)
tmp["share_positives"] = tmp["nb_positives"] / tmp["nb_positives"].sum()
tmp = tmp[::-1]
tmp["cumshare_positives"] = tmp["share_positives"].cumsum()
tmp.index.name = "nb_models"
consensus = tmp.copy()
if markdown:
typer.echo(f"\n## {technology} - {architecture}\n")
typer.echo("### Score dispersion\n")
typer.echo(dispersion.round(3).to_markdown() + "\n")
typer.echo("### Models consensus\n")
typer.echo(consensus.round(3).to_markdown())
else:
typer.secho(f"{not_ok}csv not supported yet", err=True)