Skip to content

search

Search

relationship_best(path, report='short')

Report perf of each (long)/best (short) config`

Parameters:

Name Type Description Default
path str

data file path (wildcard allowed)

required
report str

report type (in ["short", "long"])

'short'

Usage:

patentcity eval relationship-model data/gold_rel_ddpatent01.jsonl rel_ddpatent01.yaml --report json

Source code in patentcity/search.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@app.command()
def relationship_best(path: str, report: str = "short"):
    """Report perf of each (long)/best (short) config`

    Arguments:
        path: data file path (wildcard allowed)
        report: report type (in ["short", "long"])

    **Usage:**
        ```shell
        patentcity eval relationship-model data/gold_rel_ddpatent01.jsonl rel_ddpatent01.yaml --report json
        ```
    """
    files = glob(path)
    assert report in ["short", "long"]

    res = pd.DataFrame()
    for file in files:
        tmp = pd.read_json(file).T
        tmp["config"] = os.path.basename(file)
        res = res.append(tmp)
    res = res.reset_index().rename(columns={"index": "label"})

    labels = ["ALL"] + list(RELATIONS.values())
    for i, label in enumerate(labels):
        res_label = res.query(f"label=='{label}'").sort_values("f", ascending=False)
        if report == "long":
            if i == 0:
                typer.secho("# Report", fg=typer.colors.BLUE)
            typer.secho(f"\n## {label}", fg=typer.colors.BLUE)
            typer.echo(res_label.to_markdown(index=False))
        else:
            if i == 0:
                best = pd.DataFrame(columns=res_label.columns)
            best = best.append(res_label.iloc[:1])
    if report == "short":
        typer.echo(best.to_markdown(index=False))

relationship_params(config_search)

Generate config files defined by config_search grid

Parameters:

Name Type Description Default
config_search str

config search file path

required

Usage: `shell patentcity search relationship-params configs/rel_search.yaml

Source code in patentcity/search.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
@app.command()
def relationship_params(config_search: str):
    """Generate config files defined by `config_search` grid

    Arguments:
        config_search: config search file path

    **Usage:**
        ```shell
        patentcity search relationship-params configs/rel_search.yaml
        ``
    """
    filename = os.path.basename(config_search)
    path = os.path.dirname(config_search)
    with open(config_search, "r") as config_file:
        cfg = yaml.load(config_file, Loader=yaml.FullLoader)
        search = cfg["search"]
        base = cfg["base"]

        for param in search.keys():
            try:
                start, end = list(map(int, search[param].split("-")))
                grid = range(start, end)
            except ValueError:
                grid = search[param].split("-")

            for i, val in enumerate(grid):
                for label in base.keys():
                    base[label].update({param: val})
                with open(
                    os.path.join(path, filename.replace("search", str(i))), "w"
                ) as file:
                    yaml.dump(base, file)
        typer.secho(f"config files saved in {path}", fg=typer.colors.BLUE)