assets
get_publications_family(destination_table, credentials, verbose=False)
¶
Emulate patents-public-data.patents.publications
table at the family_id level
Parameters:
Name | Type | Description | Default |
---|---|---|---|
destination_table |
str |
the BQ destination table ( |
required |
credentials |
Path |
BQ credentials file path |
required |
verbose |
bool |
verbosity |
False |
Usage:
techlandscape assets get-publications-family <your-table> <your-credentials>
Note
It takes up to 2 minutes to complete
Source code in techlandscape/assets.py
@app.command()
@monitor
def get_publications_family(
destination_table: str, credentials: Path, verbose: bool = False
):
"""Emulate `patents-public-data.patents.publications` table at the *family_id* level
Arguments:
destination_table: the BQ destination table (`project.dataset.table`)
credentials: BQ credentials file path
verbose: verbosity
**Usage:**
```shell
techlandscape assets get-publications-family <your-table> <your-credentials>
```
!!! note
It takes up to 2 minutes to complete
"""
query = f"""
WITH
fam AS (
SELECT
DISTINCT family_id
FROM
`patents-public-data.patents.publications` ),
crossover AS (
SELECT
publication_number,
family_id
FROM
`patents-public-data.patents.publications` ),
pub AS (
SELECT
family_id,
MIN(publication_date) AS publication_date,
SPLIT(STRING_AGG(DISTINCT(p.publication_number))) AS publication_number,
SPLIT(STRING_AGG(DISTINCT(country_code))) AS country_code
FROM
`patents-public-data.patents.publications` AS p
GROUP BY
family_id ),
tech_class AS (
SELECT
family_id,
[STRUCT(SPLIT(STRING_AGG(DISTINCT(cpc.code))) AS code)] AS cpc,
[STRUCT(SPLIT(STRING_AGG(DISTINCT(ipc.code))) AS code)] AS ipc
FROM
`patents-public-data.patents.publications` AS p,
UNNEST(cpc) AS cpc,
UNNEST(ipc) AS ipc
GROUP BY
family_id ),
cit AS (
SELECT
p.family_id,
[STRUCT(SPLIT(STRING_AGG(DISTINCT(crossover.family_id))) AS family_id)] AS citation
FROM
`patents-public-data.patents.publications` AS p,
UNNEST(citation) AS citation
LEFT JOIN
crossover
ON citation.publication_number = crossover.publication_number
GROUP BY
p.family_id),
tmp_gpr AS (
SELECT
family_id,
SPLIT(STRING_AGG(DISTINCT(cited_by.publication_number))) AS cited_by_publication_number,
CONCAT(ANY_VALUE(title), "\\n", ANY_VALUE(abstract)) AS abstract
#ANY_VALUE(abstract) AS abstract
FROM
`patents-public-data.google_patents_research.publications` AS p,
UNNEST(cited_by) AS cited_by
LEFT JOIN
crossover
ON
p.publication_number = crossover.publication_number
GROUP BY
family_id),
gpr AS (
SELECT
tmp_gpr.family_id,
ANY_VALUE(abstract) AS abstract,
[STRUCT(SPLIT(STRING_AGG(DISTINCT(crossover.family_id))) AS family_id)] AS cited_by
#SPLIT(STRING_AGG(DISTINCT(cited_by_publication_number))) AS publication_number)] AS cited_by
FROM
tmp_gpr,
UNNEST(cited_by_publication_number) AS cited_by_publication_number
LEFT JOIN
crossover
ON
cited_by_publication_number = crossover.publication_number
GROUP BY
tmp_gpr.family_id)
SELECT
fam.family_id,
pub.* EXCEPT(family_id),
tech_class.* EXCEPT(family_id),
cit.* EXCEPT(family_id),
gpr.* EXCEPT(family_id)
FROM
fam
LEFT JOIN
pub
ON
fam.family_id = pub.family_id
LEFT JOIN
tech_class
ON
fam.family_id = tech_class.family_id
LEFT JOIN
cit
ON
fam.family_id = cit.family_id
LEFT JOIN
gpr
ON
fam.family_id = gpr.family_id
"""
get_bq_job_done(query, destination_table, credentials, verbose=verbose)