Skip to content

assets

get_publications_family(destination_table, credentials, verbose=False)

Emulate patents-public-data.patents.publications table at the family_id level

Parameters:

Name Type Description Default
destination_table str

the BQ destination table (project.dataset.table)

required
credentials Path

BQ credentials file path

required
verbose bool

verbosity

False

Usage:

techlandscape assets get-publications-family <your-table> <your-credentials>

Note

It takes up to 2 minutes to complete

Source code in techlandscape/assets.py
@app.command()
@monitor
def get_publications_family(
    destination_table: str, credentials: Path, verbose: bool = False
):
    """Emulate `patents-public-data.patents.publications` table at the *family_id* level

    Arguments:
        destination_table: the BQ destination table (`project.dataset.table`)
        credentials: BQ credentials file path
        verbose: verbosity

    **Usage:**
        ```shell
        techlandscape assets get-publications-family <your-table> <your-credentials>
        ```

    !!! note
        It takes up to 2 minutes to complete

    """
    query = f"""
        WITH
          fam AS (
          SELECT
            DISTINCT family_id
          FROM
            `patents-public-data.patents.publications` ),
          crossover AS (
          SELECT
            publication_number,
            family_id
          FROM
            `patents-public-data.patents.publications` ),
          pub AS (
          SELECT
            family_id,
            MIN(publication_date) AS publication_date,
            SPLIT(STRING_AGG(DISTINCT(p.publication_number))) AS publication_number,
            SPLIT(STRING_AGG(DISTINCT(country_code))) AS country_code
          FROM
            `patents-public-data.patents.publications` AS p
          GROUP BY
            family_id ),
          tech_class AS (
          SELECT
            family_id,
            [STRUCT(SPLIT(STRING_AGG(DISTINCT(cpc.code))) AS code)] AS cpc,
            [STRUCT(SPLIT(STRING_AGG(DISTINCT(ipc.code))) AS code)] AS ipc
          FROM
            `patents-public-data.patents.publications` AS p,
            UNNEST(cpc) AS cpc,
            UNNEST(ipc) AS ipc
          GROUP BY
            family_id ),
          cit AS (
          SELECT
              p.family_id,
              [STRUCT(SPLIT(STRING_AGG(DISTINCT(crossover.family_id))) AS family_id)] AS citation
            FROM
              `patents-public-data.patents.publications` AS p,
              UNNEST(citation) AS citation
              LEFT JOIN 
                crossover 
                ON citation.publication_number = crossover.publication_number
            GROUP BY
              p.family_id),
            tmp_gpr AS (
              SELECT
                family_id,
                SPLIT(STRING_AGG(DISTINCT(cited_by.publication_number))) AS cited_by_publication_number,
                CONCAT(ANY_VALUE(title), "\\n", ANY_VALUE(abstract)) AS abstract
                #ANY_VALUE(abstract) AS abstract
              FROM
                `patents-public-data.google_patents_research.publications` AS p,
                UNNEST(cited_by) AS cited_by
              LEFT JOIN
                crossover
              ON
                p.publication_number = crossover.publication_number
              GROUP BY
                family_id),
              gpr AS (
              SELECT
                tmp_gpr.family_id,
                ANY_VALUE(abstract) AS abstract,
                [STRUCT(SPLIT(STRING_AGG(DISTINCT(crossover.family_id))) AS family_id)] AS cited_by
                #SPLIT(STRING_AGG(DISTINCT(cited_by_publication_number))) AS publication_number)] AS cited_by
            FROM
              tmp_gpr,
              UNNEST(cited_by_publication_number) AS cited_by_publication_number
            LEFT JOIN
              crossover
            ON
              cited_by_publication_number = crossover.publication_number
            GROUP BY
              tmp_gpr.family_id)
        SELECT
          fam.family_id,
          pub.* EXCEPT(family_id),
          tech_class.* EXCEPT(family_id),
          cit.* EXCEPT(family_id),
          gpr.* EXCEPT(family_id)
        FROM
          fam
        LEFT JOIN
          pub
        ON
          fam.family_id = pub.family_id
        LEFT JOIN
          tech_class
        ON
          fam.family_id = tech_class.family_id
        LEFT JOIN
          cit
        ON
          fam.family_id = cit.family_id
        LEFT JOIN
          gpr
        ON
          fam.family_id = gpr.family_id
      """
    get_bq_job_done(query, destination_table, credentials, verbose=verbose)