14. Extracting complex publications records
Level: Medium
This query requires basic knowledge of SQL and the Dimensions data model
Description
The query below combines various techniques in order to extract full publication metadata records that include both single-value metadata and unpacked lists.
Note: we use LEFT JOIN
clauses in order to ensure we obtain all records, not just the ones that have non-null values in the nested objects.
Query
SELECT
p.id,
p.title.preferred AS title,
p.doi,
p.year,
COALESCE(p.journal.title, p.proceedings_title.preferred, p.book_title.preferred, p.book_series_title.preferred) AS venue,
p.type,
p.date AS date_publication,
p.date_inserted,
p.altmetrics.score AS altmetrics_score,
p.metrics.times_cited,
grid.id AS gridid,
grid.name AS gridname,
grid.address.country AS gridcountry,
grid.address.city AS gridcity,
open_access_categories,
cat_for.name AS category_for,
FROM `dimensions-ai.data_analytics.publications` p
LEFT JOIN UNNEST(research_orgs) AS research_orgs_grids
LEFT JOIN `dimensions-ai.data_analytics.grid` grid
ON grid.id=research_orgs_grids
LEFT JOIN UNNEST(p.open_access_categories_v2) AS open_access_categories
LEFT JOIN UNNEST(p.category_for.first_level.full) AS cat_for
WHERE
EXTRACT(YEAR FROM date_inserted) >= 2020
Results
[
{
"id": "pub.1124854415",
"title": "Gabinetto armonico pieno d'istromenti sonori",
"doi": "10.5479/sil.744616.39088011251444",
"year": "1722",
"venue": null,
"type": "monograph",
"date_publication": "1722",
"date_inserted": "2020-02-15 01:10:52 UTC",
"altmetrics_score": null,
"times_cited": "3",
"gridid": null,
"gridname": null,
"gridcountry": null,
"gridcity": null,
"open_access_categories": "oa_all",
"category_for": null
},
{
"id": "pub.1124854415",
"title": "Gabinetto armonico pieno d'istromenti sonori",
"doi": "10.5479/sil.744616.39088011251444",
"year": "1722",
"venue": null,
"type": "monograph",
"date_publication": "1722",
"date_inserted": "2020-02-15 01:10:52 UTC",
"altmetrics_score": null,
"times_cited": "3",
"gridid": null,
"gridname": null,
"gridcountry": null,
"gridcity": null,
"open_access_categories": "bronze",
"category_for": null
},
// many more entries here...
]