21. International collaboration rate of individuals, with context
Level: Advanced
This query requires a good understanding of SQL and the Dimensions data model
Description
This query determines the yearly proportion of publications from a single author that include international collaborators. It also calculates the same rate for the author's current institution (for papers in the same field), and the author's current country. A few highlights:
- We can simplify the query by collecting all the author-specific data up front, in the
researcher_details
andresearcher_field
subqueries, and referring to it later simply as something like(SELECT org FROM researcher_details)
. -
When we calculate the percentage of papers that are international collaborations, it would be much simpler to simply write
(intl*100) / (intl + domestic)
. However, in situations where there are zero papers returned for that particular category, this will return an error because the query would call for dividing by zero. We can avoid this by using theCOALESCE
function. -
The
COUNTIF
function is used multiple times here—it can be helpful in situations where you want to maintain separate counts for different conditionals without usingCOUNT
and lots of separate subqueries.
Query
WITH researcher_details AS (
-- grab the basic metadata about the selected researcher
SELECT r.id, r.current_research_org AS org, grid.address.country
FROM `dimensions-ai.data_analytics.researchers` r
INNER JOIN `dimensions-ai.data_analytics.grid` grid
ON r.current_research_org=grid.id
WHERE r.id="ur.0761121015.96" -- researcher defined here
),
researcher_field AS (
-- determines the field of research code in which
-- the researcher has most frequently authored papers
SELECT for2, COUNT(DISTINCT p.id)
FROM `dimensions-ai.data_analytics.publications` p
CROSS JOIN UNNEST(category_for.second_level.codes) for2
WHERE (SELECT id FROM researcher_details) IN UNNEST(researcher_ids)
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1
),
counts_researcher AS (
-- count how many publications from the selected researcher
-- include authors from multiple countries
SELECT year,
COUNTIF(ARRAY_LENGTH(p.research_org_countries) > 1) AS intl,
COUNTIF(ARRAY_LENGTH(p.research_org_countries) <= 1) AS domestic
FROM `dimensions-ai.data_analytics.publications` p
WHERE
year >= 2015
AND (SELECT id FROM researcher_details) IN UNNEST(researcher_ids)
GROUP BY year
),
counts_org AS (
-- Count how many publications from the selected researcher's
-- CURRENT ORGANIZATION that include authors from multiple countries.
-- We count only publications in the author's primary field of
-- research, and EXCLUDE papers they co-authored.
SELECT year,
COUNTIF(ARRAY_LENGTH(p.research_org_countries) > 1) AS intl,
COUNTIF(ARRAY_LENGTH(p.research_org_countries) <= 1) AS domestic
FROM `dimensions-ai.data_analytics.publications` p
WHERE
year >= 2015
AND (SELECT org FROM researcher_details) IN UNNEST(research_orgs)
AND (SELECT for2 FROM researcher_field) IN UNNEST(category_for.second_level.codes)
AND (SELECT id FROM researcher_details) NOT IN UNNEST(researcher_ids)
GROUP BY year
),
counts_country AS (
-- Count how many publications from the selected researcher's
-- current COUNTRY that include authors from multiple countries.
-- We count only publications in the author's primary field of
-- research, and EXCLUDE papers they co-authored.
SELECT year,
COUNTIF(ARRAY_LENGTH(p.research_org_countries) > 1) AS intl,
COUNTIF(ARRAY_LENGTH(p.research_org_countries) <= 1) AS domestic
FROM `dimensions-ai.data_analytics.publications` p
WHERE
year >= 2015
AND (SELECT country FROM researcher_details) IN UNNEST(research_org_country_names)
AND (SELECT for2 FROM researcher_field) IN UNNEST(category_for.second_level.codes)
AND (SELECT id FROM researcher_details) NOT IN UNNEST(researcher_ids)
GROUP BY year
),
raw_percents AS (
-- Divide international collabs by total collabs in each category
SELECT researcher.year,
(researcher.intl*100) /
COALESCE(researcher.intl + researcher.domestic, 1) AS intl_researcher,
(org.intl*100) /
COALESCE(org.intl + org.domestic, 1) AS intl_org,
(country.intl*100) /
COALESCE(country.intl + country.domestic, 1) AS intl_country,
FROM counts_researcher researcher
LEFT JOIN counts_org org ON researcher.year=org.year
LEFT JOIN counts_country country ON researcher.year=country.year
ORDER BY researcher.year DESC
)
-- Pull the percentages from the raw_percents table and round them
SELECT year,
ROUND(intl_researcher, 1) AS intl_researcher,
ROUND(intl_org, 1) AS intl_org,
ROUND(intl_country, 1) AS intl_country
FROM raw_percents
Results
[
{
"year": "2021",
"intl_researcher": "44.4",
"intl_org": "53.8",
"intl_country": "59.0"
},
{
"year": "2020",
"intl_researcher": "50.0",
"intl_org": "54.7",
"intl_country": "48.7"
},
{
"year": "2019",
"intl_researcher": "53.6",
"intl_org": "48.9",
"intl_country": "45.8"
},
{
"year": "2018",
"intl_researcher": "57.6",
"intl_org": "49.5",
"intl_country": "42.5"
},
{
"year": "2017",
"intl_researcher": "53.3",
"intl_org": "42.9",
"intl_country": "43.4"
},
{
"year": "2016",
"intl_researcher": "40.6",
"intl_org": "37.4",
"intl_country": "39.4"
},
{
"year": "2015",
"intl_researcher": "38.5",
"intl_org": "41.5",
"intl_country": "40.2"
}
]