24. Organizations and sub-organizations
Level: Medium
This query requires basic knowledge of SQL and the Dimensions data model
Description
This query shows how to extract information about a GRID organization together with all of its sub-organizations.
Many organizations in the GRID database include parent-child relationships. For example, grid.495456.f (the United States Department of the Air Force page) has both parent and children institutions, so one can use the hierarchy when querying related data e.g. the total number of publications for each of these organizations.
The query below shows how to leverage the organization_recursive_child_ids
field in the grid
table in order to achieve that. This field is prepopulated with all children institutions GRID IDs (recursively), hence it makes it easier to run this type of analyses.
Query
WITH hierarchy AS (
SELECT
g.id AS parent,
g.name AS parent_name,
children,
g2.name AS children_name,
FROM
`dimensions-ai.data_analytics.grid` g
CROSS JOIN
UNNEST(organization_recursive_child_ids) AS children
INNER JOIN
`dimensions-ai.data_analytics.grid` g2
ON
g2.id = children
WHERE
g.id="grid.495456.f" -- United States Department of the Air Force
)
SELECT
hierarchy.*,
COUNT(DISTINCT p.id) as pubs
FROM
hierarchy
INNER JOIN
`dimensions-ai.data_analytics.publications` p
on hierarchy.children in UNNEST(p.research_orgs)
GROUP by 1, 2, 3, 4
Breaking it down
The key part of the query uses a CROSS JOIN
on the organization_recursive_child_ids
field to retrieve all the descendants of the chosen organization:
SELECT
g.id AS parent,
children,
FROM
`dimensions-ai.data_analytics.grid` g
CROSS JOIN
UNNEST(organization_recursive_child_ids) AS children
WHERE
g.id="grid.495456.f"
Furthermore, in order to get more organization metadata e.g. the name, an inner self-join is introduced:
SELECT
g.id AS parent,
g.name AS parent_name,
children,
g2.name AS children_name,
FROM
`dimensions-ai.data_analytics.grid` g
CROSS JOIN
UNNEST(organization_recursive_child_ids) AS children
INNER JOIN
`dimensions-ai.data_analytics.grid` g2
ON
g2.id = children
WHERE
g.id="grid.495456.f"
The final step is to join also the publications
table, so to get the total publications count for each organization.
WITH hierarchy AS (
-- the grid query above
)
SELECT
hierarchy.*,
COUNT(DISTINCT p.id) as pubs
FROM
hierarchy
INNER JOIN
`dimensions-ai.data_analytics.publications` p
on hierarchy.children in UNNEST(p.research_orgs)
GROUP by 1, 2, 3, 4
Results
[
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.494596.3",
"children_name": "Edwards Air Force Base",
"pubs": "467"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.461685.8",
"children_name": "Joint Base San Antonio",
"pubs": "1031"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.265457.7",
"children_name": "United States Air Force Academy",
"pubs": "3137"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.431316.2",
"children_name": "Grand Forks Air Force Base",
"pubs": "4"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.499282.c",
"children_name": "Maxwell Air Force Base",
"pubs": "201"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.417730.6",
"children_name": "United States Air Force Research Laboratory",
"pubs": "24279"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.453002.0",
"children_name": "United States Air Force",
"pubs": "2545"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.461677.5",
"children_name": "Eglin Air Force Base",
"pubs": "575"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.507554.6",
"children_name": "United States Air Force Office of Scientific Research",
"pubs": "543"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.465246.7",
"children_name": "Air University",
"pubs": "47"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.472535.2",
"children_name": "Kirtland Air Force Base",
"pubs": "801"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.461680.d",
"children_name": "Hanscom Air Force Base",
"pubs": "400"
},
{
"parent": "grid.495456.f",
"parent_name": "United States Department of the Air Force",
"children": "grid.427848.5",
"children_name": "Air Force Institute of Technology",
"pubs": "4028"
}
// more results here...
]