Olympics Data Analysis – SQL
This project utilizes the historical Olympics dataset from Kaggle, analyzed through PostgreSQL to extract valuable insights. The data was imported into the database, ensuring column counts and data types matched the original CSV files.
I addressed 20 distinct scenarios using advanced SQL techniques, including:
Joins
Window functions
Subqueries
Aggregations and filtering
- tablefunc Extension (crosstab)
Find the scenarios and their corresponding SQL queries below.
1. How many olympics games have been held?
select count(distinct games) as total_olympics_games_held
from olympics_history;
2. List down all Olympics games held so far.
select distinct year, season, city
from olympics_history
order by year desc;
3. Mention the total no of nations who participated in each olympics game?
with all_countries as
(select games, nr.region
from olympics_history oh
join olympics_history_noc_regions nr ON nr.noc = oh.noc
group by games, nr.region)
select games, count(1) as total_countries
from all_countries
group by games
order by games;
4. Which year saw the highest and lowest no of countries participating in olympics?
with all_countries as
(select games, nr.region
from olympics_history oh
join olympics_history_noc_regions nr ON nr.noc=oh.noc
group by games, nr.region),
tot_countries as
(select games, count(1) as total_countries
from all_countries
group by games)
select distinct
concat(first_value(games) over(order by total_countries)
, ' - '
, first_value(total_countries) over(order by total_countries)) as Lowest_Countries,
concat(first_value(games) over(order by total_countries desc)
, ' - '
, first_value(total_countries) over(order by total_countries desc)) as Highest_Countries
from tot_countries
order by 1;
5. Which nation has participated in all of the olympic games?
with tot_games as
(select count(distinct games) as total_games
from olympics_history),
countries as
(select games, nr.region as country
from olympics_history oh
join olympics_history_noc_regions nr ON nr.noc=oh.noc
group by games, nr.region),
countries_participated as
(select country, count(1) as total_participated_games
from countries
group by country)
select cp.*
from countries_participated cp
join tot_games tg on tg.total_games = cp.total_participated_games
order by 1;
6. Identify the sport which was played in all summer olympics.
with all_summer_games as
(select *
from olympics_history
where season = 'Summer'),
total_games_played_in_summer as
(select count(distinct games) as total_games
from all_summer_games),
final_table as
(select distinct sport, count(games) over(partition by sport) as no_of_games
from all_summer_games
group by games, sport)
select *
from final_table t1
join total_games_played_in_summer t2
on t1.no_of_games = t2.total_games;
7. Which Sports were just played only once in the olympics?
with all_games as
(select *
from olympics_history),
final_table as
(select distinct sport, count(games) over(partition by sport) as no_of_games, games
from all_games
group by games, sport)
select *
from final_table t1
where no_of_games = 1
order by sport;
8. Fetch the total no of sports played in each olympic games.
with all_games as
(select *
from olympics_history),
final_table as
(select distinct games, count(sport) over(partition by games) as no_of_games
from all_games
group by games, sport)
select *
from final_table
order by no_of_games desc;
9. Fetch details of the oldest athletes to win a gold medal.
with t1 AS
(select name,sex,cast(case when age = 'NA' then '0' else age end as int) as age
,team,games,city,sport, event, medal
from olympics_history
where medal = 'Gold'),
t2 as
(select *
,rank() over(order by age desc) as rnk
from t1)
select name,sex,age,team,games,city,sport,event, medal
from t2
where rnk = 1;
10. Find the Ratio of male and female athletes participated in all olympic games.
with t1 as
(select sex, count(1) as cnt
from olympics_history
group by sex)
select concat('1 : ',round(max(cnt)::decimal/min(cnt),2)) as ratio from t1;
11. Fetch the top 5 athletes who have won the most gold medals.
with all_players as
(select name, team, count(medal) as total_golds
from olympics_history
where medal = 'Gold'
group by name, team
order by total_golds desc),
ranking AS
(select *
,dense_rank() over(order by total_golds desc) as rnk
from all_players)
select name, team, total_golds
from ranking
where rnk <= 5;
12. Fetch the top 5 athletes who have won the most medals (gold/silver/bronze).
with all_players as
(select name, team, count(medal) as total_medals
from olympics_history
where medal <> 'NA'
group by name, team
order by total_medals desc),
ranking AS
(select *
,dense_rank() over(order by total_medals desc) as rnk
from all_players)
select name, team, total_medals
from ranking
where rnk <= 5;
13. Fetch the top 5 most successful countries in olympics. Success is defined by no of medals won.
with medal_count as
(select nr.region, count(oh.medal) as cnt
from olympics_history oh
join olympics_history_noc_regions nr
on oh.noc = nr.noc
where oh.medal <> 'NA'
group by region
order by cnt desc),
ranking as
(select *
,dense_rank() over(order by cnt desc) as rank
from medal_count)
select *
from ranking
where rank <= 5;
14. List down total gold, silver and broze medals won by each country.
CREATE EXTENSION IF NOT EXISTS tablefunc;
SELECT country
, coalesce(gold, 0) as gold
, coalesce(silver, 0) as silver
, coalesce(bronze, 0) as bronze
FROM CROSSTAB('SELECT nr.region as country
, oh.medal
, count(1) as total_medals
FROM olympics_history oh
JOIN olympics_history_noc_regions nr ON nr.noc = oh.noc
where medal <> ''NA''
GROUP BY nr.region,medal
order BY nr.region,medal',
'values (''Bronze''), (''Gold''), (''Silver'')')
AS result(country varchar, bronze bigint, gold bigint, silver bigint)
order by gold desc, silver desc, bronze desc;
15. List down total gold, silver and broze medals won by each country corresponding to each olympic games.
SELECT substring(games,1,position(' - ' in games) - 1) as games
, substring(games,position(' - ' in games) + 3) as country
, coalesce(gold, 0) as gold
, coalesce(silver, 0) as silver
, coalesce(bronze, 0) as bronze
FROM CROSSTAB('SELECT concat(games, '' - '', nr.region) as games
, medal
, count(1) as total_medals
FROM olympics_history oh
JOIN olympics_history_noc_regions nr ON nr.noc = oh.noc
where oh.medal <> ''NA''
GROUP BY games,nr.region,medal
order BY games,medal',
'values (''Bronze''), (''Gold''), (''Silver'')')
AS FINAL_RESULT(games text, bronze bigint, gold bigint, silver bigint)
order by gold desc, silver desc, bronze desc;
16. Identify which country won the most gold, most silver and most bronze medals in each olympic games.
WITH temp as
(SELECT substring(games, 1, position(' - ' in games) - 1) as games
, substring(games, position(' - ' in games) + 3) as country
, coalesce(gold, 0) as gold
, coalesce(silver, 0) as silver
, coalesce(bronze, 0) as bronze
FROM CROSSTAB('SELECT concat(games, '' - '', nr.region) as games
, medal
, count(1) as total_medals
FROM olympics_history oh
JOIN olympics_history_noc_regions nr ON nr.noc = oh.noc
where medal <> ''NA''
GROUP BY games,nr.region,medal
order BY games,medal',
'values (''Bronze''), (''Gold''), (''Silver'')')
AS FINAL_RESULT(games text, bronze bigint, gold bigint, silver bigint))
select distinct games
, concat(first_value(country) over(partition by games order by gold desc)
, ' - '
, first_value(gold) over(partition by games order by gold desc)) as Max_Gold
, concat(first_value(country) over(partition by games order by silver desc)
, ' - '
, first_value(silver) over(partition by games order by silver desc)) as Max_Silver
, concat(first_value(country) over(partition by games order by bronze desc)
, ' - '
, first_value(bronze) over(partition by games order by bronze desc)) as Max_Bronze
from temp
order by games;
17. Identify which country won the most gold, most silver, most bronze medals and the most medals in each olympic games.
with temp as
(SELECT substring(games, 1, position(' - ' in games) - 1) as games
, substring(games, position(' - ' in games) + 3) as country
, coalesce(gold, 0) as gold
, coalesce(silver, 0) as silver
, coalesce(bronze, 0) as bronze
FROM CROSSTAB('SELECT concat(games, '' - '', nr.region) as games
, medal
, count(1) as total_medals
FROM olympics_history oh
JOIN olympics_history_noc_regions nr ON nr.noc = oh.noc
where medal <> ''NA''
GROUP BY games,nr.region,medal
order BY games,medal',
'values (''Bronze''), (''Gold''), (''Silver'')')
AS FINAL_RESULT(games text, bronze bigint, gold bigint, silver bigint)),
tot_medals as
(SELECT games, nr.region as country, count(1) as total_medals
FROM olympics_history oh
JOIN olympics_history_noc_regions nr ON nr.noc = oh.noc
where medal <> 'NA'
GROUP BY games,nr.region order BY 1, 2)
select distinct t.games
, concat(first_value(t.country) over(partition by t.games order by gold desc)
, ' - '
, first_value(t.gold) over(partition by t.games order by gold desc)) as Max_Gold
, concat(first_value(t.country) over(partition by t.games order by silver desc)
, ' - '
, first_value(t.silver) over(partition by t.games order by silver desc)) as Max_Silver
, concat(first_value(t.country) over(partition by t.games order by bronze desc)
, ' - '
, first_value(t.bronze) over(partition by t.games order by bronze desc)) as Max_Bronze
, concat(first_value(tm.country) over (partition by tm.games order by total_medals desc nulls last)
, ' - '
, first_value(tm.total_medals) over(partition by tm.games order by total_medals desc nulls last)) as Max_Medals
from temp t
join tot_medals tm on tm.games = t.games and tm.country = t.country
order by games;
18. Which countries have never won gold medal but have won silver/bronze medals?
select *
from (SELECT country
, coalesce(gold, 0) as gold
, coalesce(silver, 0) as silver
, coalesce(bronze, 0) as bronze
FROM CROSSTAB('SELECT nr.region as country
, oh.medal
, count(1) as total_medals
FROM olympics_history oh
JOIN olympics_history_noc_regions nr ON nr.noc = oh.noc
where medal <> ''NA''
GROUP BY nr.region,medal
order BY nr.region,medal',
'values (''Bronze''), (''Gold''), (''Silver'')')
AS result(country varchar, bronze bigint, gold bigint, silver bigint)) as x
where gold = 0 and (silver > 0 or bronze > 0)
order by gold desc, silver desc, bronze desc;
19. In which Sport/event, India has won highest medals.
with all_sports as
(select sport
, count(medal) as total_medals
, dense_rank() over(order by count(medal) desc) as rnk
from olympics_history
where medal <> 'NA'
and noc = 'IND'
group by sport
order by total_medals desc)
select sport, total_medals
from all_sports
where rnk = 1;
20. Break down all olympic games where india won medal for Hockey and how many medals in each olympic games.
select team, sport, games, count(1) as total_medals
from olympics_history
where medal <> 'NA'
and team = 'India' and sport = 'Hockey'
group by team, sport, games
order by total_medals desc;
Description
Self
2025