Vishist16 · Vishist16 · Oct 31, 2022 · Oct 31, 2022
diff --git a/scripts/Script to fetch top IMBD listed movies/imbd.py b/scripts/Script to fetch top IMBD listed movies/imbd.py
@@ -0,0 +1,37 @@
+from bs4 import BeautifulSoup
+import requests
+import re
+import pandas as pd
+
+url = 'http://www.imdb.com/chart/top'
+response = requests.get(url)
+soup = BeautifulSoup(response.text, "html.parser")
+movies = soup.select('td.titleColumn')
+crew = [a.attrs.get('title') for a in soup.select('td.titleColumn a')]
+ratings = [b.attrs.get('data-value')
+		for b in soup.select('td.posterColumn span[name=ir]')]
+
+list = []
+
+
+for index in range(0, len(movies)):
+
+	movie_string = movies[index].get_text()
+	movie = (' '.join(movie_string.split()).replace('.', ''))
+	movie_title = movie[len(str(index))+1:-7]
+	year = re.search('\((.*?)\)', movie_string).group(1)
+	place = movie[:len(str(index))-(len(movie))]
+	data = {"place": place,
+			"movie_title": movie_title,
+			"rating": ratings[index],
+			"year": year,
+			"star_cast": crew[index],
+			}
+	list.append(data)
+
+for movie in list:
+	print(movie['place'], '-', movie['movie_title'], '('+movie['year'] +
+		') -', 'Starring:', movie['star_cast'], movie['rating'])
+
+df = pd.DataFrame(list)
+df.to_csv('imdb_top_250_movies.csv',index=False)