In [666]:
import requests
import json
import time
import pandas
import re
import nltk
from nltk.corpus import words
from collections import Counter
Getting our reviews ->¶
In [41]:
def get_reviews(app_id,app_name):
"""
This function gets 1500 most recent steam reviews of the game with the specified steam app id
"""
total_reviews = 1500 #we collect 1500 reviews
url = f"https://store.steampowered.com/appreviews/{app_id}"
cursor = '*' #each API call returns reviews in batches. To access the next batch we need this cursor parameter
all_reviews = []
while len(all_reviews) < total_reviews: #till review count becomes 1500
params = {
"json": 1,
"filter": "recent", #we filter the reviews by recent
"num_per_page": 100,
"language": "english", #we only select reviews where detected language is english
"cursor": cursor
}
try:
response = requests.get(url, params=params) #make the API call with the specified parameters
if response.status_code != 200: #if status code 200 then break
print(f"Failed to fetch reviews. Status Code: {response.status_code}")
break
data = response.json()
reviews = data.get("reviews", []) #the reviews tag contains review data
all_reviews.extend(reviews) #add these to our all_reviews list
if not reviews: #reviews tag isn't present then that response doesn't have reviews. it means we have no more reviews to fetch
print("No more reviews to fetch.")
break
cursor = data.get("cursor") #get the cursor and this will be passed for the next call
except Exception as e: #if error then wait for 2 sec and make the call again
print(f"An error occurred: {e}. Retrying...")
time.sleep(2)
time.sleep(1) #we do 1 sec wait between calls to avoid hiting rate limits for the API call
print(f"Fetched {len(all_reviews)} reviews for {app_name}")
return all_reviews #return these reviews
In [29]:
def save_reviews_to_json(reviews, filename):
"""
This function saves the reviews for the game into a json file
"""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(reviews, f, ensure_ascii=False, indent=4)
In [31]:
def get_and_save_reviews(app_id,app_name):
"""
This function calls the get_reviews() function, gets the reviews, then passes these to the save_reviews_to_json() function and calls it
"""
game_reviews=get_reviews(app_id,app_name)
save_reviews_to_json(game_reviews,f"{app_name}_reviews.json")
In [33]:
# we store our list of games and their respective app ids
games={
"elden_ring" : 1245620,
"sekiro" : 814380,
"dark_souls_remastered" : 570940,
"armored_core_6" : 1888160,
"hollow_knight" : 367520,
"hades" : 1145360,
"dead_cells" : 588650,
"slay_the_spire" : 646570,
"returnal" : 1649240,
"risk_of_rain_2" : 632360,
"witcher_3" : 292030,
"mass_effect" : 1328670,
"divinity_original_sin_2" : 435150,
"baldurs_gate_3" : 1086940,
"pillars_of_eternity": 291650,
"portal_2" : 620,
"the_witness" : 210970,
"celeste" : 504230,
"ori_and_the_blind_forest" : 387290,
"inside" : 304430,
"stardew_valley" : 413150,
"factorio" : 427520,
"frostpunk" : 323190,
"the_forest" : 242760,
"subnautica" : 264710,
"cod_modern_warfare" : 2000950,
"rocket_league" : 252950,
"counter_strike_2" : 730,
"team_fortress_2" : 440,
"dota_2" : 570
}
In [35]:
for game in games: #for every game we call the get_and_save_reviews() function with their name and id
get_and_save_reviews(games[game],game)
Fetched 1500 reviews for elden_ring Fetched 1500 reviews for sekiro Fetched 1500 reviews for dark_souls_remastered Fetched 1500 reviews for armored_core_6 Fetched 1500 reviews for hollow_knight Fetched 1500 reviews for hades Fetched 1500 reviews for dead_cells Fetched 1500 reviews for slay_the_spire Fetched 1500 reviews for returnal Fetched 1500 reviews for risk_of_rain_2 Fetched 1500 reviews for witcher_3 Fetched 1500 reviews for mass_effect Fetched 1500 reviews for divinity_original_sin_2 Fetched 1500 reviews for baldurs_gate_3 Fetched 1500 reviews for pillars_of_eternity Fetched 1500 reviews for portal_2 Fetched 1500 reviews for the_witness Fetched 1500 reviews for celeste Fetched 1500 reviews for ori_and_the_blind_forest Fetched 1500 reviews for inside Fetched 1500 reviews for stardew_valley Fetched 1500 reviews for factorio Fetched 1500 reviews for frostpunk Fetched 1500 reviews for the_forest Fetched 1500 reviews for subnautica Fetched 1500 reviews for cod_modern_warfare Fetched 1500 reviews for rocket_league Fetched 1500 reviews for counter_strike_2 Fetched 1500 reviews for team_fortress_2 Fetched 1599 reviews for dota_2
Data preprocessing ->¶
In [65]:
def load_reviews(filename):
"""
This function is used to load up JSON file with reviews of a game into a pandas dataframe
"""
with open(filename, 'r', encoding="utf-8") as file:
reviews=json.load(file)
return pd.DataFrame(reviews)
In [77]:
all_reviews_list=[] #to store reviews of all games together
game_reviews_dict={} #to store reviews of each game separately
In [80]:
for game in games: #for every game
df=load_reviews(f"{game}_reviews.json") #get the reviews into a dataframe
df["game_name"]=game #add the column called game_name
game_reviews_dict[game]=df #add this df to the game_reviews_dict in game : df pairs
all_reviews_list.append(df) #append this df to the list of dataframes of all games
In [506]:
all_reviews_df=pd.concat(all_reviews_list,ignore_index=True) #then we make a df that is concatenation of all the dfs
In [508]:
all_reviews_df["game_name"].value_counts()
Out[508]:
game_name dota_2 1599 sekiro 1500 team_fortress_2 1500 counter_strike_2 1500 rocket_league 1500 cod_modern_warfare 1500 subnautica 1500 the_forest 1500 frostpunk 1500 factorio 1500 stardew_valley 1500 inside 1500 ori_and_the_blind_forest 1500 celeste 1500 the_witness 1500 elden_ring 1500 pillars_of_eternity 1500 baldurs_gate_3 1500 divinity_original_sin_2 1500 mass_effect 1500 witcher_3 1500 risk_of_rain_2 1500 returnal 1500 slay_the_spire 1500 dead_cells 1500 hades 1500 hollow_knight 1500 armored_core_6 1500 dark_souls_remastered 1500 portal_2 1500 Name: count, dtype: int64
In [510]:
all_reviews_df.columns
Out[510]:
Index(['recommendationid', 'author', 'language', 'review', 'timestamp_created', 'timestamp_updated', 'voted_up', 'votes_up', 'votes_funny', 'weighted_vote_score', 'comment_count', 'steam_purchase', 'received_for_free', 'written_during_early_access', 'hidden_in_steam_china', 'steam_china_location', 'primarily_steam_deck', 'game_name', 'timestamp_dev_responded', 'developer_response'], dtype='object')
In [512]:
all_reviews_df.drop_duplicates(subset="recommendationid",inplace=True) #if there are any duplicated reviews then we remove them
In [514]:
all_reviews_df["game_name"].value_counts()
Out[514]:
game_name dota_2 1599 sekiro 1500 team_fortress_2 1500 counter_strike_2 1500 rocket_league 1500 cod_modern_warfare 1500 subnautica 1500 the_forest 1500 frostpunk 1500 factorio 1500 stardew_valley 1500 inside 1500 ori_and_the_blind_forest 1500 celeste 1500 the_witness 1500 elden_ring 1500 pillars_of_eternity 1500 baldurs_gate_3 1500 divinity_original_sin_2 1500 mass_effect 1500 witcher_3 1500 risk_of_rain_2 1500 returnal 1500 slay_the_spire 1500 dead_cells 1500 hades 1500 hollow_knight 1500 armored_core_6 1500 dark_souls_remastered 1500 portal_2 1500 Name: count, dtype: int64
In [516]:
print(all_reviews_df.isnull().sum()) #we check if there are any missing values
recommendationid 0 author 0 language 0 review 0 timestamp_created 0 timestamp_updated 0 voted_up 0 votes_up 0 votes_funny 0 weighted_vote_score 0 comment_count 0 steam_purchase 0 received_for_free 0 written_during_early_access 0 hidden_in_steam_china 0 steam_china_location 0 primarily_steam_deck 0 game_name 0 timestamp_dev_responded 45098 developer_response 45098 dtype: int64
In [518]:
#we check if there are any non-english reviews
all_reviews_df["language"].value_counts()
Out[518]:
language english 45099 Name: count, dtype: int64
In [520]:
#The author col is a dictionary. We will flatten it by adding a col for each key of the author column
all_reviews_df["author"].loc[72] #if we look at any random author record
Out[520]:
{'steamid': '76561199248999433', 'num_games_owned': 0, 'num_reviews': 4, 'playtime_forever': 4056, 'playtime_last_two_weeks': 2353, 'playtime_at_review': 3887, 'last_played': 1729271154}
In [522]:
#it has these fields
for i in all_reviews_df["author"].loc[72]:
print(i,end=" ")
steamid num_games_owned num_reviews playtime_forever playtime_last_two_weeks playtime_at_review last_played
In [524]:
cols="steamid num_games_owned num_reviews playtime_forever playtime_last_two_weeks playtime_at_review last_played".split()
cols
Out[524]:
['steamid', 'num_games_owned', 'num_reviews', 'playtime_forever', 'playtime_last_two_weeks', 'playtime_at_review', 'last_played']
In [526]:
# we will make new fields in our df based on these columns
for col in cols:
all_reviews_df[f"author_{col}"]=all_reviews_df["author"].apply(lambda x: x[f"{col}"])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[526], line 3 1 # we will make new fields in our df based on these columns 2 for col in cols: ----> 3 all_reviews_df[f"author_{col}"]=all_reviews_df["author"].apply(lambda x: x[f"{col}"]) File ~\anaconda3\Lib\site-packages\pandas\core\series.py:4924, in Series.apply(self, func, convert_dtype, args, by_row, **kwargs) 4789 def apply( 4790 self, 4791 func: AggFuncType, (...) 4796 **kwargs, 4797 ) -> DataFrame | Series: 4798 """ 4799 Invoke function on values of Series. 4800 (...) 4915 dtype: float64 4916 """ 4917 return SeriesApply( 4918 self, 4919 func, 4920 convert_dtype=convert_dtype, 4921 by_row=by_row, 4922 args=args, 4923 kwargs=kwargs, -> 4924 ).apply() File ~\anaconda3\Lib\site-packages\pandas\core\apply.py:1427, in SeriesApply.apply(self) 1424 return self.apply_compat() 1426 # self.func is Callable -> 1427 return self.apply_standard() File ~\anaconda3\Lib\site-packages\pandas\core\apply.py:1507, in SeriesApply.apply_standard(self) 1501 # row-wise access 1502 # apply doesn't have a `na_action` keyword and for backward compat reasons 1503 # we need to give `na_action="ignore"` for categorical data. 1504 # TODO: remove the `na_action="ignore"` when that default has been changed in 1505 # Categorical (GH51645). 1506 action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None -> 1507 mapped = obj._map_values( 1508 mapper=curried, na_action=action, convert=self.convert_dtype 1509 ) 1511 if len(mapped) and isinstance(mapped[0], ABCSeries): 1512 # GH#43986 Need to do list(mapped) in order to get treated as nested 1513 # See also GH#25959 regarding EA support 1514 return obj._constructor_expanddim(list(mapped), index=obj.index) File ~\anaconda3\Lib\site-packages\pandas\core\base.py:921, in IndexOpsMixin._map_values(self, mapper, na_action, convert) 918 if isinstance(arr, ExtensionArray): 919 return arr.map(mapper, na_action=na_action) --> 921 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert) File ~\anaconda3\Lib\site-packages\pandas\core\algorithms.py:1743, in map_array(arr, mapper, na_action, convert) 1741 values = arr.astype(object, copy=False) 1742 if na_action is None: -> 1743 return lib.map_infer(values, mapper, convert=convert) 1744 else: 1745 return lib.map_infer_mask( 1746 values, mapper, mask=isna(values).view(np.uint8), convert=convert 1747 ) File lib.pyx:2972, in pandas._libs.lib.map_infer() Cell In[526], line 3, in <lambda>(x) 1 # we will make new fields in our df based on these columns 2 for col in cols: ----> 3 all_reviews_df[f"author_{col}"]=all_reviews_df["author"].apply(lambda x: x[f"{col}"]) KeyError: 'playtime_at_review'
In [528]:
# from the above error we can see that some records don't have the playtime_at_review field
# we wont keep these records in our df
for i in all_reviews_df["author"]: #we get these particular records
if "playtime_at_review" not in i:
print(i)
{'steamid': '76561198256506010', 'num_games_owned': 0, 'num_reviews': 3, 'playtime_forever': 37, 'playtime_last_two_weeks': 0, 'last_played': 1513029923} {'steamid': '76561198053705825', 'num_games_owned': 105, 'num_reviews': 3, 'playtime_forever': 41, 'playtime_last_two_weeks': 0, 'last_played': 1487532238}
In [530]:
unwanted=[{'steamid': '76561198256506010', 'num_games_owned': 0, 'num_reviews': 3, 'playtime_forever': 37, 'playtime_last_two_weeks': 0, 'last_played': 1513029923}, {'steamid': '76561198053705825', 'num_games_owned': 105, 'num_reviews': 3, 'playtime_forever': 41, 'playtime_last_two_weeks': 0, 'last_played': 1487532238}]
In [532]:
all_reviews_df[(all_reviews_df["author"]==unwanted[0]) | (all_reviews_df["author"]==unwanted[1])]
#we can see the records where playtime_at_review field is missing
Out[532]:
recommendationid | author | language | review | timestamp_created | timestamp_updated | voted_up | votes_up | votes_funny | weighted_vote_score | ... | steam_china_location | primarily_steam_deck | game_name | timestamp_dev_responded | developer_response | author_steamid | author_num_games_owned | author_num_reviews | author_playtime_forever | author_playtime_last_two_weeks | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
43690 | 30249428 | {'steamid': '76561198256506010', 'num_games_ow... | english | really addictive and good | 1488320291 | 1488320291 | True | 0 | 0 | 0 | ... | False | dota_2 | NaN | NaN | 76561198256506010 | 0 | 3 | 37 | 0 | |
45002 | 30042721 | {'steamid': '76561198053705825', 'num_games_ow... | english | I didn't find this game very fun. Not very bal... | 1487532588 | 1487532588 | False | 0 | 0 | 0 | ... | False | dota_2 | NaN | NaN | 76561198053705825 | 105 | 3 | 41 | 0 |
2 rows × 25 columns
In [534]:
# we drop these two records from our df
all_reviews_df = all_reviews_df[~((all_reviews_df["author"] == unwanted[0]) | (all_reviews_df["author"] == unwanted[1]))]
In [536]:
# we check again if there are any records where playtime_at_review field is missing
for i in all_reviews_df["author"]:
if "playtime_at_review" not in i:
print(i)
In [538]:
# so now we add the fields of author column as other columns in our df again
for col in cols:
all_reviews_df[f"author_{col}"]=all_reviews_df["author"].apply(lambda x: x[f"{col}"])
In [540]:
all_reviews_df.head() #we can see that the new columns have been added to our df
Out[540]:
recommendationid | author | language | review | timestamp_created | timestamp_updated | voted_up | votes_up | votes_funny | weighted_vote_score | ... | game_name | timestamp_dev_responded | developer_response | author_steamid | author_num_games_owned | author_num_reviews | author_playtime_forever | author_playtime_last_two_weeks | author_playtime_at_review | author_last_played | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 177283496 | {'steamid': '76561198266230202', 'num_games_ow... | english | I love the game, but… there is too much to do.... | 1729275153 | 1729275153 | True | 0 | 0 | 0 | ... | elden_ring | NaN | NaN | 76561198266230202 | 326 | 16 | 8988 | 3957 | 8977 | 1729275953 |
1 | 177283364 | {'steamid': '76561198829689696', 'num_games_ow... | english | roll | 1729275009 | 1729275009 | True | 0 | 0 | 0 | ... | elden_ring | NaN | NaN | 76561198829689696 | 11 | 2 | 2513 | 1085 | 2422 | 1729280436 |
2 | 177279370 | {'steamid': '76561199387385936', 'num_games_ow... | english | Its Laterally Perfect In Every Way, The Only B... | 1729270437 | 1729270437 | True | 0 | 0 | 0 | ... | elden_ring | NaN | NaN | 76561199387385936 | 0 | 1 | 492 | 492 | 350 | 1729278945 |
3 | 177278879 | {'steamid': '76561199572934905', 'num_games_ow... | english | Try finger, but hole | 1729269896 | 1729269896 | True | 0 | 0 | 0 | ... | elden_ring | NaN | NaN | 76561199572934905 | 0 | 1 | 6536 | 80 | 6456 | 1729280044 |
4 | 177278154 | {'steamid': '76561198436642208', 'num_games_ow... | english | Damn, just damn. how can i explain this piece ... | 1729268993 | 1729268993 | True | 0 | 0 | 0 | ... | elden_ring | NaN | NaN | 76561198436642208 | 51 | 3 | 5563 | 1424 | 5563 | 1729268292 |
5 rows × 27 columns
In [542]:
all_reviews_df.columns
Out[542]:
Index(['recommendationid', 'author', 'language', 'review', 'timestamp_created', 'timestamp_updated', 'voted_up', 'votes_up', 'votes_funny', 'weighted_vote_score', 'comment_count', 'steam_purchase', 'received_for_free', 'written_during_early_access', 'hidden_in_steam_china', 'steam_china_location', 'primarily_steam_deck', 'game_name', 'timestamp_dev_responded', 'developer_response', 'author_steamid', 'author_num_games_owned', 'author_num_reviews', 'author_playtime_forever', 'author_playtime_last_two_weeks', 'author_playtime_at_review', 'author_last_played'], dtype='object')
In [544]:
# we don't need all of these columns. We only keep our relevant columns
relevant_columns = ['game_name', 'review', 'voted_up', 'timestamp_created', 'author_num_games_owned', 'author_num_reviews', 'author_playtime_at_review', 'author_playtime_last_two_weeks', 'author_playtime_forever']
In [546]:
all_reviews_df=all_reviews_df[relevant_columns]
all_reviews_df
Out[546]:
game_name | review | voted_up | timestamp_created | author_num_games_owned | author_num_reviews | author_playtime_at_review | author_playtime_last_two_weeks | author_playtime_forever | |
---|---|---|---|---|---|---|---|---|---|
0 | elden_ring | I love the game, but… there is too much to do.... | True | 1729275153 | 326 | 16 | 8977 | 3957 | 8988 |
1 | elden_ring | roll | True | 1729275009 | 11 | 2 | 2422 | 1085 | 2513 |
2 | elden_ring | Its Laterally Perfect In Every Way, The Only B... | True | 1729270437 | 0 | 1 | 350 | 492 | 492 |
3 | elden_ring | Try finger, but hole | True | 1729269896 | 0 | 1 | 6456 | 80 | 6536 |
4 | elden_ring | Damn, just damn. how can i explain this piece ... | True | 1729268993 | 51 | 3 | 5563 | 1424 | 5563 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
45094 | dota_2 | Good | True | 1487494231 | 0 | 1 | 31575 | 2916 | 205094 |
45095 | dota_2 | Great Game but some of sea players are toxics | True | 1487494164 | 5 | 2 | 99538 | 0 | 170287 |
45096 | dota_2 | Keep up the good work | True | 1487493619 | 1 | 1 | 56345 | 0 | 110978 |
45097 | dota_2 | halawoken | True | 1487493562 | 0 | 1 | 32205 | 0 | 48218 |
45098 | dota_2 | weowe | True | 1487493205 | 0 | 1 | 44382 | 0 | 124043 |
45097 rows × 9 columns
In [554]:
#now its time for us to clean the reviews a little
def clean_review(text):
"""
This function will be used to clean the reviews. It converts all texts to lower case, removes characters which are not alpha numeric or white spaces, and finally removes all extra white space characters
"""
text=text.lower()
text = re.sub(r'[^\w\s]', '', text) #removes characters which aren't alpha numeric or white space (like punctuation, special symbols, etc)
text = re.sub(r'\s+', ' ', text).strip() #remove extra white spaces
return text
In [556]:
all_reviews_df["review"]=all_reviews_df["review"].apply(clean_review)
In [558]:
all_reviews_df["review"]
Out[558]:
0 i love the game but there is too much to do as... 1 roll 2 its laterally perfect in every way the only ba... 3 try finger but hole 4 damn just damn how can i explain this piece of... ... 45094 good 45095 great game but some of sea players are toxics 45096 keep up the good work 45097 halawoken 45098 weowe Name: review, Length: 45097, dtype: object
In [560]:
all_reviews_df[all_reviews_df['review'].apply(lambda x: len(x.split())==0)]
Out[560]:
game_name | review | voted_up | timestamp_created | author_num_games_owned | author_num_reviews | author_playtime_at_review | author_playtime_last_two_weeks | author_playtime_forever | |
---|---|---|---|---|---|---|---|---|---|
89 | elden_ring | True | 1729196068 | 0 | 14 | 192 | 358 | 358 | |
127 | elden_ring | True | 1729165883 | 0 | 7 | 8102 | 332 | 8123 | |
165 | elden_ring | True | 1729125671 | 0 | 10 | 9048 | 986 | 9098 | |
396 | elden_ring | True | 1728928295 | 0 | 2 | 5747 | 2049 | 6025 | |
430 | elden_ring | True | 1728886706 | 0 | 2 | 3343 | 3311 | 3729 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
44906 | dota_2 | True | 1487597602 | 0 | 1 | 36328 | 0 | 40480 | |
44907 | dota_2 | True | 1487596377 | 1 | 1 | 102099 | 0 | 150906 | |
44968 | dota_2 | True | 1487562098 | 0 | 1 | 52456 | 0 | 329005 | |
45052 | dota_2 | True | 1487509195 | 0 | 1 | 47569 | 0 | 137826 | |
45078 | dota_2 | True | 1487499792 | 0 | 2 | 160 | 0 | 196 |
755 rows × 9 columns
In [562]:
# we can see that some reviews are empty strings. So we remove these
all_reviews_df=all_reviews_df[~all_reviews_df['review'].apply(lambda x: len(x.split())==0)]
In [564]:
all_reviews_df[all_reviews_df['review'].apply(lambda x: len(x.split())==0)]
Out[564]:
game_name | review | voted_up | timestamp_created | author_num_games_owned | author_num_reviews | author_playtime_at_review | author_playtime_last_two_weeks | author_playtime_forever |
---|
In [620]:
# we don't want some random gibberish words in our reviews. we only want them to have valid english words.
nltk.download('words')
english_words = set(nltk.corpus.words.words()) #so we get a dictionary of english words
[nltk_data] Downloading package words to [nltk_data] C:\Users\aniru\AppData\Roaming\nltk_data... [nltk_data] Package words is already up-to-date!
In [656]:
# but some common gaming terms like "goty", "rpg", "fps", etc are not found in this set
# so we extend our set to contain some frequently used terms like these
english_words.update("goty elden rpg fps soulslike roguelike roguelite arpg jrpg coop pvp mmo mmorpg moba respawn afk godlike dlc vr camping meta smurf lfg gank pve buff nerf sandbox crafting skilltree xp hitbox minmaxing despawn noob op lag gg crossplay endgame speedrun".split())
In [680]:
# But we surely could not have covered all commonly used words. So we implement a mechanism to check if the word that appears has been
# used frequently enough across other reviews too or not
all_words = ' '.join(all_reviews_df['review']).split()
word_freq = Counter(all_words)
In [700]:
def validate_review(text):
"""
This function will be used to check if the review is valid or not. It will be marked valid if atleast 50% of the words are found to be valid.
"""
threshold=0.5
freq_threshold=3 #if the word is a frequently used word across reviews then it is valid
tokens = text.split()
valid_words = [word for word in tokens if word in english_words or word_freq[word] > freq_threshold]
return len(valid_words) / len(tokens) >= threshold
In [724]:
validated_df = all_reviews_df[all_reviews_df['review'].apply(validate_review)]
validated_df
Out[724]:
game_name | review | voted_up | timestamp_created | author_num_games_owned | author_num_reviews | author_playtime_at_review | author_playtime_last_two_weeks | author_playtime_forever | |
---|---|---|---|---|---|---|---|---|---|
0 | elden_ring | i love the game but there is too much to do as... | True | 1729275153 | 326 | 16 | 8977 | 3957 | 8988 |
1 | elden_ring | roll | True | 1729275009 | 11 | 2 | 2422 | 1085 | 2513 |
2 | elden_ring | its laterally perfect in every way the only ba... | True | 1729270437 | 0 | 1 | 350 | 492 | 492 |
3 | elden_ring | try finger but hole | True | 1729269896 | 0 | 1 | 6456 | 80 | 6536 |
4 | elden_ring | damn just damn how can i explain this piece of... | True | 1729268993 | 51 | 3 | 5563 | 1424 | 5563 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
45092 | dota_2 | nice | True | 1487494906 | 0 | 2 | 1937 | 6997 | 121315 |
45093 | dota_2 | good game | True | 1487494747 | 0 | 1 | 124437 | 0 | 251708 |
45094 | dota_2 | good | True | 1487494231 | 0 | 1 | 31575 | 2916 | 205094 |
45095 | dota_2 | great game but some of sea players are toxics | True | 1487494164 | 5 | 2 | 99538 | 0 | 170287 |
45096 | dota_2 | keep up the good work | True | 1487493619 | 1 | 1 | 56345 | 0 | 110978 |
42924 rows × 9 columns
In [732]:
#we save this cleaned dataset
validated_df.to_csv("cleaned_reviews_dataset.csv",index=False)
In [ ]: