Here you can find the entire code.
I wanted to download some flascards from twitter, so that is why I code this crawler to download it.
# Import Libs import tweepy import wget import os # Here you need to add your API credentials. # You can get those here: https://developer.twitter.com/en/docs/twitter-api/getting-started/about-twitter-api api_key = "XPTO" api_secret_key = "XPTO" access_token = "XPTO" access_token_secret = "XPTO" consumer_key = api_key consumer_secret = api_secret_key access_token = access_token access_token_secret = access_token_secret #Fill these in uth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # Get 200 tweets from Chris tweets = api.user_timeline(screen_name = 'chrisalbon', count = 200, include_rts = False, excludereplies = True) # 200 is not enough. Keep receiving tweets until we can no longer receive last_id = tweets[-1].id while (True): more_tweets = api.user_timeline(screen_name='chrisalbon', count=200, include_rts=False, exclude_replies=True, max_id=last_id-1) # No more tweets if (len(more_tweets) == 0): break else: last_id = more_tweets[-1].id-1 tweets += more_tweets # Chris stopped using a hashtag and started linking to a URL def has_ML_url(s): urls = s.entities.get('urls') if urls: return(urls[0].get('display_url') == 'machinelearningflashcards.com') else: return False # Filter by those that contain machinelearningflashcards.com card_tweets = [tweet for tweet in tweets if has_ML_url(tweet)] media_files = dict() for status in card_tweets: title = status.text.split(' http')[0] media = status.entities.get('media', []) # if tweet has media and media is photo if(len(media) > 0 and media[0]['type']=='photo' ): # get the URL media_files[title] = media[0]['media_url'] # create a directory to store your photos os.makedirs('ml-cards', exist_ok=True) for title, url in media_files.items(): # Get the photos! wget.download(url, out = "ml-cards/{}.png".format(title))