# -*- coding: UTF-8 -*- from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException from time import sleep import json import datetime
# edit these three variables user = 'shiraishi_haruk' start = datetime.datetime(2017, 1, 14) # year, month, day end = datetime.datetime(2017, 2, 3) # year, month, day
# only edit these if you're having problems delay = 3# time to wait on each page load before reading the page driver = webdriver.Chrome() # options are Chrome() Firefox() Safari()
# don't mess with this stuff twitter_ids_filename = 'all_ids.json' days = (end - start).days + 1 id_selector = '.time a.tweet-timestamp' tweet_selector = 'li.js-stream-item' user = user.lower() ids = []
for tweet in found_tweets: try: id = tweet.find_element_by_css_selector(id_selector).get_attribute('href').split('/')[-1] ids.append(id) except StaleElementReferenceException as e: print('lost element reference', tweet)
except NoSuchElementException: print('no tweets on this day')
start = increment_day(start, 1)
try: withopen(twitter_ids_filename) as f: all_ids = ids + json.load(f) data_to_write = list(set(all_ids)) print('tweets found on this scrape: ', len(ids)) print('total tweet count: ', len(data_to_write)) except FileNotFoundError: withopen(twitter_ids_filename, 'w') as f: all_ids = ids data_to_write = list(set(all_ids)) print('tweets found on this scrape: ', len(ids)) print('total tweet count: ', len(data_to_write))
withopen(twitter_ids_filename, 'w') as outfile: json.dump(data_to_write, outfile)
print('all done here') driver.close()
1 2 3 4 5 6 7 8 9 10 11 12 13
tw = twarc.Twarc(CONSUMER_KEY, CONSUMER_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET) f=open("shiraishi_haruk_1701.json","w") for id_no in ids: # store the ids in this list try: tweet = tw.get('https://api.twitter.com/1.1/statuses/show/%s.json' % int(id_no)) json.dump(tweet.json(),f) except Exception as e: print(e) time.sleep(60 * 15) continue except StopIteration: break f.close()