# # Stalker -- a Perl module to perform algorithmic follower stalking. # (C) 2011-2012 Jose Miguel Parrella Romero (@bureado, j@bureado.com) # This is free software, released under the same terms of Perl. # # USAGE: # You need to write a Perl file that `use Stalker;' # The file should call new() and populateStarFollowers(), i.e.: # # my $bot = Stalker->new; # $bot->populateStarFollowers('bureado'); # # The program will add a Twitter follower object to a small schema with the # name of the tweetstar, and to a large schema named 'people'. # IMPORTANT: you need to request a consumer key/secret and access token+secret # from Twitter to access their API. Fill out the data below. Module won't work # otherwise. You also need MongoDB running. # Performance notes: slurping data from Twitter is resource-consuming, both in # bandwidth, CPU in some cycles, RAM and I/O when writting to Mongo. I run one # Twitter analysis for Ecuador each Q, and consume about 150 MB of res MEM with # Mongo, and varying levels of CPU and MEM with this Perl script. My learnings: # # 1. Adjust near line 110, if you want to write earlier or later. Writing later # exhausts MongoDB RAM more slowly, but then Perl will consume a bit more. # 2. Consider trying to connect to MongoDB on the write loops. I haven't tried # this but it seems that keeping the connection open will cause overhead in # large Twitter accounts. # 3. Adjust the sleep rate near line 128, if you want a more smooth execution or # just exhaust your rate ASAP. # 4. Avoid querying Mongo while the process is running, especialy when your # tables don't have indices. It hogs RAM like a boss. package Stalker; use MongoDB; use Net::Twitter; use POSIX qw/strftime/; sub new { my $package = shift; my $key = ''; my $nt = Stalker->connect or die; print "[INF] Connected and ready to go!\n"; return bless({ conn => $nt, key => $key, $package }); } sub connect { my $nt = Net::Twitter->new( # YOU NEED TO CONFIGURE THE VALUES BELOW. traits => [qw/API::REST OAuth RateLimit/], consumer_key => '', consumer_secret => '', access_token => '', access_token_secret => '', source => '', ); return $nt; } sub populateStarFollowers { my $self = shift; my $twitstars = shift; # A Twitter handle must be passed. An arrayref can be passed as well. my $cursor = shift || -1; # Users can optionally pass a cursor ID (e.g., broken executions) my $nt = $self->{conn}; my $conn = MongoDB::Connection->new; # This will connect to Mongo in localhost. # See CPAN MongoDB docs for other scenarios. my $dbh = $conn->twitter; # DB name in MongoDB my $ppl = $dbh->people; # "Large" schema name foreach my $twitstar ( @$twitstars ) { next unless $twitstar; my $str = $dbh->${twitstar}; # "Small" schema name print "[INF] Entering $twitstar at " . localtime() . "\n"; my @objs; my $i = 1; for ( my $r; $cursor; $cursor = $r->{next_cursor} ) { eval { print "[DBG] Entering READ loop in cursor $cursor ($i)\n"; $r = $nt->followers( { screen_name => $twitstar, cursor => $cursor } ); }; if ( $@ ) { print "[ERR] Fail whale: $@\n"; # This happens more often than I'd like. redo; } my $users = $r->{users}; foreach my $user ( @$users ) { print "[DBG] Entering follower " . $user->{screen_name} . "\n"; my %obj; # I get only the useful fields (that's why I don't copy the object) my @usf = qw/screen_name user_id created_at statuses_count time_zone followers_count friends_count location lang description utc_offset/; foreach ( my @usf ) { $obj{$_} = $user->{$_}; } # I do some date mangling here so I can do data arithmetics later. $obj{'created_at'} =~ s/^(\w)+//; $obj{'created_at'} =~ s/[\+\-](\d){4}//; $obj{'created_at'} =~ s/\s{2,}/ /; $obj{'created_at'} =~ s/(\S+) (\S+) (\S+) (\S+)/$2 $1 $4 $3/; $obj{'created_at'} = strftime("%Y-%m-%d %H:%M:%S", localtime(str2time($obj{'created_at'}))); # Black magic ends. push ( @objs, { %obj } ); undef %obj; if ( $i >= 500 ) { # Write to DB. print "[DBG] Entering WRITE loop at $i in cursor $cursor\n"; foreach my $act ( @objs ) { my $scr = $act->{'screen_name'}; $ppl->insert($act) ? print "[DBG] Created $scr in MongoDB\n" : print "[DBG] Skipped $scr\n"; $str->insert({'user_id' => $act->{'user_id'}}); } undef @objs; $i = 1; } ++$i; } my $slp = $nt->until_rate(0.1); # Rate limiting, Twitter-enforced. print "[INF] Sleeping $slp seconds\n"; sleep $slp; } if ( $i > 0 ) { # Last flush. print "[DBG] Entering last WRITE loop at $i in cursor $cursor\n"; foreach my $act ( @objs ) { my $scr = $act->{'screen_name'}; $ppl->insert($act) ? print "[DBG] Created $scr in MongoDB\n" : print "[DBG] Skipped $scr\n"; $str->insert({'user_id' => $act->{'user_id'}}); } } } } 1; # kthxbye