#!/usr/bin/env perl # # unique-domain-builder.pl # # (C) 2010 José Miguel Parrella Romero # This is free software, distributed under the same terms of Perl. # use strict; use Yahoo::Search; use Domain::PublicSuffix; my $n; # Total number of results my $i = 0; # Current search result my $c = 100; # Maximum search results size my $cctld = $ARGV[0] || 've'; my $query = "site:gob.$cctld OR site:gov.$cctld"; my %options = ( AppId => 'UDB 0.1', Type => 'any', AllowSimilar => 0, Language => undef ); # Optional list of prefixes for the base domain results my @prefixes = qw/ipv6 6 www6 w6 www.v6 www.ipv6/; # Get the search results count my $search = Yahoo::Search->new(); my $req = $search->Request( Doc => $query, %options, Start => 0, Count => 1 ); my $res = $req->Fetch(); $n = $res->CountAvail; # Initialize D::PS my $d = new Domain::PublicSuffix({'data_file'=>'./suffices'}); my %domains; # List of unique domains while ( $i < $n ) { # Build an iterative query my @s = Yahoo::Search->Results( Doc => $query, Start => $i, Count => $c, %options ); exit $@ if $@; # we exit if we have an error due to Yahoo! returning 400 when limit is reached my %f; # list of unique domains per-iteration foreach (@s) { # a search result ++$i; my $u = $_->Url; $u =~ s#^http://##; # stripping stuff... $u =~ s#/.+##; # then some more... $u =~ s#/##; # and... done! my $b = $d->get_root_domain($u); # D::PS does its work unless ($domains{$b}) { # domain isn't registered $domains{$b} = $_->Title; # store it in the global list $f{$b} = $_->Title; # store it in the session list } } foreach my $e ( keys %f ) { # session list of unique domains print "$e\n"; foreach my $p ( @prefixes ) { # prefixes print "$p.$e\n"; } } }