#!/usr/bin/perl -w # Author : Benoit PAPILLAULT # Creation : 16/01/2001 use Socket; use IO::Handle; sub petites_annonces_fr { print localtime() . ": interrogation de www.petites-annonces.fr\n"; socket (SOCKET, PF_INET, SOCK_STREAM, 0); $ipaddr = gethostbyname('www.petites-annonces.fr'); $sin = sockaddr_in(80,$ipaddr); connect (SOCKET, $sin); $oldfh = select(SOCKET); $| = 1; select($oldfh); $msg = "GET /index.cfm?RequestTimeout=120&typepage=recherche&ChoixRub=504&acces_rech=1&show_result=1&sousrub1=5040103&prix_min=2000&prix_max=4000&cp=06&commune=&origine=&tri=c_date HTTP/1.0\r\n" . "Referer: http://www.petites-annonces.fr/index.cfm?ChoixRub=504&acces_rech=1\r\n" . "User-Agent: Mozilla/4.72 [en] (X11; I; Linux 2.4.0 i686)\r\n" . "Host: www.petites-annonces.fr\r\n" . "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg , image/png, */*\r\n" . "Accept-Encoding: gzip\r\n" . "Accept-Language: fr, en\r\n" . "Accept-Charset: iso-8859-1,*,utf-8\r\n" . "Cookie: VALIDUSER=\r\n" . "\r\n" ; print SOCKET $msg; open FIC, ">logement-petites-annonces.fr"; while () { print FIC $_; if (/NAME=\"an_date\" VALUE=\"(.*)\"/) { $publication = $1; } if (/NAME=\"accroche\" VALUE=\"(.*)\"/) { $accroche = $1; } if (/NAME=\"texte\" VALUE=\"(.*)\"/) { $texte = $1; } if (/NAME=\"cX\" VALUE=\"(.*)\"/) { $loyer = $1; } if (/NAME=\"cP\" VALUE=\"(.*)\"/) { $cP = $1; } if (/NAME=\"cC\" VALUE=\"(.*)\"/) { $cC = $1; } if (/NAME=\"cY\" VALUE=\"(.*)\"/) { $cY = $1; } if (defined($publication) && defined ($accroche) && defined($texte) && defined($cP) && defined($cC) && defined ($cY) && defined($loyer)) { $annonce = "date: $publication, $accroche, $texte, adresse : $cP $cC $cY, loyer: $loyer, source: www.petites-annonces.fr"; push (@annonces_courantes,$annonce); undef $publication; undef $accroche; undef $texte; undef $cP; undef $cC; undef $cY; undef $loyer; # print "++ $annonce\n"; } # print $_; } close SOCKET; close FIC; } sub nicematin_fr { print localtime() . ": interrogation de www.nicematin.fr\n"; socket (SOCKET, PF_INET, SOCK_STREAM, 0); $ipaddr = gethostbyname('www.nicematin.fr'); $sin = sockaddr_in(80,$ipaddr); connect (SOCKET, $sin); $oldfh = select(SOCKET); $| = 1; select($oldfh); $msg = "GET http://www.nicematin.fr/journal/journalnm/webPA/PA.0000121.0000129.0000294.html HTTP/1.0\r\n" . "User-Agent: Mozilla/4.72 [en] (X11; I; Linux 2.4.0 i686)\r\n" . "Pragma: no-cache\r\n" . "Host: www.nicematin.fr\r\n" . "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n" . "Accept-Encoding: gzip\r\n" . "Accept-Language: fr, en\r\n" . "Accept-Charset: iso-8859-1,*,utf-8\r\n" . "\r\n"; print SOCKET $msg; open FIC, ">logement-nicematin.fr"; while () { print FIC $_; if (/puce.gif.*>(.*)
/) { $date = $1; } if (/
(.*)
/) { $texte = $1; } if (defined ($date) && defined($texte)) { $annonce = "date: $date, $texte, source: www.nicematin.fr"; push (@annonces_courantes, $annonce); undef $texte; } } close SOCKET; close FIC; } sub bonjour_fr { print localtime() . ": interrogation de www.bonjour.fr\n"; socket (SOCKET, PF_INET, SOCK_STREAM, 0); $ipaddr = gethostbyname('web3.bonjour.fr'); $sin = sockaddr_in(80,$ipaddr); connect (SOCKET, $sin); $oldfh = select(SOCKET); $| = 1; select($oldfh); $msg = "POST http://web3.bonjour.fr/bonjour_fichiers/AffichePa2.cfm?noeud=ILHAP000 HTTP/1.0\r\n" . "Referer: http://web3.bonjour.fr/bonjour_fichiers/SelectDep.cfm?pa=5&Idnoeud=ILHAP000\r\n" . "User-Agent: Mozilla/4.72 [en] (X11; I; Linux 2.4.0 i686)\r\n" . "Host: web3.bonjour.fr\r\n" . "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n" . "Accept-Encoding: gzip\r\n" . "Accept-Language: fr, en\r\n" . "Accept-Charset: iso-8859-1,*,utf-8\r\n" . "Cookie: CFGLOBALS=HITCOUNT%3D114%23LASTVISIT%3D%7Bts+%272001%2D02%2D01+14%3A54%3A39%27%7D%23TIMECREATED%3D%7Bts+%272001%2D01%2D17+16%3A02%3A06%27%7D%23; CFID=7543430; CFMAGIC=7543430%3A80630942; CFTOKEN=80630942; RMID=c365d2d13a65b590; REFERER=bonjour; CFID=7543430; CFTOKEN=80630942\r\n" . "Content-type: application/x-www-form-urlencoded\r\n" . "Content-length: 160\r\n" . "\r\n" . "setCritereL=TypeLoc&Dept=06&Ville=&Arrondissement=&rechsur=ville&OL5CMin=2000&OL5CMax=4000&ONBPMin=2&ONBPMax=3&OSURMin=&OSURMax=&Rechercher.x=55&Rechercher.y=10"; print SOCKET $msg; open FIC, ">logement-bonjour.fr"; $resultat = ""; while () { # print FIC $_; chomp ; if (/([^ ]+)[ ]*/) { $resultat .= $1; } } print FIC $resultat; while ( $resultat =~ /
[ \t]*([^<]*)
/g) { $line = $1; $line =~ s/[ \t]+/ /; $line =~ s/[ \t]*$//; push (@texte, $line); } while ($resultat =~ /[ \t]*([^<]*)<\/B>/g) { $line = $1; $line =~ s/[ \t]+/ /; $line =~ s/[ \t]*$//; push (@lieu, $line); } for ($i=0; $i<$#lieu; $i++) { $annonce = "$lieu[$i] $texte[$i] source: www.bonjour.fr"; push (@annonces_courantes, $annonce); } close SOCKET; close FIC; } sub load_base { if (open FIC, "logement.txt" ) { print localtime() . ": chargement des annonces precedentes\n"; while () { chomp $_; push (@annonces_precedentes, $_ ); } close FIC; } } sub save_base { rename "logement.txt","logement.old"; open FIC, ">logement.txt"; foreach $annonce (@annonces_courantes) { print FIC "$annonce\n"; } close FIC; } sub check_new { $nb_courantes = @annonces_courantes; $nb_precedentes = @annonces_precedentes; print "annonces trouvees : $nb_courantes\n"; print "annonces precedentes : $nb_precedentes\n"; foreach $annonce (@annonces_courantes) { $new = 1; foreach $old (@annonces_precedentes) { if ($old eq $annonce) { $new = 0; } } if ($new) { print "$new " . localtime() . ": $annonce\n"; } } } sub main { load_base(); petites_annonces_fr(); nicematin_fr(); bonjour_fr(); save_base(); check_new(); } main;