作者:James Zhu (fatindeed@hotmail.com)
创建日期:2018-03-28
define('SQL_CREATE_TABLE', 'CREATE TABLE IF NOT EXISTS Link (
id INTEGER PRIMARY KEY,
link1 VARCHAR (255),
protocol1 VARCHAR (30),
link2 VARCHAR (255),
protocol2 VARCHAR (30),
link3 VARCHAR (255),
protocol3 VARCHAR (30)
)');
define('SQL_INSERT_DATA', 'INSERT INTO Link (id, link1, protocol1, link2, protocol2, link3, protocol3) VALUES (:id, :link1, :scheme1, :link2, :scheme2, :link3, :scheme3)');
// Ctrl-C handler
pcntl_signal(SIGINT, function() {
echo PHP_EOL.'finish at #'.$GLOBALS['i'].PHP_EOL;
exit();
});
try {
$dbh = new PDO('sqlite:dianping.db');
$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$dbh->exec(SQL_CREATE_TABLE);
$sth = $dbh->query('SELECT id from Link ORDER BY id DESC LIMIT 1');
$lastId = $sth->fetchColumn();
if($lastId == false) {
$lastId = 3000;
}
else {
$lastId++;
}
$stmt = $dbh->prepare(SQL_INSERT_DATA);
echo 'start from #'.$lastId.PHP_EOL;
for($i = $lastId; $i < $lastId + 1000; $i++) {
pcntl_signal_dispatch();
$content = file_get_contents('http://evt.dianping.com/synthesislink/'.$i.'.html');
if($content && preg_match_all('/openLink_\d = "(.*)",/iU', $content, $matches)) {
$links = $matches[1];
$schemes = array_map(function($v) {
$offset = strpos($v, '?');
return ($offset !== false ? substr($v, 0, $offset) : $v);
}, $links);
$stmt->execute(array(
':id' => $i,
':link1' => $links[0],
':scheme1' => $schemes[0],
':link2' => $links[1],
':scheme2' => $schemes[1],
':link3' => $links[2],
':scheme3' => $schemes[2]
));
}
}
echo 'finish at #'.$i.PHP_EOL;
} catch(PDOException $e) {
echo $e->getMessage();
}