$url) { echo "\n\nProcessing issue page ". ++$i .".\n"; $issue_page = file_get_contents($url); // Quick scan for all issues (which are simply links to nodes) preg_match_all('&a href="/node/([0-9]*)&', $issue_page, $matches); foreach ($matches[1] as $key => $nid) { // Download and save issue text $issue_html = file_get_contents("http://drupal.org/node/$nid"); $output_path = "$OUTPUT_PATH/$nid"; mkdir($output_path); $output_file = "$output_path/{$nid}.html"; file_put_contents($output_file, $issue_html); echo "Wrote issue #$nid to $output_file.\n"; // Save all attachments preg_match_all('&http://drupal.org/files/issues/([a-zA-Z0-9\.-_]*)"&', $issue_html, $attachments); foreach ($attachments[0] as $key => $attachment) { $file = file_get_contents(rtrim($attachment, '"')); $output_file = "$output_path/". $attachments[1][$key]; file_put_contents($output_file, $file); echo "Wrote attachment to $output_file.\n"; } } } ?>