| 
              <?phpmb_internal_encoding("UTF-8"); // encoding for mbstring
 include 'phpQuery-onefile.php';
 ?>
 <!doctype html>
 <html>
 <head>
 <meta charset="utf-8">
 <title>DOM</title>
 </head>
 
 <body>
 <?php
 $doc = phpQuery::newDocumentFileHTML("wiki.html");
 
 $doc["table"]->remove(); // remove tables as well as their contents
 $doc[":header"]->remove(); // remove <h1>-<h6>
 $doc["#coordinates"]->remove(); // remove the float layer 'coordinates'
 $doc["span.editsection"]->remove(); // remove Links 'edit'
 $doc["small"]->remove();
 // remove <span> but keep its content
 foreach ($doc["span"] as $span) {
 $span = pq($span);
 $span->after($span->text())->remove();
 }
 $doc["div"]->remove();
 $doc["script"]->remove();
 
 // remove [number]
 foreach ($doc["a"] as $a) {
 $a = pq($a);
 if (preg_match("/^\\[\\d+\\]$/", trim($a->text()))) {
 $a->remove();
 }
 }
 
 $doc->html($doc->text()); // remove all other html labels
 
 $html = $doc->html();
 $html = preg_replace("/\\n{2,}/", "\n", $html); // \n\n\n\n\n => \n
 
 define("MAXLEN", 400);
 $shortened = false;
 if (mb_strlen($html) > MAXLEN) {
 $html = mb_substr($html, 0, MAXLEN);
 $shortened = true;
 }
 
 $html = preg_replace("/^[\\n\\s]+/", "", $html); // remove \n\s\n\s\n\s at the beginning
 $html = preg_replace("/[\\n\\s]+$/", "", $html); // remove \n\s\n\s\n\s at the end
 $html = preg_replace("/\\n/", "<br>", $html); // \n => <br>
 if ($shortened) {
 $html .= "...";
 }
 $doc->html($html);
 
 
 
 echo $doc->html();
 ?>
 </body>
 </html>
 
              
                |