import java.io.ByteArrayInputStream;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.jsoup.Connection;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Test3 {
public static void main(String[] args) {
String url = "https://zh.purasbar.com/images/scripts/?debug=https://zh.wikipedia.org/w/api.php?action=parse&format=xml&page=%E8%B0%83%E8%AF%95&variant=no&redirects";
Document document = loadXMLDocument(url);
if (document != null) {
XPath xpath = XPathFactory.newInstance().newXPath();
try {
NodeList nodes = (NodeList)xpath.evaluate("/api/parse", document, XPathConstants.NODESET);
Node node = nodes.item(0);
String title = getNodeAttribute(node, "title");
System.out.println(title);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
}
public static String getNodeAttribute(Node node, String name) {
NamedNodeMap attributes = node.getAttributes();
Node attribute = attributes.getNamedItem(name);
return attribute.getNodeValue();
}
public static String loadXMLString(String url) throws IOException {
Connection conn = Jsoup.connect(url);
conn.validateTLSCertificates(false);
conn.ignoreContentType(true);
Response resp = conn.execute();
return resp.body();
}
public static Document loadXMLDocument(String url) {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
String xmlstr = loadXMLString(url);
byte[] xmlbytes = xmlstr.getBytes("UTF-8");
ByteArrayInputStream xmlstream = new ByteArrayInputStream(xmlbytes);
return builder.parse(xmlstream);
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
return null;
}
}
输出结果: