import org.jsoup.nodes.Document;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.safety.Whitelist;
import org.jsoup.nodes.Element;
public class Test2 {
public static void main(String[] args) {
Document document = parse("<div id=\"test\"> A\nB </div>");
System.out.println(document.body());
System.out.println("-------------------------");
Element div = document.getElementById("test");
System.out.println(getTextContent(div));
}
public static Document parse(String html) {
Document document = Jsoup.parse(html);
OutputSettings outputSettings = new OutputSettings();
outputSettings.prettyPrint(false);
document.outputSettings(outputSettings);
return document;
}
public static String getTextContent(Element element) {
String html = element.html();
OutputSettings outputSettings = new OutputSettings();
outputSettings.prettyPrint(false);
return Jsoup.clean(html, "", Whitelist.none(), outputSettings);
}
}
程序运行结果:
<body><div id="test"> A
B </div></body>
-------------------------
A
B