import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;
import org.jsoup.select.Elements;

public class htmlextract {
    public static void main(String[] args) throws IOException {
        File input = new File(args[0]);
        Document doc = Jsoup.parse((File)input, (String)"UTF-8");
        doc.outputSettings().escapeMode(Entities.EscapeMode.base);
        doc.select("body,head,html,tbody,title,tr,td,b,table,font,img,br,hr,u").unwrap();
        doc.outputSettings().outline(true);
        String clean = doc.toString().replaceAll("&nbsp;", "\n");
        System.out.println(clean);
    }
}