网页代码捕获从网页上捕获网页的代码随后进行正则匹配代码: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100package GetData; import java.io.BufferedReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern;public class GetWebsiteData { public static void main(String[] args) { URL url = null; URLConnection urlconn = null; BufferedReader br = null; PrintWriter pw = null; String regex = "http://[\\w+\\.?/?]+\\.[A-Za-z]+"; Pattern p = Pattern.compile(regex); try { url = new URL("http://www.baidu.com/"); urlconn = url.openConnection(); pw = new PrintWriter(new FileWriter("f:/url(baidu).txt"), true);//这里我把爬到的链接存储在了F盘底下的一个叫做url(baidu)的doc文件中 br = new BufferedReader(new InputStreamReader( urlconn.getInputStream())); String buf = null; while ((buf = br.readLine()) != null) { Matcher buf_m = p.matcher(buf); while (buf_m.find()) { pw.println(buf_m.group()); } } System.out.println("获取成功!"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { br.close(); } catch (IOException e) { e.printStackTrace(); } pw.close(); } } } ← Previous Post Next Post → 网页代码捕获从网页上捕获网页的代码随后进行正则匹配 FRIENDS kofes