htmlparser实现从网页上抓取数据

package parser;

 

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.net.MalformedURLException;

import java.net.URL;

 

/**

 * 基本能实现网页抓取,不过要手动输入URL 将整个html内容保存到指定文件

 *

 *@author chenguoyong

 *

 */

public class ScrubSelectedWeb {

       privatefinal static String CRLF = System.getProperty("line.separator");

 

       /**

        * @param args

        */

       publicstatic void main(String[] args) {

              try{

                     URLur = newURL("http://10.249.187.199:8083/injs100/");

                     InputStreaminstr = ur.openStream();

                     Strings, str;

                     BufferedReaderin = new BufferedReader(new InputStreamReader(instr));

                     StringBuffersb = new StringBuffer();

                     BufferedWriterout = new BufferedWriter(new FileWriter(

                                   "D:/outPut.txt"));

                     while((s = in.readLine()) != null) {

                            sb.append(s+ CRLF);

                     }

                     System.out.println(sb);

                     str= new String(sb);

                     out.write(str);

                     out.close();

                     in.close();

              }catch (MalformedURLException e) {

                     e.printStackTrace();

              }catch (IOException e) {

                     e.printStackTrace();

              }

 

       }

 

}

基本能实现网页抓取,不过要手动输入URL,此外没有重构。只是一个简单的思路。

http://c.tieba.baidu.com/p/3357123567
http://c.tieba.baidu.com/p/3357135017
http://c.tieba.baidu.com/p/3357188487
http://c.tieba.baidu.com/p/3356975908
http://c.tieba.baidu.com/p/3357115966
http://c.tieba.baidu.com/p/3357308004
http://c.tieba.baidu.com/p/3357853803
http://c.tieba.baidu.com/p/3357119461
http://c.tieba.baidu.com/p/3360398522
http://c.tieba.baidu.com/p/3360397387
http://c.tieba.baidu.com/p/3360396194
http://c.tieba.baidu.com/p/3360534865
http://c.tieba.baidu.com/p/3360536000
http://c.tieba.baidu.com/p/3360536000
http://c.tieba.baidu.com/p/3360537168
http://c.tieba.baidu.com/p/3360538179
http://c.tieba.baidu.com/p/3360539318
http://c.tieba.baidu.com/p/3360540512
http://c.tieba.baidu.com/p/3360545141
http://c.tieba.baidu.com/p/3360568934
http://c.tieba.baidu.com/p/3360571757
http://c.tieba.baidu.com/p/3360570598
http://c.tieba.baidu.com/p/3360578878
http://c.tieba.baidu.com/p/3360583365
http://c.tieba.baidu.com/p/3360597635
http://c.tieba.baidu.com/p/3357730668
http://c.tieba.baidu.com/p/3357740205
http://c.tieba.baidu.com/p/3357738861
http://c.tieba.baidu.com/p/3357732435
http://c.tieba.baidu.com/p/3357731702
http://c.tieba.baidu.com/p/3357744489
http://c.tieba.baidu.com/p/3357749552
http://c.tieba.baidu.com/p/3357748244
http://c.tieba.baidu.com/p/3357745240
http://c.tieba.baidu.com/p/3357746820
http://c.tieba.baidu.com/p/3357747462
http://c.tieba.baidu.com/p/3357844591
http://c.tieba.baidu.com/p/3357843183
http://c.tieba.baidu.com/p/3357856179
http://c.tieba.baidu.com/p/3357855061
http://c.tieba.baidu.com/p/3357701054
http://c.tieba.baidu.com/p/3357702373
http://c.tieba.baidu.com/p/3357711758
http://c.tieba.baidu.com/p/3357708654
http://c.tieba.baidu.com/p/3357720495
http://c.tieba.baidu.com/p/3357717009
http://c.tieba.baidu.com/p/3357715962
http://c.tieba.baidu.com/p/3357713402
http://c.tieba.baidu.com/p/3357722434
http://c.tieba.baidu.com/p/3357724762
http://c.tieba.baidu.com/p/3357728150
http://c.tieba.baidu.com/p/3357727059
http://c.tieba.baidu.com/p/3357719062
http://c.tieba.baidu.com/p/3357741757
http://c.tieba.baidu.com/p/3357730030
http://c.tieba.baidu.com/p/3357270782
http://c.tieba.baidu.com/p/3357318531
http://c.tieba.baidu.com/p/3357694273
http://c.tieba.baidu.com/p/3357659897
http://c.tieba.baidu.com/p/3357317697
http://c.tieba.baidu.com/p/3357692426
http://c.tieba.baidu.com/p/3357657994
http://c.tieba.baidu.com/p/3357275312
http://c.tieba.baidu.com/p/3357689388
http://c.tieba.baidu.com/p/3357274265
http://c.tieba.baidu.com/p/3357656525
http://c.tieba.baidu.com/p/3357685342
http://c.tieba.baidu.com/p/3357273179
http://c.tieba.baidu.com/p/3357316739
http://c.tieba.baidu.com/p/3357675967
http://c.tieba.baidu.com/p/3357664551
http://c.tieba.baidu.com/p/3361685940
http://c.tieba.baidu.com/p/3369262457
http://c.tieba.baidu.com/p/3361226381
http://c.tieba.baidu.com/p/3361701748
http://c.tieba.baidu.com/p/3369277477
http://c.tieba.baidu.com/p/3369313857
http://c.tieba.baidu.com/p/3369963501
http://c.tieba.baidu.com/p/3369970938
http://c.tieba.baidu.com/p/3369978239
http://c.tieba.baidu.com/p/3369982545
http://c.tieba.baidu.com/p/3369992787
http://c.tieba.baidu.com/p/3369998386
http://c.tieba.baidu.com/p/3370003534
http://c.tieba.baidu.com/p/3370009443
http://c.tieba.baidu.com/p/3370023015
http://c.tieba.baidu.com/p/3370094552
http://c.tieba.baidu.com/p/3370105356
http://c.tieba.baidu.com/p/3370150360
http://c.tieba.baidu.com/p/3370158940
http://c.tieba.baidu.com/p/3370159295
http://c.tieba.baidu.com/p/3370165911
http://c.tieba.baidu.com/p/3370168751
http://c.tieba.baidu.com/p/3370174645
http://c.tieba.baidu.com/p/3370186461
http://c.tieba.baidu.com/p/3370197915
http://c.tieba.baidu.com/p/3370205863
http://c.tieba.baidu.com/p/3370218402
http://c.tieba.baidu.com/p/3370230272
http://c.tieba.baidu.com/p/3370292674
http://c.tieba.baidu.com/p/3370305221
http://c.tieba.baidu.com/p/3370323987
http://c.tieba.baidu.com/p/3370334781
http://c.tieba.baidu.com/p/3370335764
http://c.tieba.baidu.com/p/3370337895
http://c.tieba.baidu.com/p/3370339341
http://c.tieba.baidu.com/p/3370339541
http://c.tieba.baidu.com/p/3370348387
http://c.tieba.baidu.com/p/3370351032
http://c.tieba.baidu.com/p/3370352833
http://c.tieba.baidu.com/p/3370353950
http://c.tieba.baidu.com/p/3370355095
http://c.tieba.baidu.com/p/3370357853
http://c.tieba.baidu.com/p/3370374120
http://c.tieba.baidu.com/p/3370374814
http://c.tieba.baidu.com/p/3370375487
http://c.tieba.baidu.com/p/3370375928
http://c.tieba.baidu.com/p/3370376930
http://c.tieba.baidu.com/p/3370377380
http://c.tieba.baidu.com/p/3370377463
http://c.tieba.baidu.com/p/3370378072
http://c.tieba.baidu.com/p/3370378125
http://c.tieba.baidu.com/p/3370378575
http://c.tieba.baidu.com/p/3370378614
http://c.tieba.baidu.com/p/3370379179
http://c.tieba.baidu.com/p/3370379233
http://c.tieba.baidu.com/p/3370379724
http://c.tieba.baidu.com/p/3370379179
http://c.tieba.baidu.com/p/3370379919
http://c.tieba.baidu.com/p/3370380646
http://c.tieba.baidu.com/p/3370380702
http://c.tieba.baidu.com/p/3370381528
http://c.tieba.baidu.com/p/3370381739
http://c.tieba.baidu.com/p/3370382101
http://c.tieba.baidu.com/p/3370382216
http://c.tieba.baidu.com/p/3370382759
http://c.tieba.baidu.com/p/3370383521
http://c.tieba.baidu.com/p/3370383575
http://c.tieba.baidu.com/p/3370385074
http://c.tieba.baidu.com/p/3370383575
http://c.tieba.baidu.com/p/3370385446
http://c.tieba.baidu.com/p/3370386163
http://c.tieba.baidu.com/p/3370386374
http://c.tieba.baidu.com/p/3370387498
http://c.tieba.baidu.com/p/3370389359
http://c.tieba.baidu.com/p/3370390933
http://c.tieba.baidu.com/p/3370391036
http://c.tieba.baidu.com/p/3370391036
http://c.tieba.baidu.com/p/3370391453
http://c.tieba.baidu.com/p/3370391468
http://c.tieba.baidu.com/p/3370393162
http://c.tieba.baidu.com/p/3370399408
http://c.tieba.baidu.com/p/3370403804
http://c.tieba.baidu.com/p/3370408675
http://c.tieba.baidu.com/p/3370409225
http://c.tieba.baidu.com/p/3370409602
http://c.tieba.baidu.com/p/3370411429
http://c.tieba.baidu.com/p/3370411571
http://c.tieba.baidu.com/p/3370415337
http://c.tieba.baidu.com/p/3370415667
http://c.tieba.baidu.com/p/3370416326
http://c.tieba.baidu.com/p/3370417849
http://c.tieba.baidu.com/p/3370417926
http://c.tieba.baidu.com/p/3370418876
http://c.tieba.baidu.com/p/3370419068
http://c.tieba.baidu.com/p/3370420330
http://c.tieba.baidu.com/p/3370420571
http://c.tieba.baidu.com/p/3370421490
http://c.tieba.baidu.com/p/3370422394
http://c.tieba.baidu.com/p/3370423071
http://c.tieba.baidu.com/p/3370424478
http://c.tieba.baidu.com/p/3370424908
http://c.tieba.baidu.com/p/3370426273
http://c.tieba.baidu.com/p/3370426319
http://c.tieba.baidu.com/p/3370462138
http://c.tieba.baidu.com/p/3371658420
http://c.tieba.baidu.com/p/3371663222
http://c.tieba.baidu.com/p/3371664077
http://c.tieba.baidu.com/p/3371674626
http://c.tieba.baidu.com/p/3371671097
http://c.tieba.baidu.com/p/3371676658
http://c.tieba.baidu.com/p/3371679958
http://c.tieba.baidu.com/p/3371682907
http://c.tieba.baidu.com/p/3371685449
http://c.tieba.baidu.com/p/3371689289
http://c.tieba.baidu.com/p/3371697541
http://c.tieba.baidu.com/p/3371698752
http://c.tieba.baidu.com/p/3371701811
http://c.tieba.baidu.com/p/3371704043
http://c.tieba.baidu.com/p/3371710108
http://c.tieba.baidu.com/p/3371714425
http://c.tieba.baidu.com/p/3371719038
http://c.tieba.baidu.com/p/3371726190
http://c.tieba.baidu.com/p/3371732092
http://c.tieba.baidu.com/p/3371732412
http://c.tieba.baidu.com/p/3371737828
http://c.tieba.baidu.com/p/3371738097
http://c.tieba.baidu.com/p/3371742564
http://c.tieba.baidu.com/p/3371742591
http://c.tieba.baidu.com/p/3371755208
http://c.tieba.baidu.com/p/3371810189
http://c.tieba.baidu.com/p/3371837480
http://c.tieba.baidu.com/p/3371864092
http://c.tieba.baidu.com/p/3371908427
http://c.tieba.baidu.com/p/3371945665
http://c.tieba.baidu.com/p/3372010213
http://c.tieba.baidu.com/p/3372015825
http://c.tieba.baidu.com/p/3372077866
http://c.tieba.baidu.com/p/3372089452
http://c.tieba.baidu.com/p/3357720495
http://c.tieba.baidu.com/p/3372148588

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。