HanDs
管理员

[Delphi文章] delphi捕捉技术 



unit Unit1;

interface

uses
 Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
 Dialogs, StdCtrls, OleCtrls, SHDocVw, IdBaseComponent, IdComponent,
  IdTCPConnection, IdTCPClient, IdHTTP, DB, ADODB,StrUtils, ComCtrls,ugethtml,
  ExtCtrls;

type
 TForm1 = class(TForm)
    ADOConnection1: TADOConnection;
    ADOQuery1: TADOQuery;
    StatusBar1: TStatusBar;
    Timer1: TTimer;
    Panel1: TPanel;
    Button1: TButton;
    Button2: TButton;
    Splitter1: TSplitter;
    Button3: TButton;
    PageControl1: TPageControl;
    TabSheet1: TTabSheet;
    TabSheet2: TTabSheet;
    Memo1: TMemo;
    Memo2: TMemo;
    Edit1: TEdit;
    Label1: TLabel;
    Edit2: TEdit;
    Label2: TLabel;
    Label3: TLabel;
    Edit3: TEdit;
    Button4: TButton;
    Button5: TButton;

 procedure Button1Click(Sender: TObject);
 procedure Timer1Timer(Sender: TObject);
    procedure Button2Click(Sender: TObject);
    procedure Button3Click(Sender: TObject);
    procedure FormCreate(Sender: TObject);
    procedure Button4Click(Sender: TObject);
    procedure Button5Click(Sender: TObject);
    procedure Panel1Click(Sender: TObject);
    procedure Splitter1CanResize(Sender: TObject; var NewSize: Integer;
      var Accept: Boolean);

 private
   { Private declarations }
 public
   { Public declarations }
   PROCEDURE ESQL(S:STRING);
   procedure getpageurls(s1:string);
   procedure getpages(s1:string);
  function GetNumberFromHTML(prefix, posfix, htmlfile: string): string;
  procedure Appendloginfo(htmlfilefolder,i, logstr: string);
  function GetSeqID():integer;
  function GetFileName():string;
  function do_break(sql:string):integer;
 end;

var
 Form1: TForm1;
 page,pages,seqID:integer;
 Thread_Max:integer;
 diqu_counter:integer;
 Thread_Counter:integer;
 get_Counter:integer;
 diqu:array[1..50] of string;
 IdHTTP1:tidhttp;
 dobreak:integer;
implementation
uses chinahr_gf;

{$R *.dfm}
procedure Delay();
begin
repeat
 Application.ProcessMessages;
 until (Thread_Counter<Thread_Max);
end;

procedure hehe();
begin
  if Win32Platform = VER_PLATFORM_WIN32_NT then
        begin
        SetProcessWorkingSetSize(GetCurrentProcess, $FFFFFFFF, $FFFFFFFF);
        application.ProcessMessages;
    end;
end;

function TForm1.GetSeqID():integer;
begin
  IF seqID<=10000000 then
      inc(SeqID)
   else
   seqID:=1;
  GetSeqID:=SeqID;
end;

function TForm1.GetFileName():string;
var
 str,timestr : string;
begin
  timestr := formatDateTime('YYYYMMDDHHMMSS',now());
  str:=timestr+'chinahr'+inttostr(GetSeqID());
  GetFileName:=str;
end;

procedure Tform1.Appendloginfo(htmlfilefolder,i, logstr: string);
 var f: TextFile;
 temp_filename:string;
begin
  try
   ForceDirectories(htmlfilefolder);
   temp_filename:=htmlfilefolder+i+'.htm';
   assignfile(f, temp_filename);
   if FileExists(temp_filename) then
    append(f)
   else
    rewrite(f);
   writeln(f, logstr);
   flush(f);
   closefile(f);
  except
   on e:exception do
     begin
      form1.memo1.Lines.Add('出错信息' + e.Message);
      form1.memo1.Lines.Add('出错信息' + htmlfilefolder);
      form1.memo1.Lines.Add('出错信息' + i);
      
     end;
  end;
end;

 

procedure TForm1.Button1Click(Sender: TObject);
var
 URL,pagehmtl:string;
begin
 ///diqu_counter:=1;
 {
  while diqu_counter<36 do
 begin
  memo1.Lines.Add(diqu[diqu_counter])  ;
  diqu_counter:=diqu_counter+1;
 end;
 exit;
 }
 button2.Enabled:=true;
 button1.Enabled:=true;
 page:=1;
 try
 if diqu_counter>35 then
 begin
  showmessage('全部完成了!');
  showmessage('全部完成了!');
  exit;
 end;
 label3.Caption:='地区编号:'+diqu[diqu_counter];
 url:='http://searchjob.chinahr.com/jobsearch.asp?ind_id=100&occ_id=255&mylocation='+diqu[diqu_counter]+'&keytxt=0&key=0&xl=255&req_wyear=255&salary=255&company_name=&job_title=&update_date=255&search=%B2%E9+%D1%AF&Search=%B2%E9+%D1%AF';
 IdHTTP1:=tidhttp.Create(nil);
 IdHTTP1.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 2003 SP1;  .NET CLR 1.1.4322)';
 try
 IdHTTP1.Request.CustomHeaders.Clear;
 except
  on E:Exception do
  begin
   memo1.Lines.Add('清除缓存出错了!');
   memo1.Lines.Add(E.Message);
  end;
 end;
 IdHTTP1.AllowCookies:=true;
 IdHTTP1.HandleRedirects:=true;
 pagehmtl:=IdHTTP1.Get(url);
///memo1.Text:= pagehmtl;
/// exit;
 sleep(10);
 getpages(pagehmtl);
 ////showmessage(inttostr(pages));
 pages:=(pages div 30)+1 ;
 ///showmessage(inttostr(pages));
 getpageurls(pagehmtl);
 {
 url:='http://searchjob.chinahr.com/jobsearch.asp?page=1&mid=&order=0 ';
 pagehmtl:= IdHTTP1.Get(url)  ;
 }
 StatusBar1.Panels[0].Text:='处理中:'+url;
 timer1.Enabled:=true;
 except
  on E:Exception do
  begin
   Button1.Enabled:=true;
   memo1.Lines.Add(E.Message);
  end;
 end;
end;

procedure TForm1.getpageurls(s1:string);
var
 mystr1,mystr2,jid,d,geturl:String;
 sqlstr:string;
 doornot:integer;
begin
 while pos('Javascript:view(',s1)>0 do
 begin
  mystr1:=GetNumberFromHTML('Javascript:view(',')',s1);
 mystr2:=mystr1;
  delete(s1,1,pos('Javascript:view(',s1)+10) ;
  //showmessage(mystr2);
  jid:=copy(mystr2,2,pos(',',mystr2)-3);
  d:=copy(mystr2,pos(',',mystr2)+2,posEX('''',mystr2,pos(',',mystr2)+2)-pos(',',mystr2)-2);
  //jid:=GetNumberFromHTML('''',''',',mystr1);    //copyjid的数字出来
  //d:=GetNumberFromHTML(''',''','''',mystr2);       //copy日期的数字出来
  //showmessage(d);
  geturl:='http://searchjob.chinahr.com/click.asp?jid='+jid+'&d='+d ;
 // showmessage(geturl);
 ///form1.memo1.Lines.Add(geturl);
 StatusBar1.Panels[0].Text:='处理中:'+geturl;
 hehe() ;
  if Thread_Counter > Thread_Max  then
 begin
  Delay();
 end;
 sqlstr:='insert into goojob_urls(URL)values('''+geturl+''')';
 doornot:=do_break(sqlstr);
 if doornot<>1001 then
  gethtml.Create(geturl)
  else
  begin
   dobreak:=dobreak+1;
  end;
 if dobreak>10 then page:=pages+1;   //使程序跳出当前地区
 
 end ;
end;

function TForm1.GetNumberFromHTML(prefix, posfix, htmlfile: string): string;
var
   iPos, ePos: integer;
begin
  try
    iPos := Pos(prefix, htmlfile);
    if iPos > 0 then
    begin
      if posfix <> '' then
      begin
        ePos := PosEx(posfix, htmlfile, iPos);
        if ePos = 0 then
          ePos := Length(htmlfile);
      end
   else
        ePos := Length(htmlfile);
      Result := Copy(htmlfile, iPos + Length(prefix), ePos - iPos - Length(prefix));
    end;
 except
  end;
end;

procedure TForm1.getpages(s1:string);
var
   mystr1:string;
begin
try
  mystr1:=s1;
  ///showmessage(inttostr(pos('符合条件的职位共',mystr1)));
  delete(mystr1,1,pos('符合条件的职位共',mystr1)) ;
  mystr1:=GetNumberFromHTML('<FONT COLOR="#FFFF00">','</FONT>个',mystr1);
  ///showmessage(mystr1);
  pages:=strtoint(mystr1);    //得到总的数量
except
  pages:=500;
end;
end;


PROCEDURE TForm1.ESQL(S:STRING);
var i:integer;
begin
  ADOConnection1.Execute(S,i,[eoExecuteNoRecords]);
END;


procedure TForm1.Timer1Timer(Sender: TObject);
var
   url,pagehmtl:string;
begin
try
begin
timer1.Enabled:=false;
if Thread_Counter > Thread_Max  then
begin
 Delay();
end;
if page<pages then
begin
 if memo1.Lines.Count>500 then memo1.Lines.Clear;
 page:=page+1;
 url:='http://searchjob.chinahr.com/jobsearch.asp?page='+inttostr(page)+'&mid=&order=0 ';
 form1.memo1.Lines.Add(url);
 StatusBar1.Panels[0].Text:='处理中:'+url;
 pagehmtl:=IdHTTP1.Get(url);
 getpageurls(pagehmtl);
 timer1.Enabled:=true;
 ///gethtml.Create(url);
end else
begin
 dobreak:=1;  ///重新开始一个地区
 IdHTTP1.Free;
 timer1.Enabled:=false;
 diqu_counter:=diqu_counter+1;
 Button1.Enabled:=true;
 Button1.Click;
end
end
except
 on e:exception do
  begin
   form1.memo1.Lines.Add('出错信息4575767' + e.Message);
  end;
end;

end;

procedure TForm1.Button2Click(Sender: TObject);
begin
    if button2.Caption<>'停止' then
    begin
       button2.Caption:='停止';
       timer1.Enabled:=true;
    end else
    begin
       button2.Caption:='继续';
       timer1.Enabled:=false;
    end
end;


{
 procedure TForm1.Button3Click(Sender: TObject);
var
htmlstr:string;
 job_str, j_company, j_location,
 j_office, j_office_describe, j_trade, j_salary, j_publishdate, j_email,
 j_telephone, j_postcode, j_contactman, j_contactaddr, j_sex, j_age,
 j_educational, j_experience, j_contactway, j_foreignlanguage,
 j_remarks: string ;
begin
 job_str:=memo1.Text;
 j_company :='公司';
 j_office :='公司';
 j_location :='公司';
 j_publishdate :='公司';
 gf_format(job_str, j_company, j_location,
 j_office, j_office_describe, j_trade, j_salary, j_publishdate, j_email,
 j_telephone, j_postcode, j_contactman, j_contactaddr, j_sex, j_age,
 j_educational, j_experience, j_contactway, j_foreignlanguage,
 j_remarks);
 memo2.Lines.Add(j_educational);
 memo2.Lines.Add(j_company);
 memo2.Lines.Add(j_office);
 memo2.Lines.Add(j_location);
 memo2.Lines.Add(j_publishdate);
 memo2.Lines.Add(j_office_describe);
end;

}

procedure TForm1.FormCreate(Sender: TObject);
begin
 page:=1;
 pages:=1;
 seqID:=1;
 Thread_Max:=20;
 Thread_Counter:=1;
 get_Counter:=1;
 diqu_counter:=1;
 diqu[1]:='5';   //北京
 diqu[6]:='115';   //上海
 diqu[3]:='140';   //天津
 diqu[4]:='25';   //重庆
 diqu[5]:='16000';   //广东
 diqu[2]:='7000';   //江苏
 diqu[7]:='8000';   //浙江
 diqu[8]:='7000';   //安徽
 diqu[9]:='10000';   //福建
 diqu[10]:='24000';   //甘肃
 diqu[11]:='17000';   //广西
 diqu[12]:='20000';   //贵州
 diqu[13]:='18000';   //海南
 diqu[14]:='1000';   //河北
 diqu[15]:='13000';   //河南
 diqu[16]:='6000';   //黑龙江
 diqu[17]:='14000';   //湖北
 diqu[18]:='15000';   //湖南
 diqu[17]:='5000';   //吉林
 diqu[18]:='11000';   //江西
 diqu[19]:='4000';   //辽宁
 diqu[20]:='3000';   //内蒙古
 diqu[21]:='26000';   //宁夏
 diqu[22]:='12000';   //山东
 diqu[23]:='2000';   //山西
 diqu[24]:='23000';   //陕西
 diqu[25]:='22000';   //西藏
 diqu[26]:='21000';   //云南
 diqu[27]:='185';   //香港
 diqu[28]:='190';   //澳门
 diqu[29]:='195';   //台湾
 diqu[30]:='200';   //亚洲其他
 diqu[31]:='205';   //北美
 diqu[32]:='230';   //南美
 diqu[33]:='210';   //大洋州
 diqu[34]:='215';   //欧洲
 diqu[35]:='235';   //非洲
 dobreak:=1;
end;

procedure TForm1.Button4Click(Sender: TObject);
begin
 Thread_Max:= strtoint(edit2.Text);
 diqu_counter:=strtoint(edit3.Text);
 memo1.Lines.Clear;
 memo1.Lines.Add('地区编号'+edit3.Text) ;
 memo1.Lines.Add('线程最多'+edit2.Text) ;
end;


 procedure TForm1.Button3Click(Sender: TObject);
var
sqlstr:string;
fedit1text,fURL,temp_str,FormatFileName:string;
i:integer;
begin
 try
  i:=1001;
  sqlstr:='';
  fedit1text :='艾思特信息技术';
  fURL:=edit1.Text;
  temp_str:='1.html';
  FormatFileName:='2.html';
  sqlstr:='insert into GooJob_URL_Info(SiteName,URL,LocalFileName,FormatFileName)values('''+fedit1text+''','''+fURL+''','''+temp_str+''','''+FormatFileName+''')';
  ADOConnection1.Execute(sqlstr,i,[eoExecuteNoRecords]);
  memo1.Lines.Add(inttostr(i));
 except
 on E:exception do
  begin
   memo1.Lines.Add(e.Message);
   memo1.Lines.Add(inttostr(i));
  end;
 end;
end;

 

 

function TForm1.do_break(sql: string): integer;
var
hehhe:integer;
begin
try
 do_break:=1001;
 hehhe:=1001;
 ADOConnection1.Execute(sql,hehhe,[eoExecuteNoRecords]);
 do_break:= hehhe;
except
 on  e:exception do
  begin
    memo1.Lines.Add('出错了!  '+e.Message);
    do_break:=1001;
  end; 
end;
end;

procedure TForm1.Button5Click(Sender: TObject);
var
sql:string;
fedit1text:string;
begin
 fedit1text:=edit1.Text;
 ///sql:='select * from GooJob_URL_Info where url='''+fedit1text+''''  ;
 sql:='insert into goojob_urls(URL)values('''+fedit1text+''')';
 memo1.Lines.Add(sql) ;
 memo1.Lines.Add(inttostr(do_break(sql))) ;
 
end;

procedure TForm1.Panel1Click(Sender: TObject);
begin

end;

procedure TForm1.Splitter1CanResize(Sender: TObject; var NewSize: Integer;
  var Accept: Boolean);
begin

end;

end.

 


学习中请遵守法律法规,本网站内容均来自于互联网,本网站不负担法律责任
delphi 捕捉技术
#1楼
发帖时间:2016-7-9   |   查看数:0   |   回复数:0
游客组
快速回复