ggjj:ggjj

// download by http://www.codefans.net/soft/5667.shtml#if !defined AFX_TESTDLG_H__#define AFX_TESTDLG_H__#pragma once#pragma warning (disable : 4786)#include <string>#include <map>#include <list>#include <queue>using namespace std;struct Bus_t{ bool bSaved; // 是否获取过了 string busName; // 线路名称 string info; // 简介 string url; // 对应的 url 后缀 list<string> stationsGo; // 站台 去程 list<string> stationsBack; // 站台 回程};/*========================================================================*/class CTestDlg : public CDialog{public:CFont m_font;CImageList m_imagelist;CTestDlg(CWnd* pParent = NULL);//{​{AFX_DATA(CTestDlg)enum { IDD = IDD_TEST_DIALOG };CProgressCtrlm_pp;CListCtrlm_list;//}}AFX_DATA//{​{AFX_VIRTUAL(CTestDlg)protected:virtual void DoDataExchange(CDataExchange* pDX);// DDX/DDV support//}}AFX_VIRTUALprotected: void GetBusInfo(); void SaveToFile(); void SaveToFile(FILE * fp, Bus_t & t); char * GetFileName(); map<string, Bus_t> m_mapBus;//{​{AFX_MSG(CTestDlg)virtual BOOL OnInitDialog();afx_msg void OnButton1();afx_msg void OnKillfocusEdit1();//}}AFX_MSGDECLARE_MESSAGE_MAP()};/*========================================================================*/CString getHTML(CString strURL);void StripTags(LPTSTR pszBuffer);void SplitStations(char * pstation, list<string>& lstStation);void WriteFile(const char * pszContent, const char * pszFilename);char * StrToInt(const char * str, int & n);string FormatBusLineToOrder(const char sLine[]);char * GetFileName(const char * url);#endif ------------------#include "stdafx.h"#include "Test.h"#include "TestDlg.h"#include <afxinet.h>#ifdef _DEBUG#define new DEBUG_NEW#undef THIS_FILEstatic char THIS_FILE[] = __FILE__;#endif/*========================================================================*/// 深圳公交 "http://shenzhen.8684.cn/"// "981路" "x_7ca8d133" 测试 网页内容没有抓全的问题// "209路区间快车" "x_32245c22" 测试 抓取回程宕机的问题,这是单程线路// "1路" "x_24f5dad9" 抓取数据的起始线路,其实随便一个线路都可以// "220路" "x_d397d994" 测试 遇到带"站)"的站台后,回程线路获取不全的问题 // "高峰专线41" "x_28a86909" 单向行驶 未抓取到数据// 上海公交 "http://shanghai.8684.cn/"// "01路" "x_ccc80acf"// 北京公交 "http://beijing.8684.cn"// "1路" "x_24f5dad9"// 济南 "http://jinan.8684.cn/"// "1路" "x_24f5dad9"#define STR_HTTP_URL "http://jinan.8684.cn/"#define STR_BUS "1路"#define STR_URL "x_24f5dad9"#define STR_GO "去程"#define STR_BACK "回程"#define STR_SINGLE "单向行驶"#define STR_LINE "相关线路"#define STR_HREF_END "</a>"CTestDlg::CTestDlg(CWnd* pParent /*=NULL*/): CDialog(CTestDlg::IDD, pParent){//{​{AFX_DATA_INIT(CTestDlg)//}}AFX_DATA_INIT}void CTestDlg::DoDataExchange(CDataExchange* pDX){CDialog::DoDataExchange(pDX);//{​{AFX_DATA_MAP(CTestDlg)DDX_Control(pDX, IDC_PROGRESS1, m_pp);DDX_Control(pDX, IDC_LIST1, m_list);//}}AFX_DATA_MAP}BEGIN_MESSAGE_MAP(CTestDlg, CDialog)//{​{AFX_MSG_MAP(CTestDlg)ON_BN_CLICKED(IDC_BUTTON1, OnButton1)ON_EN_KILLFOCUS(IDC_EDIT1, OnKillfocusEdit1)//}}AFX_MSG_MAPEND_MESSAGE_MAP()/*========================================================================*/#define MAX 10BOOL CTestDlg::OnInitDialog(){CDialog::OnInitDialog();m_imagelist.Create(16,16,TRUE,2,2);m_imagelist.Add(AfxGetApp()->LoadIcon(IDI_ICON1));m_list.SetImageList(&m_imagelist,LVSIL_SMALL);m_font.CreateFont(16, 0,0,0,FW_NORMAL, 0,0,0,DEFAULT_CHARSET, OUT_CHARACTER_PRECIS, CLIP_CHARACTER_PRECIS,DEFAULT_QUALITY, DEFAULT_PITCH | FF_DONTCARE, "Arial");m_list.SetFont(&m_font);/*-----------------------------------------------------------*/m_list.SetExtendedStyle(LVS_EX_FULLROWSELECT | LVS_EX_GRIDLINES);m_list.SetBkColor(RGB(247,247,255));m_list.SetTextColor(RGB(0,0,255));m_list.SetTextBkColor(RGB(247,247,255));m_list.InsertColumn(0, "序号", LVCFMT_LEFT, 50);m_list.InsertColumn(1, "公交路线", LVCFMT_LEFT, 350);m_list.InsertColumn(2, "下载状态", LVCFMT_LEFT, 150);m_pp.SetRange(1,MAX+1);m_pp.SetPos(0);m_pp.SetStep(1);return TRUE; }void CTestDlg::OnButton1(){ CString strURL; strURL="http://www.baidu.com/img/baidu_logo.gif"; int nIndex=m_list.InsertItem(0xffff,"0",0); m_list.SetItemText(nIndex,1,strURL); if(::URLDownloadToFile(NULL,strURL,"baidu_logo.gif",0,NULL) == S_OK) { m_list.SetItemText(0,2,"文件下载完成!"); } else { m_list.SetItemText(0,2,"文件下载失败..."); } GetBusInfo(); SaveToFile(); MessageBox("下载完成!", "公交信息", MB_ICONASTERISK | MB_OK);}void CTestDlg::OnKillfocusEdit1(){}CString getHTML(CString strURL){ CInternetSession mySession(NULL,0); CHttpFile* myHttpFile=NULL; CString strHtml=""; CString myData; myHttpFile=(CHttpFile*)mySession.OpenURL(strURL); while(myHttpFile->ReadString(myData)) { strHtml += myData + "/n"; } strHtml += myData; // 有时候明明读取到内容了,但 ReadString 返回了 FASLE/* // 通过 Read 方法读取文本 const int size = 1024; byte pByte[size]; int count = 0; vector<byte> vecByte; CString strUpdateInfo; while( (count = myHttpFile->Read(pByte, size)) > 0 ) { for(int i = 0; i < count; ++i) { vecByte.push_back(pByte[i]); } if( count < size ) break; } if( vecByte.size() > 0 ) { byte * pB = new byte[vecByte.size()]; copy(vecByte.begin(),vecByte.end(),pB); TCHAR * pChr = (TCHAR*)pB; strHtml = pChr; delete [] pB; } */ myHttpFile->Close(); mySession.Close(); WriteFile(strHtml, "3.txt"); return strHtml;}// 写文件void WriteFile(const char * pszContent, const char * pszFilename){ FILE * fp; if( (fp = fopen(pszFilename, "w+t")) != NULL) { fwrite(pszContent, sizeof(char), strlen(pszContent), fp); fclose(fp); }}void CTestDlg::GetBusInfo(){ CString url = STR_HTTP_URL; CString tmp; /*static*/ char szTmp[1024*16]; // 使用静态变态,不占用函数栈中的内存 queue<string> queBus; queBus.push(STR_BUS);//#define SAVE_FILE#ifdef SAVE_FILE FILE * fp; if( (fp = fopen(GetFileName(), "w+t")) == NULL) return;#endif Bus_t t; t.bSaved = false; t.url = STR_URL; t.busName = STR_BUS; m_mapBus.insert(pair<string, Bus_t>(FormatBusLineToOrder(STR_BUS), t)); int nL; int i = 1; int j = 1; // 如果不下载 baidu_logo.gif, 这里改为 0 CString strI; strI.Format("%d",i++); int nIndex=m_list.InsertItem(0xffff,strI,0); m_list.SetItemText(nIndex,1,STR_BUS); UpdateWindow(); CString strBusLine; // 线路名称,为取回程信息而用 while(!queBus.empty()) { string b = queBus.front(); queBus.pop(); map<string, Bus_t>::iterator ibus = m_mapBus.find(FormatBusLineToOrder(b.c_str())); if (ibus != m_mapBus.end()) { Bus_t & bsecond = ibus->second; CString html = getHTML(url + bsecond.url.c_str()); // 解析html int pos = html.Find(bsecond.url.c_str()); if (pos != -1) { html.Delete(0, pos); while(html.GetAt(0) != '>') html.Delete(0); if (html.GetAt(0) == '>') html.Delete(0); // 线路名称 int p1 = html.Find(STR_HREF_END); if (p1 != -1) { strBusLine = html.Left(p1); } } // 取 线路简介 信息 pos = html.Find(STR_GO); if (pos != -1) { tmp = html.Left(pos); nL = tmp.GetLength(); strcpy(szTmp, tmp); // 需要注意不要拷贝越界 StripTags(szTmp); bsecond.info = szTmp; // 简介 html.Delete(0, pos); while(html.GetAt(0) != '>') html.Delete(0); if (html.GetAt(0) == '>') html.Delete(0); } else { // 对单向行驶线路的处理 pos = html.Find(STR_SINGLE); if(pos != -1) { tmp = html.Left(pos); nL = tmp.GetLength(); strcpy(szTmp, tmp); // 需要注意不要拷贝越界 StripTags(szTmp); bsecond.info = szTmp; // 简介 html.Delete(0, pos); while(html.GetAt(0) != '>') html.Delete(0); if (html.GetAt(0) == '>') html.Delete(0); } } // 取 去程 信息 pos = html.Find(STR_BACK); if (pos != -1) { tmp = html.Left(pos); nL = tmp.GetLength(); strcpy(szTmp, tmp); StripTags(szTmp); // 去程 SplitStations(szTmp, bsecond.stationsGo); html.Delete(0, pos); while(html.GetAt(0) != '>') html.Delete(0); if (html.GetAt(0) == '>') html.Delete(0); } // 取 回程 信息 pos = html.Find(strBusLine); if (pos != -1) { tmp = html.Left(pos+strlen(strBusLine)); nL = tmp.GetLength(); strcpy(szTmp, tmp); StripTags(szTmp); // 回程 SplitStations(szTmp, bsecond.stationsBack); html.Delete(0, pos); while(html.GetAt(0) != '>') html.Delete(0); if (html.GetAt(0) == '>') html.Delete(0); } bsecond.bSaved = true; // 该线路已经取到了#ifdef SAVE_FILE SaveToFile(fp, bsecond);#endif m_list.SetItemText(j++,2,"下载完成!"); m_pp.SetPos(j-1); UpdateWindow(); // 获取其他线路的链接 pos = html.Find(STR_LINE); if (pos != -1) { // 相关线路 html.Delete(0, pos); pos = html.Find("</div>"); if (pos != -1) { html.Delete(pos, html.GetLength()-pos); } map<string, Bus_t>::iterator ibusT; pos = html.Find(STR_HREF_END); while(pos != -1) { tmp = html.Left(pos); html.Delete(0, pos+strlen(STR_HREF_END)); int p1 = tmp.Find("/""); tmp.Delete(0, p1+1); p1 = tmp.Find("/""); CString href = tmp.Left(p1); strcpy(szTmp, tmp); szTmp[0] = '<'; StripTags(szTmp); ibusT = m_mapBus.find(FormatBusLineToOrder(szTmp)); // 之前没有该信息时,才记录,防重复 if (ibusT == m_mapBus.end()) { t.url = href; t.busName = szTmp; m_mapBus.insert(pair<string, Bus_t>(FormatBusLineToOrder(szTmp), t)); queBus.push(szTmp); strI.Format("%d",i++); int nIndex=m_list.InsertItem(0xffff,strI,0); m_list.SetItemText(nIndex,1,szTmp); m_pp.SetRange(1,i); m_pp.SetPos(j-1); UpdateWindow(); } pos = html.Find(STR_HREF_END); } } } }#ifdef SAVE_FILE fclose(fp);#endif}void CTestDlg::SaveToFile(){ FILE * fp; if( (fp = fopen(GetFileName(), "w+t")) != NULL) { map<string, Bus_t>::iterator ibus = m_mapBus.begin(); for (; ibus != m_mapBus.end(); ++ibus) { Bus_t & t = ibus->second; SaveToFile(fp, t); } fclose(fp); }}// 2011-2-9 beginvoid CTestDlg::SaveToFile(FILE * fp, Bus_t & t){ char szCont[128]; // 需要谨慎,防止数组越界 // 线路 fwrite(t.busName.c_str(), sizeof(char), t.busName.length(), fp); fwrite("/n", 1, 1, fp); // 简介 fwrite(" ", 1, 1, fp); fwrite(t.info.c_str(), sizeof(char), t.info.length(), fp); list<string>::iterator iSt; // 判断是单向还是 双向 线路 if (!t.stationsGo.empty()) { sprintf(szCont, "/n 去程/n "); fwrite(szCont, 1, strlen(szCont), fp); for (iSt=t.stationsGo.begin(); iSt!=t.stationsGo.end(); ++iSt) { fwrite(iSt->c_str(), 1, iSt->length(), fp); fwrite(" ", 1, 1, fp); } sprintf(szCont, "%d站/n 回程/n ", t.stationsGo.size()); fwrite(szCont, 1, strlen(szCont), fp); } else { sprintf(szCont, "/n 单向行驶/n "); fwrite(szCont, 1, strlen(szCont), fp); } for (iSt=t.stationsBack.begin(); iSt!=t.stationsBack.end(); ++iSt) { fwrite(iSt->c_str(), 1, iSt->length(), fp); fwrite(" ", 1, 1, fp); } sprintf(szCont, "%d站/n", t.stationsBack.size()); fwrite(szCont, 1, strlen(szCont), fp);}char * StrToInt(const char * str, int & n){ n = 0; char * s = (char *)str; while (*s && (*s<'0'|| *s>'9')) ++s; while (*s && *s>='0' && *s<='9') { n = n*10 + *s - '0'; ++s; } return s;}string FormatBusLineToOrder(const char sLine[]){ char sNum[10]; char sFormat[128]; const char *p = sLine; char *q = sFormat; while(*p!='/0') { while(*p!='/0' && (*p<'0' || *p>'9') ) *q++ = *p++; int n; while(*p!='/0' && *p>='0' && *p<='9') { p = StrToInt(p, n); *q = '/0'; q += sprintf(sNum, "d", n); strcat(sFormat, sNum); } } *q = '/0'; return sFormat;}char * CTestDlg::GetFileName(){ return ::GetFileName(STR_HTTP_URL);}// url 是以'http://' 开始的网址char * GetFileName(const char * url){ const int SIZE = 128; static char szFile[SIZE+1]; if (strlen(url) < SIZE) { strcpy(szFile, url); } else { strncpy(szFile, url, SIZE); szFile[SIZE] = '/0'; } char *p = szFile; char *q = szFile+7; // 7 is the length of 'http://' while(*p!='/0' && *p!='.') ++p; if (*p == '.') { ++p; *p++ = 't'; *p++ = 'x'; *p++ = 't'; *p = '/0'; } return q; }// 2011-2-9 end// StripTags() rips through a buffer and removes HTML tags from it.// The function uses a static variable to remember its state in case// a HTML tag spans a buffer boundary.void StripTags(LPTSTR pszBuffer){static BOOL bInTag = FALSE;LPTSTR pszSource = pszBuffer;LPTSTR pszDest = pszBuffer;while (*pszSource != '/0'){if (bInTag){if (*pszSource == '>')bInTag = FALSE;pszSource++;}else{if (*pszSource == '<')bInTag = TRUE;else{*pszDest = *pszSource;pszDest++;}pszSource++;}}*pszDest = '/0';}void SplitStations(char * pstation, list<string>& lstStation){ const int LENTH = 256; char st[LENTH]; // 站台名称不应该太长 int i; while(*pstation != '/0') { i = 0; while(*pstation && *pstation == ' ') pstation++; // 过滤前导空格 while(*pstation && *pstation != '-' && i<LENTH) st[i++] = *pstation++; // 异常处理 if (i==LENTH) break; while (i>1 && st[i-1]==' ') i--; // 过滤末尾空格 st[i] = '/0'; if (*pstation == '-') pstation++; lstStation.push_back(st); } // 删除 最后一个站台中的 "(xx站)" 信息 if (!lstStation.empty()) { CString strTotal; strTotal.Format("(%d站)", lstStation.size()); list<string>::iterator iSt = --lstStation.end(); size_t pos = iSt->find(strTotal); if(pos != string::npos) { iSt->resize(pos); while(iSt->at(iSt->length()-1)==' ') iSt->erase(iSt->length()-1); } }}

相关推荐

相关文章