天天躁日日躁狠狠躁AV麻豆-天天躁人人躁人人躁狂躁-天天澡夜夜澡人人澡-天天影视香色欲综合网-国产成人女人在线视频观看-国产成人女人视频在线观看

asp.net(c#)做一個(gè)網(wǎng)頁(yè)數(shù)據(jù)采集工具

通過(guò)這個(gè)軟件一兩天就完成了幾千產(chǎn)品數(shù)據(jù)的錄入,可見(jiàn)很多工作不是一味用人工去做,作為一個(gè)程序員,就是要讓很多讓那些經(jīng)常做重復(fù)性的、繁瑣的工作中的人解放出來(lái)。下面只是寫(xiě)了一些核心代碼,而且采集必須要和對(duì)應(yīng)網(wǎng)站相掛鉤,作者:鄭少群

復(fù)制代碼 代碼如下:
//提取產(chǎn)品列表頁(yè)中產(chǎn)品最終頁(yè)的網(wǎng)頁(yè)
private void button1_Click(object sender, EventArgs e)
{
if (textBox1.Text.Trim() == "" || textBox2.Text.Trim() == "")
{
MessageBox.Show("網(wǎng)址和域名不能為空!", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
return;
}
try
{
string Html = inc.GetHtml("http://study.pctoday.NET.cn");
//ArrayList al = inc.GetMatchesStr(Html, "<a[^>]*?>.*?</a>");
ArrayList al = inc.GetMatchesStr(Html, @"href/s*=/s*(?:[/'/""/s](?<1>[^/""/']*)[/'/""])");//提取鏈接


" title="Replica Watches:">Replica Watches Buy Full Quality Popular Luxury Watches at Amazing Price, Your One Stop Discount Swiss Watches StoreExclusive Replica Rolex Watches, Tag Heuer Watches Replica, Cartier Watches online Sale!
StringBuilder sb = new StringBuilder();
foreach (object var in al)
{
string a = var.ToString().Replace("/"", "").Replace("'", "");
a = Regex.Replace(a, "href=", "", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (a.StartsWith("/"))
a = textBox2.Text.Trim() + a;
if (!a.StartsWith("http://"))
a = "http://" + a;
sb.Append(a + "/r/n");
}
textBox5.Text = sb.ToString();//把提取到網(wǎng)址輸出到一個(gè)textBox,每個(gè)鏈接占一行



MessageBox.Show("共提取" + al.Count.ToString() + "個(gè)鏈接", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);

}
catch (Exception err)
{
MessageBox.Show("提取出錯(cuò)!原因:" + err.Message, "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
}

}




//把采集的產(chǎn)品頁(yè)面html代碼進(jìn)行字符串處理,提取需要的代碼,最后保存到本地一個(gè)access數(shù)據(jù)庫(kù)中,同時(shí)提取產(chǎn)品圖片地址并自動(dòng)現(xiàn)在圖片到本地images文件夾下

private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
//填充產(chǎn)品表
Database.ExecuteNonQuery("delete from Tb_Product");
DataTable dt2 = new DataTable();
OleDbConnection conn = new OleDbConnection(Database.ConnectionStrings);
OleDbDataAdapter da = new OleDbDataAdapter("select * from Tb_Product", conn);
OleDbCommandBuilder cb = new OleDbCommandBuilder(da);
da.Fill(dt2);
dt2.Rows.Clear();

BackgroundWorker worker = (BackgroundWorker)sender;//這個(gè)是做一個(gè)進(jìn)度條

string[] Urls = textBox5.Text.Trim().ToLower().Replace("/r/n", ",").Split(',');
DataTable dt = new DataTable();
StringBuilder ErrorStr = new StringBuilder();
string html = "", ImageDir = AppDomain.CurrentDomain.BaseDirectory + "Images//";

//循環(huán)每次采集網(wǎng)址
for (int i = 0; i < Urls.Length; i++)
{
try
{
if (!worker.CancellationPending)
{
if (Urls[i] == "")
return;
html = inc.GetHtml(Urls[i]);//獲取該url的html代碼
DataRow NewRow = dt2.NewRow();

//產(chǎn)品名
string ProductName = html.Substring(html.IndexOf("<title>") + 7);
NewRow["ProductName"] = ProductName.Remove(ProductName.IndexOf("</title>")).Trim();

//產(chǎn)品編號(hào)
NewRow["ModelId"] = NewRow["ProductName"].ToString().Substring(NewRow["ProductName"].ToString().IndexOf("Model:") + 6).Trim();

//產(chǎn)品介紹,這些都是根據(jù)不同網(wǎng)站的html做相應(yīng)的修改
string Introduce = html.Substring(html.IndexOf("Product Details") + 26);
Introduce = Introduce.Remove(Introduce.IndexOf("</table>") + 8).Trim()

NewRow["Introduce"] = Introduce;



" title="Replica Watches:">Replica Watches Buy Full Quality Popular Luxury Watches at Amazing Price, Your One Stop Discount Swiss Watches StoreExclusive Replica Rolex Watches, Tag Heuer Watches Replica, Cartier Watches online Sale!
//下載圖片
string ProductImage = html.Substring(html.IndexOf("align=center><img") + 17);
ProductImage = textBox2.Text.Trim() + ProductImage.Substring(ProductImage.IndexOf("src=/"") + 5);
ProductImage = ProductImage.Remove(ProductImage.IndexOf("/""));
try
{
inc.DownFile(ProductImage, ImageDir + ProductImage.Substring(ProductImage.LastIndexOf("/") + 1));
}
catch (Exception)
{
ErrorStr.Append("下載圖片失敗,圖片地址:" + ImageDir + ProductImage.Substring(ProductImage.LastIndexOf("/") + 1) + "/r/n");
}


dt2.Rows.Add(NewRow);

//Thread.Sleep(100);
worker.ReportProgress((i + 1) * 100 / Urls.Length, i);
toolStripStatusLabel1.Text = "處理進(jìn)度:" + (i + 1).ToString() + "/" + Urls.Length.ToString();//進(jìn)度條
}

}
catch (Exception err)
{
ErrorStr.Append("采集錯(cuò)誤:" + err.Message + ";網(wǎng)址:" + Urls[i] + "/r/n");
}
}
da.Update(dt2);
DataBind(dt2);
ShowError(ErrorStr.ToString());
}

/// <summary>
/// ASPX頁(yè)面生成靜態(tài)Html頁(yè)面,作者:鄭少群
/// </summary>
public static string GetHtml(string url)
{
StreamReader sr = null;
string str = null;
//讀取遠(yuǎn)程路徑
WebRequest request = WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(response.CharacterSet));
str = sr.ReadToEnd();
sr.Close();
return str;
}


// 提取HTML代碼中的網(wǎng)址
public static ArrayList GetMatchesStr(string htmlCode, string strRegex)
{
ArrayList al = new ArrayList();

Regex r = new Regex(strRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline);
MatchCollection m = r.Matches(htmlCode);

for (int i = 0; i < m.Count; i++)
{
bool rep = false;
string strNew = m[i].ToString();

// 過(guò)濾重復(fù)的URL
foreach (string str in al)
{
if (strNew == str)
{
rep = true;
break;
}
}

if (!rep) al.Add(strNew);
}

al.Sort();

return al;
}

public static void DownFile(string Url, string Path)
{

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
long size = response.ContentLength;
//創(chuàng)建文件流對(duì)象
using (FileStream fs = new FileStream(Path, FileMode.OpenOrCreate, FileAccess.Write))
{
byte[] b = new byte[1025];
int n = 0;
while ((n = stream.Read(b, 0, 1024)) > 0)
{
fs.Write(b, 0, n);
}
}
}

AspNet技術(shù)asp.net(c#)做一個(gè)網(wǎng)頁(yè)數(shù)據(jù)采集工具,轉(zhuǎn)載需保留來(lái)源!

鄭重聲明:本文版權(quán)歸原作者所有,轉(zhuǎn)載文章僅為傳播更多信息之目的,如作者信息標(biāo)記有誤,請(qǐng)第一時(shí)間聯(lián)系我們修改或刪除,多謝。

主站蜘蛛池模板: 在线看片韩国免费人成视频 | 热综合一本伊人久久精品 | 伊人久久五月丁婷婷 | 国产亚洲精品视频亚洲香蕉视 | 美女图片131亚洲午夜 | 插我一区二区在线观看 | 免费视频国产在线观看网站 | 亚洲在线视频自拍精品 | 亚洲 制服 欧美 中文字幕 | 交换娇妻呻吟声不停中文字幕 | 天天色天天综合网 | 亚洲无线码一区在线观看 | 富婆大保健嗷嗷叫普通话对白 | 佐山爱巨大肥臀在线 | 真人做受120分钟免费看 | 欧美丝袜女同 | 视频三区 国产盗摄 | 国产亚洲精品久久久久久入口 | 久久综合网久久综合 | 久久免费黄色 | 偷拍精品视频一区二区三区 | 日日操日日射 | 麻豆精品2021最新 | 全彩黄漫火影忍者纲手无遮挡 | 一本色道久久综合亚洲精品 | 国产手机在线视频 | 国产精品涩涩涩视频网站 | 小莹的性荡生活45章 | 国产自拍视频在线一区 | 青青草原亚洲 | 午夜dj影院视频观看 | 亚洲三级黄色片 | 69亞洲亂人倫AV精品發布 | 花蝴蝶在线直播观看 | 黄小飞二人转 | 欧美成人精品高清在线观看 | 9位美女厕所撒尿11分 | 把她带到密室调教性奴 | 嫩草在线播放 | 欧美日韩亚洲一区二区三区在线观看 | 无套内谢大学生A片 |