a sample html web scapper with c#
====
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace webRead
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
}
void start()
{
}
private void button1_Click_1(object sender, EventArgs e)
{
lblerror.Text = “”;
richTextBox1.Clear();
string singlenode2;
singlenode2 = txtnode2.Text;
string singlenode;
singlenode = txtnode1.Text;
string webadress;
webadress = txtAddress.Text;
try
{
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
HtmlWeb hw = new HtmlWeb();
doc = hw.Load(webadress);
HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(singlenode);
string result = “”;
// ============
HtmlNode table = doc.DocumentNode.SelectSingleNode(“//table[1]”);
foreach (var cell in table.SelectNodes(“.//tr/td”)) // **notice the .**
{
//string someVariable = cell.InnerText
result += cell.InnerText + Environment.NewLine;
}
// ===============
List<News> NewsList = new List<News>();
foreach (var item in nodes)
{
News n = new News();
result += item.InnerText + Environment.NewLine;
n.Title = item.SelectSingleNode(singlenode2).InnerText;
result += item.InnerText + Environment.NewLine;
NewsList.Add(n);
}
foreach (var item in NewsList)
{
result += item.Title + Environment.NewLine;
}
richTextBox1.Text = result;
}
catch (Exception)
{
lblerror.Text = “error”;
return;
}
}
}
}