今天刚好周日,重新看了下关于线程同步的知识点,记录下,当作笔记
1. 什么是线程同步?
简单的说,当一个线程执行递增和递减的操作时,其他线程处于等待,这种被称为线程同步,以下代码演示了线程同步的问题
class Program { static void Main(string[] args) { var c = new Counter(); var t1 = new Thread(() => TestCounter(c)); var t2 = new Thread(() => TestCounter(c)); var t3 = new Thread(() => TestCounter(c)); t1.Start(); t2.Start(); t3.Start(); t1.Join(); t2.Join(); t3.Join(); Console.WriteLine($"Total count :{c.Count}"); Console.WriteLine("----------华丽的分割线-------------"); Console.WriteLine($"Correct counter"); var c1 = new CounterWithLock(); t1 = new Thread(() => TestCounter(c1)); t2 = new Thread(() => TestCounter(c1)); t3 = new Thread(() => TestCounter(c1)); t1.Start(); t2.Start(); t3.Start(); t1.Join(); t2.Join(); t3.Join(); Console.WriteLine($"Total count :{c1.Count}"); Console.ReadKey(); } static void TestCounter(CounterBase c) { for (var i = 0; i < 100000; i++) { c.Increment(); c.Decrement(); } } } abstract class CounterBase { public abstract void Increment(); public abstract void Decrement(); } class Counter : CounterBase { public int Count { get; set; } public override void Increment() { Count++; } public override void Decrement() { Count--; } } class CounterWithLock : CounterBase { private readonly object _syncRoot = new Object(); public int Count { get; private set; } public override void Increment() { lock (_syncRoot) { Count++; } } public override void Decrement() { lock (_syncRoot) { Count--; } } }
当必须使用共享状态时,我们如何避免线程同步了,由书中看到另外一种解决方案,原子操作,所谓的原子操作,就是说一个操作只占用一个量子的时间,一次就可以完成,只有该操作完成后,其他线程才能执行其他操作。因此,其他线程无须等待当前操作完成,避免了使用锁,也排除了死锁的可能。
class CounterNoLock : CounterBase {
private int _count; public int Count => _count; //Interlocked 无需锁定任何对象既可获取正确的结果 public override void Increment() { Interlocked.Increment(ref _count); } public override void Decrement() { Interlocked.Decrement(ref _count); } }
2. 使用SemaphoreSlim类
该类限制了同时访问同一个资源的线程数量
class Program { static SemaphoreSlim _semaphore = new SemaphoreSlim(3); static void ReadDataBase(string name, int seconds) { _semaphore.Wait(); Trace.WriteLine($"{name}进入==>{DateTime.Now}"); Thread.Sleep(TimeSpan.FromSeconds(seconds)); Trace.WriteLine($"{name}释放==>{DateTime.Now}"); _semaphore.Release(); } static void Main(string[] args) { var task = new Task[6]; for (var i = 1; i <= 6; i++) { string threadName = $"Thread:{i}"; int secondsWait = i * 2; task[i - 1] = Task.Run(() => ReadDataBase(threadName, secondsWait)); } Console.ReadKey(); } }
输出结果如下
Thread:2进入==>2019/11/17 21:51:15
Thread:4进入==>2019/11/17 21:51:15
Thread:3进入==>2019/11/17 21:51:15
Thread:2释放==>2019/11/17 21:51:20
Thread:1进入==>2019/11/17 21:51:20
目前我设置了同一时间访问资源的线程数量为3个,那么我们可以通过观察输出结果,当有3个线程进入后,其他线程处于等待状态,直到线程中的某一个完成工作并且调用了_semaphore.Release方法来发出信号,其他线程才能进入
3. 下面写一个小Demo,爬取知乎的图片,当作学习用哈
下面我们来爬爬该链接吧:https://www.zhihu.com/question/34243513
3.1 先分析请求
简单粗暴通过按F12进入控台,点击Network一栏,找对应的请求,按道理来说,应该size最大,所以我们直接按Size进行一波降序。
我们可以快速的找到对应的Reuqest Url,如下图所示,并且还能拿到对应的json,这样就好办了!!!

3.2 分析请求的参数
由下图我们可知
limit:显示的条数
offset:页面的偏移量

3.3 分析返回的json
由json我们可知,用户回答的内容是通过content该字段返回回来,并包含图片,那这样就简单了,反序列化提取content中的内容,通过正则把对应的图片匹配出来,在进行下载

3.4 开始写代码了,先上图,在讲一下代码的逻辑吧


3.4.1 采用的是生产者/消费者 的模式,生产负责爬取,消费者负责保存
生产者:
public static void ReptilePicture(int offset)
{
try
{
string url = $"https://www.zhihu.com/api/v4/questions/34243513/answers?include=data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled,is_recognized,paid_info,paid_info_content;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics&offset=${offset}&limit=20&sort_by=default&platform=desktop";
var client = new RestClient(url);
client.AddDefaultHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36");
var request = new RestRequest(string.Empty, Method.GET);
var res = client.Execute(request);
if (res.StatusCode == HttpStatusCode.OK)
{
var entity = JsonConvert.DeserializeObject<ZhJsonEntity>(res.Content);
if (entity != null)
{
var contentArray = entity.data;
if (contentArray.Length > 0)
{
for (var i = 0; i < contentArray.Length; i++)
{
string content = contentArray[i].content;
if (!string.IsNullOrWhiteSpace(content))
{
string rule = "img\s*src="https://pic[0-9]{1,}\.zhimg\.com/[0-9]{1,}/.+?jpg";
var match = Regex.Matches(content, rule);
foreach (Match item in match)
{
string urlDownload = item.Groups[0].Value;
if (!string.IsNullOrWhiteSpace(urlDownload))
{
urlQueue.Enqueue(urlDownload.Substring(9));
}
}
}
}
Console.WriteLine($"已采集完:{offset}===>{DateTime.Now}");
_wh.Set(); // 给工作线程发信号
}
}
}
else
{
errorQueue.Enqueue(offset);
}
}
catch (Exception ex)
{
errorQueue.Enqueue(offset);
}
}
消费者:
public static void DownLoadPic()
{
while (true)
{
string url = string.Empty;
lock (_locker)
{
if (urlQueue.Count > 0)
{
urlQueue.TryDequeue(out url);
if (string.IsNullOrWhiteSpace(url)) return;
}
}
if (!string.IsNullOrWhiteSpace(url))
{
var client = new RestClient(url);
var request = new RestRequest(string.Empty, Method.GET);
byte[] bytes = client.DownloadData(request);
File.WriteAllBytes(Program.downLoadPath + "\" + DateTime.Now.Ticks + ".jpg", bytes);
Thread.Sleep(TimeSpan.FromSeconds(2));
Console.WriteLine($"下载成功==>{DateTime.Now}");
}
else
{
_wh.WaitOne();
}
}
}
Main()方法:
static void Main(string[] args)
{
if (!Directory.Exists(downLoadPath))
{
Directory.CreateDirectory(downLoadPath);
}
var offsetIds = new List<int> { 0, 20, 40, 60, 80, 100, 120, 140, 160, 180 };
//创建工作进程
_worker = new Thread(() => DownLoadPic());
_worker.Start();
foreach (var offsetId in offsetIds)
{
ReptilePicture(offsetId);
}
if (errorQueue.Count > 0)
{
int errorOffest = 0;
bool isSuccess = errorQueue.TryDequeue(out errorOffest);
if (isSuccess)
{
Console.WriteLine($"异常offest:{errorOffest}");
}
}
Console.WriteLine("ok");
Console.ReadKey();
}
源码地址:https://github.com/SmallHan/ThreadDemo/tree/master/ReptileZhiHu
结束语:学下线程同步,并写了小Demo用于学习,不对的地方请各位大佬多多指教哈!!