用 Scrapy 做好的爬虫使用 Scrapyd 来管理发布启动等工作,每次手动执行也很繁琐;考虑可以使用 Hangfire 集成在 web 工程里。
Scrapyd 中启动爬虫的请求如下:
curl http://172.0.0.1:8081/schedule.json -d project=spider -d spider=jrj_spider -u name:pwd {"node_name": "iZbp1gf15gbzzqwvxbj18jZ", "status": "ok", "project": "spider", "spiders": 1, "version": "1492884063"}
修改:
/// <summary>
/// 执行方法
/// </summary>
public async Task SchedulePollingBackgroundJob()
{
try
{
var response = await @"http://172.0.0.1:8081/schedule.json"
.WithBasicAuth("name", "pwd")
.PostUrlEncodedAsync(new { project = "spider", spider = "jrj_spider" })
.ReceiveString();
}
catch (Exception)
{
}
}
整个过程还是比较简单的,完整的代码:
/// <summary>
/// 配置接口
/// </summary>
public IConfigurationRoot Configuration { get; }
/// <summary>
/// Redis 服务
/// </summary>
public static ConnectionMultiplexer Redis;
/// <summary>
/// 构造方法
/// </summary>
/// <param name="env"></param>
public Startup(IHostingEnvironment env)
{
var builder = new ConfigurationBuilder()
.SetBasePath(env.ContentRootPath)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
.AddJsonFile($"appsettings.{env.EnvironmentName}.json", optional: true)
.AddEnvironmentVariables();
Configuration = builder.Build();
Redis = ConnectionMultiplexer.Connect(Configuration.GetConnectionString("Redis"));
}
// This method gets called by the runtime. Use this method to add services to the container.
public void ConfigureServices(IServiceCollection services)
{
//自定义的配置
services.Configure<DbSetting>(Configuration.GetSection("ConnectionStrings"));
//注入 Hangfire服务
services.AddHangfire(config => config.UseRedisStorage(Redis));
// services.AddHangfire(config => config.UseSqlServerStorage("<connection string>"));
//添加 cookie 中间件
services.AddAuthentication(sharedOptions =>
{
sharedOptions.DefaultScheme = CookieAuthenticationDefaults.AuthenticationScheme;
// sharedOptions.DefaultChallengeScheme = OpenIdConnectDefaults.AuthenticationScheme;
})
//.AddAzureAdB2C(opts => Configuration.Bind("AzureAdB2C", opts))
.AddCookie(opts =>
{
opts.LoginPath = new PathString("/account/login");
opts.AccessDeniedPath = new PathString("/account/denied");
});
//返回大小写问题
services.AddMvc()
.AddJsonOptions(option => option.SerializerSettings.ContractResolver = new Newtonsoft.Json.Serialization.DefaultContractResolver());
}
// This method gets called by the runtime. Use this method to configure the HTTP request pipeline.
public void Configure(IApplicationBuilder app, IHostingEnvironment env)
{
if (env.IsDevelopment())
{
app.UseDeveloperExceptionPage();
app.UseBrowserLink();
}
else
{
app.UseExceptionHandler("/Home/Error");
}
app.UseStaticFiles();
//添加验证中间件
app.UseAuthentication();
//Hangfire
//http://docs.hangfire.io/en/latest/configuration/using-dashboard.html#configuring-authorization
app.UseHangfireDashboard("/hangfire", new DashboardOptions
{
Authorization = new[] { new HangfireDashboardAuthorizationFilter() }
});
app.UseHangfireServer();
app.UseHangfireDashboard();
//http://cron.qqe2.com
//0 0 * * MON-FRI At 00:00, Monday through Friday
//Cron.Daily(16, 30) 30 16 * * *
//30 16,17,18 * * *
//30 16,17,18 * * MON-FRI
//https://github.com/HangfireIO/Cronos
RecurringJob.AddOrUpdate(() => SchedulePollingBackgroundJob(), @"30 16,17,18 * * *", TimeZoneInfo.Local);
app.UseMvc(routes =>
{
routes.MapRoute(
name: "default",
template: "{controller=Hq}/{action=Index}/{id?}");
});
}
Attention: If you are using Microsoft.Extensions.Caching.Redis package, you will need to use Hangfire.Redis.StackExchange.StrongName instead, because the former package requires StackExchange.Redis.StrongName instead of StackExchange.Redis!
REFER:
https://github.com/marcoCasamento/Hangfire.Redis.StackExchange