1 /** This represent a connection : we have a fixed number of them 2 * fetchOpen links them with servers 3 * fetchPipe reads those which are linked 4 */ 5 //表达连接相关信息 6 struct Connexion { 7 char state; // socket状态 : EMPTY, CONNECTING, WRITE, OPEN 8 int pos; //请求发送位置标记 9 FetchError err; // 连接如何终止,enum 10 int socket; // socket descriptor number 11 int timeout; // 连接超时时间设置 12 LarbinString request; // http请求报头 13 file *parser; // 解析连接 (a robots.txt or an html file) 14 char buffer[maxPageSize];//下载的网页数据 15 Connexion();//初始化state=emptyC,parser=NULL 16 ~Connexion();//不执行,一旦执行就出错,assert(false) 17 void recycle();//释放*parser,再次初始化request 18 };
该结构体的主要两个类成员LarbinStrng,file。
LarbinString类(string.h声明,string.cc实现),主要是对字符串http报头的字符串相关操作
class LarbinString { private: char *chaine; //http报头字符串 uint pos; //http报头当前位置标记 uint size; //http报头大小 public: LarbinString(uint size = STRING_SIZE); //初始化*chaine,pos=0,size ~LarbinString(); //释放*chaine void recycle(uint size = STRING_SIZE); //重新分配*chaine char *getString(); //返回*chaine char *giveString(); //返回*chaine的拷贝 void addChar(char c); //chaine[pos]=c void addString(char *s); //添加*s到*chine后(pos起) void addBuffer(char *s, uint len); //添加*s到*chine后(pos起) inline uint getLength() { return pos; }; //return pos inline char operator [] (uint i) //数组[]运算符重载 void setChar(uint i, char c); //chaine[i]=c };
file类,(html类,robots类),二者继承file类。解析连接(robots.txt or .html file)
class file { protected: char *buffer; //connexion中的buffer[maxPageSize(100000)],下载的网页数据 char *posParse; //解析位置 public: file(Connexion *conn); //初始化*buffer=*posParse=conn->buffer;pos=0 virtual ~file(); bool isRobots; // Is it a robots.txt uint pos; //*buffer的当前位置 // a string arrives from the server virtual int inputHeaders(int size) = 0; // just parse headers virtual int endInput() = 0; }; class html : public file { private: url *here; //url地址 char *area; //当前感兴趣区的起始位置 char *contentStart; //真正内容的起始位置,报头之后的内容 url *base; //url基地址 /* manage a new url : verify and send it */ void manageUrl(url *nouv, bool isRedir);