三、字符串对象
1、PyStringObject与PyString_Type
2、创建PyStringObject对象
3、Intern 机制
4、字符缓冲池
5、PyStringObject 效率相关问题
6、Hack PyStringObject
1、PyStringObject与PyString_Type
PyStringObject 对象的声明:
[stringobject.h] typedef struct { PyObject_VAR_HEAD long ob_shash; int ob_sstate; char ob_sval[1]; } PyStringObject;
PyObject_VAR_HEAD 中含有字符串长度ob_size,ob_sval指向字符串存储内存,大小为ob_size+1,且ob_sval[ob_size]=' ';
ob_shash保存字符串哈希值,没计算过的默认为0,计算方法为:
[stringobject.c] static long string_hash(PyStringObject *a) { register int len; register unsigned char *p; register long x; if (a->ob_shash != -1) return a->ob_shash; len = a->ob_size; p = (unsigned char *) a->ob_sval; x = *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= a->ob_size; if (x == -1) x = -2; a->ob_shash = x; return x; }
ob_sstate表示该对象是否被Intern。
PyStringObject对应的类型对象:
[stringobject.c] PyTypeObject PyString_Type = { PyObject_HEAD_INIT(&PyType_Type) 0, "str", sizeof(PyStringObject), sizeof(char), …… (reprfunc)string_repr, /* tp_repr */ &string_as_number, /* tp_as_number */ &string_as_sequence, /* tp_as_sequence */ &string_as_mapping, /* tp_as_mapping */ (hashfunc)string_hash, /* tp_hash */ 0, /* tp_call */ …… string_new, /* tp_new */ PyObject_Del, /* tp_free */ };
2、创建PyStringObject对象
最一般的方法PyString_FromString:
[stringobject.c] PyObject * PyString_FromString(const char *str) { register size_t size; register PyStringObject *op; assert(str != NULL); /*判断字符串长度*/ size = strlen(str); if (size > INT_MAX) { PyErr_SetString(PyExc_OverflowError, "string is too long for a Python string"); return NULL; } /*处理null string*/ if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS null_strings++; #endif Py_INCREF(op); return (PyObject *)op; } if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { #ifdef COUNT_ALLOCS one_strings++; #endif Py_INCREF(op); return (PyObject *)op; } /* 创建新的PyStringObject对象,并初始化 */ /* Inline PyObject_NewVar */ op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, str, size+1); /* Itern(共享)长度较短的PyStringObject对象 */ if (size == 0) { PyObject *t = (PyObject *)op; PyString_InternInPlace(&t); op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1) { PyObject *t = (PyObject *)op; PyString_InternInPlace(&t); op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); } return (PyObject *) op; }
首先判断有没有超过长度限制(INT_MAX 2147483647 ),然后判断是不是为空串,如果是并且之前创建过空串对象就直接返回,接着就判断其是否为一个字符,是而且在Intern机制中创建过就直接返回,否则,就开始申请内存(PyStringObject + size),创建对象并返回。
另一个创建PyStringObject对象的途径:
[stringobject.c] PyObject* PyString_FromStringAndSize(const char *str, int size) { register PyStringObject *op; /*处理null string*/ if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS null_strings++; #endif Py_INCREF(op); return (PyObject *)op; } if (size == 1 && str != NULL && (op = characters[*str & UCHAR_MAX]) != NULL) { #ifdef COUNT_ALLOCS one_strings++; #endif Py_INCREF(op); return (PyObject *)op; } /* Inline PyObject_NewVar */ op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '