比如这样一种格式化的文本文件,文件说明及下载地址:/pub/machine-learning-databases/statlog/german/ 的索引
fid = fopen('german.data', 'r');
C = textscan(fid, '%s, %d, %s, %s, %d, %s, %s, %d, %s, %s, %d, %s, %d, %s, %s, %d, %s, %d, %s, %s, %d');
fclose(fid);
n1 = numel(C); % 属性列的个数
n2 = numel(C{1}); % 样本的个数
X = zeros(n1, n2);
for i = 1:n1,
if iscell(C{i})
for j = 1:n2,
% A12 ⇒ 2
if i < 10,
d = textscan(C{i}{j}, '%c%c%d');
else
% A103 ⇒ 3
d = textscan(C{i}{j}, '%c%c%c%d');
end
X(i, j) = d{end};
end
else
X(i, :) = C{i};
end
end
y = X(end, :);
X(end, :) = [];
posX = X(:, y == 1); % 700
negX = X(:, y == 2); % 300
trainX = [posX(:, 1:350), negX(:, 1:150)];
trainY = [ones(1, 350), 2*ones(1, 150)];
testX = [posX(:, 351:end), negX(:, 151:end)];
testY = [ones(1, 350), 2*ones(1, 150)];
[trainX, s1] = mapminmax(trainX);
testX = mapminmax('apply', testX, s1);