2014-05-02 07:49
原题:
Given a set of n points (coordinate in 2d plane) within a rectangular space, find out a line (ax+by=c), from which the sum of the perpendicular distances of all the points will be minimum. This can has a general usecase like, in a village there are few house, you have to lay a road, such that sum of all the approach roads from each house will be minimum.
题目:给定二维平面上的n个点,请找到一条直线,使得这些点到这条直线的距离之和最小。
解法:点到直线的距离是|a * x + b * y + c| / sqrt(a * a + b * b)。那么这个距离之和就是一堆绝对值的和。你可能很快会想到线性回归的那幅图片:一堆点均匀分散在一条直线的两侧。很遗憾,这个不是最优解,因为最小二乘满足的条件是方差最小,也就是平方和最小(学过线性代数应该知道p-范数的概念,平方和再开方就是2-范数。而范数,就是线性空间里对长度的度量算子。范数最小的时候,也就是我们用最小二乘期待的结果)。平方和最小和绝对值和最小并非同一概念。那么绝对值的和什么时候最小呢?我找到了一篇不太知名的论文,源头链接在此(里面有人提到了那篇论文的链接,我不直接贴链接是因为那人直接把论文内容贴了出来,有侵权嫌疑)。证明实在太绕了,所以我没工夫仔细推理完,只是偷懒引用了其中的一个结论:满足距离之和最短的直线,一定会穿过n个点中的两点。有这个结论,就可以用两层循环遍历所有的直线组合,然后再用一层循环计算所有点的距离。选取距离之和最小的那条直线作为结果。这样写出来的算法,时间复杂度是O(n^3)的。看来不靠谱,但好歹是个解法吧。如果你用最小二乘法来做,时间复杂度应该是O(n^2)的,我相信得到的就算不是最优解,也应该很接近。所以有时你连算法都拿不准的时候,还不如用次优解来代替最优解,因为其他方面的优势可以作为权衡因素。
代码:
1 // http://www.careercup.com/question?id=4907555595747328 2 #include <cmath> 3 #include <iomanip> 4 #include <iostream> 5 #include <vector> 6 using namespace std; 7 8 struct Line { 9 double a; 10 double b; 11 double c; 12 Line(double _a = 0, double _b = 0, double _c = 0): a(_a), b(_b), c(_c) {}; 13 }; 14 15 struct Point { 16 double x; 17 double y; 18 Point(double _x = 0, double _y = 0): x(_x), y(_y) {}; 19 }; 20 21 double calcDist(const Point &p, const Line &line) 22 { 23 return abs(line.a * p.x + line.b * p.y + line.c) / sqrt(line.a * line.a + line.b * line.b); 24 } 25 26 void calcLine(const Point &p1, const Point &p2, Line &line) 27 { 28 line.a = p2.y - p1.y; 29 line.b = p1.x - p2.x; 30 line.c = -((line.a * p1.x + line.b * p1.y) + (line.a * p2.x + line.b * p2.y)) / 2.0; 31 } 32 33 int main() 34 { 35 vector<Point> p; 36 int n; 37 Line line, min_line; 38 int i, j, k; 39 double dist, min_dist; 40 41 while (cin >> n && n > 0) { 42 p.resize(n); 43 for (i = 0; i < n; ++i) { 44 cin >> p[i].x >> p[i].y; 45 } 46 47 do { 48 if (n == 1) { 49 line = Line(0, 1, -p[0].y); 50 break; 51 } else if (n == 2) { 52 min_dist = 0; 53 calcLine(p[0], p[1], min_line); 54 break; 55 } 56 57 min_dist = -1; 58 for (i = 0; i < n; ++i) { 59 for (j = i + 1; j < n; ++j) { 60 dist = 0; 61 calcLine(p[i], p[j], line); 62 for (k = 0; k < n; ++k) { 63 if (k == i && k == j) { 64 continue; 65 } 66 dist += calcDist(p[k], line); 67 } 68 if (min_dist < 0 || dist < min_dist) { 69 min_dist = dist; 70 min_line = line; 71 } 72 } 73 } 74 } while (0); 75 76 if (min_line.a != 0.0) { 77 cout << min_line.a << 'x'; 78 } 79 if (min_line.b != 0.0) { 80 cout << setiosflags(ios::showpos) << min_line.b << 'y'; 81 } 82 if (min_line.c != 0.0) { 83 cout << min_line.c; 84 } 85 cout << resetiosflags(ios::showpos) << "=0" << endl; 86 cout << min_dist << endl; 87 } 88 89 return 0; 90 }