1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | # mlpatch.py: Run with no arguments for usage |
---|
4 | |
---|
5 | import sys, os |
---|
6 | import sgmllib |
---|
7 | try: |
---|
8 | # Python >=3.0 |
---|
9 | from html.entities import entitydefs |
---|
10 | from urllib.request import urlopen as urllib_request_urlopen |
---|
11 | except ImportError: |
---|
12 | # Python <3.0 |
---|
13 | from htmlentitydefs import entitydefs |
---|
14 | from urllib2 import urlopen as urllib_request_urlopen |
---|
15 | import fileinput |
---|
16 | |
---|
17 | CHUNKSIZE = 8 * 1024 |
---|
18 | |
---|
19 | class MyParser(sgmllib.SGMLParser): |
---|
20 | def __init__(self): |
---|
21 | self.baseclass = sgmllib.SGMLParser |
---|
22 | self.baseclass.__init__(self) |
---|
23 | self.entitydefs = entitydefs |
---|
24 | self.entitydefs["nbsp"] = " " |
---|
25 | self.inbody = False |
---|
26 | self.complete_line = False |
---|
27 | self.discard_gathered() |
---|
28 | |
---|
29 | def discard_gathered(self): |
---|
30 | self.gather_data = False |
---|
31 | self.gathered_data = "" |
---|
32 | |
---|
33 | def noop(self): |
---|
34 | pass |
---|
35 | |
---|
36 | def out(self, data): |
---|
37 | sys.stdout.write(data) |
---|
38 | |
---|
39 | def handle_starttag(self, tag, method, attrs): |
---|
40 | if not self.inbody: return |
---|
41 | self.baseclass.handle_starttag(self, tag, method, attrs) |
---|
42 | |
---|
43 | def handle_endtag(self, tag, method): |
---|
44 | if not self.inbody: return |
---|
45 | self.baseclass.handle_endtag(self, tag, method) |
---|
46 | |
---|
47 | def handle_data(self, data): |
---|
48 | if not self.inbody: return |
---|
49 | data = data.replace('\n','') |
---|
50 | if len(data) == 0: return |
---|
51 | if self.gather_data: |
---|
52 | self.gathered_data += data |
---|
53 | else: |
---|
54 | if self.complete_line: |
---|
55 | if data[0] in ('+', '-', ' ', '#') \ |
---|
56 | or data.startswith("Index:") \ |
---|
57 | or data.startswith("@@ ") \ |
---|
58 | or data.startswith("======"): |
---|
59 | # Real new line |
---|
60 | self.out('\n') |
---|
61 | else: |
---|
62 | # Presume that we are wrapped |
---|
63 | self.out(' ') |
---|
64 | self.complete_line = False |
---|
65 | self.out(data) |
---|
66 | |
---|
67 | def handle_charref(self, ref): |
---|
68 | if not self.inbody: return |
---|
69 | self.baseclass.handle_charref(self, ref) |
---|
70 | |
---|
71 | def handle_entityref(self, ref): |
---|
72 | if not self.inbody: return |
---|
73 | self.baseclass.handle_entityref(self, ref) |
---|
74 | |
---|
75 | def handle_comment(self, comment): |
---|
76 | if comment == ' body="start" ': |
---|
77 | self.inbody = True |
---|
78 | elif comment == ' body="end" ': |
---|
79 | self.inbody = False |
---|
80 | |
---|
81 | def handle_decl(self, data): |
---|
82 | if not self.inbody: return |
---|
83 | print("DECL: " + data) |
---|
84 | |
---|
85 | def unknown_starttag(self, tag, attrs): |
---|
86 | if not self.inbody: return |
---|
87 | print("UNKTAG: %s %s" % (tag, attrs)) |
---|
88 | |
---|
89 | def unknown_endtag(self, tag): |
---|
90 | if not self.inbody: return |
---|
91 | print("UNKTAG: /%s" % (tag)) |
---|
92 | |
---|
93 | def do_br(self, attrs): |
---|
94 | self.complete_line = True |
---|
95 | |
---|
96 | def do_p(self, attrs): |
---|
97 | if self.complete_line: |
---|
98 | self.out('\n') |
---|
99 | self.out(' ') |
---|
100 | self.complete_line = True |
---|
101 | |
---|
102 | def start_a(self, attrs): |
---|
103 | self.gather_data = True |
---|
104 | |
---|
105 | def end_a(self): |
---|
106 | self.out(self.gathered_data.replace('_at_', '@')) |
---|
107 | self.discard_gathered() |
---|
108 | |
---|
109 | def close(self): |
---|
110 | if self.complete_line: |
---|
111 | self.out('\n') |
---|
112 | self.baseclass.close(self) |
---|
113 | |
---|
114 | |
---|
115 | def main(): |
---|
116 | if len(sys.argv) == 1: |
---|
117 | sys.stderr.write( |
---|
118 | "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" + |
---|
119 | "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" + |
---|
120 | """ |
---|
121 | Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives |
---|
122 | mangle inline patches, and provide no raw message download facility |
---|
123 | (other than for an entire month's email as an mbox). |
---|
124 | |
---|
125 | So, I wrote this script, to demangle them. It's not perfect, as it has to |
---|
126 | guess about whitespace, but it does an acceptable job.\n""") |
---|
127 | sys.exit(0) |
---|
128 | elif len(sys.argv) != 5: |
---|
129 | sys.stderr.write("error: mlpatch.py: Bad parameters - run with no " |
---|
130 | + "parameters for usage\n") |
---|
131 | sys.exit(1) |
---|
132 | else: |
---|
133 | list, year, month, msgno = sys.argv[1:] |
---|
134 | url = "http://svn.haxx.se/" \ |
---|
135 | + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals() |
---|
136 | print("MsgUrl: " + url) |
---|
137 | msgfile = urllib_request_urlopen(url) |
---|
138 | p = MyParser() |
---|
139 | buffer = msgfile.read(CHUNKSIZE) |
---|
140 | while buffer: |
---|
141 | p.feed(buffer) |
---|
142 | buffer = msgfile.read(CHUNKSIZE) |
---|
143 | p.close() |
---|
144 | msgfile.close() |
---|
145 | |
---|
146 | if __name__ == '__main__': |
---|
147 | main() |
---|