-
Notifications
You must be signed in to change notification settings - Fork 5
/
cleanpaste.py
executable file
·146 lines (111 loc) · 4.05 KB
/
cleanpaste.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
import sys
from Foundation import *
from AppKit import *
import re, os, subprocess
from lxml.html.clean import clean_html, Cleaner
import lxml
## cleanpaste.py
## --------------
## Takes the styled text on the clipboard and removes all formatting such as
## fonts *except* for some specified tags. (Primarily bold and italics)
##
## I typically map this via Quickeys to ctrl-v so that it does a partially
## styled text. Sort of in between regular paste and paste and match style.
## I wish Apple had a way to paste maintaining some style information but
## matching size and font with where you are pasting.
##
## Requires lxml and pyobjc. To install these execute the following at the
## command line:
## sudo easy_install pyobjc
## sudo easy_install lxml
##
## For more information on this script see the posts at Clark's Tech Blog
## http://www.libertypages.com/clarktech/?p=3310
## Cleans up html using lxml to only allow specified tags with their attributes
## removed and remove any style sheet info.
def clean(html):
if html is None:
return None
if len(html) < 1:
return None
# tags to allow
tags = ['b', 'i', 'u', 'h1', 'h2','h3','p','strong','em','sub','sup']
# tags to remove AND remove the tag's content
killtags = ['style']
# keep track of the old attributes considered safe - only relevant if we use
# lxml in more than this function. Just good practice to return state. We
# have a null set since we want to remove all attributes
old_safe = lxml.html.clean.defs.safe_attrs
lxml.html.clean.defs.safe_attrs = []
cleaner = lxml.html.clean.Cleaner(remove_unknown_tags=False, style=False, kill_tags = killtags,
safe_attrs_only=True, allow_tags = tags)
new_html = cleaner.clean_html(html)
lxml.html.clean.defs.safe_attrs = old_safe # not necessary if not used elsewhere
return new_html
# function to call textutil on temporary files to convert rtf to html
# path is the path to the temporary file. Returns the data deleting
# the temporary file created in this function but not the one pointed
# to by path
def convertrtf(path):
q = "textutil -convert html " + path
subprocess.call(q, shell=True)
newpath = os.path.splitext(path)[0] + ".html"
data = None
f = open(newpath, "r")
try:
data = f.read()
finally:
f.close()
os.remove(newpath)
return data
# given rtf data returns the data converted to html
def convertohtml(data):
if data is None:
print "****ERROR"
return
path = "/tmp/pb.%s.rtf" % os.getpid()
f = open(path, 'w+t')
try:
f.write(data)
finally:
f.close()
newdata = convertrtf(path)
os.remove(path)
return newdata
# gets styled text from the clipboard either rtf converted to html, html proper
# or plain text
def getrtfclipboard():
pb = NSPasteboard.generalPasteboard()
# types of data we can accept: rtf, html, text
type = pb.availableTypeFromArray_([NSPasteboardTypeRTF, NSHTMLPboardType, NSPasteboardTypeString])
data = None
if type is None:
return ""
if type == NSHTMLPboardType:
data = pb.stringForType_(type) # get HTML data as text
return data
if type == NSPasteboardTypeRTF:
data = pb.stringForType_(type) # get the data as text
data = convertohtml(data)
return data
data = pb.stringForType_(type) # plain text
return data
# given html this puts it on the clipboard
def sethtmlclipboard(html):
pb = NSPasteboard.generalPasteboard()
a = NSArray.arrayWithObject_(NSHTMLPboardType)
pb.declareTypes_owner_(a, None)
pb.setString_forType_( html, NSHTMLPboardType)
def test():
print clean(testtext)
def main():
data = getrtfclipboard()
newdata = clean(data)
print newdata
sethtmlclipboard(newdata)
if __name__ == '__main__':
main()
#test()
# change to 0 for success, 1 for (partial) failure
sys.exit(0)