-
Notifications
You must be signed in to change notification settings - Fork 0
/
billParser.py
57 lines (42 loc) · 1.47 KB
/
billParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from xml.dom import minidom
import os
billdir = 'Bills/'
name = 'BILLS-117hr127ih.xml'
xmldoc = minidom.parse(billdir+name)
congressNum = xmldoc.getElementsByTagName('congress')[0].firstChild.nodeValue.split(' ')[0][:-2]
billID = xmldoc.getElementsByTagName('legis-num')[0].firstChild.nodeValue.replace('.', '').replace(' ','')
print(congressNum+'-'+billID)
officialTitle = xmldoc.getElementsByTagName('official-title')[0].firstChild.nodeValue
shortTitle = xmldoc.getElementsByTagName('short-title')[0].firstChild.nodeValue
print(shortTitle)
legisbody = xmldoc.getElementsByTagName('legis-body')[0]
currentLevel = 0
ref = ''
gointoquote = False
def iterate(div, ret):
global currentLevel
# print(currentLevel)
currentLevel += 1
if div.nodeName == 'external-xref':
ref = div.childNodes[0].nodeValue
nsns = xmldoc.createTextNode(ref)
par = div.parentNode
par.insertBefore(nsns, div)
par.removeChild(div)
# return
print(currentLevel, div.nodeName, div.nodeValue)
if len(div.childNodes) == 0:
if "is amended" in div.nodeValue:
print('amamama')
if div.childNodes:
# print(div.childNodes)
for div1 in div.childNodes:
if div1.nodeName != "quoted-block":
iterate(div1, False)
else:
print('QB')
currentLevel -= 1
for div in legisbody.childNodes:
iterate(div, False)
for div in legisbody.childNodes:
iterate(div, False)