Update politicians contact list

7onn · Jul 15, 2024 · b43eebc · b43eebc
1 parent 35f682d
commit b43eebc
Show file tree

Hide file tree

Showing 6 changed files with 488 additions and 476 deletions.
diff --git a/congress.csv b/congress.csv
diff --git a/scripts/__pycache__/__main__.cpython-38.pyc b/scripts/__pycache__/__main__.cpython-38.pyc
diff --git a/scripts/__pycache__/congress_crawler.cpython-38.pyc b/scripts/__pycache__/congress_crawler.cpython-38.pyc
diff --git a/scripts/__pycache__/senate_crawler.cpython-38.pyc b/scripts/__pycache__/senate_crawler.cpython-38.pyc
diff --git a/scripts/congress_crawler.py b/scripts/congress_crawler.py
@@ -13,8 +13,7 @@
 import asyncio
 from urllib.parse import urljoin
 
-logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s",
- level=logging.INFO)
+logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)
 
 
 def get_html(url):
@@ -30,8 +29,8 @@ def __init__(self):
  self.base_url = "https://www.camara.leg.br/"
  self.congress = []
  self.search_url = (
- self.base_url +
-  "deputados/quem-sao/resultado?search=&partido=&uf=&sexo=")
+ self.base_url + "deputados/quem-sao/resultado?search=&partido=&uf=&sexo="
+ )
 
  async def run(self):
  """Start the Congress crawler."""
@@ -41,30 +40,37 @@ async def run(self):
  if int(total) < 1:
  logging.error(
  "The latest legislature"
- "'s quorum for the Congress have not been informed yet")
+ "'s quorum for the Congress have not been informed yet"
+ )
 
  pages = round(int(total) / 25) + 1
  tasks = []
  for i in range(1, pages):
  tasks.append(
  asyncio.create_task(
- self.get_congress_by_page(self.search_url +
- "&legislatura=" +
- legislature + "&pagina=" +
- str(i))))
+ self.get_congress_by_page(
+ self.search_url
+ + "&legislatura="
+ + legislature
+ + "&pagina="
+ + str(i)
+ )
+ )
+ )
  await asyncio.gather(*tasks)
 
  except Exception:
  logging.exception("global failure")
  requests.post(
  os.environ.get("SLACK_WEBHOOK_URL"),
- json.dumps({
- "channel": "#notifications",
- "icon_emoji": ":fire:",
- "text":
- ":warning: Brazilian congress crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
- "username": "politicians-contacts",
- }),
+ json.dumps(
+ {
+ "channel": "#notifications",
+ "icon_emoji": ":fire:",
+ "text": ":warning: Brazilian congress crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
+ "username": "politicians-contacts",
+ }
+ ),
  headers={"Content-Type": "application/json"},
  )
 
@@ -105,7 +111,10 @@ async def get_congress_by_page(self, url):
  tasks.append(
  asyncio.create_task(
  self.get_congress_person_data(
- urljoin(self.base_url, link.get("href")))))
+ urljoin(self.base_url, link.get("href"))
+ )
+ )
+ )
 
  await asyncio.gather(*tasks)
 
@@ -119,16 +128,17 @@ async def get_congress_person_data(self, url):
  congressperson = self.get_congress_person_data_from_page(url)
  if congressperson:
  self.congress.append(congressperson)
- logging.info(f"congressperson: {url} - "
- f' email: {congressperson["email"]} -'
- f' party: {congressperson["party"]}')
+ logging.info(
+ f"congressperson: {url} - "
+ f' email: {congressperson["email"]} -'
+ f' party: {congressperson["party"]}'
+ )
 
  def get_congress_person_data_from_page(self, url):
  try:
  soup = BeautifulSoup(get_html(url), "html.parser")
  name = soup.find(id="nomedeputado").contents[0]
- party_state = soup.find(
- class_="informacoes-deputado__inline").contents[1]
+ party_state = soup.find(class_="informacoes-deputado__inline").contents[1]
  party = re.findall(r".+?(?=\s-)", party_state)[0]
  email = soup.find(class_="email").contents[0]
 

diff --git a/scripts/senate_crawler.py b/scripts/senate_crawler.py
@@ -5,8 +5,7 @@
 import requests
 import os
 
-logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s",
- level=logging.INFO)
+logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)
 
 
 def get_html(url):
@@ -46,13 +45,14 @@ async def run(self):
  logging.exception("global failure")
  requests.post(
  os.environ.get("SLACK_WEBHOOK_URL"),
- json.dumps({
- "channel": "#notifications",
- "icon_emoji": ":fire:",
- "text":
- ":warning: Brazilian senate crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
- "username": "politicians-contacts",
- }),
+ json.dumps(
+ {
+ "channel": "#notifications",
+ "icon_emoji": ":fire:",
+ "text": ":warning: Brazilian senate crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
+ "username": "politicians-contacts",
+ }
+ ),
  headers={"Content-Type": "application/json"},
  )
  finally: