main
1from bs4 import BeautifulSoup 2import requests 3from job import Job 4import db as db 5 6 7# URL containing pre-filtered job search results 8# for "software engineer intern/entry level" in the last 7 days 9LINKEDIN_URL = "https://www.linkedin.com/jobs/search/?currentJobId=3511050778&f_E=1%2C2&f_JT=F%2CI&f_TPR=r604800&geoId=103644278&keywords=software%20engineer%20intern&location=United%20States&refresh=true" 10 11 12def scrape_postings() -> list[Job]: 13 """Scrapes job postings from LinkedIn and returns a list of Job objects""" 14 15 # Get the HTML content of the URL 16 html: str = requests.get(LINKEDIN_URL).content 17 soup: BeautifulSoup = BeautifulSoup(html, "html.parser") 18 19 # Unordered list containing all job postings 20 posting_list: list[BeautifulSoup] = soup.find_all( 21 name="div", class_="job-search-card" 22 ) 23 24 # List of Job objects 25 jobs: list[Job] = [] 26 27 for post in posting_list: 28 title: str = str( 29 post.find(name="h3", class_="base-search-card__title").text 30 ).strip() 31 company: str = str( 32 post.find(name="a", class_="hidden-nested-link").text 33 ).strip() 34 location: str = str( 35 post.find(name="span", class_="job-search-card__location").text 36 ).strip() 37 date: str = str(post.find(name="time").text).strip() 38 link: str = str( 39 post.find(name="a", class_="base-card__full-link").get("href") 40 ).strip() 41 42 jobs.append(Job(title, company, location, link, date)) 43 44 return jobs 45 46 47def main(): 48 """Main function""" 49 50 new_postings: list[Job] = scrape_postings() 51 52 assert len(new_postings) > 0, "No new postings found" 53 54 55if __name__ == "__main__": 56 db.initialize_database() 57 main()
def
scrape_postings() -> list[job.Job]:
13def scrape_postings() -> list[Job]: 14 """Scrapes job postings from LinkedIn and returns a list of Job objects""" 15 16 # Get the HTML content of the URL 17 html: str = requests.get(LINKEDIN_URL).content 18 soup: BeautifulSoup = BeautifulSoup(html, "html.parser") 19 20 # Unordered list containing all job postings 21 posting_list: list[BeautifulSoup] = soup.find_all( 22 name="div", class_="job-search-card" 23 ) 24 25 # List of Job objects 26 jobs: list[Job] = [] 27 28 for post in posting_list: 29 title: str = str( 30 post.find(name="h3", class_="base-search-card__title").text 31 ).strip() 32 company: str = str( 33 post.find(name="a", class_="hidden-nested-link").text 34 ).strip() 35 location: str = str( 36 post.find(name="span", class_="job-search-card__location").text 37 ).strip() 38 date: str = str(post.find(name="time").text).strip() 39 link: str = str( 40 post.find(name="a", class_="base-card__full-link").get("href") 41 ).strip() 42 43 jobs.append(Job(title, company, location, link, date)) 44 45 return jobs
Scrapes job postings from LinkedIn and returns a list of Job objects
def
main():
48def main(): 49 """Main function""" 50 51 new_postings: list[Job] = scrape_postings() 52 53 assert len(new_postings) > 0, "No new postings found"
Main function