# Importing necessary libraries to read and analyze the PDF file
import fitz # PyMuPDF
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Reading the PDF file
pdf_path = 'synthese_satisfaction_reclamations_formations_en_cours.pdf'
doc = fitz.open(pdf_path)
# Extracting text from the first few pages to understand the structure of the data
text = ""
for page_num in range(min(3, len(doc))): # Limiting to the first 3 pages
page = doc[page_num]
text += page.get_text()
# Printing the extracted text to understand its structure
print(text[:2000]) # Displaying the first 2000 characters of the text for inspection