import pandas as pd
from azure.storage.blob import BlobServiceClient
import pyodbc
# ------------------ Azure Blob Storage Configuration ------------------
blob_connection_string = 'COnncetionstring'
blob_container_name = 'Blobcontainer Name'
# Connect to Blob Storage
blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string)
container_client = blob_service_client.get_container_client(blob_container_name)
# Fetch file names from Blob Storage
blob_file_names = [blob.name for blob in container_client.list_blobs()]
df_blob_files = pd.DataFrame(blob_file_names, columns=['FileName'])
# ------------------ Azure SQL Database Configuration ------------------
sql_server = 'Server Name'
sql_database = 'Databse'
sql_username = 'user name'
sql_password = 'password'
sql_driver = 'ODBC Driver 17 for SQL Server'
sql_table = 'FileName table'
# Connect to SQL Server and fetch file names
sql_conn_str = f'DRIVER={sql_driver};SERVER={sql_server};DATABASE={sql_database};UID={sql_username};PWD={sql_password}'
sql_query = f'SELECT B2BFileName FROM {sql_table}'
with pyodbc.connect(sql_conn_str) as conn:
df_sql_files = pd.read_sql(sql_query, conn)
# ------------------ Comparison Logic ------------------
# Find files in Blob Storage that are not in SQL Server
missing_files = df_blob_files[~df_blob_files['FileName'].isin(df_sql_files['B2BFileName'])]
# ------------------ Export to Excel ------------------
output_file = r'C:\Users\Desktop\MissingBlobFiles.xlsx'
missing_files.to_excel(output_file, index=False)
print(f"Missing file names exported to: {output_file}")
3.
.isin(...)
This method checks whether each value in
df_blob_files['FileName']
exists in the list of values fromdf_sql_files['B2BFileName']
.It returns a boolean Series —
True
if the file name exists in SQL Server,False
if it doesn't.4.
~
(Tilde Operator)This is a logical NOT operator. It inverts the boolean Series returned by
.isin(...)
.So now, it marks
True
for file names not found in SQL Server.
No comments:
Post a Comment