Skip to content

Commit

Permalink
Extend logging #386
Browse files Browse the repository at this point in the history
  • Loading branch information
chrwm committed Dec 1, 2022
1 parent a63e962 commit dc076c5
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 15 deletions.
6 changes: 3 additions & 3 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(self, engine="sqlite") -> None:

self.engine = create_database_engine(engine, self.home_directory)

print(
log.info(
f"Data will be written to the following database: {self.engine.url}\n"
"If you run into problems, try to "
"delete the database and update the package by running 'pip install --upgrade open-mastr'\n"
Expand Down Expand Up @@ -236,7 +236,7 @@ def download(
# Set api_processes to None in order to avoid the malfunctioning usage
if api_processes:
api_processes = None
print(
log.info(
"Warning: The implementation of parallel processes is currently under construction. Please let "
"the argument api_processes at the default value None."
)
Expand Down Expand Up @@ -353,7 +353,7 @@ def to_csv(
try:
df = pd.read_sql(additional_table, con=self.engine)
except ValueError as e:
print(
log.info(
f"While reading table '{additional_table}', the following error occured: {e}"
)
continue
Expand Down
1 change: 1 addition & 0 deletions open_mastr/soap_api/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -1258,6 +1258,7 @@ def to_csv(
)
metadata = datapackage_meta_json(newest_date, technology, json_serialize=False)

log.info("Save metadata")
with open(metadata_file, "w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=4)

Expand Down
6 changes: 5 additions & 1 deletion open_mastr/xml_download/utils_cleansing_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
columns_replace_list,
)
from zipfile import ZipFile
from open_mastr.utils.config import setup_logger

# setup logger
log = setup_logger()


def cleanse_bulk_data(df: pd.DataFrame, zipped_xml_file_path: str) -> pd.DataFrame:
print("Data is cleansed.")
log.info("Data is cleansed.")
df = replace_ids_with_names(df, system_catalog)
# Katalogeintraege: int -> string value
df = replace_mastr_katalogeintraege(
Expand Down
11 changes: 7 additions & 4 deletions open_mastr/xml_download/utils_download_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import numpy as np
import os
import shutil
from open_mastr.utils.config import setup_logger

# setup logger
log = setup_logger()

def get_url_from_Mastr_website() -> str:
"""Get the url of the latest MaStR file from markstammdatenregister.de.
Expand Down Expand Up @@ -34,7 +37,7 @@ def download_xml_Mastr(save_path: str, bulk_date_string: str, xml_folder_path: s
"""

if os.path.exists(save_path):
print("MaStR already downloaded.")
log.info("MaStR already downloaded.")
return None

if bulk_date_string != "today":
Expand All @@ -53,7 +56,7 @@ def download_xml_Mastr(save_path: str, bulk_date_string: str, xml_folder_path: s
"Warning: The servers from MaStR restrict the download speed."
" You may want to download it another time."
)
print(print_message)
log.info(print_message)
url = get_url_from_Mastr_website()
time_a = time.perf_counter()
r = requests.get(url, stream=True)
Expand All @@ -75,5 +78,5 @@ def download_xml_Mastr(save_path: str, bulk_date_string: str, xml_folder_path: s
# remove warning
bar.set_postfix_str(s="")
time_b = time.perf_counter()
print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
print(f"MaStR was successfully downloaded to {xml_folder_path}.")
log.info(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
log.info(f"MaStR was successfully downloaded to {xml_folder_path}.")
15 changes: 8 additions & 7 deletions open_mastr/xml_download/utils_write_to_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from open_mastr.xml_download.utils_cleansing_bulk import cleanse_bulk_data
from open_mastr.utils.config import setup_logger

log = setup_logger()

def write_mastr_xml_to_database(
engine: sqlalchemy.engine.Engine,
Expand All @@ -38,11 +39,11 @@ def write_mastr_xml_to_database(

if is_first_file(file_name):
create_database_table(engine=engine, xml_tablename=xml_tablename)
print(
log.info(
f"Table '{sql_tablename}' is filled with data '{xml_tablename}' "
"from the bulk download."
)
print(f"File '{file_name}' is parsed.")
log.info(f"File '{file_name}' is parsed.")

df = preprocess_table_for_writing_to_database(
f=f,
Expand All @@ -64,7 +65,7 @@ def write_mastr_xml_to_database(
if_exists="append",
engine=engine,
)
print("Bulk download and data cleansing were successful.")
log.info("Bulk download and data cleansing were successful.")


def is_table_relevant(xml_tablename: str, include_tables: list) -> bool:
Expand Down Expand Up @@ -285,7 +286,7 @@ def write_single_entries_until_not_unique_comes_up(
len_df_before = len(df)
df = df.drop(labels=key_list, errors="ignore")
df = df.reset_index()
print(f"{len_df_before-len(df)} entries already existed in the database.")
log.info(f"{len_df_before-len(df)} entries already existed in the database.")

return df

Expand All @@ -307,7 +308,7 @@ def add_missing_column_to_table(
-------
"""
log = setup_logger()


if engine.name == "postgresql":
missing_column = err.args[0].split("»")[1].split("«")[0]
Expand All @@ -331,7 +332,7 @@ def add_missing_column_to_table(

def delete_wrong_xml_entry(err: Error, df: pd.DataFrame) -> None:
delete_entry = str(err).split("«")[0].split("»")[1]
print(f"The entry {delete_entry} was deleted due to its false data type.")
log.info(f"The entry {delete_entry} was deleted due to its false data type.")
df = df.replace(delete_entry, np.nan)


Expand Down Expand Up @@ -370,7 +371,7 @@ def handle_xml_syntax_error(data: bytes, err: Error) -> pd.DataFrame:
else:
decoded_data = decoded_data[:start_char] + decoded_data[start_char + 1 :]
df = pd.read_xml(decoded_data)
print("One invalid xml expression was deleted.")
log.info("One invalid xml expression was deleted.")
return df


0 comments on commit dc076c5

Please sign in to comment.