diff --git a/.github/workflows/linting_and_testing.yml b/.github/workflows/linting_and_testing.yml index 0259ccb5..1af945f6 100644 --- a/.github/workflows/linting_and_testing.yml +++ b/.github/workflows/linting_and_testing.yml @@ -46,9 +46,8 @@ jobs: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: + auto-update-conda: true python-version: ${{ matrix.python-version }} - conda-remove-defaults: "true" - - name: Install dependencies for windows python 3.9 and 3.10 if: ${{ matrix.os == 'windows-latest' && (matrix.python-version == '3.9' || matrix.python-version == '3.10') }} run: | @@ -77,4 +76,4 @@ jobs: - name: Run tests run: | - conda run -n test pytest -v \ No newline at end of file + conda run -n test pytest -v diff --git a/map2loop/logging.py b/map2loop/logging.py index 816ac45e..c0fe217c 100644 --- a/map2loop/logging.py +++ b/map2loop/logging.py @@ -66,3 +66,53 @@ def set_level(level: str): logger = logging.getLogger(name) logger.setLevel(level) logger.info(f"Logging level set to {level}") + + +logger = getLogger(__name__) +logger.info("Imported map2loop.logging module") + + +def setLogging(level="info", handler=None): + """Set the logging parameters for log file or custom handler. + + Parameters + ---------- + level : str, optional + Logging level to set, by default "info" + Valid options: 'info', 'warning', 'error', 'debug' + handler : logging.Handler, optional + A logging handler to use instead of the default StreamHandler, by default None + + Examples + -------- + >>> from map2loop.logging import setLogging + >>> setLogging('debug') + >>> setLogging('info', logging.FileHandler('loop.log')) + """ + levels = get_levels() + level_value = levels.get(level, logging.WARNING) + + # Create default handler if none provided + if handler is None: + handler = logging.StreamHandler() + + formatter = logging.Formatter( + "%(levelname)s: %(asctime)s: %(filename)s:%(lineno)d -- %(message)s" + ) + handler.setFormatter(formatter) + handler.setLevel(level_value) + + # Replace handlers in all known loggers + for name in loggers: + logger = logging.getLogger(name) + logger.handlers = [] + logger.addHandler(handler) + logger.setLevel(level_value) + + # Also apply to main module logger + main_logger = logging.getLogger(__name__) + main_logger.handlers = [] + main_logger.addHandler(handler) + main_logger.setLevel(level_value) + + main_logger.info(f"Set logging to {level}") diff --git a/map2loop/project.py b/map2loop/project.py index ec7f10f5..3cf355fd 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -614,6 +614,11 @@ def calculate_stratigraphic_order(self, take_best=False): self.sorter.structure_data = self.map_data.get_map_data(Datatype.STRUCTURE) if hasattr(self.sorter, 'dtm_data') and self.sorter.dtm_data is None: self.sorter.dtm_data = self.map_data.get_map_data(Datatype.DTM) + if hasattr(self.sorter, 'min_age_column') and self.sorter.min_age_column is None: + self.sorter.min_age_column = self.stratigraphic_column.get_min_age_column() + if hasattr(self.sorter, 'max_age_column') and self.sorter.max_age_column is None: + self.sorter.max_age_column = self.stratigraphic_column.get_max_age_column() + self.stratigraphic_column.column = self.sorter.sort( self.stratigraphic_column.stratigraphicUnits, ) diff --git a/map2loop/sampler.py b/map2loop/sampler.py index b4c7835c..6aad3550 100644 --- a/map2loop/sampler.py +++ b/map2loop/sampler.py @@ -52,7 +52,8 @@ def sample( pandas.DataFrame: data frame containing samples """ pass - + def __call__(self, **kwargs): + return self.sample(**kwargs) class SamplerDecimator(Sampler): """ diff --git a/map2loop/sorter.py b/map2loop/sorter.py index 7d1a7672..123d357c 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -9,6 +9,7 @@ from osgeo import gdal from map2loop.utils import value_from_raster from .logging import getLogger +import networkx as nx logger = getLogger(__name__) @@ -21,11 +22,10 @@ class Sorter(ABC): ABC (ABC): Derived from Abstract Base Class """ - def __init__( - self): + def __init__(self): """ Initialiser for Sorter - + Args: unit_relationships (pandas.DataFrame): the relationships between units (columns must contain ["Index1", "Unitname1", "Index2", "Unitname2"]) contacts (pandas.DataFrame): unit contacts with length of the contacts in metres @@ -34,7 +34,6 @@ def __init__( dtm_data (gdal.Dataset): the dtm data """ self.sorter_label = "SorterBaseClass" - def type(self): """ @@ -59,35 +58,40 @@ def sort(self, units: pandas.DataFrame) -> list: """ pass + def __call__(self, **kwargs): + return self.sort(**kwargs) + class SorterUseNetworkX(Sorter): """ Sorter class which returns a sorted list of units based on the unit relationships using a topological graph sorting algorithm """ - required_arguments: List[str] = [ - 'geology_data' - ] + + required_arguments: List[str] = ['geology_data', 'unit_name_column'] def __init__( self, *, + unit_name_column: Optional[str] = 'name', unit_relationships: Optional[pandas.DataFrame] = None, geology_data: Optional[geopandas.GeoDataFrame] = None, ): """ Initialiser for networkx graph sorter - + Args: unit_relationships (pandas.DataFrame): the relationships between units """ super().__init__() self.sorter_label = "SorterUseNetworkX" + self.unit_name_column = unit_name_column if geology_data is not None: self.set_geology_data(geology_data) elif unit_relationships is not None: self.unit_relationships = unit_relationships else: self.unit_relationships = None + def set_geology_data(self, geology_data: geopandas.GeoDataFrame): """ Set geology data and calculate topology and unit relationships @@ -96,19 +100,20 @@ def set_geology_data(self, geology_data: geopandas.GeoDataFrame): geology_data (geopandas.GeoDataFrame): the geology data """ self._calculate_topology(geology_data) + def _calculate_topology(self, geology_data: geopandas.GeoDataFrame): - if not geology_data: + if geology_data is None: raise ValueError("geology_data is required") if isinstance(geology_data, geopandas.GeoDataFrame) is False: raise TypeError("geology_data must be a geopandas.GeoDataFrame") - + if 'UNITNAME' not in geology_data.columns: raise ValueError("geology_data must contain 'UNITNAME' column") - + self.topology = Topology(geology_data=geology_data) self.unit_relationships = self.topology.get_unit_unit_relationships() - + @beartype.beartype def sort(self, units: pandas.DataFrame) -> list: """ @@ -121,6 +126,7 @@ def sort(self, units: pandas.DataFrame) -> list: list: the sorted unit names """ import networkx as nx + if self.unit_relationships is None: raise ValueError("SorterUseNetworkX requires 'unit_relationships' argument") graph = nx.DiGraph() @@ -150,28 +156,31 @@ def sort(self, units: pandas.DataFrame) -> list: class SorterUseHint(SorterUseNetworkX): required_arguments: List[str] = ['unit_relationships'] - def __init__( - self, - *, - geology_data: Optional[geopandas.GeoDataFrame] = None, - ): - logger.warning( - "SorterUseHint is deprecated in v3.2. Using SorterUseNetworkX instead" - ) + + def __init__(self, *, geology_data: Optional[geopandas.GeoDataFrame] = None): + logger.warning("SorterUseHint is deprecated in v3.2. Using SorterUseNetworkX instead") super().__init__(geology_data=geology_data) - class SorterAgeBased(Sorter): """ Sorter class which returns a sorted list of units based on the min and max ages of the units """ - required_arguments = ['min_age_column','max_age_column'] - def __init__(self,*, min_age_column:Optional[str]='minAge', max_age_column:Optional[str]='maxAge'): + + required_arguments = ['min_age_column', 'max_age_column', 'unit_name_column'] + + def __init__( + self, + *, + unit_name_column: Optional[str] = 'name', + min_age_column: Optional[str] = 'minAge', + max_age_column: Optional[str] = 'maxAge', + ): """ Initialiser for age based sorter """ super().__init__() + self.unit_name_column = unit_name_column self.min_age_column = min_age_column self.max_age_column = max_age_column self.sorter_label = "SorterAgeBased" @@ -195,18 +204,24 @@ def sort(self, units: pandas.DataFrame) -> list: lambda row: (row[self.min_age_column] + row[self.max_age_column]) / 2.0, axis=1 ) else: - logger.error(f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame") + logger.error( + f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame" + ) logger.error(f"Available columns are: {units.columns.tolist()}") - raise ValueError(f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame") + raise ValueError( + f"Columns {self.min_age_column} and {self.max_age_column} must be present in units DataFrame" + ) if "group" in units.columns: sorted_units = sorted_units.sort_values(by=["group", "meanAge"]) else: sorted_units = sorted_units.sort_values(by=["meanAge"]) logger.info("Stratigraphic order calculated using age based sorting") for _i, row in sorted_units.iterrows(): - logger.info(f"{row['name']} - {row['minAge']} - {row['maxAge']}") + logger.info( + f"{row[self.unit_name_column]} - {row[self.min_age_column]} - {row[self.max_age_column]}" + ) - return list(sorted_units["name"]) + return list(sorted_units[self.unit_name_column]) class SorterAlpha(Sorter): @@ -214,23 +229,37 @@ class SorterAlpha(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the units with lower number of contacting units """ - required_arguments = ['contacts'] + + required_arguments = ['contacts', 'unit_name_column', 'unitname1_column', 'unitname2_column'] + def __init__( self, *, contacts: Optional[geopandas.GeoDataFrame] = None, + unit_name_column: Optional[str] = 'name', + unitname1_column: Optional[str] = 'UNITNAME_1', + unitname2_column: Optional[str] = 'UNITNAME_2', ): """ Initialiser for adjacency based sorter - + Args: contacts (geopandas.GeoDataFrame): unit contacts with length of the contacts in metres """ super().__init__() self.contacts = contacts + self.unit_name_column = unit_name_column self.sorter_label = "SorterAlpha" - if 'UNITNAME_1' not in contacts.columns or 'UNITNAME_2' not in contacts.columns or 'length' not in contacts.columns: - raise ValueError("contacts GeoDataFrame must contain 'UNITNAME_1', 'UNITNAME_2' and 'length' columns") + self.unitname1_column = unitname1_column + self.unitname2_column = unitname2_column + if ( + self.unitname1_column not in contacts.columns + or self.unitname2_column not in contacts.columns + or 'length' not in contacts.columns + ): + raise ValueError( + f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns" + ) def sort(self, units: pandas.DataFrame) -> list: """ @@ -243,21 +272,27 @@ def sort(self, units: pandas.DataFrame) -> list: list: the sorted unit names """ if self.contacts is None: - raise ValueError("contacts must be set (not None) before calling sort() in SorterAlpha.") - import networkx as nx - if self.contacts is None: - raise ValueError("SorterAlpha requires 'contacts' argument") + raise ValueError( + "contacts must be set (not None) before calling sort() in SorterAlpha." + ) + if len(self.contacts) == 0: + raise ValueError("contacts GeoDataFrame is empty in SorterAlpha.") + if 'length' not in self.contacts.columns: + self.contacts['length'] = self.contacts.geometry.length + self.contacts['length'] = self.contacts['length'].astype(float) sorted_contacts = self.contacts.sort_values(by="length", ascending=False)[ - ["UNITNAME_1", "UNITNAME_2", "length"] + [self.unitname1_column, self.unitname2_column, "length"] ] - unit_names = list(units["name"].unique()) + unit_names = list(units[self.unit_name_column].unique()) graph = nx.Graph() for unit in unit_names: graph.add_node(unit, name=unit) max_weight = max(list(sorted_contacts["length"])) + 1 for _, row in sorted_contacts.iterrows(): graph.add_edge( - row["UNITNAME_1"], row["UNITNAME_2"], weight=int(max_weight - row["length"]) + row[self.unitname1_column], + row[self.unitname2_column], + weight=int(max_weight - row["length"]), ) cnode = None @@ -301,15 +336,20 @@ class SorterMaximiseContacts(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units prioritising the maximum length of each contact """ - required_arguments = ['contacts'] + + required_arguments = ['contacts', 'unit_name_column', 'unitname1_column', 'unitname2_column'] + def __init__( self, *, contacts: Optional[geopandas.GeoDataFrame] = None, + unit_name_column: str = 'name', + unitname1_column: str = 'UNITNAME_1', + unitname2_column: str = 'UNITNAME_2', ): """ Initialiser for adjacency based sorter - + Args: contacts (pandas.DataFrame): unit contacts with length of the contacts in metres """ @@ -320,8 +360,17 @@ def __init__( self.route = None self.directed_graph = None self.contacts = contacts - if 'UNITNAME_1' not in contacts.columns or 'UNITNAME_2' not in contacts.columns or 'length' not in contacts.columns: - raise ValueError("contacts GeoDataFrame must contain 'UNITNAME_1', 'UNITNAME_2' and 'length' columns") + self.unit_name_column = unit_name_column + self.unitname1_column = unitname1_column + self.unitname2_column = unitname2_column + if ( + self.unitname1_column not in contacts.columns + or self.unitname2_column not in contacts.columns + or 'length' not in contacts.columns + ): + raise ValueError( + f"contacts GeoDataFrame must contain '{self.unitname1_column}', '{self.unitname2_column}' and 'length' columns" + ) def sort(self, units: pandas.DataFrame) -> list: """ @@ -335,17 +384,23 @@ def sort(self, units: pandas.DataFrame) -> list: """ import networkx as nx import networkx.algorithms.approximation as nx_app + if self.contacts is None: raise ValueError("SorterMaximiseContacts requires 'contacts' argument") + if len(self.contacts) == 0: + raise ValueError("contacts GeoDataFrame is empty in SorterMaximiseContacts.") + if "length" not in self.contacts.columns: + self.contacts['length'] = self.contacts.geometry.length + self.contacts['length'] = self.contacts['length'].astype(float) sorted_contacts = self.contacts.sort_values(by="length", ascending=False) self.graph = nx.Graph() - unit_names = list(units["name"].unique()) + unit_names = list(units[self.unit_name_column].unique()) for unit in unit_names: ## some units may not have any contacts e.g. if they are intrusives or sills. If we leave this then the ## sorter crashes if ( - unit not in sorted_contacts['UNITNAME_1'].values - or unit not in sorted_contacts['UNITNAME_2'].values + unit not in sorted_contacts[self.unitname1_column].values + or unit not in sorted_contacts[self.unitname2_column].values ): continue self.graph.add_node(unit, name=unit) @@ -353,7 +408,9 @@ def sort(self, units: pandas.DataFrame) -> list: max_weight = max(list(sorted_contacts["length"])) + 1 sorted_contacts['length'] /= max_weight for _, row in sorted_contacts.iterrows(): - self.graph.add_edge(row["UNITNAME_1"], row["UNITNAME_2"], weight=(1 - row["length"])) + self.graph.add_edge( + row[self.unitname1_column], row[self.unitname2_column], weight=(1 - row["length"]) + ) self.route = nx_app.traveling_salesman_problem(self.graph) edge_list = list(nx.utils.pairwise(self.route)) @@ -384,15 +441,28 @@ class SorterObservationProjections(Sorter): Sorter class which returns a sorted list of units based on the adjacency of units using the direction of observations to predict which unit is adjacent to the current one """ - required_arguments = ['contacts', 'geology_data', 'structure_data', 'dtm_data'] + + required_arguments = [ + 'contacts', + 'geology_data', + 'structure_data', + 'dtm_data', + 'unit_name_column', + 'unitname1_column', + 'unitname2_column', + ] + def __init__( self, *, + unitname1_column: Optional[str] = 'UNITNAME_1', + unitname2_column: Optional[str] = 'UNITNAME_2', + unit_name_column: Optional[str] = 'name', contacts: Optional[geopandas.GeoDataFrame] = None, geology_data: Optional[geopandas.GeoDataFrame] = None, structure_data: Optional[geopandas.GeoDataFrame] = None, dtm_data: Optional[gdal.Dataset] = None, - length: Union[float, int] = 1000 + length: Union[float, int] = 1000, ): """ Initialiser for adjacency based sorter @@ -409,9 +479,12 @@ def __init__( self.geology_data = geology_data self.structure_data = structure_data self.dtm_data = dtm_data + self.unit_name_column = unit_name_column self.sorter_label = "SorterObservationProjections" self.length = length self.lines = [] + self.unit1name_column = unitname1_column + self.unit2name_column = unitname2_column def sort(self, units: pandas.DataFrame) -> list: """ @@ -426,6 +499,7 @@ def sort(self, units: pandas.DataFrame) -> list: import networkx as nx import networkx.algorithms.approximation as nx_app from shapely.geometry import LineString, Point + if self.contacts is None: raise ValueError("SorterObservationProjections requires 'contacts' argument") if self.geology_data is None: @@ -493,7 +567,12 @@ def sort(self, units: pandas.DataFrame) -> list: # Get heights for intersection point and start of ray height = value_from_raster(inv_geotransform, dtm_array, start.x, start.y) first_intersect_point = Point(start.x, start.y, height) - height = value_from_raster(inv_geotransform, dtm_array, second_intersect_point.x, second_intersect_point.y) + height = value_from_raster( + inv_geotransform, + dtm_array, + second_intersect_point.x, + second_intersect_point.y, + ) second_intersect_point = Point(second_intersect_point.x, start.y, height) # Check vertical difference between points and compare to projected dip angle @@ -516,6 +595,7 @@ def sort(self, units: pandas.DataFrame) -> list: df = pandas.DataFrame(0, index=unit_names, columns=unit_names) for younger, older in ordered_unit_observations: df.loc[younger, older] += 1 + print(df, df.max()) max_value = max(df.max()) # Using the older/younger matrix create a directed graph @@ -543,13 +623,13 @@ def sort(self, units: pandas.DataFrame) -> list: g_undirected = g.to_undirected() for unit in unit_names: if len(list(g_undirected.neighbors(unit))) < 1: - mask1 = self.contacts["UNITNAME_1"] == unit - mask2 = self.contacts["UNITNAME_2"] == unit + mask1 = self.contacts[self.unit1name_column] == unit + mask2 = self.contacts[self.unit2name_column] == unit for _, row in self.contacts[mask1 | mask2].iterrows(): - if unit == row["UNITNAME_1"]: - g.add_edge(row["UNITNAME_2"], unit, weight=max_value * 10) + if unit == row[self.unit1name_column]: + g.add_edge(row[self.unit2name_column], unit, weight=max_value * 10) else: - g.add_edge(row["UNITNAME_1"], unit, weight=max_value * 10) + g.add_edge(row[self.unit1name_column], unit, weight=max_value * 10) # Run travelling salesman using the observation evidence as weighting route = nx_app.traveling_salesman_problem(g.to_undirected()) diff --git a/map2loop/thickness_calculator.py b/map2loop/thickness_calculator.py index 13a1b806..b0346d5a 100644 --- a/map2loop/thickness_calculator.py +++ b/map2loop/thickness_calculator.py @@ -23,6 +23,7 @@ import pandas import geopandas import shapely +from shapely.geometry import LineString import math from osgeo import gdal from shapely.errors import UnsupportedGEOSVersionError @@ -107,7 +108,9 @@ def _check_thickness_percentage_calculations(self, thicknesses: pandas.DataFrame f"have a calculated thickness of -1. This may indicate that {self.thickness_calculator_label} " f"is not suitable for this dataset." ) - + def __call__(self, **kwargs): + return self.compute(**kwargs) + class ThicknessCalculatorAlpha(ThicknessCalculator): """ ThicknessCalculator class which estimates unit thickness based on units, basal_contacts and stratigraphic order @@ -232,7 +235,6 @@ def __init__( super().__init__(dtm_data, bounding_box, max_line_length, is_strike) self.thickness_calculator_label = "InterpolatedStructure" self.lines = None - @beartype.beartype def compute( @@ -291,7 +293,7 @@ def compute( # set the crs of the contacts to the crs of the units contacts = contacts.set_crs(crs=basal_contacts.crs) if self.dtm_data is not None: - # get the elevation Z of the contacts + # get the elevation Z of the contacts contacts = set_z_values_from_raster_df(self.dtm_data, contacts) # update the geometry of the contact points to include the Z value contacts["geometry"] = contacts.apply( @@ -340,11 +342,11 @@ def compute( interpolated_orientations = interpolated_orientations[ ["geometry", "dip", "UNITNAME"] ].copy() - + _lines = [] _dips = [] _location_tracking = [] - + for i in range(0, len(stratigraphic_order) - 1): if ( stratigraphic_order[i] in basal_unit_list @@ -366,21 +368,21 @@ def compute( dip = interpolated_orientations.loc[ interpolated_orientations["UNITNAME"] == stratigraphic_order[i], "dip" ].to_numpy() - + _thickness = [] - + for _, row in basal_contact.iterrows(): # find the shortest line between the basal contact points and top contact points short_line = shapely.shortest_line(row.geometry, top_contact_geometry) _lines.append(short_line[0]) - - # check if the short line is + + # check if the short line is if self.max_line_length is not None and short_line.length > self.max_line_length: continue if self.dtm_data is not None: inv_geotransform = gdal.InvGeoTransform(self.dtm_data.GetGeoTransform()) data_array = numpy.array(self.dtm_data.GetRasterBand(1).ReadAsArray().T) - + # extract the end points of the shortest line p1 = numpy.zeros(3) p1[0] = numpy.asarray(short_line[0].coords[0][0]) @@ -408,7 +410,7 @@ def compute( _dips.append(_dip) # calculate the true thickness t = L * sin(dip) thickness = line_length * numpy.sin(_dip) - + # add location tracking location_tracking = pandas.DataFrame( { @@ -419,7 +421,7 @@ def compute( } ) _location_tracking.append(location_tracking) - + # Average thickness along the shortest line if all(numpy.isnan(thickness)): pass @@ -444,20 +446,35 @@ def compute( logger.warning( f"Thickness Calculator InterpolatedStructure: Cannot calculate thickness between {stratigraphic_order[i]} and {stratigraphic_order[i + 1]}\n" ) - + # Combine all location_tracking DataFrames into a single DataFrame - combined_location_tracking = pandas.concat(_location_tracking, ignore_index=True) - + if _location_tracking and len(_location_tracking) > 0: + combined_location_tracking = pandas.concat(_location_tracking, ignore_index=True) + combined_location_tracking['geometry'] = combined_location_tracking.apply( + lambda row: LineString( + [ + (row['p1_x'], row['p1_y'], row['p1_z']), + (row['p2_x'], row['p2_y'], row['p2_z']), + ] + ), + axis=1, + ) + else: + combined_location_tracking = pandas.DataFrame() # Save the combined DataFrame as an attribute of the class - self.location_tracking = combined_location_tracking - + + # Convert to GeoDataFrame and set CRS to match basal_contacts + if not combined_location_tracking.empty: + self.location_tracking = geopandas.GeoDataFrame(combined_location_tracking, geometry='geometry', crs=basal_contacts.crs) + else: + self.location_tracking = geopandas.GeoDataFrame(columns=['p1_x', 'p1_y', 'p1_z', 'p2_x', 'p2_y', 'p2_z', 'thickness', 'unit', 'geometry'], crs=basal_contacts.crs) # Create GeoDataFrame for lines self.lines = geopandas.GeoDataFrame(geometry=_lines, crs=basal_contacts.crs) self.lines['dip'] = _dips - + # Check thickness calculation self._check_thickness_percentage_calculations(thicknesses) - + return thicknesses class StructuralPoint(ThicknessCalculator): @@ -486,7 +503,6 @@ def __init__( self.strike_allowance = 30 self.lines = None - @beartype.beartype def compute( self, @@ -545,8 +561,12 @@ def compute( geometry=geopandas.points_from_xy(sampled_structures.X, sampled_structures.Y), crs=basal_contacts.crs, ) + if 'UNITNAME' in sampled_structures.columns: + sampled_structures = sampled_structures.drop(columns=['UNITNAME']) # add unitname to the sampled structures - sampled_structures['unit_name'] = geopandas.sjoin(sampled_structures, geology)['UNITNAME'] + sampled_structures['unit_name'] = geopandas.sjoin( + sampled_structures, geology, how='inner', predicate='within' + )['UNITNAME'] # remove nans from sampled structures # this happens when there are strati measurements within intrusions. If intrusions are removed from the geology map, unit_name will then return a nan @@ -605,7 +625,7 @@ def compute( # draw orthogonal line to the strike (default value 10Km), and clip it by the bounding box of the lithology if self.max_line_length is None: self.max_line_length = 10000 - B = calculate_endpoints(measurement_pt, strike, self.max_line_length, bbox_poly) + B = calculate_endpoints(measurement_pt, float(strike), float(self.max_line_length), bbox_poly) b = geopandas.GeoDataFrame({'geometry': [B]}).set_crs(basal_contacts.crs) # find all intersections @@ -677,7 +697,7 @@ def compute( if not (b_s[0] < strike1 < b_s[1] and b_s[0] < strike2 < b_s[1]): continue - #build the debug info + # build the debug info line = shapely.geometry.LineString([int_pt1, int_pt2]) _lines.append(line) _dip.append(measurement['DIP']) @@ -688,17 +708,17 @@ def compute( # if length is higher than max_line_length, skip if self.max_line_length is not None and L > self.max_line_length: continue - + # calculate thickness thickness = L * math.sin(math.radians(measurement['DIP'])) thicknesses.append(thickness) lis.append(litho_in) - + # create the debug gdf self.lines = geopandas.GeoDataFrame(geometry=_lines, crs=basal_contacts.crs) self.lines["DIP"] = _dip - + # create a DataFrame of the thicknesses median and standard deviation by lithology result = pandas.DataFrame({'unit': lis, 'thickness': thicknesses}) result = result.groupby('unit')['thickness'].agg(['median', 'mean', 'std']).reset_index() @@ -709,7 +729,7 @@ def compute( output_units['ThicknessMedian'] = numpy.full(len(output_units), numpy.nan) output_units['ThicknessMean'] = numpy.full(len(output_units), numpy.nan) output_units['ThicknessStdDev'] = numpy.full(len(output_units), numpy.nan) - + # find which units have no thickness calculated names_not_in_result = units[~units['name'].isin(result['unit'])]['name'].to_list() # assign the thicknesses to the each unit @@ -718,12 +738,11 @@ def compute( output_units.loc[idx, 'ThicknessMedian'] = unit['median'] output_units.loc[idx, 'ThicknessMean'] = unit['mean'] output_units.loc[idx, 'ThicknessStdDev'] = unit['std'] - + output_units["ThicknessMean"] = output_units["ThicknessMean"].fillna(-1) output_units["ThicknessMedian"] = output_units["ThicknessMedian"].fillna(-1) output_units["ThicknessStdDev"] = output_units["ThicknessStdDev"].fillna(-1) - - + # handle the units that have no thickness for unit in names_not_in_result: # if no thickness has been calculated for the unit @@ -754,5 +773,5 @@ def compute( output_units.loc[output_units["name"] == unit, "ThicknessStdDev"] = -1 self._check_thickness_percentage_calculations(output_units) - + return output_units diff --git a/map2loop/topology.py b/map2loop/topology.py index d7a90dc4..88b4e643 100644 --- a/map2loop/topology.py +++ b/map2loop/topology.py @@ -36,7 +36,8 @@ class Topology: def __init__( self, geology_data: geopandas.GeoDataFrame, fault_data: Optional[geopandas.GeoDataFrame] = None, - verbose_level: VerboseLevel = VerboseLevel.NONE + verbose_level: VerboseLevel = VerboseLevel.NONE, + id_field: str = 'ID', ): """ The initialiser for the map2model wrapper @@ -55,7 +56,7 @@ def __init__( self.fault_data = fault_data self.verbose_level = verbose_level self.buffer_radius = 500 - + self.fault_id_field = id_field @property def fault_fault_relationships(self): if self._fault_fault_relationships is None: diff --git a/map2loop/utils/utility_functions.py b/map2loop/utils/utility_functions.py index 7c864fc0..529fbefe 100644 --- a/map2loop/utils/utility_functions.py +++ b/map2loop/utils/utility_functions.py @@ -10,6 +10,7 @@ from osgeo import gdal from ..logging import getLogger + logger = getLogger(__name__) @@ -184,8 +185,11 @@ def find_segment_strike_from_pt( @beartype.beartype def calculate_endpoints( - start_point: shapely.geometry.Point, azimuth_deg: float, distance: int, bbox: pandas.DataFrame -) -> shapely.geometry.LineString: + start_point: shapely.geometry.Point, + azimuth_deg: Union[float, int], + distance: Union[float, int], + bbox: pandas.DataFrame, +) -> Union[shapely.geometry.LineString, shapely.geometry.GeometryCollection]: """ Calculate the endpoints of a line segment given a start point, azimuth angle, distance, and bounding box. @@ -220,13 +224,12 @@ def calculate_endpoints( line = shapely.geometry.LineString([left_endpoint, right_endpoint]) new_line = shapely.ops.clip_by_rect(line, minx, miny, maxx, maxy) - return new_line @beartype.beartype def multiline_to_line( - geometry: Union[shapely.geometry.LineString, shapely.geometry.MultiLineString] + geometry: Union[shapely.geometry.LineString, shapely.geometry.MultiLineString], ) -> shapely.geometry.LineString: """ Converts a multiline geometry to a single line geometry. @@ -383,7 +386,6 @@ def hex_to_rgb(hex_color: str) -> tuple: def calculate_minimum_fault_length( bbox: dict[str, Union[int, float]], area_percentage: float ) -> float: - """ Calculate the minimum fault length based on the map bounding box and a given area percentage. @@ -441,11 +443,10 @@ def read_hjson_with_json(file_path: str) -> dict: except json.JSONDecodeError as e: raise ValueError(f"Failed to decode preprocessed HJSON as JSON: {e}") from e + @beartype.beartype def update_from_legacy_file( - filename: str, - json_save_path: Optional[str] = None, - lower: bool = False + filename: str, json_save_path: Optional[str] = None, lower: bool = False ) -> Optional[Dict[str, Dict]]: """ Update the config dictionary from the provided old version dictionary @@ -453,18 +454,19 @@ def update_from_legacy_file( filename (str): the path to the legacy file json_save_path (str, optional): the path to save the updated json file. Defaults to None. lower (bool, optional): whether to convert all strings to lowercase. Defaults to False. - + Returns: Dict[Dict]: the updated config dictionary - + Example: from map2loop.utils import update_from_legacy_file update_from_legacy_file(filename=r"./source_data/example.hjson") """ # only import config if needed from .config import Config + file_map = Config() - + code_mapping = { "otype": (file_map.structure_config, "orientation_type"), "dd": (file_map.structure_config, "dipdir_column"), @@ -506,7 +508,7 @@ def update_from_legacy_file( except Exception as e: logger.error(f"Error reading file {filename}: {e}") return - #map the keys + # map the keys file_map = file_map.to_dict() for legacy_key, new_mapping in code_mapping.items(): if legacy_key in parsed_data: @@ -515,21 +517,22 @@ def update_from_legacy_file( if lower and isinstance(value, str): value = value.lower() section[new_key] = value - + if "o" in parsed_data: object_id_value = parsed_data["o"] if lower and isinstance(object_id_value, str): object_id_value = object_id_value.lower() file_map['structure']["objectid_column"] = object_id_value file_map['geology']["objectid_column"] = object_id_value - file_map['fold']["objectid_column"] = object_id_value - + file_map['fold']["objectid_column"] = object_id_value + if json_save_path is not None: with open(json_save_path, "w") as f: json.dump(parsed_data, f, indent=4) - + return file_map + @beartype.beartype def value_from_raster(inv_geotransform, data, x: float, y: float): """ @@ -557,6 +560,7 @@ def value_from_raster(inv_geotransform, data, x: float, y: float): py = min(py, data.shape[1] - 1) return data[px][py] + @beartype.beartype def set_z_values_from_raster_df(dtm_data: gdal.Dataset, df: pandas.DataFrame): """ @@ -574,7 +578,7 @@ def set_z_values_from_raster_df(dtm_data: gdal.Dataset, df: pandas.DataFrame): if len(df) <= 0: df["Z"] = [] return df - + if dtm_data is None: logger.warning("Cannot get value from data as data is not loaded") return None @@ -583,8 +587,7 @@ def set_z_values_from_raster_df(dtm_data: gdal.Dataset, df: pandas.DataFrame): data_array = numpy.array(dtm_data.GetRasterBand(1).ReadAsArray().T) df["Z"] = df.apply( - lambda row: value_from_raster(inv_geotransform, data_array, row["X"], row["Y"]), - axis=1, + lambda row: value_from_raster(inv_geotransform, data_array, row["X"], row["Y"]), axis=1 ) - return df \ No newline at end of file + return df diff --git a/tests/project/test_thickness_calculations.py b/tests/project/test_thickness_calculations.py index 961e1a9c..7f601c84 100644 --- a/tests/project/test_thickness_calculations.py +++ b/tests/project/test_thickness_calculations.py @@ -5,7 +5,7 @@ from map2loop.utils import load_map2loop_data from map2loop.thickness_calculator import InterpolatedStructure, StructuralPoint -from map2loop import Project +from map2loop.project import Project # 1. self.stratigraphic_column.stratigraphicUnits, st_units = pandas.DataFrame(