Spaces:
Running
Running
| '''Some operations. To be split into separate files when we have more.''' | |
| from . import ops | |
| import matplotlib | |
| import networkx as nx | |
| import pandas as pd | |
| def import_parquet(*, filename: str): | |
| '''Imports a parquet file.''' | |
| return pd.read_parquet(filename) | |
| def create_scale_free_graph(*, nodes: int = 10): | |
| '''Creates a scale-free graph with the given number of nodes.''' | |
| return nx.scale_free_graph(nodes) | |
| def compute_pagerank(graph: nx.Graph, *, damping=0.85, iterations=100): | |
| return nx.pagerank(graph, alpha=damping, max_iter=iterations) | |
| def create_scale_free_graph(*, nodes: int = 10): | |
| '''Creates a scale-free graph with the given number of nodes.''' | |
| return nx.scale_free_graph(nodes) | |
| def _map_color(value): | |
| cmap = matplotlib.cm.get_cmap('viridis') | |
| value = (value - value.min()) / (value.max() - value.min()) | |
| rgba = cmap(value) | |
| return ['#{:02x}{:02x}{:02x}'.format(int(r*255), int(g*255), int(b*255)) for r, g, b in rgba[:, :3]] | |
| def visualize_graph(graph: ops.Bundle, *, color_nodes_by: 'node_attribute' = None): | |
| nodes = graph.dfs['nodes'].copy() | |
| node_attributes = sorted(nodes.columns) | |
| if color_nodes_by: | |
| nodes['color'] = _map_color(nodes[color_nodes_by]) | |
| nodes = nodes.to_records() | |
| edges = graph.dfs['edges'].drop_duplicates(['source', 'target']) | |
| edges = edges.to_records() | |
| pos = nx.spring_layout(graph.to_nx(), iterations=max(1, int(10000/len(nodes)))) | |
| v = { | |
| 'animationDuration': 500, | |
| 'animationEasingUpdate': 'quinticInOut', | |
| 'series': [ | |
| { | |
| 'type': 'graph', | |
| 'roam': True, | |
| 'lineStyle': { | |
| 'color': 'gray', | |
| 'curveness': 0.3, | |
| }, | |
| 'emphasis': { | |
| 'focus': 'adjacency', | |
| 'lineStyle': { | |
| 'width': 10, | |
| } | |
| }, | |
| 'data': [ | |
| { | |
| 'id': str(n.id), | |
| 'x': float(pos[n.id][0]), 'y': float(pos[n.id][1]), | |
| # Adjust node size to cover the same area no matter how many nodes there are. | |
| 'symbolSize': 50 / len(nodes) ** 0.5, | |
| 'itemStyle': {'color': n.color} if color_nodes_by else {}, | |
| } | |
| for n in nodes], | |
| 'links': [ | |
| {'source': str(r.source), 'target': str(r.target)} | |
| for r in edges], | |
| }, | |
| ], | |
| } | |
| return v | |
| def view_tables(bundle: ops.Bundle): | |
| v = { | |
| 'dataframes': { name: { | |
| 'columns': [str(c) for c in df.columns], | |
| 'data': df.values.tolist(), | |
| } for name, df in bundle.dfs.items() }, | |
| 'relations': bundle.relations, | |
| 'other': bundle.other, | |
| } | |
| return v | |