Source code for dbload.query_parser

# Copyright 2020-2021 Dynatrace LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from typing import List

from loguru import logger
from mapz import Mapz


[docs]class QueryParser: """SQL file parser. Parses annotated SQL files from provided paths. This class should not generally be used by itself. :class:`~dbload.context.Context` already invokes this parser during its initialization phase. QueryParser provides a single static method :meth:`~.QueryParser.parse` that performs the parsing. There is no need to create the QueryParser object itself. Examples: Use parser:: from dbload import QueryParser parsed = QueryParser.parse(["./queries.sql"]) """
[docs] @staticmethod def parse(sources: List[str] = []) -> Mapz: """Parse text sources with annotated SQL queries. Args: sources (List[str]): List of text strings with annotated SQL queries to parse. Parser reads each string line by line (split by ``\\n`` symbol) and looks for annotated SQL queries in it. It does not verify the validity of SQL syntax. Parser understands annotation comments in SQL file that start with ``"--"`` comment identifier and contain ``name:`` tag in them. Returns: Mapz: Dictionary of parsed queries. Raises: SqlQueriesFileEmptyError: when provided text file with annotated SQL queries is empty. """ parsed = Mapz() for source in sources: QueryParser._parse_queries(source, parsed) return parsed
@staticmethod def _parse_queries(source: str, parsed: Mapz) -> None: name_regex = re.compile(r".*name:\s*([\w]+)") option_regex = re.compile(r"option:\s*([\w]+)") # re.findall( # r"scenario:\s*([\w-]+)(?:\[([-\d]+)\])?", # "--name:disi, scenario: sample[1], scenario: teardown[-90], scenario: name", # ) # >>> [('sample', '1'), ('teardown', '-90'), ('name', '')] scenario_regex = re.compile(r"scenario:\s*([\w-]+)(?:\[([-\d]+)\])?") # After reading whole file, process the lines # one by one, assembling queries one by one current_query_name = None # current_query_kind = None current_query_content = "" collected = Mapz() lines = source.split("\n") for line in lines: line = line.strip("\n").replace("\t", " ").replace("\r", "") if "--" in line: # Detect start of the new query nm = name_regex.match(line) if nm: # If we have a current query name, then append new content to it if current_query_name: collected[ current_query_name ].text = current_query_content current_query_content = "" # Detect if there are any options specified in the query options = option_regex.findall(line) # Detect if the querly explicitly wants to be called # within a certain scenario scenarios = scenario_regex.findall(line) scenarios = [ (n, int(order) if order else 0) for n, order in scenarios ] # Start new context for tracking the new query current_query_name = nm.group(1) current_query_content = "" collected[current_query_name] = Mapz( # kind=current_query_kind, options=options, scenarios=scenarios, text="", ) else: current_query_content += line # Add last "unseparrated" query to list collected[current_query_name].text = current_query_content # Remove queries without `text`. # Happens when you comment out the whole half of the queries file # in IDE and `-- name:` comment gets parsed but the text contains # nothing due to it being fully commented out for k, v in collected.items(): if v.text: parsed[k] = v