contentdb/migrations/versions/7ff57806ffd5_.py

250 lines
7.1 KiB
Python

"""empty message
Revision ID: 7ff57806ffd5
Revises: 2f3c3597c78d
Create Date: 2019-01-29 02:57:50.279918
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '7ff57806ffd5'
down_revision = '2f3c3597c78d'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.execute("""
DROP TYPE IF EXISTS tsq_state CASCADE;
CREATE TYPE tsq_state AS (
search_query text,
parentheses_stack int,
skip_for int,
current_token text,
current_index int,
current_char text,
previous_char text,
tokens text[]
);
CREATE OR REPLACE FUNCTION tsq_append_current_token(state tsq_state)
RETURNS tsq_state AS $$
BEGIN
IF state.current_token != '' THEN
state.tokens := array_append(state.tokens, state.current_token);
state.current_token := '';
END IF;
RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION tsq_tokenize_character(state tsq_state)
RETURNS tsq_state AS $$
BEGIN
IF state.current_char = '(' THEN
state.tokens := array_append(state.tokens, '(');
state.parentheses_stack := state.parentheses_stack + 1;
state := tsq_append_current_token(state);
ELSIF state.current_char = ')' THEN
IF (state.parentheses_stack > 0 AND state.current_token != '') THEN
state := tsq_append_current_token(state);
state.tokens := array_append(state.tokens, ')');
state.parentheses_stack := state.parentheses_stack - 1;
END IF;
ELSIF state.current_char = '"' THEN
state.skip_for := position('"' IN substring(
state.search_query FROM state.current_index + 1
));
IF state.skip_for > 1 THEN
state.tokens = array_append(
state.tokens,
substring(
state.search_query
FROM state.current_index FOR state.skip_for + 1
)
);
ELSIF state.skip_for = 0 THEN
state.current_token := state.current_token || state.current_char;
END IF;
ELSIF (
state.current_char = '-' AND
(state.current_index = 1 OR state.previous_char = ' ')
) THEN
state.tokens := array_append(state.tokens, '-');
ELSIF state.current_char = ' ' THEN
state := tsq_append_current_token(state);
IF substring(
state.search_query FROM state.current_index FOR 4
) = ' or ' THEN
state.skip_for := 2;
-- remove duplicate OR tokens
IF state.tokens[array_length(state.tokens, 1)] != ' | ' THEN
state.tokens := array_append(state.tokens, ' | ');
END IF;
END IF;
ELSE
state.current_token = state.current_token || state.current_char;
END IF;
RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION tsq_tokenize(search_query text) RETURNS text[] AS $$
DECLARE
state tsq_state;
BEGIN
SELECT
search_query::text AS search_query,
0::int AS parentheses_stack,
0 AS skip_for,
''::text AS current_token,
0 AS current_index,
''::text AS current_char,
''::text AS previous_char,
'{}'::text[] AS tokens
INTO state;
state.search_query := lower(trim(
regexp_replace(search_query, '""+', '""', 'g')
));
FOR state.current_index IN (
SELECT generate_series(1, length(state.search_query))
) LOOP
state.current_char := substring(
search_query FROM state.current_index FOR 1
);
IF state.skip_for > 0 THEN
state.skip_for := state.skip_for - 1;
CONTINUE;
END IF;
state := tsq_tokenize_character(state);
state.previous_char := state.current_char;
END LOOP;
state := tsq_append_current_token(state);
state.tokens := array_nremove(state.tokens, '(', -state.parentheses_stack);
RETURN state.tokens;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- Processes an array of text search tokens and returns a tsquery
CREATE OR REPLACE FUNCTION tsq_process_tokens(config regconfig, tokens text[])
RETURNS tsquery AS $$
DECLARE
result_query text;
previous_value text;
value text;
BEGIN
result_query := '';
FOREACH value IN ARRAY tokens LOOP
IF value = '"' THEN
CONTINUE;
END IF;
IF left(value, 1) = '"' AND right(value, 1) = '"' THEN
value := phraseto_tsquery(config, value);
ELSIF value NOT IN ('(', ' | ', ')', '-') THEN
value := quote_literal(value) || ':*';
END IF;
IF previous_value = '-' THEN
IF value = '(' THEN
value := '!' || value;
ELSE
value := '!(' || value || ')';
END IF;
END IF;
SELECT
CASE
WHEN result_query = '' THEN value
WHEN (
previous_value IN ('!(', '(', ' | ') OR
value IN (')', ' | ')
) THEN result_query || value
ELSE result_query || ' & ' || value
END
INTO result_query;
previous_value := value;
END LOOP;
RETURN to_tsquery(config, result_query);
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION tsq_process_tokens(tokens text[])
RETURNS tsquery AS $$
SELECT tsq_process_tokens(get_current_ts_config(), tokens);
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION tsq_parse(config regconfig, search_query text)
RETURNS tsquery AS $$
SELECT tsq_process_tokens(config, tsq_tokenize(search_query));
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION tsq_parse(config text, search_query text)
RETURNS tsquery AS $$
SELECT tsq_parse(config::regconfig, search_query);
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION tsq_parse(search_query text) RETURNS tsquery AS $$
SELECT tsq_parse(get_current_ts_config(), search_query);
$$ LANGUAGE SQL IMMUTABLE;
-- remove first N elements equal to the given value from the array (array
-- must be one-dimensional)
--
-- If negative value is given as the third argument the removal of elements
-- starts from the last array element.
CREATE OR REPLACE FUNCTION array_nremove(anyarray, anyelement, int)
RETURNS ANYARRAY AS $$
WITH replaced_positions AS (
SELECT UNNEST(
CASE
WHEN $2 IS NULL THEN
'{}'::int[]
WHEN $3 > 0 THEN
(array_positions($1, $2))[1:$3]
WHEN $3 < 0 THEN
(array_positions($1, $2))[
(cardinality(array_positions($1, $2)) + $3 + 1):
]
ELSE
'{}'::int[]
END
) AS position
)
SELECT COALESCE((
SELECT array_agg(value)
FROM unnest($1) WITH ORDINALITY AS t(value, index)
WHERE index NOT IN (SELECT position FROM replaced_positions)
), $1[1:0]);
$$ LANGUAGE SQL IMMUTABLE;
""")
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###