test/test_sparql/test_agg_distinct.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

from rdflib import Graph

query_tpl = """
SELECT ?x (MIN(?y_) as ?y) (%s(DISTINCT ?z_) as ?z) {
  VALUES (?x ?y_ ?z_) {
    ("x1" 10 1)
    ("x1" 11 1)
    ("x2" 20 2)
  }
} GROUP BY ?x ORDER BY ?x
"""


def test_group_concat_distinct():
    g = Graph()
    results = g.query(query_tpl % "GROUP_CONCAT")
    results = [[lit.toPython() for lit in line] for line in results]

    # this is the tricky part
    assert results[0][2] == "1", results[0][2]

    # still check the whole result, to be on the safe side
    assert results == [
        ["x1", 10, "1"],
        ["x2", 20, "2"],
    ], results


def test_sum_distinct():
    g = Graph()
    results = g.query(query_tpl % "SUM")
    results = [[lit.toPython() for lit in line] for line in results]

    # this is the tricky part
    assert results[0][2] == 1, results[0][2]

    # still check the whole result, to be on the safe side
    assert results == [
        ["x1", 10, 1],
        ["x2", 20, 2],
    ], results


def test_avg_distinct():
    g = Graph()
    results = g.query(
        """
        SELECT ?x (MIN(?y_) as ?y) (AVG(DISTINCT ?z_) as ?z) {
          VALUES (?x ?y_ ?z_) {
            ("x1" 10 1)
            ("x1" 11 1)
            ("x1" 12 3)
            ("x2" 20 2)
          }
       } GROUP BY ?x ORDER BY ?x
    """
    )
    results = [[lit.toPython() for lit in line] for line in results]

    # this is the tricky part
    assert results[0][2] == 2, results[0][2]

    # still check the whole result, to be on the safe side
    assert results == [
        ["x1", 10, 2],
        ["x2", 20, 2],
    ], results


def test_count_distinct():
    g = Graph()

    g.parse(
        format="turtle",
        publicID="http://example.org/",
        data="""
    @prefix : <> .

    <#a>
      :knows <#b>, <#c> ;
      :age 42 .

    <#b>
      :knows <#a>, <#c> ;
      :age 36 .

    <#c>
      :knows <#b>, <#c> ;
      :age 20 .

    """,
    )

    # Query 1: people knowing someone younger
    results = g.query(
        """
    PREFIX : <http://example.org/>

    SELECT DISTINCT ?x {
      ?x :age ?ax ; :knows [ :age ?ay ].
      FILTER( ?ax > ?ay )
    }
    """
    )
    assert len(results) == 2

    # nQuery 2: count people knowing someone younger
    results = g.query(
        """
    PREFIX : <http://example.org/>

    SELECT (COUNT(DISTINCT ?x) as ?cx) {
      ?x :age ?ax ; :knows [ :age ?ay ].
      FILTER( ?ax > ?ay )
    }
    """
    )
    assert list(results)[0][0].toPython() == 2