set mapred.job.queue.name=analytics; /*EOS*/

------------------------------------------------------
------------------------------------------------------
-- Project table/view and Project related tables/views
------------------------------------------------------
------------------------------------------------------
DROP TABLE IF EXISTS ${stats_db_name}.project_oids purge; /*EOS*/

CREATE TABLE ${stats_db_name}.project_oids STORED AS PARQUET AS
SELECT /*+ COALESCE(100) */ substr(p.id, 4) AS id, oids.ids AS oid
FROM ${openaire_db_name}.project p LATERAL VIEW explode(p.originalid) oids AS ids
where p.datainfo.deletedbyinference=false  and p.datainfo.invisible=false; /*EOS*/

ANALYZE TABLE ${stats_db_name}.project_oids COMPUTE STATISTICS; /*EOS*/

DROP TABLE IF EXISTS ${stats_db_name}.project_organizations purge; /*EOS*/

CREATE TABLE ${stats_db_name}.project_organizations STORED AS PARQUET AS
SELECT /*+ COALESCE(100) */ substr(r.source, 4) AS id, substr(r.target, 4) AS organization
from ${openaire_db_name}.relation r
WHERE r.reltype = 'projectOrganization' and r.source like '40|%'
  and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/

ANALYZE TABLE ${stats_db_name}.project_organizations COMPUTE STATISTICS; /*EOS*/

DROP TABLE IF EXISTS ${stats_db_name}.project_results purge; /*EOS*/

CREATE TABLE ${stats_db_name}.project_results STORED AS PARQUET AS
SELECT /*+ COALESCE(100) */ substr(r.target, 4) AS id, substr(r.source, 4) AS result, r.datainfo.provenanceaction.classname as provenance
FROM ${openaire_db_name}.relation r
WHERE r.reltype = 'resultProject' and r.target like '40|%'
  and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/

ANALYZE TABLE ${stats_db_name}.project_results COMPUTE STATISTICS; /*EOS*/

DROP TABLE IF EXISTS ${stats_db_name}.project_classification purge; /*EOS*/

create table ${stats_db_name}.project_classification STORED AS PARQUET as
select /*+ COALESCE(100) */ substr(p.id, 4) as id, class.h2020programme.code, class.level1, class.level2, class.level3
from ${openaire_db_name}.project p
    lateral view explode(p.h2020classification) classifs as class
where p.datainfo.deletedbyinference=false and p.datainfo.invisible=false and class.h2020programme is not null; /*EOS*/

ANALYZE TABLE ${stats_db_name}.project_classification COMPUTE STATISTICS; /*EOS*/

DROP TABLE IF EXISTS ${stats_db_name}.project purge; /*EOS*/

CREATE TABLE ${stats_db_name}.project stored as parquet as
with pr_pub as (
        select pr.id as pr_id, pub.id as pub_id, datediff(pub.dt_dateofacceptance, pr.dt_enddate) as daysForPub
    from (
             select id, to_date(dateofacceptance.value) as dt_dateofacceptance
    from ${openaire_db_name}.publication
    where datainfo.deletedbyinference = false and datainfo.invisible = false) pub
    join ${openaire_db_name}.relation rel on rel.reltype = 'resultProject' and rel.relclass = 'isProducedBy' and rel.source=pub.id and rel.datainfo.deletedbyinference = false and rel.datainfo.invisible = false
    join (
             select id, to_date(enddate.value) as dt_enddate
    from ${openaire_db_name}.project
    where datainfo.deletedbyinference = false and datainfo.invisible = false) pr on pr.id=rel.target
    ),
project_pub_stats as (
                             select pr_id, count(distinct pub_id) as num_pubs,  max(daysForPub) as daysForlastPub, sum(case when daysForPub > 0 then 1 else 0 end) as delayedPubs
    from pr_pub pr
    group by pr_id
    )
select /*+ COALESCE(100) */
    substr(p.id, 4)                                                             as id,
    p.acronym.value                                                             as acronym,
    p.title.value                                                               as title,
    xpath_string(p.fundingtree[0].value, '//funder/name')                       as funder,
    xpath_string(p.fundingtree[0].value, '//funding_level_0/name')              as funding_lvl0,
    xpath_string(p.fundingtree[0].value, '//funding_level_1/name')              as funding_lvl1,
    xpath_string(p.fundingtree[0].value, '//funding_level_2/name')              as funding_lvl2,
    p.ecsc39.value                                                              as ec39,
    p.contracttype.classname                                                    as type,
    p.startdate.value                                                           as startdate,
    p.enddate.value                                                             as enddate,
    cast(year(p.startdate.value) as int)                                        as start_year,
    cast(year(p.enddate.value) as int)                                          as end_year,
    cast(months_between(p.enddate.value, p.startdate.value) as int)             as duration,
    case when project_pub_stats.num_pubs > 0 then 'yes' else 'no' end           as haspubs,
    project_pub_stats.num_pubs                                                  as numpubs,
    project_pub_stats.daysForlastPub                                            as daysForlastPub,
    project_pub_stats.delayedPubs                                               as delayedpubs,
    p.callidentifier.value                                                      as callidentifier,
    p.code.value                                                                as code,
    p.totalcost                                                                 as totalcost,
    p.fundedamount                                                              as fundedamount,
    p.currency.value                                                            as currency
from ${openaire_db_name}.project p
left outer join project_pub_stats on project_pub_stats.pr_id = p.id
where p.datainfo.deletedbyinference = false and p.datainfo.invisible = false; /*EOS*/

ANALYZE TABLE ${stats_db_name}.project COMPUTE STATISTICS; /*EOS*/

DROP TABLE IF EXISTS ${stats_db_name}.funder purge; /*EOS*/

create table ${stats_db_name}.funder STORED AS PARQUET as
select /*+ COALESCE(100) */ distinct xpath_string(fund, '//funder/id')        as id,
                xpath_string(fund, '//funder/name')      as name,
                xpath_string(fund, '//funder/shortname') as shortname,
                xpath_string(fundingtree[0].value, '//funder/jurisdiction') as country
from ${openaire_db_name}.project p lateral view explode(p.fundingtree.value) fundingtree as fund; /*EOS*/

ANALYZE TABLE ${stats_db_name}.funder COMPUTE STATISTICS; /*EOS*/

DROP TABLE IF EXISTS ${stats_db_name}.project_organization_contribution purge; /*EOS*/

CREATE TABLE ${stats_db_name}.project_organization_contribution STORED AS PARQUET AS
SELECT /*+ COALESCE(100) */ distinct substr(r.source, 4) AS project, substr(r.target, 4) AS organization,
properties[0].value contribution, properties[1].value currency
from ${openaire_db_name}.relation r
LATERAL VIEW explode (r.properties) properties
where properties[0].key='contribution' and r.reltype = 'projectOrganization' and r.source like '40|%'
and properties[0].value>0.0 and r.datainfo.deletedbyinference = false and r.datainfo.invisible=false; /*EOS*/

ANALYZE TABLE ${stats_db_name}.project_organization_contribution COMPUTE STATISTICS; /*EOS*/
